Bug Summary

File:llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Warning:line 12566, column 48
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'unsigned long long'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AArch64 -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64ISelLowering.h"
14#include "AArch64CallingConvention.h"
15#include "AArch64ExpandImm.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/ADT/Triple.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/ObjCARCUtil.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/Analysis.h"
35#include "llvm/CodeGen/CallingConvLower.h"
36#include "llvm/CodeGen/MachineBasicBlock.h"
37#include "llvm/CodeGen/MachineFrameInfo.h"
38#include "llvm/CodeGen/MachineFunction.h"
39#include "llvm/CodeGen/MachineInstr.h"
40#include "llvm/CodeGen/MachineInstrBuilder.h"
41#include "llvm/CodeGen/MachineMemOperand.h"
42#include "llvm/CodeGen/MachineRegisterInfo.h"
43#include "llvm/CodeGen/RuntimeLibcalls.h"
44#include "llvm/CodeGen/SelectionDAG.h"
45#include "llvm/CodeGen/SelectionDAGNodes.h"
46#include "llvm/CodeGen/TargetCallingConv.h"
47#include "llvm/CodeGen/TargetInstrInfo.h"
48#include "llvm/CodeGen/ValueTypes.h"
49#include "llvm/IR/Attributes.h"
50#include "llvm/IR/Constants.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugLoc.h"
53#include "llvm/IR/DerivedTypes.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/GetElementPtrTypeIterator.h"
56#include "llvm/IR/GlobalValue.h"
57#include "llvm/IR/IRBuilder.h"
58#include "llvm/IR/Instruction.h"
59#include "llvm/IR/Instructions.h"
60#include "llvm/IR/IntrinsicInst.h"
61#include "llvm/IR/Intrinsics.h"
62#include "llvm/IR/IntrinsicsAArch64.h"
63#include "llvm/IR/Module.h"
64#include "llvm/IR/OperandTraits.h"
65#include "llvm/IR/PatternMatch.h"
66#include "llvm/IR/Type.h"
67#include "llvm/IR/Use.h"
68#include "llvm/IR/Value.h"
69#include "llvm/MC/MCRegisterInfo.h"
70#include "llvm/Support/Casting.h"
71#include "llvm/Support/CodeGen.h"
72#include "llvm/Support/CommandLine.h"
73#include "llvm/Support/Compiler.h"
74#include "llvm/Support/Debug.h"
75#include "llvm/Support/ErrorHandling.h"
76#include "llvm/Support/KnownBits.h"
77#include "llvm/Support/MachineValueType.h"
78#include "llvm/Support/MathExtras.h"
79#include "llvm/Support/raw_ostream.h"
80#include "llvm/Target/TargetMachine.h"
81#include "llvm/Target/TargetOptions.h"
82#include <algorithm>
83#include <bitset>
84#include <cassert>
85#include <cctype>
86#include <cstdint>
87#include <cstdlib>
88#include <iterator>
89#include <limits>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace llvm::PatternMatch;
96
97#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
98
99STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls"}
;
100STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts"}
;
101STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized"}
;
102
103// FIXME: The necessary dtprel relocations don't seem to be supported
104// well in the GNU bfd and gold linkers at the moment. Therefore, by
105// default, for now, fall back to GeneralDynamic code generation.
106cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
107 "aarch64-elf-ldtls-generation", cl::Hidden,
108 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
109 cl::init(false));
110
111static cl::opt<bool>
112EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
113 cl::desc("Enable AArch64 logical imm instruction "
114 "optimization"),
115 cl::init(true));
116
117// Temporary option added for the purpose of testing functionality added
118// to DAGCombiner.cpp in D92230. It is expected that this can be removed
119// in future when both implementations will be based off MGATHER rather
120// than the GLD1 nodes added for the SVE gather load intrinsics.
121static cl::opt<bool>
122EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
123 cl::desc("Combine extends of AArch64 masked "
124 "gather intrinsics"),
125 cl::init(true));
126
127/// Value type used for condition codes.
128static const MVT MVT_CC = MVT::i32;
129
130static inline EVT getPackedSVEVectorVT(EVT VT) {
131 switch (VT.getSimpleVT().SimpleTy) {
132 default:
133 llvm_unreachable("unexpected element type for vector")__builtin_unreachable();
134 case MVT::i8:
135 return MVT::nxv16i8;
136 case MVT::i16:
137 return MVT::nxv8i16;
138 case MVT::i32:
139 return MVT::nxv4i32;
140 case MVT::i64:
141 return MVT::nxv2i64;
142 case MVT::f16:
143 return MVT::nxv8f16;
144 case MVT::f32:
145 return MVT::nxv4f32;
146 case MVT::f64:
147 return MVT::nxv2f64;
148 case MVT::bf16:
149 return MVT::nxv8bf16;
150 }
151}
152
153// NOTE: Currently there's only a need to return integer vector types. If this
154// changes then just add an extra "type" parameter.
155static inline EVT getPackedSVEVectorVT(ElementCount EC) {
156 switch (EC.getKnownMinValue()) {
157 default:
158 llvm_unreachable("unexpected element count for vector")__builtin_unreachable();
159 case 16:
160 return MVT::nxv16i8;
161 case 8:
162 return MVT::nxv8i16;
163 case 4:
164 return MVT::nxv4i32;
165 case 2:
166 return MVT::nxv2i64;
167 }
168}
169
170static inline EVT getPromotedVTForPredicate(EVT VT) {
171 assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&(static_cast<void> (0))
172 "Expected scalable predicate vector type!")(static_cast<void> (0));
173 switch (VT.getVectorMinNumElements()) {
174 default:
175 llvm_unreachable("unexpected element count for vector")__builtin_unreachable();
176 case 2:
177 return MVT::nxv2i64;
178 case 4:
179 return MVT::nxv4i32;
180 case 8:
181 return MVT::nxv8i16;
182 case 16:
183 return MVT::nxv16i8;
184 }
185}
186
187/// Returns true if VT's elements occupy the lowest bit positions of its
188/// associated register class without any intervening space.
189///
190/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
191/// same register class, but only nxv8f16 can be treated as a packed vector.
192static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
193 assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&(static_cast<void> (0))
194 "Expected legal vector type!")(static_cast<void> (0));
195 return VT.isFixedLengthVector() ||
196 VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
197}
198
199// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
200// predicate and end with a passthru value matching the result type.
201static bool isMergePassthruOpcode(unsigned Opc) {
202 switch (Opc) {
203 default:
204 return false;
205 case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
206 case AArch64ISD::BSWAP_MERGE_PASSTHRU:
207 case AArch64ISD::CTLZ_MERGE_PASSTHRU:
208 case AArch64ISD::CTPOP_MERGE_PASSTHRU:
209 case AArch64ISD::DUP_MERGE_PASSTHRU:
210 case AArch64ISD::ABS_MERGE_PASSTHRU:
211 case AArch64ISD::NEG_MERGE_PASSTHRU:
212 case AArch64ISD::FNEG_MERGE_PASSTHRU:
213 case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
214 case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
215 case AArch64ISD::FCEIL_MERGE_PASSTHRU:
216 case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
217 case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
218 case AArch64ISD::FRINT_MERGE_PASSTHRU:
219 case AArch64ISD::FROUND_MERGE_PASSTHRU:
220 case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
221 case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
222 case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
223 case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
224 case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
225 case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
226 case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
227 case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
228 case AArch64ISD::FSQRT_MERGE_PASSTHRU:
229 case AArch64ISD::FRECPX_MERGE_PASSTHRU:
230 case AArch64ISD::FABS_MERGE_PASSTHRU:
231 return true;
232 }
233}
234
235AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
236 const AArch64Subtarget &STI)
237 : TargetLowering(TM), Subtarget(&STI) {
238 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
239 // we have to make something up. Arbitrarily, choose ZeroOrOne.
240 setBooleanContents(ZeroOrOneBooleanContent);
241 // When comparing vectors the result sets the different elements in the
242 // vector to all-one or all-zero.
243 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
244
245 // Set up the register classes.
246 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
247 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
248
249 if (Subtarget->hasLS64()) {
250 addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass);
251 setOperationAction(ISD::LOAD, MVT::i64x8, Custom);
252 setOperationAction(ISD::STORE, MVT::i64x8, Custom);
253 }
254
255 if (Subtarget->hasFPARMv8()) {
256 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
257 addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
258 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
259 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
260 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
261 }
262
263 if (Subtarget->hasNEON()) {
264 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
265 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
266 // Someone set us up the NEON.
267 addDRTypeForNEON(MVT::v2f32);
268 addDRTypeForNEON(MVT::v8i8);
269 addDRTypeForNEON(MVT::v4i16);
270 addDRTypeForNEON(MVT::v2i32);
271 addDRTypeForNEON(MVT::v1i64);
272 addDRTypeForNEON(MVT::v1f64);
273 addDRTypeForNEON(MVT::v4f16);
274 if (Subtarget->hasBF16())
275 addDRTypeForNEON(MVT::v4bf16);
276
277 addQRTypeForNEON(MVT::v4f32);
278 addQRTypeForNEON(MVT::v2f64);
279 addQRTypeForNEON(MVT::v16i8);
280 addQRTypeForNEON(MVT::v8i16);
281 addQRTypeForNEON(MVT::v4i32);
282 addQRTypeForNEON(MVT::v2i64);
283 addQRTypeForNEON(MVT::v8f16);
284 if (Subtarget->hasBF16())
285 addQRTypeForNEON(MVT::v8bf16);
286 }
287
288 if (Subtarget->hasSVE()) {
289 // Add legal sve predicate types
290 addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
291 addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
292 addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
293 addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
294
295 // Add legal sve data types
296 addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
297 addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
298 addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
299 addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
300
301 addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
302 addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
303 addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
304 addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
305 addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
306 addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
307
308 if (Subtarget->hasBF16()) {
309 addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
310 addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
311 addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
312 }
313
314 if (Subtarget->useSVEForFixedLengthVectors()) {
315 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
316 if (useSVEForFixedLengthVectorVT(VT))
317 addRegisterClass(VT, &AArch64::ZPRRegClass);
318
319 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
320 if (useSVEForFixedLengthVectorVT(VT))
321 addRegisterClass(VT, &AArch64::ZPRRegClass);
322 }
323
324 for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
325 setOperationAction(ISD::SADDSAT, VT, Legal);
326 setOperationAction(ISD::UADDSAT, VT, Legal);
327 setOperationAction(ISD::SSUBSAT, VT, Legal);
328 setOperationAction(ISD::USUBSAT, VT, Legal);
329 setOperationAction(ISD::UREM, VT, Expand);
330 setOperationAction(ISD::SREM, VT, Expand);
331 setOperationAction(ISD::SDIVREM, VT, Expand);
332 setOperationAction(ISD::UDIVREM, VT, Expand);
333 }
334
335 for (auto VT :
336 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
337 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
338 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
339
340 for (auto VT :
341 { MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32,
342 MVT::nxv2f64 }) {
343 setCondCodeAction(ISD::SETO, VT, Expand);
344 setCondCodeAction(ISD::SETOLT, VT, Expand);
345 setCondCodeAction(ISD::SETLT, VT, Expand);
346 setCondCodeAction(ISD::SETOLE, VT, Expand);
347 setCondCodeAction(ISD::SETLE, VT, Expand);
348 setCondCodeAction(ISD::SETULT, VT, Expand);
349 setCondCodeAction(ISD::SETULE, VT, Expand);
350 setCondCodeAction(ISD::SETUGE, VT, Expand);
351 setCondCodeAction(ISD::SETUGT, VT, Expand);
352 setCondCodeAction(ISD::SETUEQ, VT, Expand);
353 setCondCodeAction(ISD::SETUNE, VT, Expand);
354
355 setOperationAction(ISD::FREM, VT, Expand);
356 setOperationAction(ISD::FPOW, VT, Expand);
357 setOperationAction(ISD::FPOWI, VT, Expand);
358 setOperationAction(ISD::FCOS, VT, Expand);
359 setOperationAction(ISD::FSIN, VT, Expand);
360 setOperationAction(ISD::FSINCOS, VT, Expand);
361 setOperationAction(ISD::FEXP, VT, Expand);
362 setOperationAction(ISD::FEXP2, VT, Expand);
363 setOperationAction(ISD::FLOG, VT, Expand);
364 setOperationAction(ISD::FLOG2, VT, Expand);
365 setOperationAction(ISD::FLOG10, VT, Expand);
366 }
367 }
368
369 // Compute derived properties from the register classes
370 computeRegisterProperties(Subtarget->getRegisterInfo());
371
372 // Provide all sorts of operation actions
373 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
374 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
375 setOperationAction(ISD::SETCC, MVT::i32, Custom);
376 setOperationAction(ISD::SETCC, MVT::i64, Custom);
377 setOperationAction(ISD::SETCC, MVT::f16, Custom);
378 setOperationAction(ISD::SETCC, MVT::f32, Custom);
379 setOperationAction(ISD::SETCC, MVT::f64, Custom);
380 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
381 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
382 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
383 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
384 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
385 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
386 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
387 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
388 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
389 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
390 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
391 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
392 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
393 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
394 setOperationAction(ISD::SELECT, MVT::i32, Custom);
395 setOperationAction(ISD::SELECT, MVT::i64, Custom);
396 setOperationAction(ISD::SELECT, MVT::f16, Custom);
397 setOperationAction(ISD::SELECT, MVT::f32, Custom);
398 setOperationAction(ISD::SELECT, MVT::f64, Custom);
399 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
400 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
401 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
402 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
403 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
404 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
405 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
406
407 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
408 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
409 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
410
411 setOperationAction(ISD::FREM, MVT::f32, Expand);
412 setOperationAction(ISD::FREM, MVT::f64, Expand);
413 setOperationAction(ISD::FREM, MVT::f80, Expand);
414
415 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
416
417 // Custom lowering hooks are needed for XOR
418 // to fold it into CSINC/CSINV.
419 setOperationAction(ISD::XOR, MVT::i32, Custom);
420 setOperationAction(ISD::XOR, MVT::i64, Custom);
421
422 // Virtually no operation on f128 is legal, but LLVM can't expand them when
423 // there's a valid register class, so we need custom operations in most cases.
424 setOperationAction(ISD::FABS, MVT::f128, Expand);
425 setOperationAction(ISD::FADD, MVT::f128, LibCall);
426 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
427 setOperationAction(ISD::FCOS, MVT::f128, Expand);
428 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
429 setOperationAction(ISD::FMA, MVT::f128, Expand);
430 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
431 setOperationAction(ISD::FNEG, MVT::f128, Expand);
432 setOperationAction(ISD::FPOW, MVT::f128, Expand);
433 setOperationAction(ISD::FREM, MVT::f128, Expand);
434 setOperationAction(ISD::FRINT, MVT::f128, Expand);
435 setOperationAction(ISD::FSIN, MVT::f128, Expand);
436 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
437 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
438 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
439 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
440 setOperationAction(ISD::SETCC, MVT::f128, Custom);
441 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
442 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
443 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
444 setOperationAction(ISD::SELECT, MVT::f128, Custom);
445 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
446 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
447
448 // Lowering for many of the conversions is actually specified by the non-f128
449 // type. The LowerXXX function will be trivial when f128 isn't involved.
450 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
451 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
452 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
453 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
454 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
455 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
456 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
457 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
458 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
459 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
460 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
461 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
462 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
463 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
464 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
465 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
466 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
467 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
468 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
469 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
470 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
471 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
472 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
473 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
474 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
475 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
476 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
477 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
478 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
479 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
480
481 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
482 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
483 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
484 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
485
486 // Variable arguments.
487 setOperationAction(ISD::VASTART, MVT::Other, Custom);
488 setOperationAction(ISD::VAARG, MVT::Other, Custom);
489 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
490 setOperationAction(ISD::VAEND, MVT::Other, Expand);
491
492 // Variable-sized objects.
493 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
494 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
495
496 if (Subtarget->isTargetWindows())
497 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
498 else
499 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
500
501 // Constant pool entries
502 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
503
504 // BlockAddress
505 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
506
507 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
508 setOperationAction(ISD::ADDC, MVT::i32, Custom);
509 setOperationAction(ISD::ADDE, MVT::i32, Custom);
510 setOperationAction(ISD::SUBC, MVT::i32, Custom);
511 setOperationAction(ISD::SUBE, MVT::i32, Custom);
512 setOperationAction(ISD::ADDC, MVT::i64, Custom);
513 setOperationAction(ISD::ADDE, MVT::i64, Custom);
514 setOperationAction(ISD::SUBC, MVT::i64, Custom);
515 setOperationAction(ISD::SUBE, MVT::i64, Custom);
516
517 // AArch64 lacks both left-rotate and popcount instructions.
518 setOperationAction(ISD::ROTL, MVT::i32, Expand);
519 setOperationAction(ISD::ROTL, MVT::i64, Expand);
520 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
521 setOperationAction(ISD::ROTL, VT, Expand);
522 setOperationAction(ISD::ROTR, VT, Expand);
523 }
524
525 // AArch64 doesn't have i32 MULH{S|U}.
526 setOperationAction(ISD::MULHU, MVT::i32, Expand);
527 setOperationAction(ISD::MULHS, MVT::i32, Expand);
528
529 // AArch64 doesn't have {U|S}MUL_LOHI.
530 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
531 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
532
533 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
534 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
535 setOperationAction(ISD::CTPOP, MVT::i128, Custom);
536
537 setOperationAction(ISD::ABS, MVT::i32, Custom);
538 setOperationAction(ISD::ABS, MVT::i64, Custom);
539
540 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
541 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
542 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
543 setOperationAction(ISD::SDIVREM, VT, Expand);
544 setOperationAction(ISD::UDIVREM, VT, Expand);
545 }
546 setOperationAction(ISD::SREM, MVT::i32, Expand);
547 setOperationAction(ISD::SREM, MVT::i64, Expand);
548 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
549 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
550 setOperationAction(ISD::UREM, MVT::i32, Expand);
551 setOperationAction(ISD::UREM, MVT::i64, Expand);
552
553 // Custom lower Add/Sub/Mul with overflow.
554 setOperationAction(ISD::SADDO, MVT::i32, Custom);
555 setOperationAction(ISD::SADDO, MVT::i64, Custom);
556 setOperationAction(ISD::UADDO, MVT::i32, Custom);
557 setOperationAction(ISD::UADDO, MVT::i64, Custom);
558 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
559 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
560 setOperationAction(ISD::USUBO, MVT::i32, Custom);
561 setOperationAction(ISD::USUBO, MVT::i64, Custom);
562 setOperationAction(ISD::SMULO, MVT::i32, Custom);
563 setOperationAction(ISD::SMULO, MVT::i64, Custom);
564 setOperationAction(ISD::UMULO, MVT::i32, Custom);
565 setOperationAction(ISD::UMULO, MVT::i64, Custom);
566
567 setOperationAction(ISD::FSIN, MVT::f32, Expand);
568 setOperationAction(ISD::FSIN, MVT::f64, Expand);
569 setOperationAction(ISD::FCOS, MVT::f32, Expand);
570 setOperationAction(ISD::FCOS, MVT::f64, Expand);
571 setOperationAction(ISD::FPOW, MVT::f32, Expand);
572 setOperationAction(ISD::FPOW, MVT::f64, Expand);
573 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
574 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
575 if (Subtarget->hasFullFP16())
576 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
577 else
578 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
579
580 setOperationAction(ISD::FREM, MVT::f16, Promote);
581 setOperationAction(ISD::FREM, MVT::v4f16, Expand);
582 setOperationAction(ISD::FREM, MVT::v8f16, Expand);
583 setOperationAction(ISD::FPOW, MVT::f16, Promote);
584 setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
585 setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
586 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
587 setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
588 setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
589 setOperationAction(ISD::FCOS, MVT::f16, Promote);
590 setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
591 setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
592 setOperationAction(ISD::FSIN, MVT::f16, Promote);
593 setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
594 setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
595 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
596 setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
597 setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
598 setOperationAction(ISD::FEXP, MVT::f16, Promote);
599 setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
600 setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
601 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
602 setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
603 setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
604 setOperationAction(ISD::FLOG, MVT::f16, Promote);
605 setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
606 setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
607 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
608 setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
609 setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
610 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
611 setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
612 setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
613
614 if (!Subtarget->hasFullFP16()) {
615 setOperationAction(ISD::SELECT, MVT::f16, Promote);
616 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
617 setOperationAction(ISD::SETCC, MVT::f16, Promote);
618 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
619 setOperationAction(ISD::FADD, MVT::f16, Promote);
620 setOperationAction(ISD::FSUB, MVT::f16, Promote);
621 setOperationAction(ISD::FMUL, MVT::f16, Promote);
622 setOperationAction(ISD::FDIV, MVT::f16, Promote);
623 setOperationAction(ISD::FMA, MVT::f16, Promote);
624 setOperationAction(ISD::FNEG, MVT::f16, Promote);
625 setOperationAction(ISD::FABS, MVT::f16, Promote);
626 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
627 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
628 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
629 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
630 setOperationAction(ISD::FRINT, MVT::f16, Promote);
631 setOperationAction(ISD::FROUND, MVT::f16, Promote);
632 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
633 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
634 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
635 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
636 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
637 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
638
639 // promote v4f16 to v4f32 when that is known to be safe.
640 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
641 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
642 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
643 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
644 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
645 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
646 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
647 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
648
649 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
650 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
651 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
652 setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
653 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
654 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
655 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
656 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
657 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
658 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
659 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
660 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
661 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
662 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
663 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
664 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
665
666 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
667 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
668 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
669 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
670 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
671 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
672 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
673 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
674 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
675 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
676 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
677 setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
678 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
679 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
680 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
681 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
682 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
683 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
684 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
685 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
686 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
687 }
688
689 // AArch64 has implementations of a lot of rounding-like FP operations.
690 for (MVT Ty : {MVT::f32, MVT::f64}) {
691 setOperationAction(ISD::FFLOOR, Ty, Legal);
692 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
693 setOperationAction(ISD::FCEIL, Ty, Legal);
694 setOperationAction(ISD::FRINT, Ty, Legal);
695 setOperationAction(ISD::FTRUNC, Ty, Legal);
696 setOperationAction(ISD::FROUND, Ty, Legal);
697 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
698 setOperationAction(ISD::FMINNUM, Ty, Legal);
699 setOperationAction(ISD::FMAXNUM, Ty, Legal);
700 setOperationAction(ISD::FMINIMUM, Ty, Legal);
701 setOperationAction(ISD::FMAXIMUM, Ty, Legal);
702 setOperationAction(ISD::LROUND, Ty, Legal);
703 setOperationAction(ISD::LLROUND, Ty, Legal);
704 setOperationAction(ISD::LRINT, Ty, Legal);
705 setOperationAction(ISD::LLRINT, Ty, Legal);
706 }
707
708 if (Subtarget->hasFullFP16()) {
709 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
710 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
711 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
712 setOperationAction(ISD::FRINT, MVT::f16, Legal);
713 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
714 setOperationAction(ISD::FROUND, MVT::f16, Legal);
715 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
716 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
717 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
718 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
719 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
720 }
721
722 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
723
724 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
725 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
726
727 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
728 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
729 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
730 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
731 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
732
733 // Generate outline atomics library calls only if LSE was not specified for
734 // subtarget
735 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
736 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
737 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
738 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
739 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
740 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
741 setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
742 setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
743 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
744 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
745 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
746 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
747 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
748 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
749 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
750 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
751 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
752 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
753 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
754 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
755 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
756 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
757 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
758 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
759 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
760 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
761#define LCALLNAMES(A, B, N) \
762 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
763 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
764 setLibcallName(A##N##_REL, #B #N "_rel"); \
765 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
766#define LCALLNAME4(A, B) \
767 LCALLNAMES(A, B, 1) \
768 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
769#define LCALLNAME5(A, B) \
770 LCALLNAMES(A, B, 1) \
771 LCALLNAMES(A, B, 2) \
772 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
773 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
774 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
775 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
776 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
777 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
778 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
779#undef LCALLNAMES
780#undef LCALLNAME4
781#undef LCALLNAME5
782 }
783
784 // 128-bit loads and stores can be done without expanding
785 setOperationAction(ISD::LOAD, MVT::i128, Custom);
786 setOperationAction(ISD::STORE, MVT::i128, Custom);
787
788 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
789 // custom lowering, as there are no un-paired non-temporal stores and
790 // legalization will break up 256 bit inputs.
791 setOperationAction(ISD::STORE, MVT::v32i8, Custom);
792 setOperationAction(ISD::STORE, MVT::v16i16, Custom);
793 setOperationAction(ISD::STORE, MVT::v16f16, Custom);
794 setOperationAction(ISD::STORE, MVT::v8i32, Custom);
795 setOperationAction(ISD::STORE, MVT::v8f32, Custom);
796 setOperationAction(ISD::STORE, MVT::v4f64, Custom);
797 setOperationAction(ISD::STORE, MVT::v4i64, Custom);
798
799 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
800 // This requires the Performance Monitors extension.
801 if (Subtarget->hasPerfMon())
802 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
803
804 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
805 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
806 // Issue __sincos_stret if available.
807 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
808 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
809 } else {
810 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
811 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
812 }
813
814 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
815 // MSVCRT doesn't have powi; fall back to pow
816 setLibcallName(RTLIB::POWI_F32, nullptr);
817 setLibcallName(RTLIB::POWI_F64, nullptr);
818 }
819
820 // Make floating-point constants legal for the large code model, so they don't
821 // become loads from the constant pool.
822 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
823 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
824 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
825 }
826
827 // AArch64 does not have floating-point extending loads, i1 sign-extending
828 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
829 for (MVT VT : MVT::fp_valuetypes()) {
830 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
831 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
832 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
833 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
834 }
835 for (MVT VT : MVT::integer_valuetypes())
836 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
837
838 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
839 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
840 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
841 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
842 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
843 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
844 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
845
846 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
847 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
848 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
849
850 // Indexed loads and stores are supported.
851 for (unsigned im = (unsigned)ISD::PRE_INC;
852 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
853 setIndexedLoadAction(im, MVT::i8, Legal);
854 setIndexedLoadAction(im, MVT::i16, Legal);
855 setIndexedLoadAction(im, MVT::i32, Legal);
856 setIndexedLoadAction(im, MVT::i64, Legal);
857 setIndexedLoadAction(im, MVT::f64, Legal);
858 setIndexedLoadAction(im, MVT::f32, Legal);
859 setIndexedLoadAction(im, MVT::f16, Legal);
860 setIndexedLoadAction(im, MVT::bf16, Legal);
861 setIndexedStoreAction(im, MVT::i8, Legal);
862 setIndexedStoreAction(im, MVT::i16, Legal);
863 setIndexedStoreAction(im, MVT::i32, Legal);
864 setIndexedStoreAction(im, MVT::i64, Legal);
865 setIndexedStoreAction(im, MVT::f64, Legal);
866 setIndexedStoreAction(im, MVT::f32, Legal);
867 setIndexedStoreAction(im, MVT::f16, Legal);
868 setIndexedStoreAction(im, MVT::bf16, Legal);
869 }
870
871 // Trap.
872 setOperationAction(ISD::TRAP, MVT::Other, Legal);
873 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
874 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
875
876 // We combine OR nodes for bitfield operations.
877 setTargetDAGCombine(ISD::OR);
878 // Try to create BICs for vector ANDs.
879 setTargetDAGCombine(ISD::AND);
880
881 // Vector add and sub nodes may conceal a high-half opportunity.
882 // Also, try to fold ADD into CSINC/CSINV..
883 setTargetDAGCombine(ISD::ADD);
884 setTargetDAGCombine(ISD::ABS);
885 setTargetDAGCombine(ISD::SUB);
886 setTargetDAGCombine(ISD::SRL);
887 setTargetDAGCombine(ISD::XOR);
888 setTargetDAGCombine(ISD::SINT_TO_FP);
889 setTargetDAGCombine(ISD::UINT_TO_FP);
890
891 // TODO: Do the same for FP_TO_*INT_SAT.
892 setTargetDAGCombine(ISD::FP_TO_SINT);
893 setTargetDAGCombine(ISD::FP_TO_UINT);
894 setTargetDAGCombine(ISD::FDIV);
895
896 // Try and combine setcc with csel
897 setTargetDAGCombine(ISD::SETCC);
898
899 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
900
901 setTargetDAGCombine(ISD::ANY_EXTEND);
902 setTargetDAGCombine(ISD::ZERO_EXTEND);
903 setTargetDAGCombine(ISD::SIGN_EXTEND);
904 setTargetDAGCombine(ISD::VECTOR_SPLICE);
905 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
906 setTargetDAGCombine(ISD::TRUNCATE);
907 setTargetDAGCombine(ISD::CONCAT_VECTORS);
908 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
909 setTargetDAGCombine(ISD::STORE);
910 if (Subtarget->supportsAddressTopByteIgnored())
911 setTargetDAGCombine(ISD::LOAD);
912
913 setTargetDAGCombine(ISD::MUL);
914
915 setTargetDAGCombine(ISD::SELECT);
916 setTargetDAGCombine(ISD::VSELECT);
917
918 setTargetDAGCombine(ISD::INTRINSIC_VOID);
919 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
920 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
921 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
922 setTargetDAGCombine(ISD::VECREDUCE_ADD);
923 setTargetDAGCombine(ISD::STEP_VECTOR);
924
925 setTargetDAGCombine(ISD::GlobalAddress);
926
927 // In case of strict alignment, avoid an excessive number of byte wide stores.
928 MaxStoresPerMemsetOptSize = 8;
929 MaxStoresPerMemset = Subtarget->requiresStrictAlign()
930 ? MaxStoresPerMemsetOptSize : 32;
931
932 MaxGluedStoresPerMemcpy = 4;
933 MaxStoresPerMemcpyOptSize = 4;
934 MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
935 ? MaxStoresPerMemcpyOptSize : 16;
936
937 MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
938
939 MaxLoadsPerMemcmpOptSize = 4;
940 MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
941 ? MaxLoadsPerMemcmpOptSize : 8;
942
943 setStackPointerRegisterToSaveRestore(AArch64::SP);
944
945 setSchedulingPreference(Sched::Hybrid);
946
947 EnableExtLdPromotion = true;
948
949 // Set required alignment.
950 setMinFunctionAlignment(Align(4));
951 // Set preferred alignments.
952 setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
953 setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
954
955 // Only change the limit for entries in a jump table if specified by
956 // the sub target, but not at the command line.
957 unsigned MaxJT = STI.getMaximumJumpTableSize();
958 if (MaxJT && getMaximumJumpTableSize() == UINT_MAX(2147483647 *2U +1U))
959 setMaximumJumpTableSize(MaxJT);
960
961 setHasExtractBitsInsn(true);
962
963 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
964
965 if (Subtarget->hasNEON()) {
966 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
967 // silliness like this:
968 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
969 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
970 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
971 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
972 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
973 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
974 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
975 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
976 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
977 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
978 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
979 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
980 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
981 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
982 setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand);
983 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
984 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
985 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
986 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
987 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
988 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
989 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
990 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
991 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
992 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
993 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
994
995 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
996 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
997 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
998 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
999 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
1000
1001 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v1i64, Expand);
1002 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v1i64, Expand);
1003
1004 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
1005
1006 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
1007 // elements smaller than i32, so promote the input to i32 first.
1008 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
1009 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
1010 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
1011 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
1012 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
1013 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
1014
1015 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
1016 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1017 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1018 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
1019 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
1020 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
1021 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
1022 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
1023 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1024
1025 if (Subtarget->hasFullFP16()) {
1026 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1027 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1028 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
1029 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1030 } else {
1031 // when AArch64 doesn't have fullfp16 support, promote the input
1032 // to i32 first.
1033 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
1034 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
1035 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
1036 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
1037 }
1038
1039 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
1040 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
1041 setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
1042 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
1043 setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
1044 setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
1045 setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
1046 setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
1047 for (auto VT : {MVT::v1i64, MVT::v2i64}) {
1048 setOperationAction(ISD::UMAX, VT, Custom);
1049 setOperationAction(ISD::SMAX, VT, Custom);
1050 setOperationAction(ISD::UMIN, VT, Custom);
1051 setOperationAction(ISD::SMIN, VT, Custom);
1052 }
1053
1054 // AArch64 doesn't have MUL.2d:
1055 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
1056 // Custom handling for some quad-vector types to detect MULL.
1057 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
1058 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1059 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1060
1061 // Saturates
1062 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1063 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1064 setOperationAction(ISD::SADDSAT, VT, Legal);
1065 setOperationAction(ISD::UADDSAT, VT, Legal);
1066 setOperationAction(ISD::SSUBSAT, VT, Legal);
1067 setOperationAction(ISD::USUBSAT, VT, Legal);
1068 }
1069
1070 for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1071 MVT::v4i32}) {
1072 setOperationAction(ISD::ABDS, VT, Legal);
1073 setOperationAction(ISD::ABDU, VT, Legal);
1074 }
1075
1076 // Vector reductions
1077 for (MVT VT : { MVT::v4f16, MVT::v2f32,
1078 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1079 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1080 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1081 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1082
1083 setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
1084 }
1085 }
1086 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1087 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1088 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1089 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1090 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1091 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1092 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1093 }
1094 setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
1095
1096 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
1097 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
1098 // Likewise, narrowing and extending vector loads/stores aren't handled
1099 // directly.
1100 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1101 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
1102
1103 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1104 setOperationAction(ISD::MULHS, VT, Legal);
1105 setOperationAction(ISD::MULHU, VT, Legal);
1106 } else {
1107 setOperationAction(ISD::MULHS, VT, Expand);
1108 setOperationAction(ISD::MULHU, VT, Expand);
1109 }
1110 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1111 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1112
1113 setOperationAction(ISD::BSWAP, VT, Expand);
1114 setOperationAction(ISD::CTTZ, VT, Expand);
1115
1116 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
1117 setTruncStoreAction(VT, InnerVT, Expand);
1118 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1119 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1120 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1121 }
1122 }
1123
1124 // AArch64 has implementations of a lot of rounding-like FP operations.
1125 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
1126 setOperationAction(ISD::FFLOOR, Ty, Legal);
1127 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
1128 setOperationAction(ISD::FCEIL, Ty, Legal);
1129 setOperationAction(ISD::FRINT, Ty, Legal);
1130 setOperationAction(ISD::FTRUNC, Ty, Legal);
1131 setOperationAction(ISD::FROUND, Ty, Legal);
1132 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
1133 }
1134
1135 if (Subtarget->hasFullFP16()) {
1136 for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
1137 setOperationAction(ISD::FFLOOR, Ty, Legal);
1138 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
1139 setOperationAction(ISD::FCEIL, Ty, Legal);
1140 setOperationAction(ISD::FRINT, Ty, Legal);
1141 setOperationAction(ISD::FTRUNC, Ty, Legal);
1142 setOperationAction(ISD::FROUND, Ty, Legal);
1143 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
1144 }
1145 }
1146
1147 if (Subtarget->hasSVE())
1148 setOperationAction(ISD::VSCALE, MVT::i32, Custom);
1149
1150 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
1151
1152 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1153 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1154 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1155 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1156 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1157 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1158 }
1159
1160 if (Subtarget->hasSVE()) {
1161 for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1162 setOperationAction(ISD::BITREVERSE, VT, Custom);
1163 setOperationAction(ISD::BSWAP, VT, Custom);
1164 setOperationAction(ISD::CTLZ, VT, Custom);
1165 setOperationAction(ISD::CTPOP, VT, Custom);
1166 setOperationAction(ISD::CTTZ, VT, Custom);
1167 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1168 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1169 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1170 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1171 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1172 setOperationAction(ISD::MGATHER, VT, Custom);
1173 setOperationAction(ISD::MSCATTER, VT, Custom);
1174 setOperationAction(ISD::MLOAD, VT, Custom);
1175 setOperationAction(ISD::MUL, VT, Custom);
1176 setOperationAction(ISD::MULHS, VT, Custom);
1177 setOperationAction(ISD::MULHU, VT, Custom);
1178 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1179 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1180 setOperationAction(ISD::SELECT, VT, Custom);
1181 setOperationAction(ISD::SETCC, VT, Custom);
1182 setOperationAction(ISD::SDIV, VT, Custom);
1183 setOperationAction(ISD::UDIV, VT, Custom);
1184 setOperationAction(ISD::SMIN, VT, Custom);
1185 setOperationAction(ISD::UMIN, VT, Custom);
1186 setOperationAction(ISD::SMAX, VT, Custom);
1187 setOperationAction(ISD::UMAX, VT, Custom);
1188 setOperationAction(ISD::SHL, VT, Custom);
1189 setOperationAction(ISD::SRL, VT, Custom);
1190 setOperationAction(ISD::SRA, VT, Custom);
1191 setOperationAction(ISD::ABS, VT, Custom);
1192 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1193 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1194 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1195 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1196 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1197 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1198 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1199 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1200
1201 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1202 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1203 setOperationAction(ISD::SELECT_CC, VT, Expand);
1204 setOperationAction(ISD::ROTL, VT, Expand);
1205 setOperationAction(ISD::ROTR, VT, Expand);
1206 }
1207
1208 // Illegal unpacked integer vector types.
1209 for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1210 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1211 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1212 }
1213
1214 // Legalize unpacked bitcasts to REINTERPRET_CAST.
1215 for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
1216 MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
1217 setOperationAction(ISD::BITCAST, VT, Custom);
1218
1219 for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
1220 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1221 setOperationAction(ISD::SELECT, VT, Custom);
1222 setOperationAction(ISD::SETCC, VT, Custom);
1223 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1224 setOperationAction(ISD::TRUNCATE, VT, Custom);
1225 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1226 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1227 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1228
1229 setOperationAction(ISD::SELECT_CC, VT, Expand);
1230 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1231 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1232
1233 // There are no legal MVT::nxv16f## based types.
1234 if (VT != MVT::nxv16i1) {
1235 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1236 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1237 }
1238 }
1239
1240 // NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
1241 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1242 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1243 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1244 setOperationAction(ISD::MLOAD, VT, Custom);
1245 setOperationAction(ISD::MSTORE, VT, Custom);
1246 setOperationAction(ISD::MGATHER, VT, Custom);
1247 setOperationAction(ISD::MSCATTER, VT, Custom);
1248 }
1249
1250 for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
1251 for (MVT InnerVT : MVT::fp_scalable_vector_valuetypes()) {
1252 // Avoid marking truncating FP stores as legal to prevent the
1253 // DAGCombiner from creating unsupported truncating stores.
1254 setTruncStoreAction(VT, InnerVT, Expand);
1255 // SVE does not have floating-point extending loads.
1256 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1257 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1258 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1259 }
1260 }
1261
1262 // SVE supports truncating stores of 64 and 128-bit vectors
1263 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom);
1264 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom);
1265 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom);
1266 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
1267 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
1268
1269 for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1270 MVT::nxv4f32, MVT::nxv2f64}) {
1271 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1272 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1273 setOperationAction(ISD::MGATHER, VT, Custom);
1274 setOperationAction(ISD::MSCATTER, VT, Custom);
1275 setOperationAction(ISD::MLOAD, VT, Custom);
1276 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1277 setOperationAction(ISD::SELECT, VT, Custom);
1278 setOperationAction(ISD::FADD, VT, Custom);
1279 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1280 setOperationAction(ISD::FDIV, VT, Custom);
1281 setOperationAction(ISD::FMA, VT, Custom);
1282 setOperationAction(ISD::FMAXIMUM, VT, Custom);
1283 setOperationAction(ISD::FMAXNUM, VT, Custom);
1284 setOperationAction(ISD::FMINIMUM, VT, Custom);
1285 setOperationAction(ISD::FMINNUM, VT, Custom);
1286 setOperationAction(ISD::FMUL, VT, Custom);
1287 setOperationAction(ISD::FNEG, VT, Custom);
1288 setOperationAction(ISD::FSUB, VT, Custom);
1289 setOperationAction(ISD::FCEIL, VT, Custom);
1290 setOperationAction(ISD::FFLOOR, VT, Custom);
1291 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1292 setOperationAction(ISD::FRINT, VT, Custom);
1293 setOperationAction(ISD::FROUND, VT, Custom);
1294 setOperationAction(ISD::FROUNDEVEN, VT, Custom);
1295 setOperationAction(ISD::FTRUNC, VT, Custom);
1296 setOperationAction(ISD::FSQRT, VT, Custom);
1297 setOperationAction(ISD::FABS, VT, Custom);
1298 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1299 setOperationAction(ISD::FP_ROUND, VT, Custom);
1300 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1301 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1302 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1303 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1304 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1305
1306 setOperationAction(ISD::SELECT_CC, VT, Expand);
1307 }
1308
1309 for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1310 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1311 setOperationAction(ISD::MGATHER, VT, Custom);
1312 setOperationAction(ISD::MSCATTER, VT, Custom);
1313 setOperationAction(ISD::MLOAD, VT, Custom);
1314 }
1315
1316 setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
1317
1318 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
1319 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
1320
1321 // NOTE: Currently this has to happen after computeRegisterProperties rather
1322 // than the preferred option of combining it with the addRegisterClass call.
1323 if (Subtarget->useSVEForFixedLengthVectors()) {
1324 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
1325 if (useSVEForFixedLengthVectorVT(VT))
1326 addTypeForFixedLengthSVE(VT);
1327 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
1328 if (useSVEForFixedLengthVectorVT(VT))
1329 addTypeForFixedLengthSVE(VT);
1330
1331 // 64bit results can mean a bigger than NEON input.
1332 for (auto VT : {MVT::v8i8, MVT::v4i16})
1333 setOperationAction(ISD::TRUNCATE, VT, Custom);
1334 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
1335
1336 // 128bit results imply a bigger than NEON input.
1337 for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1338 setOperationAction(ISD::TRUNCATE, VT, Custom);
1339 for (auto VT : {MVT::v8f16, MVT::v4f32})
1340 setOperationAction(ISD::FP_ROUND, VT, Custom);
1341
1342 // These operations are not supported on NEON but SVE can do them.
1343 setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
1344 setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
1345 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1346 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
1347 setOperationAction(ISD::MUL, MVT::v1i64, Custom);
1348 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1349 setOperationAction(ISD::MULHS, MVT::v1i64, Custom);
1350 setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
1351 setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
1352 setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
1353 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
1354 setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
1355 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
1356 setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
1357 setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
1358 setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
1359 setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
1360 setOperationAction(ISD::SDIV, MVT::v2i64, Custom);
1361 setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
1362 setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
1363 setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
1364 setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
1365 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
1366 setOperationAction(ISD::UDIV, MVT::v16i8, Custom);
1367 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
1368 setOperationAction(ISD::UDIV, MVT::v8i16, Custom);
1369 setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
1370 setOperationAction(ISD::UDIV, MVT::v4i32, Custom);
1371 setOperationAction(ISD::UDIV, MVT::v1i64, Custom);
1372 setOperationAction(ISD::UDIV, MVT::v2i64, Custom);
1373 setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
1374 setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
1375 setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
1376 setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
1377 setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
1378 setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
1379 setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
1380 setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
1381
1382 // Int operations with no NEON support.
1383 for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1384 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1385 setOperationAction(ISD::BITREVERSE, VT, Custom);
1386 setOperationAction(ISD::CTTZ, VT, Custom);
1387 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1388 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1389 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1390 }
1391
1392 // FP operations with no NEON support.
1393 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
1394 MVT::v1f64, MVT::v2f64})
1395 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1396
1397 // Use SVE for vectors with more than 2 elements.
1398 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1399 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1400 }
1401
1402 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv2i1, MVT::nxv2i64);
1403 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32);
1404 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16);
1405 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
1406 }
1407
1408 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
1409}
1410
1411void AArch64TargetLowering::addTypeForNEON(MVT VT) {
1412 assert(VT.isVector() && "VT should be a vector type")(static_cast<void> (0));
1413
1414 if (VT.isFloatingPoint()) {
1415 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
1416 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
1417 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
1418 }
1419
1420 // Mark vector float intrinsics as expand.
1421 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
1422 setOperationAction(ISD::FSIN, VT, Expand);
1423 setOperationAction(ISD::FCOS, VT, Expand);
1424 setOperationAction(ISD::FPOW, VT, Expand);
1425 setOperationAction(ISD::FLOG, VT, Expand);
1426 setOperationAction(ISD::FLOG2, VT, Expand);
1427 setOperationAction(ISD::FLOG10, VT, Expand);
1428 setOperationAction(ISD::FEXP, VT, Expand);
1429 setOperationAction(ISD::FEXP2, VT, Expand);
1430 }
1431
1432 // But we do support custom-lowering for FCOPYSIGN.
1433 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
1434 ((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16()))
1435 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1436
1437 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1438 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1439 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1440 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1441 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1442 setOperationAction(ISD::SRA, VT, Custom);
1443 setOperationAction(ISD::SRL, VT, Custom);
1444 setOperationAction(ISD::SHL, VT, Custom);
1445 setOperationAction(ISD::OR, VT, Custom);
1446 setOperationAction(ISD::SETCC, VT, Custom);
1447 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
1448
1449 setOperationAction(ISD::SELECT, VT, Expand);
1450 setOperationAction(ISD::SELECT_CC, VT, Expand);
1451 setOperationAction(ISD::VSELECT, VT, Expand);
1452 for (MVT InnerVT : MVT::all_valuetypes())
1453 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
1454
1455 // CNT supports only B element sizes, then use UADDLP to widen.
1456 if (VT != MVT::v8i8 && VT != MVT::v16i8)
1457 setOperationAction(ISD::CTPOP, VT, Custom);
1458
1459 setOperationAction(ISD::UDIV, VT, Expand);
1460 setOperationAction(ISD::SDIV, VT, Expand);
1461 setOperationAction(ISD::UREM, VT, Expand);
1462 setOperationAction(ISD::SREM, VT, Expand);
1463 setOperationAction(ISD::FREM, VT, Expand);
1464
1465 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1466 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1467 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
1468 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
1469
1470 if (!VT.isFloatingPoint())
1471 setOperationAction(ISD::ABS, VT, Legal);
1472
1473 // [SU][MIN|MAX] are available for all NEON types apart from i64.
1474 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
1475 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
1476 setOperationAction(Opcode, VT, Legal);
1477
1478 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
1479 if (VT.isFloatingPoint() &&
1480 VT.getVectorElementType() != MVT::bf16 &&
1481 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
1482 for (unsigned Opcode :
1483 {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
1484 setOperationAction(Opcode, VT, Legal);
1485
1486 if (Subtarget->isLittleEndian()) {
1487 for (unsigned im = (unsigned)ISD::PRE_INC;
1488 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1489 setIndexedLoadAction(im, VT, Legal);
1490 setIndexedStoreAction(im, VT, Legal);
1491 }
1492 }
1493}
1494
1495void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
1496 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast<void> (0));
1497
1498 // By default everything must be expanded.
1499 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1500 setOperationAction(Op, VT, Expand);
1501
1502 // We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
1503 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1504
1505 if (VT.isFloatingPoint()) {
1506 setCondCodeAction(ISD::SETO, VT, Expand);
1507 setCondCodeAction(ISD::SETOLT, VT, Expand);
1508 setCondCodeAction(ISD::SETLT, VT, Expand);
1509 setCondCodeAction(ISD::SETOLE, VT, Expand);
1510 setCondCodeAction(ISD::SETLE, VT, Expand);
1511 setCondCodeAction(ISD::SETULT, VT, Expand);
1512 setCondCodeAction(ISD::SETULE, VT, Expand);
1513 setCondCodeAction(ISD::SETUGE, VT, Expand);
1514 setCondCodeAction(ISD::SETUGT, VT, Expand);
1515 setCondCodeAction(ISD::SETUEQ, VT, Expand);
1516 setCondCodeAction(ISD::SETUNE, VT, Expand);
1517 }
1518
1519 // Mark integer truncating stores as having custom lowering
1520 if (VT.isInteger()) {
1521 MVT InnerVT = VT.changeVectorElementType(MVT::i8);
1522 while (InnerVT != VT) {
1523 setTruncStoreAction(VT, InnerVT, Custom);
1524 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Custom);
1525 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Custom);
1526 InnerVT = InnerVT.changeVectorElementType(
1527 MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
1528 }
1529 }
1530
1531 // Lower fixed length vector operations to scalable equivalents.
1532 setOperationAction(ISD::ABS, VT, Custom);
1533 setOperationAction(ISD::ADD, VT, Custom);
1534 setOperationAction(ISD::AND, VT, Custom);
1535 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1536 setOperationAction(ISD::BITCAST, VT, Custom);
1537 setOperationAction(ISD::BITREVERSE, VT, Custom);
1538 setOperationAction(ISD::BSWAP, VT, Custom);
1539 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1540 setOperationAction(ISD::CTLZ, VT, Custom);
1541 setOperationAction(ISD::CTPOP, VT, Custom);
1542 setOperationAction(ISD::CTTZ, VT, Custom);
1543 setOperationAction(ISD::FABS, VT, Custom);
1544 setOperationAction(ISD::FADD, VT, Custom);
1545 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1546 setOperationAction(ISD::FCEIL, VT, Custom);
1547 setOperationAction(ISD::FDIV, VT, Custom);
1548 setOperationAction(ISD::FFLOOR, VT, Custom);
1549 setOperationAction(ISD::FMA, VT, Custom);
1550 setOperationAction(ISD::FMAXIMUM, VT, Custom);
1551 setOperationAction(ISD::FMAXNUM, VT, Custom);
1552 setOperationAction(ISD::FMINIMUM, VT, Custom);
1553 setOperationAction(ISD::FMINNUM, VT, Custom);
1554 setOperationAction(ISD::FMUL, VT, Custom);
1555 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1556 setOperationAction(ISD::FNEG, VT, Custom);
1557 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1558 setOperationAction(ISD::FP_ROUND, VT, Custom);
1559 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1560 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1561 setOperationAction(ISD::FRINT, VT, Custom);
1562 setOperationAction(ISD::FROUND, VT, Custom);
1563 setOperationAction(ISD::FROUNDEVEN, VT, Custom);
1564 setOperationAction(ISD::FSQRT, VT, Custom);
1565 setOperationAction(ISD::FSUB, VT, Custom);
1566 setOperationAction(ISD::FTRUNC, VT, Custom);
1567 setOperationAction(ISD::LOAD, VT, Custom);
1568 setOperationAction(ISD::MGATHER, VT, Custom);
1569 setOperationAction(ISD::MLOAD, VT, Custom);
1570 setOperationAction(ISD::MSCATTER, VT, Custom);
1571 setOperationAction(ISD::MSTORE, VT, Custom);
1572 setOperationAction(ISD::MUL, VT, Custom);
1573 setOperationAction(ISD::MULHS, VT, Custom);
1574 setOperationAction(ISD::MULHU, VT, Custom);
1575 setOperationAction(ISD::OR, VT, Custom);
1576 setOperationAction(ISD::SDIV, VT, Custom);
1577 setOperationAction(ISD::SELECT, VT, Custom);
1578 setOperationAction(ISD::SETCC, VT, Custom);
1579 setOperationAction(ISD::SHL, VT, Custom);
1580 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1581 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
1582 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1583 setOperationAction(ISD::SMAX, VT, Custom);
1584 setOperationAction(ISD::SMIN, VT, Custom);
1585 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1586 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1587 setOperationAction(ISD::SRA, VT, Custom);
1588 setOperationAction(ISD::SRL, VT, Custom);
1589 setOperationAction(ISD::STORE, VT, Custom);
1590 setOperationAction(ISD::SUB, VT, Custom);
1591 setOperationAction(ISD::TRUNCATE, VT, Custom);
1592 setOperationAction(ISD::UDIV, VT, Custom);
1593 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1594 setOperationAction(ISD::UMAX, VT, Custom);
1595 setOperationAction(ISD::UMIN, VT, Custom);
1596 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1597 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1598 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1599 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1600 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1601 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1602 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1603 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1604 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1605 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1606 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1607 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1608 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1609 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1610 setOperationAction(ISD::VSELECT, VT, Custom);
1611 setOperationAction(ISD::XOR, VT, Custom);
1612 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1613}
1614
1615void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
1616 addRegisterClass(VT, &AArch64::FPR64RegClass);
1617 addTypeForNEON(VT);
1618}
1619
1620void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
1621 addRegisterClass(VT, &AArch64::FPR128RegClass);
1622 addTypeForNEON(VT);
1623}
1624
1625EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
1626 LLVMContext &C, EVT VT) const {
1627 if (!VT.isVector())
1628 return MVT::i32;
1629 if (VT.isScalableVector())
1630 return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount());
1631 return VT.changeVectorElementTypeToInteger();
1632}
1633
1634static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
1635 const APInt &Demanded,
1636 TargetLowering::TargetLoweringOpt &TLO,
1637 unsigned NewOpc) {
1638 uint64_t OldImm = Imm, NewImm, Enc;
1639 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
1640
1641 // Return if the immediate is already all zeros, all ones, a bimm32 or a
1642 // bimm64.
1643 if (Imm == 0 || Imm == Mask ||
1644 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
1645 return false;
1646
1647 unsigned EltSize = Size;
1648 uint64_t DemandedBits = Demanded.getZExtValue();
1649
1650 // Clear bits that are not demanded.
1651 Imm &= DemandedBits;
1652
1653 while (true) {
1654 // The goal here is to set the non-demanded bits in a way that minimizes
1655 // the number of switching between 0 and 1. In order to achieve this goal,
1656 // we set the non-demanded bits to the value of the preceding demanded bits.
1657 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
1658 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
1659 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
1660 // The final result is 0b11000011.
1661 uint64_t NonDemandedBits = ~DemandedBits;
1662 uint64_t InvertedImm = ~Imm & DemandedBits;
1663 uint64_t RotatedImm =
1664 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
1665 NonDemandedBits;
1666 uint64_t Sum = RotatedImm + NonDemandedBits;
1667 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
1668 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
1669 NewImm = (Imm | Ones) & Mask;
1670
1671 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
1672 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
1673 // we halve the element size and continue the search.
1674 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
1675 break;
1676
1677 // We cannot shrink the element size any further if it is 2-bits.
1678 if (EltSize == 2)
1679 return false;
1680
1681 EltSize /= 2;
1682 Mask >>= EltSize;
1683 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
1684
1685 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
1686 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
1687 return false;
1688
1689 // Merge the upper and lower halves of Imm and DemandedBits.
1690 Imm |= Hi;
1691 DemandedBits |= DemandedBitsHi;
1692 }
1693
1694 ++NumOptimizedImms;
1695
1696 // Replicate the element across the register width.
1697 while (EltSize < Size) {
1698 NewImm |= NewImm << EltSize;
1699 EltSize *= 2;
1700 }
1701
1702 (void)OldImm;
1703 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&(static_cast<void> (0))
1704 "demanded bits should never be altered")(static_cast<void> (0));
1705 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")(static_cast<void> (0));
1706
1707 // Create the new constant immediate node.
1708 EVT VT = Op.getValueType();
1709 SDLoc DL(Op);
1710 SDValue New;
1711
1712 // If the new constant immediate is all-zeros or all-ones, let the target
1713 // independent DAG combine optimize this node.
1714 if (NewImm == 0 || NewImm == OrigMask) {
1715 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
1716 TLO.DAG.getConstant(NewImm, DL, VT));
1717 // Otherwise, create a machine node so that target independent DAG combine
1718 // doesn't undo this optimization.
1719 } else {
1720 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
1721 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
1722 New = SDValue(
1723 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
1724 }
1725
1726 return TLO.CombineTo(Op, New);
1727}
1728
1729bool AArch64TargetLowering::targetShrinkDemandedConstant(
1730 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1731 TargetLoweringOpt &TLO) const {
1732 // Delay this optimization to as late as possible.
1733 if (!TLO.LegalOps)
1734 return false;
1735
1736 if (!EnableOptimizeLogicalImm)
1737 return false;
1738
1739 EVT VT = Op.getValueType();
1740 if (VT.isVector())
1741 return false;
1742
1743 unsigned Size = VT.getSizeInBits();
1744 assert((Size == 32 || Size == 64) &&(static_cast<void> (0))
1745 "i32 or i64 is expected after legalization.")(static_cast<void> (0));
1746
1747 // Exit early if we demand all bits.
1748 if (DemandedBits.countPopulation() == Size)
1749 return false;
1750
1751 unsigned NewOpc;
1752 switch (Op.getOpcode()) {
1753 default:
1754 return false;
1755 case ISD::AND:
1756 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1757 break;
1758 case ISD::OR:
1759 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1760 break;
1761 case ISD::XOR:
1762 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1763 break;
1764 }
1765 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1766 if (!C)
1767 return false;
1768 uint64_t Imm = C->getZExtValue();
1769 return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
1770}
1771
1772/// computeKnownBitsForTargetNode - Determine which of the bits specified in
1773/// Mask are known to be either zero or one and return them Known.
1774void AArch64TargetLowering::computeKnownBitsForTargetNode(
1775 const SDValue Op, KnownBits &Known,
1776 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
1777 switch (Op.getOpcode()) {
1778 default:
1779 break;
1780 case AArch64ISD::CSEL: {
1781 KnownBits Known2;
1782 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1783 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1784 Known = KnownBits::commonBits(Known, Known2);
1785 break;
1786 }
1787 case AArch64ISD::LOADgot:
1788 case AArch64ISD::ADDlow: {
1789 if (!Subtarget->isTargetILP32())
1790 break;
1791 // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
1792 Known.Zero = APInt::getHighBitsSet(64, 32);
1793 break;
1794 }
1795 case ISD::INTRINSIC_W_CHAIN: {
1796 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1797 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1798 switch (IntID) {
1799 default: return;
1800 case Intrinsic::aarch64_ldaxr:
1801 case Intrinsic::aarch64_ldxr: {
1802 unsigned BitWidth = Known.getBitWidth();
1803 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1804 unsigned MemBits = VT.getScalarSizeInBits();
1805 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1806 return;
1807 }
1808 }
1809 break;
1810 }
1811 case ISD::INTRINSIC_WO_CHAIN:
1812 case ISD::INTRINSIC_VOID: {
1813 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1814 switch (IntNo) {
1815 default:
1816 break;
1817 case Intrinsic::aarch64_neon_umaxv:
1818 case Intrinsic::aarch64_neon_uminv: {
1819 // Figure out the datatype of the vector operand. The UMINV instruction
1820 // will zero extend the result, so we can mark as known zero all the
1821 // bits larger than the element datatype. 32-bit or larget doesn't need
1822 // this as those are legal types and will be handled by isel directly.
1823 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1824 unsigned BitWidth = Known.getBitWidth();
1825 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1826 assert(BitWidth >= 8 && "Unexpected width!")(static_cast<void> (0));
1827 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1828 Known.Zero |= Mask;
1829 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1830 assert(BitWidth >= 16 && "Unexpected width!")(static_cast<void> (0));
1831 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1832 Known.Zero |= Mask;
1833 }
1834 break;
1835 } break;
1836 }
1837 }
1838 }
1839}
1840
1841MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1842 EVT) const {
1843 return MVT::i64;
1844}
1845
1846bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1847 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1848 bool *Fast) const {
1849 if (Subtarget->requiresStrictAlign())
1850 return false;
1851
1852 if (Fast) {
1853 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1854 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1855 // See comments in performSTORECombine() for more details about
1856 // these conditions.
1857
1858 // Code that uses clang vector extensions can mark that it
1859 // wants unaligned accesses to be treated as fast by
1860 // underspecifying alignment to be 1 or 2.
1861 Alignment <= 2 ||
1862
1863 // Disregard v2i64. Memcpy lowering produces those and splitting
1864 // them regresses performance on micro-benchmarks and olden/bh.
1865 VT == MVT::v2i64;
1866 }
1867 return true;
1868}
1869
1870// Same as above but handling LLTs instead.
1871bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1872 LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1873 bool *Fast) const {
1874 if (Subtarget->requiresStrictAlign())
1875 return false;
1876
1877 if (Fast) {
1878 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1879 *Fast = !Subtarget->isMisaligned128StoreSlow() ||
1880 Ty.getSizeInBytes() != 16 ||
1881 // See comments in performSTORECombine() for more details about
1882 // these conditions.
1883
1884 // Code that uses clang vector extensions can mark that it
1885 // wants unaligned accesses to be treated as fast by
1886 // underspecifying alignment to be 1 or 2.
1887 Alignment <= 2 ||
1888
1889 // Disregard v2i64. Memcpy lowering produces those and splitting
1890 // them regresses performance on micro-benchmarks and olden/bh.
1891 Ty == LLT::fixed_vector(2, 64);
1892 }
1893 return true;
1894}
1895
1896FastISel *
1897AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1898 const TargetLibraryInfo *libInfo) const {
1899 return AArch64::createFastISel(funcInfo, libInfo);
1900}
1901
1902const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1903#define MAKE_CASE(V) \
1904 case V: \
1905 return #V;
1906 switch ((AArch64ISD::NodeType)Opcode) {
1907 case AArch64ISD::FIRST_NUMBER:
1908 break;
1909 MAKE_CASE(AArch64ISD::CALL)
1910 MAKE_CASE(AArch64ISD::ADRP)
1911 MAKE_CASE(AArch64ISD::ADR)
1912 MAKE_CASE(AArch64ISD::ADDlow)
1913 MAKE_CASE(AArch64ISD::LOADgot)
1914 MAKE_CASE(AArch64ISD::RET_FLAG)
1915 MAKE_CASE(AArch64ISD::BRCOND)
1916 MAKE_CASE(AArch64ISD::CSEL)
1917 MAKE_CASE(AArch64ISD::CSINV)
1918 MAKE_CASE(AArch64ISD::CSNEG)
1919 MAKE_CASE(AArch64ISD::CSINC)
1920 MAKE_CASE(AArch64ISD::THREAD_POINTER)
1921 MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
1922 MAKE_CASE(AArch64ISD::ADD_PRED)
1923 MAKE_CASE(AArch64ISD::MUL_PRED)
1924 MAKE_CASE(AArch64ISD::MULHS_PRED)
1925 MAKE_CASE(AArch64ISD::MULHU_PRED)
1926 MAKE_CASE(AArch64ISD::SDIV_PRED)
1927 MAKE_CASE(AArch64ISD::SHL_PRED)
1928 MAKE_CASE(AArch64ISD::SMAX_PRED)
1929 MAKE_CASE(AArch64ISD::SMIN_PRED)
1930 MAKE_CASE(AArch64ISD::SRA_PRED)
1931 MAKE_CASE(AArch64ISD::SRL_PRED)
1932 MAKE_CASE(AArch64ISD::SUB_PRED)
1933 MAKE_CASE(AArch64ISD::UDIV_PRED)
1934 MAKE_CASE(AArch64ISD::UMAX_PRED)
1935 MAKE_CASE(AArch64ISD::UMIN_PRED)
1936 MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
1937 MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
1938 MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
1939 MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
1940 MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
1941 MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
1942 MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
1943 MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
1944 MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
1945 MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
1946 MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
1947 MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
1948 MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
1949 MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
1950 MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
1951 MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
1952 MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
1953 MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
1954 MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
1955 MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
1956 MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
1957 MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
1958 MAKE_CASE(AArch64ISD::ADC)
1959 MAKE_CASE(AArch64ISD::SBC)
1960 MAKE_CASE(AArch64ISD::ADDS)
1961 MAKE_CASE(AArch64ISD::SUBS)
1962 MAKE_CASE(AArch64ISD::ADCS)
1963 MAKE_CASE(AArch64ISD::SBCS)
1964 MAKE_CASE(AArch64ISD::ANDS)
1965 MAKE_CASE(AArch64ISD::CCMP)
1966 MAKE_CASE(AArch64ISD::CCMN)
1967 MAKE_CASE(AArch64ISD::FCCMP)
1968 MAKE_CASE(AArch64ISD::FCMP)
1969 MAKE_CASE(AArch64ISD::STRICT_FCMP)
1970 MAKE_CASE(AArch64ISD::STRICT_FCMPE)
1971 MAKE_CASE(AArch64ISD::DUP)
1972 MAKE_CASE(AArch64ISD::DUPLANE8)
1973 MAKE_CASE(AArch64ISD::DUPLANE16)
1974 MAKE_CASE(AArch64ISD::DUPLANE32)
1975 MAKE_CASE(AArch64ISD::DUPLANE64)
1976 MAKE_CASE(AArch64ISD::MOVI)
1977 MAKE_CASE(AArch64ISD::MOVIshift)
1978 MAKE_CASE(AArch64ISD::MOVIedit)
1979 MAKE_CASE(AArch64ISD::MOVImsl)
1980 MAKE_CASE(AArch64ISD::FMOV)
1981 MAKE_CASE(AArch64ISD::MVNIshift)
1982 MAKE_CASE(AArch64ISD::MVNImsl)
1983 MAKE_CASE(AArch64ISD::BICi)
1984 MAKE_CASE(AArch64ISD::ORRi)
1985 MAKE_CASE(AArch64ISD::BSP)
1986 MAKE_CASE(AArch64ISD::EXTR)
1987 MAKE_CASE(AArch64ISD::ZIP1)
1988 MAKE_CASE(AArch64ISD::ZIP2)
1989 MAKE_CASE(AArch64ISD::UZP1)
1990 MAKE_CASE(AArch64ISD::UZP2)
1991 MAKE_CASE(AArch64ISD::TRN1)
1992 MAKE_CASE(AArch64ISD::TRN2)
1993 MAKE_CASE(AArch64ISD::REV16)
1994 MAKE_CASE(AArch64ISD::REV32)
1995 MAKE_CASE(AArch64ISD::REV64)
1996 MAKE_CASE(AArch64ISD::EXT)
1997 MAKE_CASE(AArch64ISD::SPLICE)
1998 MAKE_CASE(AArch64ISD::VSHL)
1999 MAKE_CASE(AArch64ISD::VLSHR)
2000 MAKE_CASE(AArch64ISD::VASHR)
2001 MAKE_CASE(AArch64ISD::VSLI)
2002 MAKE_CASE(AArch64ISD::VSRI)
2003 MAKE_CASE(AArch64ISD::CMEQ)
2004 MAKE_CASE(AArch64ISD::CMGE)
2005 MAKE_CASE(AArch64ISD::CMGT)
2006 MAKE_CASE(AArch64ISD::CMHI)
2007 MAKE_CASE(AArch64ISD::CMHS)
2008 MAKE_CASE(AArch64ISD::FCMEQ)
2009 MAKE_CASE(AArch64ISD::FCMGE)
2010 MAKE_CASE(AArch64ISD::FCMGT)
2011 MAKE_CASE(AArch64ISD::CMEQz)
2012 MAKE_CASE(AArch64ISD::CMGEz)
2013 MAKE_CASE(AArch64ISD::CMGTz)
2014 MAKE_CASE(AArch64ISD::CMLEz)
2015 MAKE_CASE(AArch64ISD::CMLTz)
2016 MAKE_CASE(AArch64ISD::FCMEQz)
2017 MAKE_CASE(AArch64ISD::FCMGEz)
2018 MAKE_CASE(AArch64ISD::FCMGTz)
2019 MAKE_CASE(AArch64ISD::FCMLEz)
2020 MAKE_CASE(AArch64ISD::FCMLTz)
2021 MAKE_CASE(AArch64ISD::SADDV)
2022 MAKE_CASE(AArch64ISD::UADDV)
2023 MAKE_CASE(AArch64ISD::SRHADD)
2024 MAKE_CASE(AArch64ISD::URHADD)
2025 MAKE_CASE(AArch64ISD::SHADD)
2026 MAKE_CASE(AArch64ISD::UHADD)
2027 MAKE_CASE(AArch64ISD::SDOT)
2028 MAKE_CASE(AArch64ISD::UDOT)
2029 MAKE_CASE(AArch64ISD::SMINV)
2030 MAKE_CASE(AArch64ISD::UMINV)
2031 MAKE_CASE(AArch64ISD::SMAXV)
2032 MAKE_CASE(AArch64ISD::UMAXV)
2033 MAKE_CASE(AArch64ISD::SADDV_PRED)
2034 MAKE_CASE(AArch64ISD::UADDV_PRED)
2035 MAKE_CASE(AArch64ISD::SMAXV_PRED)
2036 MAKE_CASE(AArch64ISD::UMAXV_PRED)
2037 MAKE_CASE(AArch64ISD::SMINV_PRED)
2038 MAKE_CASE(AArch64ISD::UMINV_PRED)
2039 MAKE_CASE(AArch64ISD::ORV_PRED)
2040 MAKE_CASE(AArch64ISD::EORV_PRED)
2041 MAKE_CASE(AArch64ISD::ANDV_PRED)
2042 MAKE_CASE(AArch64ISD::CLASTA_N)
2043 MAKE_CASE(AArch64ISD::CLASTB_N)
2044 MAKE_CASE(AArch64ISD::LASTA)
2045 MAKE_CASE(AArch64ISD::LASTB)
2046 MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
2047 MAKE_CASE(AArch64ISD::LS64_BUILD)
2048 MAKE_CASE(AArch64ISD::LS64_EXTRACT)
2049 MAKE_CASE(AArch64ISD::TBL)
2050 MAKE_CASE(AArch64ISD::FADD_PRED)
2051 MAKE_CASE(AArch64ISD::FADDA_PRED)
2052 MAKE_CASE(AArch64ISD::FADDV_PRED)
2053 MAKE_CASE(AArch64ISD::FDIV_PRED)
2054 MAKE_CASE(AArch64ISD::FMA_PRED)
2055 MAKE_CASE(AArch64ISD::FMAX_PRED)
2056 MAKE_CASE(AArch64ISD::FMAXV_PRED)
2057 MAKE_CASE(AArch64ISD::FMAXNM_PRED)
2058 MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
2059 MAKE_CASE(AArch64ISD::FMIN_PRED)
2060 MAKE_CASE(AArch64ISD::FMINV_PRED)
2061 MAKE_CASE(AArch64ISD::FMINNM_PRED)
2062 MAKE_CASE(AArch64ISD::FMINNMV_PRED)
2063 MAKE_CASE(AArch64ISD::FMUL_PRED)
2064 MAKE_CASE(AArch64ISD::FSUB_PRED)
2065 MAKE_CASE(AArch64ISD::BIC)
2066 MAKE_CASE(AArch64ISD::BIT)
2067 MAKE_CASE(AArch64ISD::CBZ)
2068 MAKE_CASE(AArch64ISD::CBNZ)
2069 MAKE_CASE(AArch64ISD::TBZ)
2070 MAKE_CASE(AArch64ISD::TBNZ)
2071 MAKE_CASE(AArch64ISD::TC_RETURN)
2072 MAKE_CASE(AArch64ISD::PREFETCH)
2073 MAKE_CASE(AArch64ISD::SITOF)
2074 MAKE_CASE(AArch64ISD::UITOF)
2075 MAKE_CASE(AArch64ISD::NVCAST)
2076 MAKE_CASE(AArch64ISD::MRS)
2077 MAKE_CASE(AArch64ISD::SQSHL_I)
2078 MAKE_CASE(AArch64ISD::UQSHL_I)
2079 MAKE_CASE(AArch64ISD::SRSHR_I)
2080 MAKE_CASE(AArch64ISD::URSHR_I)
2081 MAKE_CASE(AArch64ISD::SQSHLU_I)
2082 MAKE_CASE(AArch64ISD::WrapperLarge)
2083 MAKE_CASE(AArch64ISD::LD2post)
2084 MAKE_CASE(AArch64ISD::LD3post)
2085 MAKE_CASE(AArch64ISD::LD4post)
2086 MAKE_CASE(AArch64ISD::ST2post)
2087 MAKE_CASE(AArch64ISD::ST3post)
2088 MAKE_CASE(AArch64ISD::ST4post)
2089 MAKE_CASE(AArch64ISD::LD1x2post)
2090 MAKE_CASE(AArch64ISD::LD1x3post)
2091 MAKE_CASE(AArch64ISD::LD1x4post)
2092 MAKE_CASE(AArch64ISD::ST1x2post)
2093 MAKE_CASE(AArch64ISD::ST1x3post)
2094 MAKE_CASE(AArch64ISD::ST1x4post)
2095 MAKE_CASE(AArch64ISD::LD1DUPpost)
2096 MAKE_CASE(AArch64ISD::LD2DUPpost)
2097 MAKE_CASE(AArch64ISD::LD3DUPpost)
2098 MAKE_CASE(AArch64ISD::LD4DUPpost)
2099 MAKE_CASE(AArch64ISD::LD1LANEpost)
2100 MAKE_CASE(AArch64ISD::LD2LANEpost)
2101 MAKE_CASE(AArch64ISD::LD3LANEpost)
2102 MAKE_CASE(AArch64ISD::LD4LANEpost)
2103 MAKE_CASE(AArch64ISD::ST2LANEpost)
2104 MAKE_CASE(AArch64ISD::ST3LANEpost)
2105 MAKE_CASE(AArch64ISD::ST4LANEpost)
2106 MAKE_CASE(AArch64ISD::SMULL)
2107 MAKE_CASE(AArch64ISD::UMULL)
2108 MAKE_CASE(AArch64ISD::FRECPE)
2109 MAKE_CASE(AArch64ISD::FRECPS)
2110 MAKE_CASE(AArch64ISD::FRSQRTE)
2111 MAKE_CASE(AArch64ISD::FRSQRTS)
2112 MAKE_CASE(AArch64ISD::STG)
2113 MAKE_CASE(AArch64ISD::STZG)
2114 MAKE_CASE(AArch64ISD::ST2G)
2115 MAKE_CASE(AArch64ISD::STZ2G)
2116 MAKE_CASE(AArch64ISD::SUNPKHI)
2117 MAKE_CASE(AArch64ISD::SUNPKLO)
2118 MAKE_CASE(AArch64ISD::UUNPKHI)
2119 MAKE_CASE(AArch64ISD::UUNPKLO)
2120 MAKE_CASE(AArch64ISD::INSR)
2121 MAKE_CASE(AArch64ISD::PTEST)
2122 MAKE_CASE(AArch64ISD::PTRUE)
2123 MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
2124 MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
2125 MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
2126 MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO)
2127 MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO)
2128 MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO)
2129 MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO)
2130 MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO)
2131 MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
2132 MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
2133 MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
2134 MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO)
2135 MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO)
2136 MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO)
2137 MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO)
2138 MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
2139 MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
2140 MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
2141 MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
2142 MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
2143 MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
2144 MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO)
2145 MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO)
2146 MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO)
2147 MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO)
2148 MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO)
2149 MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO)
2150 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO)
2151 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO)
2152 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO)
2153 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO)
2154 MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO)
2155 MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO)
2156 MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO)
2157 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO)
2158 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO)
2159 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO)
2160 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO)
2161 MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO)
2162 MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
2163 MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
2164 MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
2165 MAKE_CASE(AArch64ISD::ST1_PRED)
2166 MAKE_CASE(AArch64ISD::SST1_PRED)
2167 MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
2168 MAKE_CASE(AArch64ISD::SST1_SXTW_PRED)
2169 MAKE_CASE(AArch64ISD::SST1_UXTW_PRED)
2170 MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED)
2171 MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED)
2172 MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
2173 MAKE_CASE(AArch64ISD::SSTNT1_PRED)
2174 MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
2175 MAKE_CASE(AArch64ISD::LDP)
2176 MAKE_CASE(AArch64ISD::STP)
2177 MAKE_CASE(AArch64ISD::STNP)
2178 MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
2179 MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
2180 MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
2181 MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
2182 MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
2183 MAKE_CASE(AArch64ISD::INDEX_VECTOR)
2184 MAKE_CASE(AArch64ISD::UADDLP)
2185 MAKE_CASE(AArch64ISD::CALL_RVMARKER)
2186 }
2187#undef MAKE_CASE
2188 return nullptr;
2189}
2190
2191MachineBasicBlock *
2192AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
2193 MachineBasicBlock *MBB) const {
2194 // We materialise the F128CSEL pseudo-instruction as some control flow and a
2195 // phi node:
2196
2197 // OrigBB:
2198 // [... previous instrs leading to comparison ...]
2199 // b.ne TrueBB
2200 // b EndBB
2201 // TrueBB:
2202 // ; Fallthrough
2203 // EndBB:
2204 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
2205
2206 MachineFunction *MF = MBB->getParent();
2207 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2208 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2209 DebugLoc DL = MI.getDebugLoc();
2210 MachineFunction::iterator It = ++MBB->getIterator();
2211
2212 Register DestReg = MI.getOperand(0).getReg();
2213 Register IfTrueReg = MI.getOperand(1).getReg();
2214 Register IfFalseReg = MI.getOperand(2).getReg();
2215 unsigned CondCode = MI.getOperand(3).getImm();
2216 bool NZCVKilled = MI.getOperand(4).isKill();
2217
2218 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
2219 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
2220 MF->insert(It, TrueBB);
2221 MF->insert(It, EndBB);
2222
2223 // Transfer rest of current basic-block to EndBB
2224 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
2225 MBB->end());
2226 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
2227
2228 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
2229 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
2230 MBB->addSuccessor(TrueBB);
2231 MBB->addSuccessor(EndBB);
2232
2233 // TrueBB falls through to the end.
2234 TrueBB->addSuccessor(EndBB);
2235
2236 if (!NZCVKilled) {
2237 TrueBB->addLiveIn(AArch64::NZCV);
2238 EndBB->addLiveIn(AArch64::NZCV);
2239 }
2240
2241 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
2242 .addReg(IfTrueReg)
2243 .addMBB(TrueBB)
2244 .addReg(IfFalseReg)
2245 .addMBB(MBB);
2246
2247 MI.eraseFromParent();
2248 return EndBB;
2249}
2250
2251MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
2252 MachineInstr &MI, MachineBasicBlock *BB) const {
2253 assert(!isAsynchronousEHPersonality(classifyEHPersonality((static_cast<void> (0))
2254 BB->getParent()->getFunction().getPersonalityFn())) &&(static_cast<void> (0))
2255 "SEH does not use catchret!")(static_cast<void> (0));
2256 return BB;
2257}
2258
2259MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
2260 MachineInstr &MI, MachineBasicBlock *BB) const {
2261 switch (MI.getOpcode()) {
2262 default:
2263#ifndef NDEBUG1
2264 MI.dump();
2265#endif
2266 llvm_unreachable("Unexpected instruction for custom inserter!")__builtin_unreachable();
2267
2268 case AArch64::F128CSEL:
2269 return EmitF128CSEL(MI, BB);
2270
2271 case TargetOpcode::STACKMAP:
2272 case TargetOpcode::PATCHPOINT:
2273 case TargetOpcode::STATEPOINT:
2274 return emitPatchPoint(MI, BB);
2275
2276 case AArch64::CATCHRET:
2277 return EmitLoweredCatchRet(MI, BB);
2278 }
2279}
2280
2281//===----------------------------------------------------------------------===//
2282// AArch64 Lowering private implementation.
2283//===----------------------------------------------------------------------===//
2284
2285//===----------------------------------------------------------------------===//
2286// Lowering Code
2287//===----------------------------------------------------------------------===//
2288
2289// Forward declarations of SVE fixed length lowering helpers
2290static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT);
2291static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
2292static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
2293static SDValue convertFixedMaskToScalableVector(SDValue Mask,
2294 SelectionDAG &DAG);
2295
2296/// isZerosVector - Check whether SDNode N is a zero-filled vector.
2297static bool isZerosVector(const SDNode *N) {
2298 // Look through a bit convert.
2299 while (N->getOpcode() == ISD::BITCAST)
2300 N = N->getOperand(0).getNode();
2301
2302 if (ISD::isConstantSplatVectorAllZeros(N))
2303 return true;
2304
2305 if (N->getOpcode() != AArch64ISD::DUP)
2306 return false;
2307
2308 auto Opnd0 = N->getOperand(0);
2309 auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
2310 auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
2311 return (CINT && CINT->isNullValue()) || (CFP && CFP->isZero());
2312}
2313
2314/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
2315/// CC
2316static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
2317 switch (CC) {
2318 default:
2319 llvm_unreachable("Unknown condition code!")__builtin_unreachable();
2320 case ISD::SETNE:
2321 return AArch64CC::NE;
2322 case ISD::SETEQ:
2323 return AArch64CC::EQ;
2324 case ISD::SETGT:
2325 return AArch64CC::GT;
2326 case ISD::SETGE:
2327 return AArch64CC::GE;
2328 case ISD::SETLT:
2329 return AArch64CC::LT;
2330 case ISD::SETLE:
2331 return AArch64CC::LE;
2332 case ISD::SETUGT:
2333 return AArch64CC::HI;
2334 case ISD::SETUGE:
2335 return AArch64CC::HS;
2336 case ISD::SETULT:
2337 return AArch64CC::LO;
2338 case ISD::SETULE:
2339 return AArch64CC::LS;
2340 }
2341}
2342
2343/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
2344static void changeFPCCToAArch64CC(ISD::CondCode CC,
2345 AArch64CC::CondCode &CondCode,
2346 AArch64CC::CondCode &CondCode2) {
2347 CondCode2 = AArch64CC::AL;
2348 switch (CC) {
2349 default:
2350 llvm_unreachable("Unknown FP condition!")__builtin_unreachable();
2351 case ISD::SETEQ:
2352 case ISD::SETOEQ:
2353 CondCode = AArch64CC::EQ;
2354 break;
2355 case ISD::SETGT:
2356 case ISD::SETOGT:
2357 CondCode = AArch64CC::GT;
2358 break;
2359 case ISD::SETGE:
2360 case ISD::SETOGE:
2361 CondCode = AArch64CC::GE;
2362 break;
2363 case ISD::SETOLT:
2364 CondCode = AArch64CC::MI;
2365 break;
2366 case ISD::SETOLE:
2367 CondCode = AArch64CC::LS;
2368 break;
2369 case ISD::SETONE:
2370 CondCode = AArch64CC::MI;
2371 CondCode2 = AArch64CC::GT;
2372 break;
2373 case ISD::SETO:
2374 CondCode = AArch64CC::VC;
2375 break;
2376 case ISD::SETUO:
2377 CondCode = AArch64CC::VS;
2378 break;
2379 case ISD::SETUEQ:
2380 CondCode = AArch64CC::EQ;
2381 CondCode2 = AArch64CC::VS;
2382 break;
2383 case ISD::SETUGT:
2384 CondCode = AArch64CC::HI;
2385 break;
2386 case ISD::SETUGE:
2387 CondCode = AArch64CC::PL;
2388 break;
2389 case ISD::SETLT:
2390 case ISD::SETULT:
2391 CondCode = AArch64CC::LT;
2392 break;
2393 case ISD::SETLE:
2394 case ISD::SETULE:
2395 CondCode = AArch64CC::LE;
2396 break;
2397 case ISD::SETNE:
2398 case ISD::SETUNE:
2399 CondCode = AArch64CC::NE;
2400 break;
2401 }
2402}
2403
2404/// Convert a DAG fp condition code to an AArch64 CC.
2405/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
2406/// should be AND'ed instead of OR'ed.
2407static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
2408 AArch64CC::CondCode &CondCode,
2409 AArch64CC::CondCode &CondCode2) {
2410 CondCode2 = AArch64CC::AL;
2411 switch (CC) {
2412 default:
2413 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2414 assert(CondCode2 == AArch64CC::AL)(static_cast<void> (0));
2415 break;
2416 case ISD::SETONE:
2417 // (a one b)
2418 // == ((a olt b) || (a ogt b))
2419 // == ((a ord b) && (a une b))
2420 CondCode = AArch64CC::VC;
2421 CondCode2 = AArch64CC::NE;
2422 break;
2423 case ISD::SETUEQ:
2424 // (a ueq b)
2425 // == ((a uno b) || (a oeq b))
2426 // == ((a ule b) && (a uge b))
2427 CondCode = AArch64CC::PL;
2428 CondCode2 = AArch64CC::LE;
2429 break;
2430 }
2431}
2432
2433/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
2434/// CC usable with the vector instructions. Fewer operations are available
2435/// without a real NZCV register, so we have to use less efficient combinations
2436/// to get the same effect.
2437static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
2438 AArch64CC::CondCode &CondCode,
2439 AArch64CC::CondCode &CondCode2,
2440 bool &Invert) {
2441 Invert = false;
2442 switch (CC) {
2443 default:
2444 // Mostly the scalar mappings work fine.
2445 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2446 break;
2447 case ISD::SETUO:
2448 Invert = true;
2449 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2450 case ISD::SETO:
2451 CondCode = AArch64CC::MI;
2452 CondCode2 = AArch64CC::GE;
2453 break;
2454 case ISD::SETUEQ:
2455 case ISD::SETULT:
2456 case ISD::SETULE:
2457 case ISD::SETUGT:
2458 case ISD::SETUGE:
2459 // All of the compare-mask comparisons are ordered, but we can switch
2460 // between the two by a double inversion. E.g. ULE == !OGT.
2461 Invert = true;
2462 changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
2463 CondCode, CondCode2);
2464 break;
2465 }
2466}
2467
2468static bool isLegalArithImmed(uint64_t C) {
2469 // Matches AArch64DAGToDAGISel::SelectArithImmed().
2470 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
2471 LLVM_DEBUG(dbgs() << "Is imm " << Cdo { } while (false)
2472 << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { } while (false);
2473 return IsLegal;
2474}
2475
2476// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
2477// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
2478// can be set differently by this operation. It comes down to whether
2479// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
2480// everything is fine. If not then the optimization is wrong. Thus general
2481// comparisons are only valid if op2 != 0.
2482//
2483// So, finally, the only LLVM-native comparisons that don't mention C and V
2484// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
2485// the absence of information about op2.
2486static bool isCMN(SDValue Op, ISD::CondCode CC) {
2487 return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
2488 (CC == ISD::SETEQ || CC == ISD::SETNE);
2489}
2490
2491static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
2492 SelectionDAG &DAG, SDValue Chain,
2493 bool IsSignaling) {
2494 EVT VT = LHS.getValueType();
2495 assert(VT != MVT::f128)(static_cast<void> (0));
2496 assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented")(static_cast<void> (0));
2497 unsigned Opcode =
2498 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
2499 return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
2500}
2501
2502static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2503 const SDLoc &dl, SelectionDAG &DAG) {
2504 EVT VT = LHS.getValueType();
2505 const bool FullFP16 =
2506 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2507
2508 if (VT.isFloatingPoint()) {
2509 assert(VT != MVT::f128)(static_cast<void> (0));
2510 if (VT == MVT::f16 && !FullFP16) {
2511 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
2512 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
2513 VT = MVT::f32;
2514 }
2515 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
2516 }
2517
2518 // The CMP instruction is just an alias for SUBS, and representing it as
2519 // SUBS means that it's possible to get CSE with subtract operations.
2520 // A later phase can perform the optimization of setting the destination
2521 // register to WZR/XZR if it ends up being unused.
2522 unsigned Opcode = AArch64ISD::SUBS;
2523
2524 if (isCMN(RHS, CC)) {
2525 // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
2526 Opcode = AArch64ISD::ADDS;
2527 RHS = RHS.getOperand(1);
2528 } else if (isCMN(LHS, CC)) {
2529 // As we are looking for EQ/NE compares, the operands can be commuted ; can
2530 // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
2531 Opcode = AArch64ISD::ADDS;
2532 LHS = LHS.getOperand(1);
2533 } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
2534 if (LHS.getOpcode() == ISD::AND) {
2535 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
2536 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
2537 // of the signed comparisons.
2538 const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
2539 DAG.getVTList(VT, MVT_CC),
2540 LHS.getOperand(0),
2541 LHS.getOperand(1));
2542 // Replace all users of (and X, Y) with newly generated (ands X, Y)
2543 DAG.ReplaceAllUsesWith(LHS, ANDSNode);
2544 return ANDSNode.getValue(1);
2545 } else if (LHS.getOpcode() == AArch64ISD::ANDS) {
2546 // Use result of ANDS
2547 return LHS.getValue(1);
2548 }
2549 }
2550
2551 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
2552 .getValue(1);
2553}
2554
2555/// \defgroup AArch64CCMP CMP;CCMP matching
2556///
2557/// These functions deal with the formation of CMP;CCMP;... sequences.
2558/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
2559/// a comparison. They set the NZCV flags to a predefined value if their
2560/// predicate is false. This allows to express arbitrary conjunctions, for
2561/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
2562/// expressed as:
2563/// cmp A
2564/// ccmp B, inv(CB), CA
2565/// check for CB flags
2566///
2567/// This naturally lets us implement chains of AND operations with SETCC
2568/// operands. And we can even implement some other situations by transforming
2569/// them:
2570/// - We can implement (NEG SETCC) i.e. negating a single comparison by
2571/// negating the flags used in a CCMP/FCCMP operations.
2572/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
2573/// by negating the flags we test for afterwards. i.e.
2574/// NEG (CMP CCMP CCCMP ...) can be implemented.
2575/// - Note that we can only ever negate all previously processed results.
2576/// What we can not implement by flipping the flags to test is a negation
2577/// of two sub-trees (because the negation affects all sub-trees emitted so
2578/// far, so the 2nd sub-tree we emit would also affect the first).
2579/// With those tools we can implement some OR operations:
2580/// - (OR (SETCC A) (SETCC B)) can be implemented via:
2581/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
2582/// - After transforming OR to NEG/AND combinations we may be able to use NEG
2583/// elimination rules from earlier to implement the whole thing as a
2584/// CCMP/FCCMP chain.
2585///
2586/// As complete example:
2587/// or (or (setCA (cmp A)) (setCB (cmp B)))
2588/// (and (setCC (cmp C)) (setCD (cmp D)))"
2589/// can be reassociated to:
2590/// or (and (setCC (cmp C)) setCD (cmp D))
2591// (or (setCA (cmp A)) (setCB (cmp B)))
2592/// can be transformed to:
2593/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
2594/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
2595/// which can be implemented as:
2596/// cmp C
2597/// ccmp D, inv(CD), CC
2598/// ccmp A, CA, inv(CD)
2599/// ccmp B, CB, inv(CA)
2600/// check for CB flags
2601///
2602/// A counterexample is "or (and A B) (and C D)" which translates to
2603/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
2604/// can only implement 1 of the inner (not) operations, but not both!
2605/// @{
2606
2607/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
2608static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
2609 ISD::CondCode CC, SDValue CCOp,
2610 AArch64CC::CondCode Predicate,
2611 AArch64CC::CondCode OutCC,
2612 const SDLoc &DL, SelectionDAG &DAG) {
2613 unsigned Opcode = 0;
2614 const bool FullFP16 =
2615 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2616
2617 if (LHS.getValueType().isFloatingPoint()) {
2618 assert(LHS.getValueType() != MVT::f128)(static_cast<void> (0));
2619 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
2620 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
2621 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
2622 }
2623 Opcode = AArch64ISD::FCCMP;
2624 } else if (RHS.getOpcode() == ISD::SUB) {
2625 SDValue SubOp0 = RHS.getOperand(0);
2626 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2627 // See emitComparison() on why we can only do this for SETEQ and SETNE.
2628 Opcode = AArch64ISD::CCMN;
2629 RHS = RHS.getOperand(1);
2630 }
2631 }
2632 if (Opcode == 0)
2633 Opcode = AArch64ISD::CCMP;
2634
2635 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
2636 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
2637 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
2638 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
2639 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
2640}
2641
2642/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
2643/// expressed as a conjunction. See \ref AArch64CCMP.
2644/// \param CanNegate Set to true if we can negate the whole sub-tree just by
2645/// changing the conditions on the SETCC tests.
2646/// (this means we can call emitConjunctionRec() with
2647/// Negate==true on this sub-tree)
2648/// \param MustBeFirst Set to true if this subtree needs to be negated and we
2649/// cannot do the negation naturally. We are required to
2650/// emit the subtree first in this case.
2651/// \param WillNegate Is true if are called when the result of this
2652/// subexpression must be negated. This happens when the
2653/// outer expression is an OR. We can use this fact to know
2654/// that we have a double negation (or (or ...) ...) that
2655/// can be implemented for free.
2656static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
2657 bool &MustBeFirst, bool WillNegate,
2658 unsigned Depth = 0) {
2659 if (!Val.hasOneUse())
2660 return false;
2661 unsigned Opcode = Val->getOpcode();
2662 if (Opcode == ISD::SETCC) {
2663 if (Val->getOperand(0).getValueType() == MVT::f128)
2664 return false;
2665 CanNegate = true;
2666 MustBeFirst = false;
2667 return true;
2668 }
2669 // Protect against exponential runtime and stack overflow.
2670 if (Depth > 6)
2671 return false;
2672 if (Opcode == ISD::AND || Opcode == ISD::OR) {
2673 bool IsOR = Opcode == ISD::OR;
2674 SDValue O0 = Val->getOperand(0);
2675 SDValue O1 = Val->getOperand(1);
2676 bool CanNegateL;
2677 bool MustBeFirstL;
2678 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
2679 return false;
2680 bool CanNegateR;
2681 bool MustBeFirstR;
2682 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
2683 return false;
2684
2685 if (MustBeFirstL && MustBeFirstR)
2686 return false;
2687
2688 if (IsOR) {
2689 // For an OR expression we need to be able to naturally negate at least
2690 // one side or we cannot do the transformation at all.
2691 if (!CanNegateL && !CanNegateR)
2692 return false;
2693 // If we the result of the OR will be negated and we can naturally negate
2694 // the leafs, then this sub-tree as a whole negates naturally.
2695 CanNegate = WillNegate && CanNegateL && CanNegateR;
2696 // If we cannot naturally negate the whole sub-tree, then this must be
2697 // emitted first.
2698 MustBeFirst = !CanNegate;
2699 } else {
2700 assert(Opcode == ISD::AND && "Must be OR or AND")(static_cast<void> (0));
2701 // We cannot naturally negate an AND operation.
2702 CanNegate = false;
2703 MustBeFirst = MustBeFirstL || MustBeFirstR;
2704 }
2705 return true;
2706 }
2707 return false;
2708}
2709
2710/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
2711/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
2712/// Tries to transform the given i1 producing node @p Val to a series compare
2713/// and conditional compare operations. @returns an NZCV flags producing node
2714/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
2715/// transformation was not possible.
2716/// \p Negate is true if we want this sub-tree being negated just by changing
2717/// SETCC conditions.
2718static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
2719 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
2720 AArch64CC::CondCode Predicate) {
2721 // We're at a tree leaf, produce a conditional comparison operation.
2722 unsigned Opcode = Val->getOpcode();
2723 if (Opcode == ISD::SETCC) {
2724 SDValue LHS = Val->getOperand(0);
2725 SDValue RHS = Val->getOperand(1);
2726 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
2727 bool isInteger = LHS.getValueType().isInteger();
2728 if (Negate)
2729 CC = getSetCCInverse(CC, LHS.getValueType());
2730 SDLoc DL(Val);
2731 // Determine OutCC and handle FP special case.
2732 if (isInteger) {
2733 OutCC = changeIntCCToAArch64CC(CC);
2734 } else {
2735 assert(LHS.getValueType().isFloatingPoint())(static_cast<void> (0));
2736 AArch64CC::CondCode ExtraCC;
2737 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
2738 // Some floating point conditions can't be tested with a single condition
2739 // code. Construct an additional comparison in this case.
2740 if (ExtraCC != AArch64CC::AL) {
2741 SDValue ExtraCmp;
2742 if (!CCOp.getNode())
2743 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
2744 else
2745 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
2746 ExtraCC, DL, DAG);
2747 CCOp = ExtraCmp;
2748 Predicate = ExtraCC;
2749 }
2750 }
2751
2752 // Produce a normal comparison if we are first in the chain
2753 if (!CCOp)
2754 return emitComparison(LHS, RHS, CC, DL, DAG);
2755 // Otherwise produce a ccmp.
2756 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
2757 DAG);
2758 }
2759 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree")(static_cast<void> (0));
2760
2761 bool IsOR = Opcode == ISD::OR;
2762
2763 SDValue LHS = Val->getOperand(0);
2764 bool CanNegateL;
2765 bool MustBeFirstL;
2766 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
2767 assert(ValidL && "Valid conjunction/disjunction tree")(static_cast<void> (0));
2768 (void)ValidL;
2769
2770 SDValue RHS = Val->getOperand(1);
2771 bool CanNegateR;
2772 bool MustBeFirstR;
2773 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
2774 assert(ValidR && "Valid conjunction/disjunction tree")(static_cast<void> (0));
2775 (void)ValidR;
2776
2777 // Swap sub-tree that must come first to the right side.
2778 if (MustBeFirstL) {
2779 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")(static_cast<void> (0));
2780 std::swap(LHS, RHS);
2781 std::swap(CanNegateL, CanNegateR);
2782 std::swap(MustBeFirstL, MustBeFirstR);
2783 }
2784
2785 bool NegateR;
2786 bool NegateAfterR;
2787 bool NegateL;
2788 bool NegateAfterAll;
2789 if (Opcode == ISD::OR) {
2790 // Swap the sub-tree that we can negate naturally to the left.
2791 if (!CanNegateL) {
2792 assert(CanNegateR && "at least one side must be negatable")(static_cast<void> (0));
2793 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")(static_cast<void> (0));
2794 assert(!Negate)(static_cast<void> (0));
2795 std::swap(LHS, RHS);
2796 NegateR = false;
2797 NegateAfterR = true;
2798 } else {
2799 // Negate the left sub-tree if possible, otherwise negate the result.
2800 NegateR = CanNegateR;
2801 NegateAfterR = !CanNegateR;
2802 }
2803 NegateL = true;
2804 NegateAfterAll = !Negate;
2805 } else {
2806 assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree")(static_cast<void> (0));
2807 assert(!Negate && "Valid conjunction/disjunction tree")(static_cast<void> (0));
2808
2809 NegateL = false;
2810 NegateR = false;
2811 NegateAfterR = false;
2812 NegateAfterAll = false;
2813 }
2814
2815 // Emit sub-trees.
2816 AArch64CC::CondCode RHSCC;
2817 SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
2818 if (NegateAfterR)
2819 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
2820 SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
2821 if (NegateAfterAll)
2822 OutCC = AArch64CC::getInvertedCondCode(OutCC);
2823 return CmpL;
2824}
2825
2826/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
2827/// In some cases this is even possible with OR operations in the expression.
2828/// See \ref AArch64CCMP.
2829/// \see emitConjunctionRec().
2830static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
2831 AArch64CC::CondCode &OutCC) {
2832 bool DummyCanNegate;
2833 bool DummyMustBeFirst;
2834 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
2835 return SDValue();
2836
2837 return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
2838}
2839
2840/// @}
2841
2842/// Returns how profitable it is to fold a comparison's operand's shift and/or
2843/// extension operations.
2844static unsigned getCmpOperandFoldingProfit(SDValue Op) {
2845 auto isSupportedExtend = [&](SDValue V) {
2846 if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
2847 return true;
2848
2849 if (V.getOpcode() == ISD::AND)
2850 if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
2851 uint64_t Mask = MaskCst->getZExtValue();
2852 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
2853 }
2854
2855 return false;
2856 };
2857
2858 if (!Op.hasOneUse())
2859 return 0;
2860
2861 if (isSupportedExtend(Op))
2862 return 1;
2863
2864 unsigned Opc = Op.getOpcode();
2865 if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
2866 if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2867 uint64_t Shift = ShiftCst->getZExtValue();
2868 if (isSupportedExtend(Op.getOperand(0)))
2869 return (Shift <= 4) ? 2 : 1;
2870 EVT VT = Op.getValueType();
2871 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
2872 return 1;
2873 }
2874
2875 return 0;
2876}
2877
2878static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2879 SDValue &AArch64cc, SelectionDAG &DAG,
2880 const SDLoc &dl) {
2881 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2882 EVT VT = RHS.getValueType();
2883 uint64_t C = RHSC->getZExtValue();
2884 if (!isLegalArithImmed(C)) {
2885 // Constant does not fit, try adjusting it by one?
2886 switch (CC) {
2887 default:
2888 break;
2889 case ISD::SETLT:
2890 case ISD::SETGE:
2891 if ((VT == MVT::i32 && C != 0x80000000 &&
2892 isLegalArithImmed((uint32_t)(C - 1))) ||
2893 (VT == MVT::i64 && C != 0x80000000ULL &&
2894 isLegalArithImmed(C - 1ULL))) {
2895 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2896 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2897 RHS = DAG.getConstant(C, dl, VT);
2898 }
2899 break;
2900 case ISD::SETULT:
2901 case ISD::SETUGE:
2902 if ((VT == MVT::i32 && C != 0 &&
2903 isLegalArithImmed((uint32_t)(C - 1))) ||
2904 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
2905 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2906 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2907 RHS = DAG.getConstant(C, dl, VT);
2908 }
2909 break;
2910 case ISD::SETLE:
2911 case ISD::SETGT:
2912 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
2913 isLegalArithImmed((uint32_t)(C + 1))) ||
2914 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
2915 isLegalArithImmed(C + 1ULL))) {
2916 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2917 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2918 RHS = DAG.getConstant(C, dl, VT);
2919 }
2920 break;
2921 case ISD::SETULE:
2922 case ISD::SETUGT:
2923 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
2924 isLegalArithImmed((uint32_t)(C + 1))) ||
2925 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
2926 isLegalArithImmed(C + 1ULL))) {
2927 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2928 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2929 RHS = DAG.getConstant(C, dl, VT);
2930 }
2931 break;
2932 }
2933 }
2934 }
2935
2936 // Comparisons are canonicalized so that the RHS operand is simpler than the
2937 // LHS one, the extreme case being when RHS is an immediate. However, AArch64
2938 // can fold some shift+extend operations on the RHS operand, so swap the
2939 // operands if that can be done.
2940 //
2941 // For example:
2942 // lsl w13, w11, #1
2943 // cmp w13, w12
2944 // can be turned into:
2945 // cmp w12, w11, lsl #1
2946 if (!isa<ConstantSDNode>(RHS) ||
2947 !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
2948 SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
2949
2950 if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
2951 std::swap(LHS, RHS);
2952 CC = ISD::getSetCCSwappedOperands(CC);
2953 }
2954 }
2955
2956 SDValue Cmp;
2957 AArch64CC::CondCode AArch64CC;
2958 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
2959 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
2960
2961 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
2962 // For the i8 operand, the largest immediate is 255, so this can be easily
2963 // encoded in the compare instruction. For the i16 operand, however, the
2964 // largest immediate cannot be encoded in the compare.
2965 // Therefore, use a sign extending load and cmn to avoid materializing the
2966 // -1 constant. For example,
2967 // movz w1, #65535
2968 // ldrh w0, [x0, #0]
2969 // cmp w0, w1
2970 // >
2971 // ldrsh w0, [x0, #0]
2972 // cmn w0, #1
2973 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
2974 // if and only if (sext LHS) == (sext RHS). The checks are in place to
2975 // ensure both the LHS and RHS are truly zero extended and to make sure the
2976 // transformation is profitable.
2977 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
2978 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
2979 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
2980 LHS.getNode()->hasNUsesOfValue(1, 0)) {
2981 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
2982 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
2983 SDValue SExt =
2984 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
2985 DAG.getValueType(MVT::i16));
2986 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
2987 RHS.getValueType()),
2988 CC, dl, DAG);
2989 AArch64CC = changeIntCCToAArch64CC(CC);
2990 }
2991 }
2992
2993 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
2994 if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
2995 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
2996 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
2997 }
2998 }
2999 }
3000
3001 if (!Cmp) {
3002 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
3003 AArch64CC = changeIntCCToAArch64CC(CC);
3004 }
3005 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
3006 return Cmp;
3007}
3008
3009static std::pair<SDValue, SDValue>
3010getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
3011 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(static_cast<void> (0))
3012 "Unsupported value type")(static_cast<void> (0));
3013 SDValue Value, Overflow;
3014 SDLoc DL(Op);
3015 SDValue LHS = Op.getOperand(0);
3016 SDValue RHS = Op.getOperand(1);
3017 unsigned Opc = 0;
3018 switch (Op.getOpcode()) {
3019 default:
3020 llvm_unreachable("Unknown overflow instruction!")__builtin_unreachable();
3021 case ISD::SADDO:
3022 Opc = AArch64ISD::ADDS;
3023 CC = AArch64CC::VS;
3024 break;
3025 case ISD::UADDO:
3026 Opc = AArch64ISD::ADDS;
3027 CC = AArch64CC::HS;
3028 break;
3029 case ISD::SSUBO:
3030 Opc = AArch64ISD::SUBS;
3031 CC = AArch64CC::VS;
3032 break;
3033 case ISD::USUBO:
3034 Opc = AArch64ISD::SUBS;
3035 CC = AArch64CC::LO;
3036 break;
3037 // Multiply needs a little bit extra work.
3038 case ISD::SMULO:
3039 case ISD::UMULO: {
3040 CC = AArch64CC::NE;
3041 bool IsSigned = Op.getOpcode() == ISD::SMULO;
3042 if (Op.getValueType() == MVT::i32) {
3043 // Extend to 64-bits, then perform a 64-bit multiply.
3044 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3045 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
3046 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
3047 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
3048 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3049
3050 // Check that the result fits into a 32-bit integer.
3051 SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC);
3052 if (IsSigned) {
3053 // cmp xreg, wreg, sxtw
3054 SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value);
3055 Overflow =
3056 DAG.getNode(AArch64ISD::SUBS, DL, VTs, Mul, SExtMul).getValue(1);
3057 } else {
3058 // tst xreg, #0xffffffff00000000
3059 SDValue UpperBits = DAG.getConstant(0xFFFFFFFF00000000, DL, MVT::i64);
3060 Overflow =
3061 DAG.getNode(AArch64ISD::ANDS, DL, VTs, Mul, UpperBits).getValue(1);
3062 }
3063 break;
3064 }
3065 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")(static_cast<void> (0));
3066 // For the 64 bit multiply
3067 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
3068 if (IsSigned) {
3069 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
3070 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
3071 DAG.getConstant(63, DL, MVT::i64));
3072 // It is important that LowerBits is last, otherwise the arithmetic
3073 // shift will not be folded into the compare (SUBS).
3074 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3075 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
3076 .getValue(1);
3077 } else {
3078 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
3079 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3080 Overflow =
3081 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
3082 DAG.getConstant(0, DL, MVT::i64),
3083 UpperBits).getValue(1);
3084 }
3085 break;
3086 }
3087 } // switch (...)
3088
3089 if (Opc) {
3090 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
3091
3092 // Emit the AArch64 operation with overflow check.
3093 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
3094 Overflow = Value.getValue(1);
3095 }
3096 return std::make_pair(Value, Overflow);
3097}
3098
3099SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
3100 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
3101 return LowerToScalableOp(Op, DAG);
3102
3103 SDValue Sel = Op.getOperand(0);
3104 SDValue Other = Op.getOperand(1);
3105 SDLoc dl(Sel);
3106
3107 // If the operand is an overflow checking operation, invert the condition
3108 // code and kill the Not operation. I.e., transform:
3109 // (xor (overflow_op_bool, 1))
3110 // -->
3111 // (csel 1, 0, invert(cc), overflow_op_bool)
3112 // ... which later gets transformed to just a cset instruction with an
3113 // inverted condition code, rather than a cset + eor sequence.
3114 if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
3115 // Only lower legal XALUO ops.
3116 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
3117 return SDValue();
3118
3119 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3120 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3121 AArch64CC::CondCode CC;
3122 SDValue Value, Overflow;
3123 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
3124 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3125 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
3126 CCVal, Overflow);
3127 }
3128 // If neither operand is a SELECT_CC, give up.
3129 if (Sel.getOpcode() != ISD::SELECT_CC)
3130 std::swap(Sel, Other);
3131 if (Sel.getOpcode() != ISD::SELECT_CC)
3132 return Op;
3133
3134 // The folding we want to perform is:
3135 // (xor x, (select_cc a, b, cc, 0, -1) )
3136 // -->
3137 // (csel x, (xor x, -1), cc ...)
3138 //
3139 // The latter will get matched to a CSINV instruction.
3140
3141 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
3142 SDValue LHS = Sel.getOperand(0);
3143 SDValue RHS = Sel.getOperand(1);
3144 SDValue TVal = Sel.getOperand(2);
3145 SDValue FVal = Sel.getOperand(3);
3146
3147 // FIXME: This could be generalized to non-integer comparisons.
3148 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
3149 return Op;
3150
3151 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
3152 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
3153
3154 // The values aren't constants, this isn't the pattern we're looking for.
3155 if (!CFVal || !CTVal)
3156 return Op;
3157
3158 // We can commute the SELECT_CC by inverting the condition. This
3159 // might be needed to make this fit into a CSINV pattern.
3160 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
3161 std::swap(TVal, FVal);
3162 std::swap(CTVal, CFVal);
3163 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
3164 }
3165
3166 // If the constants line up, perform the transform!
3167 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
3168 SDValue CCVal;
3169 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
3170
3171 FVal = Other;
3172 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
3173 DAG.getConstant(-1ULL, dl, Other.getValueType()));
3174
3175 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
3176 CCVal, Cmp);
3177 }
3178
3179 return Op;
3180}
3181
3182static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
3183 EVT VT = Op.getValueType();
3184
3185 // Let legalize expand this if it isn't a legal type yet.
3186 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3187 return SDValue();
3188
3189 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3190
3191 unsigned Opc;
3192 bool ExtraOp = false;
3193 switch (Op.getOpcode()) {
3194 default:
3195 llvm_unreachable("Invalid code")__builtin_unreachable();
3196 case ISD::ADDC:
3197 Opc = AArch64ISD::ADDS;
3198 break;
3199 case ISD::SUBC:
3200 Opc = AArch64ISD::SUBS;
3201 break;
3202 case ISD::ADDE:
3203 Opc = AArch64ISD::ADCS;
3204 ExtraOp = true;
3205 break;
3206 case ISD::SUBE:
3207 Opc = AArch64ISD::SBCS;
3208 ExtraOp = true;
3209 break;
3210 }
3211
3212 if (!ExtraOp)
3213 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
3214 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
3215 Op.getOperand(2));
3216}
3217
3218static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
3219 // Let legalize expand this if it isn't a legal type yet.
3220 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3221 return SDValue();
3222
3223 SDLoc dl(Op);
3224 AArch64CC::CondCode CC;
3225 // The actual operation that sets the overflow or carry flag.
3226 SDValue Value, Overflow;
3227 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
3228
3229 // We use 0 and 1 as false and true values.
3230 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3231 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3232
3233 // We use an inverted condition, because the conditional select is inverted
3234 // too. This will allow it to be selected to a single instruction:
3235 // CSINC Wd, WZR, WZR, invert(cond).
3236 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3237 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
3238 CCVal, Overflow);
3239
3240 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3241 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3242}
3243
3244// Prefetch operands are:
3245// 1: Address to prefetch
3246// 2: bool isWrite
3247// 3: int locality (0 = no locality ... 3 = extreme locality)
3248// 4: bool isDataCache
3249static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
3250 SDLoc DL(Op);
3251 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3252 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
3253 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3254
3255 bool IsStream = !Locality;
3256 // When the locality number is set
3257 if (Locality) {
3258 // The front-end should have filtered out the out-of-range values
3259 assert(Locality <= 3 && "Prefetch locality out-of-range")(static_cast<void> (0));
3260 // The locality degree is the opposite of the cache speed.
3261 // Put the number the other way around.
3262 // The encoding starts at 0 for level 1
3263 Locality = 3 - Locality;
3264 }
3265
3266 // built the mask value encoding the expected behavior.
3267 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
3268 (!IsData << 3) | // IsDataCache bit
3269 (Locality << 1) | // Cache level bits
3270 (unsigned)IsStream; // Stream bit
3271 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
3272 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
3273}
3274
3275SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
3276 SelectionDAG &DAG) const {
3277 EVT VT = Op.getValueType();
3278 if (VT.isScalableVector())
3279 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
3280
3281 if (useSVEForFixedLengthVectorVT(VT))
3282 return LowerFixedLengthFPExtendToSVE(Op, DAG);
3283
3284 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")(static_cast<void> (0));
3285 return SDValue();
3286}
3287
3288SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
3289 SelectionDAG &DAG) const {
3290 if (Op.getValueType().isScalableVector())
3291 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
3292
3293 bool IsStrict = Op->isStrictFPOpcode();
3294 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3295 EVT SrcVT = SrcVal.getValueType();
3296
3297 if (useSVEForFixedLengthVectorVT(SrcVT))
3298 return LowerFixedLengthFPRoundToSVE(Op, DAG);
3299
3300 if (SrcVT != MVT::f128) {
3301 // Expand cases where the input is a vector bigger than NEON.
3302 if (useSVEForFixedLengthVectorVT(SrcVT))
3303 return SDValue();
3304
3305 // It's legal except when f128 is involved
3306 return Op;
3307 }
3308
3309 return SDValue();
3310}
3311
3312SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
3313 SelectionDAG &DAG) const {
3314 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3315 // Any additional optimization in this function should be recorded
3316 // in the cost tables.
3317 EVT InVT = Op.getOperand(0).getValueType();
3318 EVT VT = Op.getValueType();
3319
3320 if (VT.isScalableVector()) {
3321 unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
3322 ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
3323 : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
3324 return LowerToPredicatedOp(Op, DAG, Opcode);
3325 }
3326
3327 if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
3328 return LowerFixedLengthFPToIntToSVE(Op, DAG);
3329
3330 unsigned NumElts = InVT.getVectorNumElements();
3331
3332 // f16 conversions are promoted to f32 when full fp16 is not supported.
3333 if (InVT.getVectorElementType() == MVT::f16 &&
3334 !Subtarget->hasFullFP16()) {
3335 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
3336 SDLoc dl(Op);
3337 return DAG.getNode(
3338 Op.getOpcode(), dl, Op.getValueType(),
3339 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
3340 }
3341
3342 uint64_t VTSize = VT.getFixedSizeInBits();
3343 uint64_t InVTSize = InVT.getFixedSizeInBits();
3344 if (VTSize < InVTSize) {
3345 SDLoc dl(Op);
3346 SDValue Cv =
3347 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
3348 Op.getOperand(0));
3349 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
3350 }
3351
3352 if (VTSize > InVTSize) {
3353 SDLoc dl(Op);
3354 MVT ExtVT =
3355 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
3356 VT.getVectorNumElements());
3357 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
3358 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
3359 }
3360
3361 // Type changing conversions are illegal.
3362 return Op;
3363}
3364
3365SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
3366 SelectionDAG &DAG) const {
3367 bool IsStrict = Op->isStrictFPOpcode();
3368 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3369
3370 if (SrcVal.getValueType().isVector())
3371 return LowerVectorFP_TO_INT(Op, DAG);
3372
3373 // f16 conversions are promoted to f32 when full fp16 is not supported.
3374 if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
3375 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")(static_cast<void> (0));
3376 SDLoc dl(Op);
3377 return DAG.getNode(
3378 Op.getOpcode(), dl, Op.getValueType(),
3379 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
3380 }
3381
3382 if (SrcVal.getValueType() != MVT::f128) {
3383 // It's legal except when f128 is involved
3384 return Op;
3385 }
3386
3387 return SDValue();
3388}
3389
3390SDValue
3391AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
3392 SelectionDAG &DAG) const {
3393 // AArch64 FP-to-int conversions saturate to the destination element size, so
3394 // we can lower common saturating conversions to simple instructions.
3395 SDValue SrcVal = Op.getOperand(0);
3396 EVT SrcVT = SrcVal.getValueType();
3397 EVT DstVT = Op.getValueType();
3398 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3399
3400 uint64_t SrcElementWidth = SrcVT.getScalarSizeInBits();
3401 uint64_t DstElementWidth = DstVT.getScalarSizeInBits();
3402 uint64_t SatWidth = SatVT.getScalarSizeInBits();
3403 assert(SatWidth <= DstElementWidth &&(static_cast<void> (0))
3404 "Saturation width cannot exceed result width")(static_cast<void> (0));
3405
3406 // TODO: Consider lowering to SVE operations, as in LowerVectorFP_TO_INT.
3407 // Currently, the `llvm.fpto[su]i.sat.*` instrinsics don't accept scalable
3408 // types, so this is hard to reach.
3409 if (DstVT.isScalableVector())
3410 return SDValue();
3411
3412 // TODO: Saturate to SatWidth explicitly.
3413 if (SatWidth != DstElementWidth)
3414 return SDValue();
3415
3416 EVT SrcElementVT = SrcVT.getVectorElementType();
3417
3418 // In the absence of FP16 support, promote f16 to f32, like
3419 // LowerVectorFP_TO_INT().
3420 if (SrcElementVT == MVT::f16 && !Subtarget->hasFullFP16()) {
3421 MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements());
3422 return DAG.getNode(Op.getOpcode(), SDLoc(Op), DstVT,
3423 DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal),
3424 Op.getOperand(1));
3425 }
3426
3427 // Cases that we can emit directly.
3428 if ((SrcElementWidth == DstElementWidth) &&
3429 (SrcElementVT == MVT::f64 || SrcElementVT == MVT::f32 ||
3430 (SrcElementVT == MVT::f16 && Subtarget->hasFullFP16()))) {
3431 return Op;
3432 }
3433
3434 // For all other cases, fall back on the expanded form.
3435 return SDValue();
3436}
3437
3438SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
3439 SelectionDAG &DAG) const {
3440 // AArch64 FP-to-int conversions saturate to the destination register size, so
3441 // we can lower common saturating conversions to simple instructions.
3442 SDValue SrcVal = Op.getOperand(0);
3443 EVT SrcVT = SrcVal.getValueType();
3444
3445 if (SrcVT.isVector())
3446 return LowerVectorFP_TO_INT_SAT(Op, DAG);
3447
3448 EVT DstVT = Op.getValueType();
3449 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3450 uint64_t SatWidth = SatVT.getScalarSizeInBits();
3451 uint64_t DstWidth = DstVT.getScalarSizeInBits();
3452 assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width")(static_cast<void> (0));
3453
3454 // TODO: Saturate to SatWidth explicitly.
3455 if (SatWidth != DstWidth)
3456 return SDValue();
3457
3458 // In the absence of FP16 support, promote f16 to f32, like LowerFP_TO_INT().
3459 if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16())
3460 return DAG.getNode(Op.getOpcode(), SDLoc(Op), DstVT,
3461 DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal),
3462 Op.getOperand(1));
3463
3464 // Cases that we can emit directly.
3465 if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
3466 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
3467 (DstVT == MVT::i64 || DstVT == MVT::i32))
3468 return Op;
3469
3470 // For all other cases, fall back on the expanded form.
3471 return SDValue();
3472}
3473
3474SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
3475 SelectionDAG &DAG) const {
3476 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3477 // Any additional optimization in this function should be recorded
3478 // in the cost tables.
3479 EVT VT = Op.getValueType();
3480 SDLoc dl(Op);
3481 SDValue In = Op.getOperand(0);
3482 EVT InVT = In.getValueType();
3483 unsigned Opc = Op.getOpcode();
3484 bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
3485
3486 if (VT.isScalableVector()) {
3487 if (InVT.getVectorElementType() == MVT::i1) {
3488 // We can't directly extend an SVE predicate; extend it first.
3489 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3490 EVT CastVT = getPromotedVTForPredicate(InVT);
3491 In = DAG.getNode(CastOpc, dl, CastVT, In);
3492 return DAG.getNode(Opc, dl, VT, In);
3493 }
3494
3495 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
3496 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
3497 return LowerToPredicatedOp(Op, DAG, Opcode);
3498 }
3499
3500 if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
3501 return LowerFixedLengthIntToFPToSVE(Op, DAG);
3502
3503 uint64_t VTSize = VT.getFixedSizeInBits();
3504 uint64_t InVTSize = InVT.getFixedSizeInBits();
3505 if (VTSize < InVTSize) {
3506 MVT CastVT =
3507 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
3508 InVT.getVectorNumElements());
3509 In = DAG.getNode(Opc, dl, CastVT, In);
3510 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
3511 }
3512
3513 if (VTSize > InVTSize) {
3514 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3515 EVT CastVT = VT.changeVectorElementTypeToInteger();
3516 In = DAG.getNode(CastOpc, dl, CastVT, In);
3517 return DAG.getNode(Opc, dl, VT, In);
3518 }
3519
3520 return Op;
3521}
3522
3523SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
3524 SelectionDAG &DAG) const {
3525 if (Op.getValueType().isVector())
3526 return LowerVectorINT_TO_FP(Op, DAG);
3527
3528 bool IsStrict = Op->isStrictFPOpcode();
3529 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3530
3531 // f16 conversions are promoted to f32 when full fp16 is not supported.
3532 if (Op.getValueType() == MVT::f16 &&
3533 !Subtarget->hasFullFP16()) {
3534 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")(static_cast<void> (0));
3535 SDLoc dl(Op);
3536 return DAG.getNode(
3537 ISD::FP_ROUND, dl, MVT::f16,
3538 DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
3539 DAG.getIntPtrConstant(0, dl));
3540 }
3541
3542 // i128 conversions are libcalls.
3543 if (SrcVal.getValueType() == MVT::i128)
3544 return SDValue();
3545
3546 // Other conversions are legal, unless it's to the completely software-based
3547 // fp128.
3548 if (Op.getValueType() != MVT::f128)
3549 return Op;
3550 return SDValue();
3551}
3552
3553SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
3554 SelectionDAG &DAG) const {
3555 // For iOS, we want to call an alternative entry point: __sincos_stret,
3556 // which returns the values in two S / D registers.
3557 SDLoc dl(Op);
3558 SDValue Arg = Op.getOperand(0);
3559 EVT ArgVT = Arg.getValueType();
3560 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
3561
3562 ArgListTy Args;
3563 ArgListEntry Entry;
3564
3565 Entry.Node = Arg;
3566 Entry.Ty = ArgTy;
3567 Entry.IsSExt = false;
3568 Entry.IsZExt = false;
3569 Args.push_back(Entry);
3570
3571 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
3572 : RTLIB::SINCOS_STRET_F32;
3573 const char *LibcallName = getLibcallName(LC);
3574 SDValue Callee =
3575 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
3576
3577 StructType *RetTy = StructType::get(ArgTy, ArgTy);
3578 TargetLowering::CallLoweringInfo CLI(DAG);
3579 CLI.setDebugLoc(dl)
3580 .setChain(DAG.getEntryNode())
3581 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
3582
3583 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3584 return CallResult.first;
3585}
3586
3587static MVT getSVEContainerType(EVT ContentTy);
3588
3589SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
3590 SelectionDAG &DAG) const {
3591 EVT OpVT = Op.getValueType();
3592 EVT ArgVT = Op.getOperand(0).getValueType();
3593
3594 if (useSVEForFixedLengthVectorVT(OpVT))
3595 return LowerFixedLengthBitcastToSVE(Op, DAG);
3596
3597 if (OpVT.isScalableVector()) {
3598 if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) {
3599 assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() &&(static_cast<void> (0))
3600 "Expected int->fp bitcast!")(static_cast<void> (0));
3601 SDValue ExtResult =
3602 DAG.getNode(ISD::ANY_EXTEND, SDLoc(Op), getSVEContainerType(ArgVT),
3603 Op.getOperand(0));
3604 return getSVESafeBitCast(OpVT, ExtResult, DAG);
3605 }
3606 return getSVESafeBitCast(OpVT, Op.getOperand(0), DAG);
3607 }
3608
3609 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
3610 return SDValue();
3611
3612 assert(ArgVT == MVT::i16)(static_cast<void> (0));
3613 SDLoc DL(Op);
3614
3615 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
3616 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
3617 return SDValue(
3618 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
3619 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
3620 0);
3621}
3622
3623static EVT getExtensionTo64Bits(const EVT &OrigVT) {
3624 if (OrigVT.getSizeInBits() >= 64)
3625 return OrigVT;
3626
3627 assert(OrigVT.isSimple() && "Expecting a simple value type")(static_cast<void> (0));
3628
3629 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
3630 switch (OrigSimpleTy) {
3631 default: llvm_unreachable("Unexpected Vector Type")__builtin_unreachable();
3632 case MVT::v2i8:
3633 case MVT::v2i16:
3634 return MVT::v2i32;
3635 case MVT::v4i8:
3636 return MVT::v4i16;
3637 }
3638}
3639
3640static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
3641 const EVT &OrigTy,
3642 const EVT &ExtTy,
3643 unsigned ExtOpcode) {
3644 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
3645 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
3646 // 64-bits we need to insert a new extension so that it will be 64-bits.
3647 assert(ExtTy.is128BitVector() && "Unexpected extension size")(static_cast<void> (0));
3648 if (OrigTy.getSizeInBits() >= 64)
3649 return N;
3650
3651 // Must extend size to at least 64 bits to be used as an operand for VMULL.
3652 EVT NewVT = getExtensionTo64Bits(OrigTy);
3653
3654 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
3655}
3656
3657static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
3658 bool isSigned) {
3659 EVT VT = N->getValueType(0);
3660
3661 if (N->getOpcode() != ISD::BUILD_VECTOR)
3662 return false;
3663
3664 for (const SDValue &Elt : N->op_values()) {
3665 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
3666 unsigned EltSize = VT.getScalarSizeInBits();
3667 unsigned HalfSize = EltSize / 2;
3668 if (isSigned) {
3669 if (!isIntN(HalfSize, C->getSExtValue()))
3670 return false;
3671 } else {
3672 if (!isUIntN(HalfSize, C->getZExtValue()))
3673 return false;
3674 }
3675 continue;
3676 }
3677 return false;
3678 }
3679
3680 return true;
3681}
3682
3683static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
3684 if (N->getOpcode() == ISD::SIGN_EXTEND ||
3685 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
3686 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
3687 N->getOperand(0)->getValueType(0),
3688 N->getValueType(0),
3689 N->getOpcode());
3690
3691 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")(static_cast<void> (0));
3692 EVT VT = N->getValueType(0);
3693 SDLoc dl(N);
3694 unsigned EltSize = VT.getScalarSizeInBits() / 2;
3695 unsigned NumElts = VT.getVectorNumElements();
3696 MVT TruncVT = MVT::getIntegerVT(EltSize);
3697 SmallVector<SDValue, 8> Ops;
3698 for (unsigned i = 0; i != NumElts; ++i) {
3699 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
3700 const APInt &CInt = C->getAPIntValue();
3701 // Element types smaller than 32 bits are not legal, so use i32 elements.
3702 // The values are implicitly truncated so sext vs. zext doesn't matter.
3703 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
3704 }
3705 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
3706}
3707
3708static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
3709 return N->getOpcode() == ISD::SIGN_EXTEND ||
3710 N->getOpcode() == ISD::ANY_EXTEND ||
3711 isExtendedBUILD_VECTOR(N, DAG, true);
3712}
3713
3714static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
3715 return N->getOpcode() == ISD::ZERO_EXTEND ||
3716 N->getOpcode() == ISD::ANY_EXTEND ||
3717 isExtendedBUILD_VECTOR(N, DAG, false);
3718}
3719
3720static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
3721 unsigned Opcode = N->getOpcode();
3722 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3723 SDNode *N0 = N->getOperand(0).getNode();
3724 SDNode *N1 = N->getOperand(1).getNode();
3725 return N0->hasOneUse() && N1->hasOneUse() &&
3726 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
3727 }
3728 return false;
3729}
3730
3731static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
3732 unsigned Opcode = N->getOpcode();
3733 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3734 SDNode *N0 = N->getOperand(0).getNode();
3735 SDNode *N1 = N->getOperand(1).getNode();
3736 return N0->hasOneUse() && N1->hasOneUse() &&
3737 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
3738 }
3739 return false;
3740}
3741
3742SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
3743 SelectionDAG &DAG) const {
3744 // The rounding mode is in bits 23:22 of the FPSCR.
3745 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
3746 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
3747 // so that the shift + and get folded into a bitfield extract.
3748 SDLoc dl(Op);
3749
3750 SDValue Chain = Op.getOperand(0);
3751 SDValue FPCR_64 = DAG.getNode(
3752 ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
3753 {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
3754 Chain = FPCR_64.getValue(1);
3755 SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
3756 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
3757 DAG.getConstant(1U << 22, dl, MVT::i32));
3758 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
3759 DAG.getConstant(22, dl, MVT::i32));
3760 SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
3761 DAG.getConstant(3, dl, MVT::i32));
3762 return DAG.getMergeValues({AND, Chain}, dl);
3763}
3764
3765SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
3766 SelectionDAG &DAG) const {
3767 SDLoc DL(Op);
3768 SDValue Chain = Op->getOperand(0);
3769 SDValue RMValue = Op->getOperand(1);
3770
3771 // The rounding mode is in bits 23:22 of the FPCR.
3772 // The llvm.set.rounding argument value to the rounding mode in FPCR mapping
3773 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
3774 // ((arg - 1) & 3) << 22).
3775 //
3776 // The argument of llvm.set.rounding must be within the segment [0, 3], so
3777 // NearestTiesToAway (4) is not handled here. It is responsibility of the code
3778 // generated llvm.set.rounding to ensure this condition.
3779
3780 // Calculate new value of FPCR[23:22].
3781 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
3782 DAG.getConstant(1, DL, MVT::i32));
3783 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
3784 DAG.getConstant(0x3, DL, MVT::i32));
3785 RMValue =
3786 DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
3787 DAG.getConstant(AArch64::RoundingBitsPos, DL, MVT::i32));
3788 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue);
3789
3790 // Get current value of FPCR.
3791 SDValue Ops[] = {
3792 Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
3793 SDValue FPCR =
3794 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
3795 Chain = FPCR.getValue(1);
3796 FPCR = FPCR.getValue(0);
3797
3798 // Put new rounding mode into FPSCR[23:22].
3799 const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos);
3800 FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR,
3801 DAG.getConstant(RMMask, DL, MVT::i64));
3802 FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue);
3803 SDValue Ops2[] = {
3804 Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
3805 FPCR};
3806 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
3807}
3808
3809SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
3810 EVT VT = Op.getValueType();
3811
3812 // If SVE is available then i64 vector multiplications can also be made legal.
3813 bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
3814
3815 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
3816 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
3817
3818 // Multiplications are only custom-lowered for 128-bit vectors so that
3819 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
3820 assert(VT.is128BitVector() && VT.isInteger() &&(static_cast<void> (0))
3821 "unexpected type for custom-lowering ISD::MUL")(static_cast<void> (0));
3822 SDNode *N0 = Op.getOperand(0).getNode();
3823 SDNode *N1 = Op.getOperand(1).getNode();
3824 unsigned NewOpc = 0;
3825 bool isMLA = false;
3826 bool isN0SExt = isSignExtended(N0, DAG);
3827 bool isN1SExt = isSignExtended(N1, DAG);
3828 if (isN0SExt && isN1SExt)
3829 NewOpc = AArch64ISD::SMULL;
3830 else {
3831 bool isN0ZExt = isZeroExtended(N0, DAG);
3832 bool isN1ZExt = isZeroExtended(N1, DAG);
3833 if (isN0ZExt && isN1ZExt)
3834 NewOpc = AArch64ISD::UMULL;
3835 else if (isN1SExt || isN1ZExt) {
3836 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
3837 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
3838 if (isN1SExt && isAddSubSExt(N0, DAG)) {
3839 NewOpc = AArch64ISD::SMULL;
3840 isMLA = true;
3841 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
3842 NewOpc = AArch64ISD::UMULL;
3843 isMLA = true;
3844 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
3845 std::swap(N0, N1);
3846 NewOpc = AArch64ISD::UMULL;
3847 isMLA = true;
3848 }
3849 }
3850
3851 if (!NewOpc) {
3852 if (VT == MVT::v2i64)
3853 // Fall through to expand this. It is not legal.
3854 return SDValue();
3855 else
3856 // Other vector multiplications are legal.
3857 return Op;
3858 }
3859 }
3860
3861 // Legalize to a S/UMULL instruction
3862 SDLoc DL(Op);
3863 SDValue Op0;
3864 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
3865 if (!isMLA) {
3866 Op0 = skipExtensionForVectorMULL(N0, DAG);
3867 assert(Op0.getValueType().is64BitVector() &&(static_cast<void> (0))
3868 Op1.getValueType().is64BitVector() &&(static_cast<void> (0))
3869 "unexpected types for extended operands to VMULL")(static_cast<void> (0));
3870 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
3871 }
3872 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
3873 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
3874 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
3875 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
3876 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
3877 EVT Op1VT = Op1.getValueType();
3878 return DAG.getNode(N0->getOpcode(), DL, VT,
3879 DAG.getNode(NewOpc, DL, VT,
3880 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
3881 DAG.getNode(NewOpc, DL, VT,
3882 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
3883}
3884
3885static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
3886 int Pattern) {
3887 return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
3888 DAG.getTargetConstant(Pattern, DL, MVT::i32));
3889}
3890
3891static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
3892 SDLoc DL(Op);
3893 EVT OutVT = Op.getValueType();
3894 SDValue InOp = Op.getOperand(1);
3895 EVT InVT = InOp.getValueType();
3896
3897 // Return the operand if the cast isn't changing type,
3898 // i.e. <n x 16 x i1> -> <n x 16 x i1>
3899 if (InVT == OutVT)
3900 return InOp;
3901
3902 SDValue Reinterpret =
3903 DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, InOp);
3904
3905 // If the argument converted to an svbool is a ptrue or a comparison, the
3906 // lanes introduced by the widening are zero by construction.
3907 switch (InOp.getOpcode()) {
3908 case AArch64ISD::SETCC_MERGE_ZERO:
3909 return Reinterpret;
3910 case ISD::INTRINSIC_WO_CHAIN:
3911 if (InOp.getConstantOperandVal(0) == Intrinsic::aarch64_sve_ptrue)
3912 return Reinterpret;
3913 }
3914
3915 // Otherwise, zero the newly introduced lanes.
3916 SDValue Mask = getPTrue(DAG, DL, InVT, AArch64SVEPredPattern::all);
3917 SDValue MaskReinterpret =
3918 DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, Mask);
3919 return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret);
3920}
3921
3922SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
3923 SelectionDAG &DAG) const {
3924 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3925 SDLoc dl(Op);
3926 switch (IntNo) {
3927 default: return SDValue(); // Don't custom lower most intrinsics.
3928 case Intrinsic::thread_pointer: {
3929 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3930 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
3931 }
3932 case Intrinsic::aarch64_neon_abs: {
3933 EVT Ty = Op.getValueType();
3934 if (Ty == MVT::i64) {
3935 SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
3936 Op.getOperand(1));
3937 Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
3938 return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
3939 } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
3940 return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
3941 } else {
3942 report_fatal_error("Unexpected type for AArch64 NEON intrinic");
3943 }
3944 }
3945 case Intrinsic::aarch64_neon_smax:
3946 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
3947 Op.getOperand(1), Op.getOperand(2));
3948 case Intrinsic::aarch64_neon_umax:
3949 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
3950 Op.getOperand(1), Op.getOperand(2));
3951 case Intrinsic::aarch64_neon_smin:
3952 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
3953 Op.getOperand(1), Op.getOperand(2));
3954 case Intrinsic::aarch64_neon_umin:
3955 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
3956 Op.getOperand(1), Op.getOperand(2));
3957
3958 case Intrinsic::aarch64_sve_sunpkhi:
3959 return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
3960 Op.getOperand(1));
3961 case Intrinsic::aarch64_sve_sunpklo:
3962 return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
3963 Op.getOperand(1));
3964 case Intrinsic::aarch64_sve_uunpkhi:
3965 return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
3966 Op.getOperand(1));
3967 case Intrinsic::aarch64_sve_uunpklo:
3968 return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
3969 Op.getOperand(1));
3970 case Intrinsic::aarch64_sve_clasta_n:
3971 return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
3972 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3973 case Intrinsic::aarch64_sve_clastb_n:
3974 return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
3975 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3976 case Intrinsic::aarch64_sve_lasta:
3977 return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
3978 Op.getOperand(1), Op.getOperand(2));
3979 case Intrinsic::aarch64_sve_lastb:
3980 return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
3981 Op.getOperand(1), Op.getOperand(2));
3982 case Intrinsic::aarch64_sve_rev:
3983 return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
3984 Op.getOperand(1));
3985 case Intrinsic::aarch64_sve_tbl:
3986 return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
3987 Op.getOperand(1), Op.getOperand(2));
3988 case Intrinsic::aarch64_sve_trn1:
3989 return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
3990 Op.getOperand(1), Op.getOperand(2));
3991 case Intrinsic::aarch64_sve_trn2:
3992 return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
3993 Op.getOperand(1), Op.getOperand(2));
3994 case Intrinsic::aarch64_sve_uzp1:
3995 return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
3996 Op.getOperand(1), Op.getOperand(2));
3997 case Intrinsic::aarch64_sve_uzp2:
3998 return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
3999 Op.getOperand(1), Op.getOperand(2));
4000 case Intrinsic::aarch64_sve_zip1:
4001 return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
4002 Op.getOperand(1), Op.getOperand(2));
4003 case Intrinsic::aarch64_sve_zip2:
4004 return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
4005 Op.getOperand(1), Op.getOperand(2));
4006 case Intrinsic::aarch64_sve_splice:
4007 return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
4008 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4009 case Intrinsic::aarch64_sve_ptrue:
4010 return getPTrue(DAG, dl, Op.getValueType(),
4011 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
4012 case Intrinsic::aarch64_sve_clz:
4013 return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
4014 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4015 case Intrinsic::aarch64_sve_cnt: {
4016 SDValue Data = Op.getOperand(3);
4017 // CTPOP only supports integer operands.
4018 if (Data.getValueType().isFloatingPoint())
4019 Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
4020 return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
4021 Op.getOperand(2), Data, Op.getOperand(1));
4022 }
4023 case Intrinsic::aarch64_sve_dupq_lane:
4024 return LowerDUPQLane(Op, DAG);
4025 case Intrinsic::aarch64_sve_convert_from_svbool:
4026 return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
4027 Op.getOperand(1));
4028 case Intrinsic::aarch64_sve_convert_to_svbool:
4029 return lowerConvertToSVBool(Op, DAG);
4030 case Intrinsic::aarch64_sve_fneg:
4031 return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
4032 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4033 case Intrinsic::aarch64_sve_frintp:
4034 return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
4035 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4036 case Intrinsic::aarch64_sve_frintm:
4037 return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
4038 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4039 case Intrinsic::aarch64_sve_frinti:
4040 return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
4041 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4042 case Intrinsic::aarch64_sve_frintx:
4043 return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
4044 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4045 case Intrinsic::aarch64_sve_frinta:
4046 return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
4047 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4048 case Intrinsic::aarch64_sve_frintn:
4049 return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
4050 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4051 case Intrinsic::aarch64_sve_frintz:
4052 return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
4053 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4054 case Intrinsic::aarch64_sve_ucvtf:
4055 return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
4056 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4057 Op.getOperand(1));
4058 case Intrinsic::aarch64_sve_scvtf:
4059 return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
4060 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4061 Op.getOperand(1));
4062 case Intrinsic::aarch64_sve_fcvtzu:
4063 return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
4064 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4065 Op.getOperand(1));
4066 case Intrinsic::aarch64_sve_fcvtzs:
4067 return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
4068 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4069 Op.getOperand(1));
4070 case Intrinsic::aarch64_sve_fsqrt:
4071 return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
4072 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4073 case Intrinsic::aarch64_sve_frecpx:
4074 return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
4075 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4076 case Intrinsic::aarch64_sve_fabs:
4077 return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
4078 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4079 case Intrinsic::aarch64_sve_abs:
4080 return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
4081 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4082 case Intrinsic::aarch64_sve_neg:
4083 return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
4084 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4085 case Intrinsic::aarch64_sve_insr: {
4086 SDValue Scalar = Op.getOperand(2);
4087 EVT ScalarTy = Scalar.getValueType();
4088 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
4089 Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
4090
4091 return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
4092 Op.getOperand(1), Scalar);
4093 }
4094 case Intrinsic::aarch64_sve_rbit:
4095 return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
4096 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4097 Op.getOperand(1));
4098 case Intrinsic::aarch64_sve_revb:
4099 return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
4100 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4101 case Intrinsic::aarch64_sve_sxtb:
4102 return DAG.getNode(
4103 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4104 Op.getOperand(2), Op.getOperand(3),
4105 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
4106 Op.getOperand(1));
4107 case Intrinsic::aarch64_sve_sxth:
4108 return DAG.getNode(
4109 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4110 Op.getOperand(2), Op.getOperand(3),
4111 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
4112 Op.getOperand(1));
4113 case Intrinsic::aarch64_sve_sxtw:
4114 return DAG.getNode(
4115 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4116 Op.getOperand(2), Op.getOperand(3),
4117 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
4118 Op.getOperand(1));
4119 case Intrinsic::aarch64_sve_uxtb:
4120 return DAG.getNode(
4121 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4122 Op.getOperand(2), Op.getOperand(3),
4123 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
4124 Op.getOperand(1));
4125 case Intrinsic::aarch64_sve_uxth:
4126 return DAG.getNode(
4127 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4128 Op.getOperand(2), Op.getOperand(3),
4129 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
4130 Op.getOperand(1));
4131 case Intrinsic::aarch64_sve_uxtw:
4132 return DAG.getNode(
4133 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4134 Op.getOperand(2), Op.getOperand(3),
4135 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
4136 Op.getOperand(1));
4137
4138 case Intrinsic::localaddress: {
4139 const auto &MF = DAG.getMachineFunction();
4140 const auto *RegInfo = Subtarget->getRegisterInfo();
4141 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
4142 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
4143 Op.getSimpleValueType());
4144 }
4145
4146 case Intrinsic::eh_recoverfp: {
4147 // FIXME: This needs to be implemented to correctly handle highly aligned
4148 // stack objects. For now we simply return the incoming FP. Refer D53541
4149 // for more details.
4150 SDValue FnOp = Op.getOperand(1);
4151 SDValue IncomingFPOp = Op.getOperand(2);
4152 GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
4153 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
4154 if (!Fn)
4155 report_fatal_error(
4156 "llvm.eh.recoverfp must take a function as the first argument");
4157 return IncomingFPOp;
4158 }
4159
4160 case Intrinsic::aarch64_neon_vsri:
4161 case Intrinsic::aarch64_neon_vsli: {
4162 EVT Ty = Op.getValueType();
4163
4164 if (!Ty.isVector())
4165 report_fatal_error("Unexpected type for aarch64_neon_vsli");
4166
4167 assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits())(static_cast<void> (0));
4168
4169 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
4170 unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
4171 return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
4172 Op.getOperand(3));
4173 }
4174
4175 case Intrinsic::aarch64_neon_srhadd:
4176 case Intrinsic::aarch64_neon_urhadd:
4177 case Intrinsic::aarch64_neon_shadd:
4178 case Intrinsic::aarch64_neon_uhadd: {
4179 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4180 IntNo == Intrinsic::aarch64_neon_shadd);
4181 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4182 IntNo == Intrinsic::aarch64_neon_urhadd);
4183 unsigned Opcode =
4184 IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
4185 : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
4186 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4187 Op.getOperand(2));
4188 }
4189 case Intrinsic::aarch64_neon_sabd:
4190 case Intrinsic::aarch64_neon_uabd: {
4191 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU
4192 : ISD::ABDS;
4193 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4194 Op.getOperand(2));
4195 }
4196 case Intrinsic::aarch64_neon_uaddlp: {
4197 unsigned Opcode = AArch64ISD::UADDLP;
4198 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1));
4199 }
4200 case Intrinsic::aarch64_neon_sdot:
4201 case Intrinsic::aarch64_neon_udot:
4202 case Intrinsic::aarch64_sve_sdot:
4203 case Intrinsic::aarch64_sve_udot: {
4204 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
4205 IntNo == Intrinsic::aarch64_sve_udot)
4206 ? AArch64ISD::UDOT
4207 : AArch64ISD::SDOT;
4208 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4209 Op.getOperand(2), Op.getOperand(3));
4210 }
4211 }
4212}
4213
4214bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
4215 if (VT.getVectorElementType() == MVT::i8 ||
4216 VT.getVectorElementType() == MVT::i16) {
4217 EltTy = MVT::i32;
4218 return true;
4219 }
4220 return false;
4221}
4222
4223bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
4224 if (VT.getVectorElementType() == MVT::i32 &&
4225 VT.getVectorElementCount().getKnownMinValue() >= 4 &&
4226 !VT.isFixedLengthVector())
4227 return true;
4228
4229 return false;
4230}
4231
4232bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
4233 return ExtVal.getValueType().isScalableVector() ||
4234 useSVEForFixedLengthVectorVT(ExtVal.getValueType(),
4235 /*OverrideNEON=*/true);
4236}
4237
4238unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
4239 std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
4240 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
4241 AArch64ISD::GLD1_MERGE_ZERO},
4242 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
4243 AArch64ISD::GLD1_UXTW_MERGE_ZERO},
4244 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
4245 AArch64ISD::GLD1_MERGE_ZERO},
4246 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
4247 AArch64ISD::GLD1_SXTW_MERGE_ZERO},
4248 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
4249 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
4250 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
4251 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
4252 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
4253 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
4254 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
4255 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
4256 };
4257 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4258 return AddrModes.find(Key)->second;
4259}
4260
4261unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
4262 std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
4263 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
4264 AArch64ISD::SST1_PRED},
4265 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
4266 AArch64ISD::SST1_UXTW_PRED},
4267 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
4268 AArch64ISD::SST1_PRED},
4269 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
4270 AArch64ISD::SST1_SXTW_PRED},
4271 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
4272 AArch64ISD::SST1_SCALED_PRED},
4273 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
4274 AArch64ISD::SST1_UXTW_SCALED_PRED},
4275 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
4276 AArch64ISD::SST1_SCALED_PRED},
4277 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
4278 AArch64ISD::SST1_SXTW_SCALED_PRED},
4279 };
4280 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4281 return AddrModes.find(Key)->second;
4282}
4283
4284unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
4285 switch (Opcode) {
4286 default:
4287 llvm_unreachable("unimplemented opcode")__builtin_unreachable();
4288 return Opcode;
4289 case AArch64ISD::GLD1_MERGE_ZERO:
4290 return AArch64ISD::GLD1S_MERGE_ZERO;
4291 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
4292 return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
4293 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
4294 return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
4295 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
4296 return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
4297 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
4298 return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
4299 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
4300 return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
4301 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
4302 return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
4303 }
4304}
4305
4306bool getGatherScatterIndexIsExtended(SDValue Index) {
4307 unsigned Opcode = Index.getOpcode();
4308 if (Opcode == ISD::SIGN_EXTEND_INREG)
4309 return true;
4310
4311 if (Opcode == ISD::AND) {
4312 SDValue Splat = Index.getOperand(1);
4313 if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
4314 return false;
4315 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
4316 if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
4317 return false;
4318 return true;
4319 }
4320
4321 return false;
4322}
4323
4324// If the base pointer of a masked gather or scatter is null, we
4325// may be able to swap BasePtr & Index and use the vector + register
4326// or vector + immediate addressing mode, e.g.
4327// VECTOR + REGISTER:
4328// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
4329// -> getelementptr %offset, <vscale x N x T> %indices
4330// VECTOR + IMMEDIATE:
4331// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
4332// -> getelementptr #x, <vscale x N x T> %indices
4333void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
4334 unsigned &Opcode, bool IsGather,
4335 SelectionDAG &DAG) {
4336 if (!isNullConstant(BasePtr))
4337 return;
4338
4339 // FIXME: This will not match for fixed vector type codegen as the nodes in
4340 // question will have fixed<->scalable conversions around them. This should be
4341 // moved to a DAG combine or complex pattern so that is executes after all of
4342 // the fixed vector insert and extracts have been removed. This deficiency
4343 // will result in a sub-optimal addressing mode being used, i.e. an ADD not
4344 // being folded into the scatter/gather.
4345 ConstantSDNode *Offset = nullptr;
4346 if (Index.getOpcode() == ISD::ADD)
4347 if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
4348 if (isa<ConstantSDNode>(SplatVal))
4349 Offset = cast<ConstantSDNode>(SplatVal);
4350 else {
4351 BasePtr = SplatVal;
4352 Index = Index->getOperand(0);
4353 return;
4354 }
4355 }
4356
4357 unsigned NewOp =
4358 IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
4359
4360 if (!Offset) {
4361 std::swap(BasePtr, Index);
4362 Opcode = NewOp;
4363 return;
4364 }
4365
4366 uint64_t OffsetVal = Offset->getZExtValue();
4367 unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
4368 auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
4369
4370 if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
4371 // Index is out of range for the immediate addressing mode
4372 BasePtr = ConstOffset;
4373 Index = Index->getOperand(0);
4374 return;
4375 }
4376
4377 // Immediate is in range
4378 Opcode = NewOp;
4379 BasePtr = Index->getOperand(0);
4380 Index = ConstOffset;
4381}
4382
4383SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
4384 SelectionDAG &DAG) const {
4385 SDLoc DL(Op);
4386 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
4387 assert(MGT && "Can only custom lower gather load nodes")(static_cast<void> (0));
4388
4389 bool IsFixedLength = MGT->getMemoryVT().isFixedLengthVector();
4390
4391 SDValue Index = MGT->getIndex();
4392 SDValue Chain = MGT->getChain();
4393 SDValue PassThru = MGT->getPassThru();
4394 SDValue Mask = MGT->getMask();
4395 SDValue BasePtr = MGT->getBasePtr();
4396 ISD::LoadExtType ExtTy = MGT->getExtensionType();
4397
4398 ISD::MemIndexType IndexType = MGT->getIndexType();
4399 bool IsScaled =
4400 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4401 bool IsSigned =
4402 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4403 bool IdxNeedsExtend =
4404 getGatherScatterIndexIsExtended(Index) ||
4405 Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4406 bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
4407
4408 EVT VT = PassThru.getSimpleValueType();
4409 EVT IndexVT = Index.getSimpleValueType();
4410 EVT MemVT = MGT->getMemoryVT();
4411 SDValue InputVT = DAG.getValueType(MemVT);
4412
4413 if (VT.getVectorElementType() == MVT::bf16 &&
4414 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4415 return SDValue();
4416
4417 if (IsFixedLength) {
4418 assert(Subtarget->useSVEForFixedLengthVectors() &&(static_cast<void> (0))
4419 "Cannot lower when not using SVE for fixed vectors")(static_cast<void> (0));
4420 if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
4421 IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
4422 MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
4423 } else {
4424 MemVT = getContainerForFixedLengthVector(DAG, MemVT);
4425 IndexVT = MemVT.changeTypeToInteger();
4426 }
4427 InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
4428 Mask = DAG.getNode(
4429 ISD::ZERO_EXTEND, DL,
4430 VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
4431 }
4432
4433 if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
4434 PassThru = SDValue();
4435
4436 if (VT.isFloatingPoint() && !IsFixedLength) {
4437 // Handle FP data by using an integer gather and casting the result.
4438 if (PassThru) {
4439 EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4440 PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
4441 }
4442 InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4443 }
4444
4445 SDVTList VTs = DAG.getVTList(IndexVT, MVT::Other);
4446
4447 if (getGatherScatterIndexIsExtended(Index))
4448 Index = Index.getOperand(0);
4449
4450 unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
4451 selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4452 /*isGather=*/true, DAG);
4453
4454 if (ResNeedsSignExtend)
4455 Opcode = getSignExtendedGatherOpcode(Opcode);
4456
4457 if (IsFixedLength) {
4458 if (Index.getSimpleValueType().isFixedLengthVector())
4459 Index = convertToScalableVector(DAG, IndexVT, Index);
4460 if (BasePtr.getSimpleValueType().isFixedLengthVector())
4461 BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
4462 Mask = convertFixedMaskToScalableVector(Mask, DAG);
4463 }
4464
4465 SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT};
4466 SDValue Result = DAG.getNode(Opcode, DL, VTs, Ops);
4467 Chain = Result.getValue(1);
4468
4469 if (IsFixedLength) {
4470 Result = convertFromScalableVector(
4471 DAG, VT.changeVectorElementType(IndexVT.getVectorElementType()),
4472 Result);
4473 Result = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Result);
4474 Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
4475
4476 if (PassThru)
4477 Result = DAG.getSelect(DL, VT, MGT->getMask(), Result, PassThru);
4478 } else {
4479 if (PassThru)
4480 Result = DAG.getSelect(DL, IndexVT, Mask, Result, PassThru);
4481
4482 if (VT.isFloatingPoint())
4483 Result = getSVESafeBitCast(VT, Result, DAG);
4484 }
4485
4486 return DAG.getMergeValues({Result, Chain}, DL);
4487}
4488
4489SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
4490 SelectionDAG &DAG) const {
4491 SDLoc DL(Op);
4492 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
4493 assert(MSC && "Can only custom lower scatter store nodes")(static_cast<void> (0));
4494
4495 bool IsFixedLength = MSC->getMemoryVT().isFixedLengthVector();
4496
4497 SDValue Index = MSC->getIndex();
4498 SDValue Chain = MSC->getChain();
4499 SDValue StoreVal = MSC->getValue();
4500 SDValue Mask = MSC->getMask();
4501 SDValue BasePtr = MSC->getBasePtr();
4502
4503 ISD::MemIndexType IndexType = MSC->getIndexType();
4504 bool IsScaled =
4505 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4506 bool IsSigned =
4507 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4508 bool NeedsExtend =
4509 getGatherScatterIndexIsExtended(Index) ||
4510 Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4511
4512 EVT VT = StoreVal.getSimpleValueType();
4513 EVT IndexVT = Index.getSimpleValueType();
4514 SDVTList VTs = DAG.getVTList(MVT::Other);
4515 EVT MemVT = MSC->getMemoryVT();
4516 SDValue InputVT = DAG.getValueType(MemVT);
4517
4518 if (VT.getVectorElementType() == MVT::bf16 &&
4519 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4520 return SDValue();
4521
4522 if (IsFixedLength) {
4523 assert(Subtarget->useSVEForFixedLengthVectors() &&(static_cast<void> (0))
4524 "Cannot lower when not using SVE for fixed vectors")(static_cast<void> (0));
4525 if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
4526 IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
4527 MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
4528 } else {
4529 MemVT = getContainerForFixedLengthVector(DAG, MemVT);
4530 IndexVT = MemVT.changeTypeToInteger();
4531 }
4532 InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
4533
4534 StoreVal =
4535 DAG.getNode(ISD::BITCAST, DL, VT.changeTypeToInteger(), StoreVal);
4536 StoreVal = DAG.getNode(
4537 ISD::ANY_EXTEND, DL,
4538 VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal);
4539 StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal);
4540 Mask = DAG.getNode(
4541 ISD::ZERO_EXTEND, DL,
4542 VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
4543 } else if (VT.isFloatingPoint()) {
4544 // Handle FP data by casting the data so an integer scatter can be used.
4545 EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4546 StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
4547 InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4548 }
4549
4550 if (getGatherScatterIndexIsExtended(Index))
4551 Index = Index.getOperand(0);
4552
4553 unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
4554 selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4555 /*isGather=*/false, DAG);
4556
4557 if (IsFixedLength) {
4558 if (Index.getSimpleValueType().isFixedLengthVector())
4559 Index = convertToScalableVector(DAG, IndexVT, Index);
4560 if (BasePtr.getSimpleValueType().isFixedLengthVector())
4561 BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
4562 Mask = convertFixedMaskToScalableVector(Mask, DAG);
4563 }
4564
4565 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
4566 return DAG.getNode(Opcode, DL, VTs, Ops);
4567}
4568
4569SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
4570 SDLoc DL(Op);
4571 MaskedLoadSDNode *LoadNode = cast<MaskedLoadSDNode>(Op);
4572 assert(LoadNode && "Expected custom lowering of a masked load node")(static_cast<void> (0));
4573 EVT VT = Op->getValueType(0);
4574
4575 if (useSVEForFixedLengthVectorVT(VT, true))
4576 return LowerFixedLengthVectorMLoadToSVE(Op, DAG);
4577
4578 SDValue PassThru = LoadNode->getPassThru();
4579 SDValue Mask = LoadNode->getMask();
4580
4581 if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
4582 return Op;
4583
4584 SDValue Load = DAG.getMaskedLoad(
4585 VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
4586 LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
4587 LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
4588 LoadNode->getExtensionType());
4589
4590 SDValue Result = DAG.getSelect(DL, VT, Mask, Load, PassThru);
4591
4592 return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
4593}
4594
4595// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
4596static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
4597 EVT VT, EVT MemVT,
4598 SelectionDAG &DAG) {
4599 assert(VT.isVector() && "VT should be a vector type")(static_cast<void> (0));
4600 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16)(static_cast<void> (0));
4601
4602 SDValue Value = ST->getValue();
4603
4604 // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
4605 // the word lane which represent the v4i8 subvector. It optimizes the store
4606 // to:
4607 //
4608 // xtn v0.8b, v0.8h
4609 // str s0, [x0]
4610
4611 SDValue Undef = DAG.getUNDEF(MVT::i16);
4612 SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
4613 {Undef, Undef, Undef, Undef});
4614
4615 SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
4616 Value, UndefVec);
4617 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
4618
4619 Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
4620 SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4621 Trunc, DAG.getConstant(0, DL, MVT::i64));
4622
4623 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
4624 ST->getBasePtr(), ST->getMemOperand());
4625}
4626
4627// Custom lowering for any store, vector or scalar and/or default or with
4628// a truncate operations. Currently only custom lower truncate operation
4629// from vector v4i16 to v4i8 or volatile stores of i128.
4630SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
4631 SelectionDAG &DAG) const {
4632 SDLoc Dl(Op);
4633 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
4634 assert (StoreNode && "Can only custom lower store nodes")(static_cast<void> (0));
4635
4636 SDValue Value = StoreNode->getValue();
4637
4638 EVT VT = Value.getValueType();
4639 EVT MemVT = StoreNode->getMemoryVT();
4640
4641 if (VT.isVector()) {
4642 if (useSVEForFixedLengthVectorVT(VT, true))
4643 return LowerFixedLengthVectorStoreToSVE(Op, DAG);
4644
4645 unsigned AS = StoreNode->getAddressSpace();
4646 Align Alignment = StoreNode->getAlign();
4647 if (Alignment < MemVT.getStoreSize() &&
4648 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
4649 StoreNode->getMemOperand()->getFlags(),
4650 nullptr)) {
4651 return scalarizeVectorStore(StoreNode, DAG);
4652 }
4653
4654 if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
4655 MemVT == MVT::v4i8) {
4656 return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
4657 }
4658 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
4659 // the custom lowering, as there are no un-paired non-temporal stores and
4660 // legalization will break up 256 bit inputs.
4661 ElementCount EC = MemVT.getVectorElementCount();
4662 if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
4663 EC.isKnownEven() &&
4664 ((MemVT.getScalarSizeInBits() == 8u ||
4665 MemVT.getScalarSizeInBits() == 16u ||
4666 MemVT.getScalarSizeInBits() == 32u ||
4667 MemVT.getScalarSizeInBits() == 64u))) {
4668 SDValue Lo =
4669 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
4670 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4671 StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
4672 SDValue Hi =
4673 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
4674 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4675 StoreNode->getValue(),
4676 DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
4677 SDValue Result = DAG.getMemIntrinsicNode(
4678 AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
4679 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4680 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4681 return Result;
4682 }
4683 } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
4684 assert(StoreNode->getValue()->getValueType(0) == MVT::i128)(static_cast<void> (0));
4685 SDValue Lo =
4686 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4687 DAG.getConstant(0, Dl, MVT::i64));
4688 SDValue Hi =
4689 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4690 DAG.getConstant(1, Dl, MVT::i64));
4691 SDValue Result = DAG.getMemIntrinsicNode(
4692 AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
4693 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4694 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4695 return Result;
4696 } else if (MemVT == MVT::i64x8) {
4697 SDValue Value = StoreNode->getValue();
4698 assert(Value->getValueType(0) == MVT::i64x8)(static_cast<void> (0));
4699 SDValue Chain = StoreNode->getChain();
4700 SDValue Base = StoreNode->getBasePtr();
4701 EVT PtrVT = Base.getValueType();
4702 for (unsigned i = 0; i < 8; i++) {
4703 SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
4704 Value, DAG.getConstant(i, Dl, MVT::i32));
4705 SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
4706 DAG.getConstant(i * 8, Dl, PtrVT));
4707 Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
4708 StoreNode->getOriginalAlign());
4709 }
4710 return Chain;
4711 }
4712
4713 return SDValue();
4714}
4715
4716SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
4717 SelectionDAG &DAG) const {
4718 SDLoc DL(Op);
4719 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
4720 assert(LoadNode && "Expected custom lowering of a load node")(static_cast<void> (0));
4721
4722 if (LoadNode->getMemoryVT() == MVT::i64x8) {
4723 SmallVector<SDValue, 8> Ops;
4724 SDValue Base = LoadNode->getBasePtr();
4725 SDValue Chain = LoadNode->getChain();
4726 EVT PtrVT = Base.getValueType();
4727 for (unsigned i = 0; i < 8; i++) {
4728 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
4729 DAG.getConstant(i * 8, DL, PtrVT));
4730 SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr,
4731 LoadNode->getPointerInfo(),
4732 LoadNode->getOriginalAlign());
4733 Ops.push_back(Part);
4734 Chain = SDValue(Part.getNode(), 1);
4735 }
4736 SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops);
4737 return DAG.getMergeValues({Loaded, Chain}, DL);
4738 }
4739
4740 // Custom lowering for extending v4i8 vector loads.
4741 EVT VT = Op->getValueType(0);
4742 assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32")(static_cast<void> (0));
4743
4744 if (LoadNode->getMemoryVT() != MVT::v4i8)
4745 return SDValue();
4746
4747 unsigned ExtType;
4748 if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
4749 ExtType = ISD::SIGN_EXTEND;
4750 else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
4751 LoadNode->getExtensionType() == ISD::EXTLOAD)
4752 ExtType = ISD::ZERO_EXTEND;
4753 else
4754 return SDValue();
4755
4756 SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
4757 LoadNode->getBasePtr(), MachinePointerInfo());
4758 SDValue Chain = Load.getValue(1);
4759 SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load);
4760 SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec);
4761 SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC);
4762 Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext,
4763 DAG.getConstant(0, DL, MVT::i64));
4764 if (VT == MVT::v4i32)
4765 Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext);
4766 return DAG.getMergeValues({Ext, Chain}, DL);
4767}
4768
4769// Generate SUBS and CSEL for integer abs.
4770SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
4771 MVT VT = Op.getSimpleValueType();
4772
4773 if (VT.isVector())
4774 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
4775
4776 SDLoc DL(Op);
4777 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4778 Op.getOperand(0));
4779 // Generate SUBS & CSEL.
4780 SDValue Cmp =
4781 DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
4782 Op.getOperand(0), DAG.getConstant(0, DL, VT));
4783 return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
4784 DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
4785 Cmp.getValue(1));
4786}
4787
4788SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
4789 SelectionDAG &DAG) const {
4790 LLVM_DEBUG(dbgs() << "Custom lowering: ")do { } while (false);
4791 LLVM_DEBUG(Op.dump())do { } while (false);
4792
4793 switch (Op.getOpcode()) {
4794 default:
4795 llvm_unreachable("unimplemented operand")__builtin_unreachable();
4796 return SDValue();
4797 case ISD::BITCAST:
4798 return LowerBITCAST(Op, DAG);
4799 case ISD::GlobalAddress:
4800 return LowerGlobalAddress(Op, DAG);
4801 case ISD::GlobalTLSAddress:
4802 return LowerGlobalTLSAddress(Op, DAG);
4803 case ISD::SETCC:
4804 case ISD::STRICT_FSETCC:
4805 case ISD::STRICT_FSETCCS:
4806 return LowerSETCC(Op, DAG);
4807 case ISD::BR_CC:
4808 return LowerBR_CC(Op, DAG);
4809 case ISD::SELECT:
4810 return LowerSELECT(Op, DAG);
4811 case ISD::SELECT_CC:
4812 return LowerSELECT_CC(Op, DAG);
4813 case ISD::JumpTable:
4814 return LowerJumpTable(Op, DAG);
4815 case ISD::BR_JT:
4816 return LowerBR_JT(Op, DAG);
4817 case ISD::ConstantPool:
4818 return LowerConstantPool(Op, DAG);
4819 case ISD::BlockAddress:
4820 return LowerBlockAddress(Op, DAG);
4821 case ISD::VASTART:
4822 return LowerVASTART(Op, DAG);
4823 case ISD::VACOPY:
4824 return LowerVACOPY(Op, DAG);
4825 case ISD::VAARG:
4826 return LowerVAARG(Op, DAG);
4827 case ISD::ADDC:
4828 case ISD::ADDE:
4829 case ISD::SUBC:
4830 case ISD::SUBE:
4831 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
4832 case ISD::SADDO:
4833 case ISD::UADDO:
4834 case ISD::SSUBO:
4835 case ISD::USUBO:
4836 case ISD::SMULO:
4837 case ISD::UMULO:
4838 return LowerXALUO(Op, DAG);
4839 case ISD::FADD:
4840 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
4841 case ISD::FSUB:
4842 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
4843 case ISD::FMUL:
4844 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
4845 case ISD::FMA:
4846 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
4847 case ISD::FDIV:
4848 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
4849 case ISD::FNEG:
4850 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
4851 case ISD::FCEIL:
4852 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
4853 case ISD::FFLOOR:
4854 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
4855 case ISD::FNEARBYINT:
4856 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
4857 case ISD::FRINT:
4858 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
4859 case ISD::FROUND:
4860 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
4861 case ISD::FROUNDEVEN:
4862 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
4863 case ISD::FTRUNC:
4864 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
4865 case ISD::FSQRT:
4866 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
4867 case ISD::FABS:
4868 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
4869 case ISD::FP_ROUND:
4870 case ISD::STRICT_FP_ROUND:
4871 return LowerFP_ROUND(Op, DAG);
4872 case ISD::FP_EXTEND:
4873 return LowerFP_EXTEND(Op, DAG);
4874 case ISD::FRAMEADDR:
4875 return LowerFRAMEADDR(Op, DAG);
4876 case ISD::SPONENTRY:
4877 return LowerSPONENTRY(Op, DAG);
4878 case ISD::RETURNADDR:
4879 return LowerRETURNADDR(Op, DAG);
4880 case ISD::ADDROFRETURNADDR:
4881 return LowerADDROFRETURNADDR(Op, DAG);
4882 case ISD::CONCAT_VECTORS:
4883 return LowerCONCAT_VECTORS(Op, DAG);
4884 case ISD::INSERT_VECTOR_ELT:
4885 return LowerINSERT_VECTOR_ELT(Op, DAG);
4886 case ISD::EXTRACT_VECTOR_ELT:
4887 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
4888 case ISD::BUILD_VECTOR:
4889 return LowerBUILD_VECTOR(Op, DAG);
4890 case ISD::VECTOR_SHUFFLE:
4891 return LowerVECTOR_SHUFFLE(Op, DAG);
4892 case ISD::SPLAT_VECTOR:
4893 return LowerSPLAT_VECTOR(Op, DAG);
4894 case ISD::EXTRACT_SUBVECTOR:
4895 return LowerEXTRACT_SUBVECTOR(Op, DAG);
4896 case ISD::INSERT_SUBVECTOR:
4897 return LowerINSERT_SUBVECTOR(Op, DAG);
4898 case ISD::SDIV:
4899 case ISD::UDIV:
4900 return LowerDIV(Op, DAG);
4901 case ISD::SMIN:
4902 case ISD::UMIN:
4903 case ISD::SMAX:
4904 case ISD::UMAX:
4905 return LowerMinMax(Op, DAG);
4906 case ISD::SRA:
4907 case ISD::SRL:
4908 case ISD::SHL:
4909 return LowerVectorSRA_SRL_SHL(Op, DAG);
4910 case ISD::SHL_PARTS:
4911 case ISD::SRL_PARTS:
4912 case ISD::SRA_PARTS:
4913 return LowerShiftParts(Op, DAG);
4914 case ISD::CTPOP:
4915 return LowerCTPOP(Op, DAG);
4916 case ISD::FCOPYSIGN:
4917 return LowerFCOPYSIGN(Op, DAG);
4918 case ISD::OR:
4919 return LowerVectorOR(Op, DAG);
4920 case ISD::XOR:
4921 return LowerXOR(Op, DAG);
4922 case ISD::PREFETCH:
4923 return LowerPREFETCH(Op, DAG);
4924 case ISD::SINT_TO_FP:
4925 case ISD::UINT_TO_FP:
4926 case ISD::STRICT_SINT_TO_FP:
4927 case ISD::STRICT_UINT_TO_FP:
4928 return LowerINT_TO_FP(Op, DAG);
4929 case ISD::FP_TO_SINT:
4930 case ISD::FP_TO_UINT:
4931 case ISD::STRICT_FP_TO_SINT:
4932 case ISD::STRICT_FP_TO_UINT:
4933 return LowerFP_TO_INT(Op, DAG);
4934 case ISD::FP_TO_SINT_SAT:
4935 case ISD::FP_TO_UINT_SAT:
4936 return LowerFP_TO_INT_SAT(Op, DAG);
4937 case ISD::FSINCOS:
4938 return LowerFSINCOS(Op, DAG);
4939 case ISD::FLT_ROUNDS_:
4940 return LowerFLT_ROUNDS_(Op, DAG);
4941 case ISD::SET_ROUNDING:
4942 return LowerSET_ROUNDING(Op, DAG);
4943 case ISD::MUL:
4944 return LowerMUL(Op, DAG);
4945 case ISD::MULHS:
4946 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED,
4947 /*OverrideNEON=*/true);
4948 case ISD::MULHU:
4949 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
4950 /*OverrideNEON=*/true);
4951 case ISD::INTRINSIC_WO_CHAIN:
4952 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4953 case ISD::STORE:
4954 return LowerSTORE(Op, DAG);
4955 case ISD::MSTORE:
4956 return LowerFixedLengthVectorMStoreToSVE(Op, DAG);
4957 case ISD::MGATHER:
4958 return LowerMGATHER(Op, DAG);
4959 case ISD::MSCATTER:
4960 return LowerMSCATTER(Op, DAG);
4961 case ISD::VECREDUCE_SEQ_FADD:
4962 return LowerVECREDUCE_SEQ_FADD(Op, DAG);
4963 case ISD::VECREDUCE_ADD:
4964 case ISD::VECREDUCE_AND:
4965 case ISD::VECREDUCE_OR:
4966 case ISD::VECREDUCE_XOR:
4967 case ISD::VECREDUCE_SMAX:
4968 case ISD::VECREDUCE_SMIN:
4969 case ISD::VECREDUCE_UMAX:
4970 case ISD::VECREDUCE_UMIN:
4971 case ISD::VECREDUCE_FADD:
4972 case ISD::VECREDUCE_FMAX:
4973 case ISD::VECREDUCE_FMIN:
4974 return LowerVECREDUCE(Op, DAG);
4975 case ISD::ATOMIC_LOAD_SUB:
4976 return LowerATOMIC_LOAD_SUB(Op, DAG);
4977 case ISD::ATOMIC_LOAD_AND:
4978 return LowerATOMIC_LOAD_AND(Op, DAG);
4979 case ISD::DYNAMIC_STACKALLOC:
4980 return LowerDYNAMIC_STACKALLOC(Op, DAG);
4981 case ISD::VSCALE:
4982 return LowerVSCALE(Op, DAG);
4983 case ISD::ANY_EXTEND:
4984 case ISD::SIGN_EXTEND:
4985 case ISD::ZERO_EXTEND:
4986 return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
4987 case ISD::SIGN_EXTEND_INREG: {
4988 // Only custom lower when ExtraVT has a legal byte based element type.
4989 EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4990 EVT ExtraEltVT = ExtraVT.getVectorElementType();
4991 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
4992 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
4993 return SDValue();
4994
4995 return LowerToPredicatedOp(Op, DAG,
4996 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
4997 }
4998 case ISD::TRUNCATE:
4999 return LowerTRUNCATE(Op, DAG);
5000 case ISD::MLOAD:
5001 return LowerMLOAD(Op, DAG);
5002 case ISD::LOAD:
5003 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
5004 return LowerFixedLengthVectorLoadToSVE(Op, DAG);
5005 return LowerLOAD(Op, DAG);
5006 case ISD::ADD:
5007 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
5008 case ISD::AND:
5009 return LowerToScalableOp(Op, DAG);
5010 case ISD::SUB:
5011 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
5012 case ISD::FMAXIMUM:
5013 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
5014 case ISD::FMAXNUM:
5015 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
5016 case ISD::FMINIMUM:
5017 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED);
5018 case ISD::FMINNUM:
5019 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
5020 case ISD::VSELECT:
5021 return LowerFixedLengthVectorSelectToSVE(Op, DAG);
5022 case ISD::ABS:
5023 return LowerABS(Op, DAG);
5024 case ISD::BITREVERSE:
5025 return LowerBitreverse(Op, DAG);
5026 case ISD::BSWAP:
5027 return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
5028 case ISD::CTLZ:
5029 return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
5030 /*OverrideNEON=*/true);
5031 case ISD::CTTZ:
5032 return LowerCTTZ(Op, DAG);
5033 case ISD::VECTOR_SPLICE:
5034 return LowerVECTOR_SPLICE(Op, DAG);
5035 }
5036}
5037
5038bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
5039 return !Subtarget->useSVEForFixedLengthVectors();
5040}
5041
5042bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
5043 EVT VT, bool OverrideNEON) const {
5044 if (!Subtarget->useSVEForFixedLengthVectors())
5045 return false;
5046
5047 if (!VT.isFixedLengthVector())
5048 return false;
5049
5050 // Don't use SVE for vectors we cannot scalarize if required.
5051 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
5052 // Fixed length predicates should be promoted to i8.
5053 // NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
5054 case MVT::i1:
5055 default:
5056 return false;
5057 case MVT::i8:
5058 case MVT::i16:
5059 case MVT::i32:
5060 case MVT::i64:
5061 case MVT::f16:
5062 case MVT::f32:
5063 case MVT::f64:
5064 break;
5065 }
5066
5067 // All SVE implementations support NEON sized vectors.
5068 if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
5069 return true;
5070
5071 // Ensure NEON MVTs only belong to a single register class.
5072 if (VT.getFixedSizeInBits() <= 128)
5073 return false;
5074
5075 // Don't use SVE for types that don't fit.
5076 if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
5077 return false;
5078
5079 // TODO: Perhaps an artificial restriction, but worth having whilst getting
5080 // the base fixed length SVE support in place.
5081 if (!VT.isPow2VectorType())
5082 return false;
5083
5084 return true;
5085}
5086
5087//===----------------------------------------------------------------------===//
5088// Calling Convention Implementation
5089//===----------------------------------------------------------------------===//
5090
5091/// Selects the correct CCAssignFn for a given CallingConvention value.
5092CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
5093 bool IsVarArg) const {
5094 switch (CC) {
5095 default:
5096 report_fatal_error("Unsupported calling convention.");
5097 case CallingConv::WebKit_JS:
5098 return CC_AArch64_WebKit_JS;
5099 case CallingConv::GHC:
5100 return CC_AArch64_GHC;
5101 case CallingConv::C:
5102 case CallingConv::Fast:
5103 case CallingConv::PreserveMost:
5104 case CallingConv::CXX_FAST_TLS:
5105 case CallingConv::Swift:
5106 case CallingConv::SwiftTail:
5107 case CallingConv::Tail:
5108 if (Subtarget->isTargetWindows() && IsVarArg)
5109 return CC_AArch64_Win64_VarArg;
5110 if (!Subtarget->isTargetDarwin())
5111 return CC_AArch64_AAPCS;
5112 if (!IsVarArg)
5113 return CC_AArch64_DarwinPCS;
5114 return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
5115 : CC_AArch64_DarwinPCS_VarArg;
5116 case CallingConv::Win64:
5117 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
5118 case CallingConv::CFGuard_Check:
5119 return CC_AArch64_Win64_CFGuard_Check;
5120 case CallingConv::AArch64_VectorCall:
5121 case CallingConv::AArch64_SVE_VectorCall:
5122 return CC_AArch64_AAPCS;
5123 }
5124}
5125
5126CCAssignFn *
5127AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
5128 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
5129 : RetCC_AArch64_AAPCS;
5130}
5131
5132SDValue AArch64TargetLowering::LowerFormalArguments(
5133 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
5134 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5135 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5136 MachineFunction &MF = DAG.getMachineFunction();
5137 MachineFrameInfo &MFI = MF.getFrameInfo();
5138 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
5139
5140 // Assign locations to all of the incoming arguments.
5141 SmallVector<CCValAssign, 16> ArgLocs;
5142 DenseMap<unsigned, SDValue> CopiedRegs;
5143 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
5144
5145 // At this point, Ins[].VT may already be promoted to i32. To correctly
5146 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
5147 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
5148 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
5149 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
5150 // LocVT.
5151 unsigned NumArgs = Ins.size();
5152 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
5153 unsigned CurArgIdx = 0;
5154 for (unsigned i = 0; i != NumArgs; ++i) {
5155 MVT ValVT = Ins[i].VT;
5156 if (Ins[i].isOrigArg()) {
5157 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
5158 CurArgIdx = Ins[i].getOrigArgIndex();
5159
5160 // Get type of the original argument.
5161 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
5162 /*AllowUnknown*/ true);
5163 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
5164 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5165 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5166 ValVT = MVT::i8;
5167 else if (ActualMVT == MVT::i16)
5168 ValVT = MVT::i16;
5169 }
5170 bool UseVarArgCC = false;
5171 if (IsWin64)
5172 UseVarArgCC = isVarArg;
5173 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
5174 bool Res =
5175 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
5176 assert(!Res && "Call operand has unhandled type")(static_cast<void> (0));
5177 (void)Res;
5178 }
5179 SmallVector<SDValue, 16> ArgValues;
5180 unsigned ExtraArgLocs = 0;
5181 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5182 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
5183
5184 if (Ins[i].Flags.isByVal()) {
5185 // Byval is used for HFAs in the PCS, but the system should work in a
5186 // non-compliant manner for larger structs.
5187 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5188 int Size = Ins[i].Flags.getByValSize();
5189 unsigned NumRegs = (Size + 7) / 8;
5190
5191 // FIXME: This works on big-endian for composite byvals, which are the common
5192 // case. It should also work for fundamental types too.
5193 unsigned FrameIdx =
5194 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
5195 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
5196 InVals.push_back(FrameIdxN);
5197
5198 continue;
5199 }
5200
5201 if (Ins[i].Flags.isSwiftAsync())
5202 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5203
5204 SDValue ArgValue;
5205 if (VA.isRegLoc()) {
5206 // Arguments stored in registers.
5207 EVT RegVT = VA.getLocVT();
5208 const TargetRegisterClass *RC;
5209
5210 if (RegVT == MVT::i32)
5211 RC = &AArch64::GPR32RegClass;
5212 else if (RegVT == MVT::i64)
5213 RC = &AArch64::GPR64RegClass;
5214 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
5215 RC = &AArch64::FPR16RegClass;
5216 else if (RegVT == MVT::f32)
5217 RC = &AArch64::FPR32RegClass;
5218 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
5219 RC = &AArch64::FPR64RegClass;
5220 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
5221 RC = &AArch64::FPR128RegClass;
5222 else if (RegVT.isScalableVector() &&
5223 RegVT.getVectorElementType() == MVT::i1)
5224 RC = &AArch64::PPRRegClass;
5225 else if (RegVT.isScalableVector())
5226 RC = &AArch64::ZPRRegClass;
5227 else
5228 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")__builtin_unreachable();
5229
5230 // Transform the arguments in physical registers into virtual ones.
5231 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
5232 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
5233
5234 // If this is an 8, 16 or 32-bit value, it is really passed promoted
5235 // to 64 bits. Insert an assert[sz]ext to capture this, then
5236 // truncate to the right size.
5237 switch (VA.getLocInfo()) {
5238 default:
5239 llvm_unreachable("Unknown loc info!")__builtin_unreachable();
5240 case CCValAssign::Full:
5241 break;
5242 case CCValAssign::Indirect:
5243 assert(VA.getValVT().isScalableVector() &&(static_cast<void> (0))
5244 "Only scalable vectors can be passed indirectly")(static_cast<void> (0));
5245 break;
5246 case CCValAssign::BCvt:
5247 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
5248 break;
5249 case CCValAssign::AExt:
5250 case CCValAssign::SExt:
5251 case CCValAssign::ZExt:
5252 break;
5253 case CCValAssign::AExtUpper:
5254 ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
5255 DAG.getConstant(32, DL, RegVT));
5256 ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
5257 break;
5258 }
5259 } else { // VA.isRegLoc()
5260 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")(static_cast<void> (0));
5261 unsigned ArgOffset = VA.getLocMemOffset();
5262 unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
5263 ? VA.getLocVT().getSizeInBits()
5264 : VA.getValVT().getSizeInBits()) / 8;
5265
5266 uint32_t BEAlign = 0;
5267 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
5268 !Ins[i].Flags.isInConsecutiveRegs())
5269 BEAlign = 8 - ArgSize;
5270
5271 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
5272
5273 // Create load nodes to retrieve arguments from the stack.
5274 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5275
5276 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
5277 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
5278 MVT MemVT = VA.getValVT();
5279
5280 switch (VA.getLocInfo()) {
5281 default:
5282 break;
5283 case CCValAssign::Trunc:
5284 case CCValAssign::BCvt:
5285 MemVT = VA.getLocVT();
5286 break;
5287 case CCValAssign::Indirect:
5288 assert(VA.getValVT().isScalableVector() &&(static_cast<void> (0))
5289 "Only scalable vectors can be passed indirectly")(static_cast<void> (0));
5290 MemVT = VA.getLocVT();
5291 break;
5292 case CCValAssign::SExt:
5293 ExtType = ISD::SEXTLOAD;
5294 break;
5295 case CCValAssign::ZExt:
5296 ExtType = ISD::ZEXTLOAD;
5297 break;
5298 case CCValAssign::AExt:
5299 ExtType = ISD::EXTLOAD;
5300 break;
5301 }
5302
5303 ArgValue =
5304 DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
5305 MachinePointerInfo::getFixedStack(MF, FI), MemVT);
5306 }
5307
5308 if (VA.getLocInfo() == CCValAssign::Indirect) {
5309 assert(VA.getValVT().isScalableVector() &&(static_cast<void> (0))
5310 "Only scalable vectors can be passed indirectly")(static_cast<void> (0));
5311
5312 uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
5313 unsigned NumParts = 1;
5314 if (Ins[i].Flags.isInConsecutiveRegs()) {
5315 assert(!Ins[i].Flags.isInConsecutiveRegsLast())(static_cast<void> (0));
5316 while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
5317 ++NumParts;
5318 }
5319
5320 MVT PartLoad = VA.getValVT();
5321 SDValue Ptr = ArgValue;
5322
5323 // Ensure we generate all loads for each tuple part, whilst updating the
5324 // pointer after each load correctly using vscale.
5325 while (NumParts > 0) {
5326 ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
5327 InVals.push_back(ArgValue);
5328 NumParts--;
5329 if (NumParts > 0) {
5330 SDValue BytesIncrement = DAG.getVScale(
5331 DL, Ptr.getValueType(),
5332 APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
5333 SDNodeFlags Flags;
5334 Flags.setNoUnsignedWrap(true);
5335 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5336 BytesIncrement, Flags);
5337 ExtraArgLocs++;
5338 i++;
5339 }
5340 }
5341 } else {
5342 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
5343 ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
5344 ArgValue, DAG.getValueType(MVT::i32));
5345 InVals.push_back(ArgValue);
5346 }
5347 }
5348 assert((ArgLocs.size() + ExtraArgLocs) == Ins.size())(static_cast<void> (0));
5349
5350 // varargs
5351 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5352 if (isVarArg) {
5353 if (!Subtarget->isTargetDarwin() || IsWin64) {
5354 // The AAPCS variadic function ABI is identical to the non-variadic
5355 // one. As a result there may be more arguments in registers and we should
5356 // save them for future reference.
5357 // Win64 variadic functions also pass arguments in registers, but all float
5358 // arguments are passed in integer registers.
5359 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
5360 }
5361
5362 // This will point to the next argument passed via stack.
5363 unsigned StackOffset = CCInfo.getNextStackOffset();
5364 // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
5365 StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
5366 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
5367
5368 if (MFI.hasMustTailInVarArgFunc()) {
5369 SmallVector<MVT, 2> RegParmTypes;
5370 RegParmTypes.push_back(MVT::i64);
5371 RegParmTypes.push_back(MVT::f128);
5372 // Compute the set of forwarded registers. The rest are scratch.
5373 SmallVectorImpl<ForwardedRegister> &Forwards =
5374 FuncInfo->getForwardedMustTailRegParms();
5375 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
5376 CC_AArch64_AAPCS);
5377
5378 // Conservatively forward X8, since it might be used for aggregate return.
5379 if (!CCInfo.isAllocated(AArch64::X8)) {
5380 unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
5381 Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
5382 }
5383 }
5384 }
5385
5386 // On Windows, InReg pointers must be returned, so record the pointer in a
5387 // virtual register at the start of the function so it can be returned in the
5388 // epilogue.
5389 if (IsWin64) {
5390 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
5391 if (Ins[I].Flags.isInReg()) {
5392 assert(!FuncInfo->getSRetReturnReg())(static_cast<void> (0));
5393
5394 MVT PtrTy = getPointerTy(DAG.getDataLayout());
5395 Register Reg =
5396 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
5397 FuncInfo->setSRetReturnReg(Reg);
5398
5399 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
5400 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
5401 break;
5402 }
5403 }
5404 }
5405
5406 unsigned StackArgSize = CCInfo.getNextStackOffset();
5407 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
5408 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
5409 // This is a non-standard ABI so by fiat I say we're allowed to make full
5410 // use of the stack area to be popped, which must be aligned to 16 bytes in
5411 // any case:
5412 StackArgSize = alignTo(StackArgSize, 16);
5413
5414 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
5415 // a multiple of 16.
5416 FuncInfo->setArgumentStackToRestore(StackArgSize);
5417
5418 // This realignment carries over to the available bytes below. Our own
5419 // callers will guarantee the space is free by giving an aligned value to
5420 // CALLSEQ_START.
5421 }
5422 // Even if we're not expected to free up the space, it's useful to know how
5423 // much is there while considering tail calls (because we can reuse it).
5424 FuncInfo->setBytesInStackArgArea(StackArgSize);
5425
5426 if (Subtarget->hasCustomCallingConv())
5427 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
5428
5429 return Chain;
5430}
5431
5432void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
5433 SelectionDAG &DAG,
5434 const SDLoc &DL,
5435 SDValue &Chain) const {
5436 MachineFunction &MF = DAG.getMachineFunction();
5437 MachineFrameInfo &MFI = MF.getFrameInfo();
5438 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5439 auto PtrVT = getPointerTy(DAG.getDataLayout());
5440 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
5441
5442 SmallVector<SDValue, 8> MemOps;
5443
5444 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
5445 AArch64::X3, AArch64::X4, AArch64::X5,
5446 AArch64::X6, AArch64::X7 };
5447 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
5448 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
5449
5450 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
5451 int GPRIdx = 0;
5452 if (GPRSaveSize != 0) {
5453 if (IsWin64) {
5454 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
5455 if (GPRSaveSize & 15)
5456 // The extra size here, if triggered, will always be 8.
5457 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
5458 } else
5459 GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
5460
5461 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
5462
5463 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
5464 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
5465 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
5466 SDValue Store =
5467 DAG.getStore(Val.getValue(1), DL, Val, FIN,
5468 IsWin64 ? MachinePointerInfo::getFixedStack(
5469 MF, GPRIdx, (i - FirstVariadicGPR) * 8)
5470 : MachinePointerInfo::getStack(MF, i * 8));
5471 MemOps.push_back(Store);
5472 FIN =
5473 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
5474 }
5475 }
5476 FuncInfo->setVarArgsGPRIndex(GPRIdx);
5477 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
5478
5479 if (Subtarget->hasFPARMv8() && !IsWin64) {
5480 static const MCPhysReg FPRArgRegs[] = {
5481 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
5482 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
5483 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
5484 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
5485
5486 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
5487 int FPRIdx = 0;
5488 if (FPRSaveSize != 0) {
5489 FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
5490
5491 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
5492
5493 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
5494 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
5495 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
5496
5497 SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
5498 MachinePointerInfo::getStack(MF, i * 16));
5499 MemOps.push_back(Store);
5500 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
5501 DAG.getConstant(16, DL, PtrVT));
5502 }
5503 }
5504 FuncInfo->setVarArgsFPRIndex(FPRIdx);
5505 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
5506 }
5507
5508 if (!MemOps.empty()) {
5509 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
5510 }
5511}
5512
5513/// LowerCallResult - Lower the result values of a call into the
5514/// appropriate copies out of appropriate physical registers.
5515SDValue AArch64TargetLowering::LowerCallResult(
5516 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5517 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5518 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
5519 SDValue ThisVal) const {
5520 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
5521 // Assign locations to each value returned by this call.
5522 SmallVector<CCValAssign, 16> RVLocs;
5523 DenseMap<unsigned, SDValue> CopiedRegs;
5524 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5525 *DAG.getContext());
5526 CCInfo.AnalyzeCallResult(Ins, RetCC);
5527
5528 // Copy all of the result registers out of their specified physreg.
5529 for (unsigned i = 0; i != RVLocs.size(); ++i) {
5530 CCValAssign VA = RVLocs[i];
5531
5532 // Pass 'this' value directly from the argument to return value, to avoid
5533 // reg unit interference
5534 if (i == 0 && isThisReturn) {
5535 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&(static_cast<void> (0))
5536 "unexpected return calling convention register assignment")(static_cast<void> (0));
5537 InVals.push_back(ThisVal);
5538 continue;
5539 }
5540
5541 // Avoid copying a physreg twice since RegAllocFast is incompetent and only
5542 // allows one use of a physreg per block.
5543 SDValue Val = CopiedRegs.lookup(VA.getLocReg());
5544 if (!Val) {
5545 Val =
5546 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
5547 Chain = Val.getValue(1);
5548 InFlag = Val.getValue(2);
5549 CopiedRegs[VA.getLocReg()] = Val;
5550 }
5551
5552 switch (VA.getLocInfo()) {
5553 default:
5554 llvm_unreachable("Unknown loc info!")__builtin_unreachable();
5555 case CCValAssign::Full:
5556 break;
5557 case CCValAssign::BCvt:
5558 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5559 break;
5560 case CCValAssign::AExtUpper:
5561 Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
5562 DAG.getConstant(32, DL, VA.getLocVT()));
5563 LLVM_FALLTHROUGH[[gnu::fallthrough]];
5564 case CCValAssign::AExt:
5565 LLVM_FALLTHROUGH[[gnu::fallthrough]];
5566 case CCValAssign::ZExt:
5567 Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
5568 break;
5569 }
5570
5571 InVals.push_back(Val);
5572 }
5573
5574 return Chain;
5575}
5576
5577/// Return true if the calling convention is one that we can guarantee TCO for.
5578static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
5579 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
5580 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
5581}
5582
5583/// Return true if we might ever do TCO for calls with this calling convention.
5584static bool mayTailCallThisCC(CallingConv::ID CC) {
5585 switch (CC) {
5586 case CallingConv::C:
5587 case CallingConv::AArch64_SVE_VectorCall:
5588 case CallingConv::PreserveMost:
5589 case CallingConv::Swift:
5590 case CallingConv::SwiftTail:
5591 case CallingConv::Tail:
5592 case CallingConv::Fast:
5593 return true;
5594 default:
5595 return false;
5596 }
5597}
5598
5599bool AArch64TargetLowering::isEligibleForTailCallOptimization(
5600 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
5601 const SmallVectorImpl<ISD::OutputArg> &Outs,
5602 const SmallVectorImpl<SDValue> &OutVals,
5603 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
5604 if (!mayTailCallThisCC(CalleeCC))
5605 return false;
5606
5607 MachineFunction &MF = DAG.getMachineFunction();
5608 const Function &CallerF = MF.getFunction();
5609 CallingConv::ID CallerCC = CallerF.getCallingConv();
5610
5611 // Functions using the C or Fast calling convention that have an SVE signature
5612 // preserve more registers and should assume the SVE_VectorCall CC.
5613 // The check for matching callee-saved regs will determine whether it is
5614 // eligible for TCO.
5615 if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) &&
5616 AArch64RegisterInfo::hasSVEArgsOrReturn(&MF))
5617 CallerCC = CallingConv::AArch64_SVE_VectorCall;
5618
5619 bool CCMatch = CallerCC == CalleeCC;
5620
5621 // When using the Windows calling convention on a non-windows OS, we want
5622 // to back up and restore X18 in such functions; we can't do a tail call
5623 // from those functions.
5624 if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
5625 CalleeCC != CallingConv::Win64)
5626 return false;
5627
5628 // Byval parameters hand the function a pointer directly into the stack area
5629 // we want to reuse during a tail call. Working around this *is* possible (see
5630 // X86) but less efficient and uglier in LowerCall.
5631 for (Function::const_arg_iterator i = CallerF.arg_begin(),
5632 e = CallerF.arg_end();
5633 i != e; ++i) {
5634 if (i->hasByValAttr())
5635 return false;
5636
5637 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
5638 // In this case, it is necessary to save/restore X0 in the callee. Tail
5639 // call opt interferes with this. So we disable tail call opt when the
5640 // caller has an argument with "inreg" attribute.
5641
5642 // FIXME: Check whether the callee also has an "inreg" argument.
5643 if (i->hasInRegAttr())
5644 return false;
5645 }
5646
5647 if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
5648 return CCMatch;
5649
5650 // Externally-defined functions with weak linkage should not be
5651 // tail-called on AArch64 when the OS does not support dynamic
5652 // pre-emption of symbols, as the AAELF spec requires normal calls
5653 // to undefined weak functions to be replaced with a NOP or jump to the
5654 // next instruction. The behaviour of branch instructions in this
5655 // situation (as used for tail calls) is implementation-defined, so we
5656 // cannot rely on the linker replacing the tail call with a return.
5657 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5658 const GlobalValue *GV = G->getGlobal();
5659 const Triple &TT = getTargetMachine().getTargetTriple();
5660 if (GV->hasExternalWeakLinkage() &&
5661 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
5662 return false;
5663 }
5664
5665 // Now we search for cases where we can use a tail call without changing the
5666 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
5667 // concept.
5668
5669 // I want anyone implementing a new calling convention to think long and hard
5670 // about this assert.
5671 assert((!isVarArg || CalleeCC == CallingConv::C) &&(static_cast<void> (0))
5672 "Unexpected variadic calling convention")(static_cast<void> (0));
5673
5674 LLVMContext &C = *DAG.getContext();
5675 if (isVarArg && !Outs.empty()) {
5676 // At least two cases here: if caller is fastcc then we can't have any
5677 // memory arguments (we'd be expected to clean up the stack afterwards). If
5678 // caller is C then we could potentially use its argument area.
5679
5680 // FIXME: for now we take the most conservative of these in both cases:
5681 // disallow all variadic memory operands.
5682 SmallVector<CCValAssign, 16> ArgLocs;
5683 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5684
5685 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
5686 for (const CCValAssign &ArgLoc : ArgLocs)
5687 if (!ArgLoc.isRegLoc())
5688 return false;
5689 }
5690
5691 // Check that the call results are passed in the same way.
5692 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
5693 CCAssignFnForCall(CalleeCC, isVarArg),
5694 CCAssignFnForCall(CallerCC, isVarArg)))
5695 return false;
5696 // The callee has to preserve all registers the caller needs to preserve.
5697 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5698 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5699 if (!CCMatch) {
5700 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5701 if (Subtarget->hasCustomCallingConv()) {
5702 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
5703 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
5704 }
5705 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5706 return false;
5707 }
5708
5709 // Nothing more to check if the callee is taking no arguments
5710 if (Outs.empty())
5711 return true;
5712
5713 SmallVector<CCValAssign, 16> ArgLocs;
5714 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5715
5716 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
5717
5718 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5719
5720 // If any of the arguments is passed indirectly, it must be SVE, so the
5721 // 'getBytesInStackArgArea' is not sufficient to determine whether we need to
5722 // allocate space on the stack. That is why we determine this explicitly here
5723 // the call cannot be a tailcall.
5724 if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
5725 assert((A.getLocInfo() != CCValAssign::Indirect ||(static_cast<void> (0))
5726 A.getValVT().isScalableVector()) &&(static_cast<void> (0))
5727 "Expected value to be scalable")(static_cast<void> (0));
5728 return A.getLocInfo() == CCValAssign::Indirect;
5729 }))
5730 return false;
5731
5732 // If the stack arguments for this call do not fit into our own save area then
5733 // the call cannot be made tail.
5734 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
5735 return false;
5736
5737 const MachineRegisterInfo &MRI = MF.getRegInfo();
5738 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5739 return false;
5740
5741 return true;
5742}
5743
5744SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
5745 SelectionDAG &DAG,
5746 MachineFrameInfo &MFI,
5747 int ClobberedFI) const {
5748 SmallVector<SDValue, 8> ArgChains;
5749 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
5750 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
5751
5752 // Include the original chain at the beginning of the list. When this is
5753 // used by target LowerCall hooks, this helps legalize find the
5754 // CALLSEQ_BEGIN node.
5755 ArgChains.push_back(Chain);
5756
5757 // Add a chain value for each stack argument corresponding
5758 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
5759 UE = DAG.getEntryNode().getNode()->use_end();
5760 U != UE; ++U)
5761 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
5762 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
5763 if (FI->getIndex() < 0) {
5764 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
5765 int64_t InLastByte = InFirstByte;
5766 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
5767
5768 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
5769 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
5770 ArgChains.push_back(SDValue(L, 1));
5771 }
5772
5773 // Build a tokenfactor for all the chains.
5774 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
5775}
5776
5777bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
5778 bool TailCallOpt) const {
5779 return (CallCC == CallingConv::Fast && TailCallOpt) ||
5780 CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail;
5781}
5782
5783/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
5784/// and add input and output parameter nodes.
5785SDValue
5786AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
5787 SmallVectorImpl<SDValue> &InVals) const {
5788 SelectionDAG &DAG = CLI.DAG;
5789 SDLoc &DL = CLI.DL;
5790 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
5791 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
5792 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
5793 SDValue Chain = CLI.Chain;
5794 SDValue Callee = CLI.Callee;
5795 bool &IsTailCall = CLI.IsTailCall;
5796 CallingConv::ID CallConv = CLI.CallConv;
5797 bool IsVarArg = CLI.IsVarArg;
5798
5799 MachineFunction &MF = DAG.getMachineFunction();
5800 MachineFunction::CallSiteInfo CSInfo;
5801 bool IsThisReturn = false;
5802
5803 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5804 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
5805 bool IsSibCall = false;
5806 bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CallConv);
5807
5808 // Check callee args/returns for SVE registers and set calling convention
5809 // accordingly.
5810 if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) {
5811 bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
5812 return Out.VT.isScalableVector();
5813 });
5814 bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
5815 return In.VT.isScalableVector();
5816 });
5817
5818 if (CalleeInSVE || CalleeOutSVE)
5819 CallConv = CallingConv::AArch64_SVE_VectorCall;
5820 }
5821
5822 if (IsTailCall) {
5823 // Check if it's really possible to do a tail call.
5824 IsTailCall = isEligibleForTailCallOptimization(
5825 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
5826
5827 // A sibling call is one where we're under the usual C ABI and not planning
5828 // to change that but can still do a tail call:
5829 if (!TailCallOpt && IsTailCall && CallConv != CallingConv::Tail &&
5830 CallConv != CallingConv::SwiftTail)
5831 IsSibCall = true;
5832
5833 if (IsTailCall)
5834 ++NumTailCalls;
5835 }
5836
5837 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
5838 report_fatal_error("failed to perform tail call elimination on a call "
5839 "site marked musttail");
5840
5841 // Analyze operands of the call, assigning locations to each operand.
5842 SmallVector<CCValAssign, 16> ArgLocs;
5843 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5844
5845 if (IsVarArg) {
5846 // Handle fixed and variable vector arguments differently.
5847 // Variable vector arguments always go into memory.
5848 unsigned NumArgs = Outs.size();
5849
5850 for (unsigned i = 0; i != NumArgs; ++i) {
5851 MVT ArgVT = Outs[i].VT;
5852 if (!Outs[i].IsFixed && ArgVT.isScalableVector())
5853 report_fatal_error("Passing SVE types to variadic functions is "
5854 "currently not supported");
5855
5856 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5857 bool UseVarArgCC = !Outs[i].IsFixed;
5858 // On Windows, the fixed arguments in a vararg call are passed in GPRs
5859 // too, so use the vararg CC to force them to integer registers.
5860 if (IsCalleeWin64)
5861 UseVarArgCC = true;
5862 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
5863 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
5864 assert(!Res && "Call operand has unhandled type")(static_cast<void> (0));
5865 (void)Res;
5866 }
5867 } else {
5868 // At this point, Outs[].VT may already be promoted to i32. To correctly
5869 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
5870 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
5871 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
5872 // we use a special version of AnalyzeCallOperands to pass in ValVT and
5873 // LocVT.
5874 unsigned NumArgs = Outs.size();
5875 for (unsigned i = 0; i != NumArgs; ++i) {
5876 MVT ValVT = Outs[i].VT;
5877 // Get type of the original argument.
5878 EVT ActualVT = getValueType(DAG.getDataLayout(),
5879 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
5880 /*AllowUnknown*/ true);
5881 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
5882 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5883 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5884 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5885 ValVT = MVT::i8;
5886 else if (ActualMVT == MVT::i16)
5887 ValVT = MVT::i16;
5888
5889 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
5890 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
5891 assert(!Res && "Call operand has unhandled type")(static_cast<void> (0));
5892 (void)Res;
5893 }
5894 }
5895
5896 // Get a count of how many bytes are to be pushed on the stack.
5897 unsigned NumBytes = CCInfo.getNextStackOffset();
5898
5899 if (IsSibCall) {
5900 // Since we're not changing the ABI to make this a tail call, the memory
5901 // operands are already available in the caller's incoming argument space.
5902 NumBytes = 0;
5903 }
5904
5905 // FPDiff is the byte offset of the call's argument area from the callee's.
5906 // Stores to callee stack arguments will be placed in FixedStackSlots offset
5907 // by this amount for a tail call. In a sibling call it must be 0 because the
5908 // caller will deallocate the entire stack and the callee still expects its
5909 // arguments to begin at SP+0. Completely unused for non-tail calls.
5910 int FPDiff = 0;
5911
5912 if (IsTailCall && !IsSibCall) {
5913 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
5914
5915 // Since callee will pop argument stack as a tail call, we must keep the
5916 // popped size 16-byte aligned.
5917 NumBytes = alignTo(NumBytes, 16);
5918
5919 // FPDiff will be negative if this tail call requires more space than we
5920 // would automatically have in our incoming argument space. Positive if we
5921 // can actually shrink the stack.
5922 FPDiff = NumReusableBytes - NumBytes;
5923
5924 // Update the required reserved area if this is the tail call requiring the
5925 // most argument stack space.
5926 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
5927 FuncInfo->setTailCallReservedStack(-FPDiff);
5928
5929 // The stack pointer must be 16-byte aligned at all times it's used for a
5930 // memory operation, which in practice means at *all* times and in
5931 // particular across call boundaries. Therefore our own arguments started at
5932 // a 16-byte aligned SP and the delta applied for the tail call should
5933 // satisfy the same constraint.
5934 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")(static_cast<void> (0));
5935 }
5936
5937 // Adjust the stack pointer for the new arguments...
5938 // These operations are automatically eliminated by the prolog/epilog pass
5939 if (!IsSibCall)
5940 Chain = DAG.getCALLSEQ_START(Chain, IsTailCall ? 0 : NumBytes, 0, DL);
5941
5942 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
5943 getPointerTy(DAG.getDataLayout()));
5944
5945 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5946 SmallSet<unsigned, 8> RegsUsed;
5947 SmallVector<SDValue, 8> MemOpChains;
5948 auto PtrVT = getPointerTy(DAG.getDataLayout());
5949
5950 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
5951 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
5952 for (const auto &F : Forwards) {
5953 SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
5954 RegsToPass.emplace_back(F.PReg, Val);
5955 }
5956 }
5957
5958 // Walk the register/memloc assignments, inserting copies/loads.
5959 unsigned ExtraArgLocs = 0;
5960 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5961 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
5962 SDValue Arg = OutVals[i];
5963 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5964
5965 // Promote the value if needed.
5966 switch (VA.getLocInfo()) {
5967 default:
5968 llvm_unreachable("Unknown loc info!")__builtin_unreachable();
5969 case CCValAssign::Full:
5970 break;
5971 case CCValAssign::SExt:
5972 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
5973 break;
5974 case CCValAssign::ZExt:
5975 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
5976 break;
5977 case CCValAssign::AExt:
5978 if (Outs[i].ArgVT == MVT::i1) {
5979 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
5980 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
5981 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
5982 }
5983 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5984 break;
5985 case CCValAssign::AExtUpper:
5986 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")(static_cast<void> (0));
5987 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5988 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
5989 DAG.getConstant(32, DL, VA.getLocVT()));
5990 break;
5991 case CCValAssign::BCvt:
5992 Arg = DAG.getBitcast(VA.getLocVT(), Arg);
5993 break;
5994 case CCValAssign::Trunc:
5995 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
5996 break;
5997 case CCValAssign::FPExt:
5998 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
5999 break;
6000 case CCValAssign::Indirect:
6001 assert(VA.getValVT().isScalableVector() &&(static_cast<void> (0))
6002 "Only scalable vectors can be passed indirectly")(static_cast<void> (0));
6003
6004 uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
6005 uint64_t PartSize = StoreSize;
6006 unsigned NumParts = 1;
6007 if (Outs[i].Flags.isInConsecutiveRegs()) {
6008 assert(!Outs[i].Flags.isInConsecutiveRegsLast())(static_cast<void> (0));
6009 while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
6010 ++NumParts;
6011 StoreSize *= NumParts;
6012 }
6013
6014 MachineFrameInfo &MFI = MF.getFrameInfo();
6015 Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
6016 Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
6017 int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
6018 MFI.setStackID(FI, TargetStackID::ScalableVector);
6019
6020 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
6021 SDValue Ptr = DAG.getFrameIndex(
6022 FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
6023 SDValue SpillSlot = Ptr;
6024
6025 // Ensure we generate all stores for each tuple part, whilst updating the
6026 // pointer after each store correctly using vscale.
6027 while (NumParts) {
6028 Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
6029 NumParts--;
6030 if (NumParts > 0) {
6031 SDValue BytesIncrement = DAG.getVScale(
6032 DL, Ptr.getValueType(),
6033 APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
6034 SDNodeFlags Flags;
6035 Flags.setNoUnsignedWrap(true);
6036
6037 MPI = MachinePointerInfo(MPI.getAddrSpace());
6038 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
6039 BytesIncrement, Flags);
6040 ExtraArgLocs++;
6041 i++;
6042 }
6043 }
6044
6045 Arg = SpillSlot;
6046 break;
6047 }
6048
6049 if (VA.isRegLoc()) {
6050 if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
6051 Outs[0].VT == MVT::i64) {
6052 assert(VA.getLocVT() == MVT::i64 &&(static_cast<void> (0))
6053 "unexpected calling convention register assignment")(static_cast<void> (0));
6054 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&(static_cast<void> (0))
6055 "unexpected use of 'returned'")(static_cast<void> (0));
6056 IsThisReturn = true;
6057 }
6058 if (RegsUsed.count(VA.getLocReg())) {
6059 // If this register has already been used then we're trying to pack
6060 // parts of an [N x i32] into an X-register. The extension type will
6061 // take care of putting the two halves in the right place but we have to
6062 // combine them.
6063 SDValue &Bits =
6064 llvm::find_if(RegsToPass,
6065 [=](const std::pair<unsigned, SDValue> &Elt) {
6066 return Elt.first == VA.getLocReg();
6067 })
6068 ->second;
6069 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
6070 // Call site info is used for function's parameter entry value
6071 // tracking. For now we track only simple cases when parameter
6072 // is transferred through whole register.
6073 llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
6074 return ArgReg.Reg == VA.getLocReg();
6075 });
6076 } else {
6077 RegsToPass.emplace_back(VA.getLocReg(), Arg);
6078 RegsUsed.insert(VA.getLocReg());
6079 const TargetOptions &Options = DAG.getTarget().Options;
6080 if (Options.EmitCallSiteInfo)
6081 CSInfo.emplace_back(VA.getLocReg(), i);
6082 }
6083 } else {
6084 assert(VA.isMemLoc())(static_cast<void> (0));
6085
6086 SDValue DstAddr;
6087 MachinePointerInfo DstInfo;
6088
6089 // FIXME: This works on big-endian for composite byvals, which are the
6090 // common case. It should also work for fundamental types too.
6091 uint32_t BEAlign = 0;
6092 unsigned OpSize;
6093 if (VA.getLocInfo() == CCValAssign::Indirect ||
6094 VA.getLocInfo() == CCValAssign::Trunc)
6095 OpSize = VA.getLocVT().getFixedSizeInBits();
6096 else
6097 OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
6098 : VA.getValVT().getSizeInBits();
6099 OpSize = (OpSize + 7) / 8;
6100 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
6101 !Flags.isInConsecutiveRegs()) {
6102 if (OpSize < 8)
6103 BEAlign = 8 - OpSize;
6104 }
6105 unsigned LocMemOffset = VA.getLocMemOffset();
6106 int32_t Offset = LocMemOffset + BEAlign;
6107 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
6108 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
6109
6110 if (IsTailCall) {
6111 Offset = Offset + FPDiff;
6112 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
6113
6114 DstAddr = DAG.getFrameIndex(FI, PtrVT);
6115 DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
6116
6117 // Make sure any stack arguments overlapping with where we're storing
6118 // are loaded before this eventual operation. Otherwise they'll be
6119 // clobbered.
6120 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
6121 } else {
6122 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
6123
6124 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
6125 DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
6126 }
6127
6128 if (Outs[i].Flags.isByVal()) {
6129 SDValue SizeNode =
6130 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
6131 SDValue Cpy = DAG.getMemcpy(
6132 Chain, DL, DstAddr, Arg, SizeNode,
6133 Outs[i].Flags.getNonZeroByValAlign(),
6134 /*isVol = */ false, /*AlwaysInline = */ false,
6135 /*isTailCall = */ false, DstInfo, MachinePointerInfo());
6136
6137 MemOpChains.push_back(Cpy);
6138 } else {
6139 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
6140 // promoted to a legal register type i32, we should truncate Arg back to
6141 // i1/i8/i16.
6142 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
6143 VA.getValVT() == MVT::i16)
6144 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
6145
6146 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
6147 MemOpChains.push_back(Store);
6148 }
6149 }
6150 }
6151
6152 if (!MemOpChains.empty())
6153 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
6154
6155 // Build a sequence of copy-to-reg nodes chained together with token chain
6156 // and flag operands which copy the outgoing args into the appropriate regs.
6157 SDValue InFlag;
6158 for (auto &RegToPass : RegsToPass) {
6159 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
6160 RegToPass.second, InFlag);
6161 InFlag = Chain.getValue(1);
6162 }
6163
6164 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
6165 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
6166 // node so that legalize doesn't hack it.
6167 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
6168 auto GV = G->getGlobal();
6169 unsigned OpFlags =
6170 Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
6171 if (OpFlags & AArch64II::MO_GOT) {
6172 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
6173 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
6174 } else {
6175 const GlobalValue *GV = G->getGlobal();
6176 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
6177 }
6178 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
6179 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
6180 Subtarget->isTargetMachO()) {
6181 const char *Sym = S->getSymbol();
6182 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
6183 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
6184 } else {
6185 const char *Sym = S->getSymbol();
6186 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
6187 }
6188 }
6189
6190 // We don't usually want to end the call-sequence here because we would tidy
6191 // the frame up *after* the call, however in the ABI-changing tail-call case
6192 // we've carefully laid out the parameters so that when sp is reset they'll be
6193 // in the correct location.
6194 if (IsTailCall && !IsSibCall) {
6195 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
6196 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
6197 InFlag