Bug Summary

File:llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1114, column 10
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/build-llvm/lib/Target/AArch64 -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-07-26-235520-9401-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64ISelLowering.h"
14#include "AArch64CallingConvention.h"
15#include "AArch64ExpandImm.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/ADT/Triple.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/ObjCARCUtil.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/Analysis.h"
35#include "llvm/CodeGen/CallingConvLower.h"
36#include "llvm/CodeGen/MachineBasicBlock.h"
37#include "llvm/CodeGen/MachineFrameInfo.h"
38#include "llvm/CodeGen/MachineFunction.h"
39#include "llvm/CodeGen/MachineInstr.h"
40#include "llvm/CodeGen/MachineInstrBuilder.h"
41#include "llvm/CodeGen/MachineMemOperand.h"
42#include "llvm/CodeGen/MachineRegisterInfo.h"
43#include "llvm/CodeGen/RuntimeLibcalls.h"
44#include "llvm/CodeGen/SelectionDAG.h"
45#include "llvm/CodeGen/SelectionDAGNodes.h"
46#include "llvm/CodeGen/TargetCallingConv.h"
47#include "llvm/CodeGen/TargetInstrInfo.h"
48#include "llvm/CodeGen/ValueTypes.h"
49#include "llvm/IR/Attributes.h"
50#include "llvm/IR/Constants.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugLoc.h"
53#include "llvm/IR/DerivedTypes.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/GetElementPtrTypeIterator.h"
56#include "llvm/IR/GlobalValue.h"
57#include "llvm/IR/IRBuilder.h"
58#include "llvm/IR/Instruction.h"
59#include "llvm/IR/Instructions.h"
60#include "llvm/IR/IntrinsicInst.h"
61#include "llvm/IR/Intrinsics.h"
62#include "llvm/IR/IntrinsicsAArch64.h"
63#include "llvm/IR/Module.h"
64#include "llvm/IR/OperandTraits.h"
65#include "llvm/IR/PatternMatch.h"
66#include "llvm/IR/Type.h"
67#include "llvm/IR/Use.h"
68#include "llvm/IR/Value.h"
69#include "llvm/MC/MCRegisterInfo.h"
70#include "llvm/Support/Casting.h"
71#include "llvm/Support/CodeGen.h"
72#include "llvm/Support/CommandLine.h"
73#include "llvm/Support/Compiler.h"
74#include "llvm/Support/Debug.h"
75#include "llvm/Support/ErrorHandling.h"
76#include "llvm/Support/KnownBits.h"
77#include "llvm/Support/MachineValueType.h"
78#include "llvm/Support/MathExtras.h"
79#include "llvm/Support/raw_ostream.h"
80#include "llvm/Target/TargetMachine.h"
81#include "llvm/Target/TargetOptions.h"
82#include <algorithm>
83#include <bitset>
84#include <cassert>
85#include <cctype>
86#include <cstdint>
87#include <cstdlib>
88#include <iterator>
89#include <limits>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace llvm::PatternMatch;
96
97#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
98
99STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls"}
;
100STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts"}
;
101STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized"}
;
102
103// FIXME: The necessary dtprel relocations don't seem to be supported
104// well in the GNU bfd and gold linkers at the moment. Therefore, by
105// default, for now, fall back to GeneralDynamic code generation.
106cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
107 "aarch64-elf-ldtls-generation", cl::Hidden,
108 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
109 cl::init(false));
110
111static cl::opt<bool>
112EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
113 cl::desc("Enable AArch64 logical imm instruction "
114 "optimization"),
115 cl::init(true));
116
117// Temporary option added for the purpose of testing functionality added
118// to DAGCombiner.cpp in D92230. It is expected that this can be removed
119// in future when both implementations will be based off MGATHER rather
120// than the GLD1 nodes added for the SVE gather load intrinsics.
121static cl::opt<bool>
122EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
123 cl::desc("Combine extends of AArch64 masked "
124 "gather intrinsics"),
125 cl::init(true));
126
127/// Value type used for condition codes.
128static const MVT MVT_CC = MVT::i32;
129
130static inline EVT getPackedSVEVectorVT(EVT VT) {
131 switch (VT.getSimpleVT().SimpleTy) {
132 default:
133 llvm_unreachable("unexpected element type for vector")::llvm::llvm_unreachable_internal("unexpected element type for vector"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 133)
;
134 case MVT::i8:
135 return MVT::nxv16i8;
136 case MVT::i16:
137 return MVT::nxv8i16;
138 case MVT::i32:
139 return MVT::nxv4i32;
140 case MVT::i64:
141 return MVT::nxv2i64;
142 case MVT::f16:
143 return MVT::nxv8f16;
144 case MVT::f32:
145 return MVT::nxv4f32;
146 case MVT::f64:
147 return MVT::nxv2f64;
148 case MVT::bf16:
149 return MVT::nxv8bf16;
150 }
151}
152
153// NOTE: Currently there's only a need to return integer vector types. If this
154// changes then just add an extra "type" parameter.
155static inline EVT getPackedSVEVectorVT(ElementCount EC) {
156 switch (EC.getKnownMinValue()) {
157 default:
158 llvm_unreachable("unexpected element count for vector")::llvm::llvm_unreachable_internal("unexpected element count for vector"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 158)
;
159 case 16:
160 return MVT::nxv16i8;
161 case 8:
162 return MVT::nxv8i16;
163 case 4:
164 return MVT::nxv4i32;
165 case 2:
166 return MVT::nxv2i64;
167 }
168}
169
170static inline EVT getPromotedVTForPredicate(EVT VT) {
171 assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&(static_cast <bool> (VT.isScalableVector() && (
VT.getVectorElementType() == MVT::i1) && "Expected scalable predicate vector type!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && \"Expected scalable predicate vector type!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 172, __extension__ __PRETTY_FUNCTION__))
172 "Expected scalable predicate vector type!")(static_cast <bool> (VT.isScalableVector() && (
VT.getVectorElementType() == MVT::i1) && "Expected scalable predicate vector type!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && \"Expected scalable predicate vector type!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 172, __extension__ __PRETTY_FUNCTION__))
;
173 switch (VT.getVectorMinNumElements()) {
174 default:
175 llvm_unreachable("unexpected element count for vector")::llvm::llvm_unreachable_internal("unexpected element count for vector"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 175)
;
176 case 2:
177 return MVT::nxv2i64;
178 case 4:
179 return MVT::nxv4i32;
180 case 8:
181 return MVT::nxv8i16;
182 case 16:
183 return MVT::nxv16i8;
184 }
185}
186
187/// Returns true if VT's elements occupy the lowest bit positions of its
188/// associated register class without any intervening space.
189///
190/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
191/// same register class, but only nxv8f16 can be treated as a packed vector.
192static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
193 assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&(static_cast <bool> (VT.isVector() && DAG.getTargetLoweringInfo
().isTypeLegal(VT) && "Expected legal vector type!") ?
void (0) : __assert_fail ("VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal vector type!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 194, __extension__ __PRETTY_FUNCTION__))
194 "Expected legal vector type!")(static_cast <bool> (VT.isVector() && DAG.getTargetLoweringInfo
().isTypeLegal(VT) && "Expected legal vector type!") ?
void (0) : __assert_fail ("VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal vector type!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 194, __extension__ __PRETTY_FUNCTION__))
;
195 return VT.isFixedLengthVector() ||
196 VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
197}
198
199// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
200// predicate and end with a passthru value matching the result type.
201static bool isMergePassthruOpcode(unsigned Opc) {
202 switch (Opc) {
203 default:
204 return false;
205 case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
206 case AArch64ISD::BSWAP_MERGE_PASSTHRU:
207 case AArch64ISD::CTLZ_MERGE_PASSTHRU:
208 case AArch64ISD::CTPOP_MERGE_PASSTHRU:
209 case AArch64ISD::DUP_MERGE_PASSTHRU:
210 case AArch64ISD::ABS_MERGE_PASSTHRU:
211 case AArch64ISD::NEG_MERGE_PASSTHRU:
212 case AArch64ISD::FNEG_MERGE_PASSTHRU:
213 case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
214 case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
215 case AArch64ISD::FCEIL_MERGE_PASSTHRU:
216 case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
217 case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
218 case AArch64ISD::FRINT_MERGE_PASSTHRU:
219 case AArch64ISD::FROUND_MERGE_PASSTHRU:
220 case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
221 case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
222 case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
223 case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
224 case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
225 case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
226 case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
227 case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
228 case AArch64ISD::FSQRT_MERGE_PASSTHRU:
229 case AArch64ISD::FRECPX_MERGE_PASSTHRU:
230 case AArch64ISD::FABS_MERGE_PASSTHRU:
231 return true;
232 }
233}
234
235AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
236 const AArch64Subtarget &STI)
237 : TargetLowering(TM), Subtarget(&STI) {
238 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
239 // we have to make something up. Arbitrarily, choose ZeroOrOne.
240 setBooleanContents(ZeroOrOneBooleanContent);
241 // When comparing vectors the result sets the different elements in the
242 // vector to all-one or all-zero.
243 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
244
245 // Set up the register classes.
246 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
247 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
248
249 if (Subtarget->hasFPARMv8()) {
250 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
251 addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
252 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
253 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
254 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
255 }
256
257 if (Subtarget->hasNEON()) {
258 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
259 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
260 // Someone set us up the NEON.
261 addDRTypeForNEON(MVT::v2f32);
262 addDRTypeForNEON(MVT::v8i8);
263 addDRTypeForNEON(MVT::v4i16);
264 addDRTypeForNEON(MVT::v2i32);
265 addDRTypeForNEON(MVT::v1i64);
266 addDRTypeForNEON(MVT::v1f64);
267 addDRTypeForNEON(MVT::v4f16);
268 if (Subtarget->hasBF16())
269 addDRTypeForNEON(MVT::v4bf16);
270
271 addQRTypeForNEON(MVT::v4f32);
272 addQRTypeForNEON(MVT::v2f64);
273 addQRTypeForNEON(MVT::v16i8);
274 addQRTypeForNEON(MVT::v8i16);
275 addQRTypeForNEON(MVT::v4i32);
276 addQRTypeForNEON(MVT::v2i64);
277 addQRTypeForNEON(MVT::v8f16);
278 if (Subtarget->hasBF16())
279 addQRTypeForNEON(MVT::v8bf16);
280 }
281
282 if (Subtarget->hasSVE()) {
283 // Add legal sve predicate types
284 addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
285 addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
286 addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
287 addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
288
289 // Add legal sve data types
290 addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
291 addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
292 addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
293 addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
294
295 addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
296 addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
297 addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
298 addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
299 addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
300 addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
301
302 if (Subtarget->hasBF16()) {
303 addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
304 addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
305 addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
306 }
307
308 if (Subtarget->useSVEForFixedLengthVectors()) {
309 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
310 if (useSVEForFixedLengthVectorVT(VT))
311 addRegisterClass(VT, &AArch64::ZPRRegClass);
312
313 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
314 if (useSVEForFixedLengthVectorVT(VT))
315 addRegisterClass(VT, &AArch64::ZPRRegClass);
316 }
317
318 for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
319 setOperationAction(ISD::SADDSAT, VT, Legal);
320 setOperationAction(ISD::UADDSAT, VT, Legal);
321 setOperationAction(ISD::SSUBSAT, VT, Legal);
322 setOperationAction(ISD::USUBSAT, VT, Legal);
323 setOperationAction(ISD::UREM, VT, Expand);
324 setOperationAction(ISD::SREM, VT, Expand);
325 setOperationAction(ISD::SDIVREM, VT, Expand);
326 setOperationAction(ISD::UDIVREM, VT, Expand);
327 }
328
329 for (auto VT :
330 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
331 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
332 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
333
334 for (auto VT :
335 { MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32,
336 MVT::nxv2f64 }) {
337 setCondCodeAction(ISD::SETO, VT, Expand);
338 setCondCodeAction(ISD::SETOLT, VT, Expand);
339 setCondCodeAction(ISD::SETLT, VT, Expand);
340 setCondCodeAction(ISD::SETOLE, VT, Expand);
341 setCondCodeAction(ISD::SETLE, VT, Expand);
342 setCondCodeAction(ISD::SETULT, VT, Expand);
343 setCondCodeAction(ISD::SETULE, VT, Expand);
344 setCondCodeAction(ISD::SETUGE, VT, Expand);
345 setCondCodeAction(ISD::SETUGT, VT, Expand);
346 setCondCodeAction(ISD::SETUEQ, VT, Expand);
347 setCondCodeAction(ISD::SETUNE, VT, Expand);
348
349 setOperationAction(ISD::FREM, VT, Expand);
350 setOperationAction(ISD::FPOW, VT, Expand);
351 setOperationAction(ISD::FPOWI, VT, Expand);
352 setOperationAction(ISD::FCOS, VT, Expand);
353 setOperationAction(ISD::FSIN, VT, Expand);
354 setOperationAction(ISD::FSINCOS, VT, Expand);
355 setOperationAction(ISD::FEXP, VT, Expand);
356 setOperationAction(ISD::FEXP2, VT, Expand);
357 setOperationAction(ISD::FLOG, VT, Expand);
358 setOperationAction(ISD::FLOG2, VT, Expand);
359 setOperationAction(ISD::FLOG10, VT, Expand);
360 }
361 }
362
363 // Compute derived properties from the register classes
364 computeRegisterProperties(Subtarget->getRegisterInfo());
365
366 // Provide all sorts of operation actions
367 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
368 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
369 setOperationAction(ISD::SETCC, MVT::i32, Custom);
370 setOperationAction(ISD::SETCC, MVT::i64, Custom);
371 setOperationAction(ISD::SETCC, MVT::f16, Custom);
372 setOperationAction(ISD::SETCC, MVT::f32, Custom);
373 setOperationAction(ISD::SETCC, MVT::f64, Custom);
374 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
375 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
376 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
377 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
378 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
379 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
380 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
381 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
382 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
383 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
384 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
385 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
386 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
387 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
388 setOperationAction(ISD::SELECT, MVT::i32, Custom);
389 setOperationAction(ISD::SELECT, MVT::i64, Custom);
390 setOperationAction(ISD::SELECT, MVT::f16, Custom);
391 setOperationAction(ISD::SELECT, MVT::f32, Custom);
392 setOperationAction(ISD::SELECT, MVT::f64, Custom);
393 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
394 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
395 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
396 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
397 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
398 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
399 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
400
401 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
402 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
403 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
404
405 setOperationAction(ISD::FREM, MVT::f32, Expand);
406 setOperationAction(ISD::FREM, MVT::f64, Expand);
407 setOperationAction(ISD::FREM, MVT::f80, Expand);
408
409 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
410
411 // Custom lowering hooks are needed for XOR
412 // to fold it into CSINC/CSINV.
413 setOperationAction(ISD::XOR, MVT::i32, Custom);
414 setOperationAction(ISD::XOR, MVT::i64, Custom);
415
416 // Virtually no operation on f128 is legal, but LLVM can't expand them when
417 // there's a valid register class, so we need custom operations in most cases.
418 setOperationAction(ISD::FABS, MVT::f128, Expand);
419 setOperationAction(ISD::FADD, MVT::f128, LibCall);
420 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
421 setOperationAction(ISD::FCOS, MVT::f128, Expand);
422 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
423 setOperationAction(ISD::FMA, MVT::f128, Expand);
424 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
425 setOperationAction(ISD::FNEG, MVT::f128, Expand);
426 setOperationAction(ISD::FPOW, MVT::f128, Expand);
427 setOperationAction(ISD::FREM, MVT::f128, Expand);
428 setOperationAction(ISD::FRINT, MVT::f128, Expand);
429 setOperationAction(ISD::FSIN, MVT::f128, Expand);
430 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
431 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
432 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
433 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
434 setOperationAction(ISD::SETCC, MVT::f128, Custom);
435 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
436 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
437 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
438 setOperationAction(ISD::SELECT, MVT::f128, Custom);
439 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
440 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
441
442 // Lowering for many of the conversions is actually specified by the non-f128
443 // type. The LowerXXX function will be trivial when f128 isn't involved.
444 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
445 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
446 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
447 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
448 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
449 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
450 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
451 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
452 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
453 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
454 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
455 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
456 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
457 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
458 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
459 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
460 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
461 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
462 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
463 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
464 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
465 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
466 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
467 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
468 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
469 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
470 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
471 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
472 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
473 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
474
475 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
476 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
477 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
478 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
479
480 // Variable arguments.
481 setOperationAction(ISD::VASTART, MVT::Other, Custom);
482 setOperationAction(ISD::VAARG, MVT::Other, Custom);
483 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
484 setOperationAction(ISD::VAEND, MVT::Other, Expand);
485
486 // Variable-sized objects.
487 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
488 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
489
490 if (Subtarget->isTargetWindows())
491 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
492 else
493 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
494
495 // Constant pool entries
496 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
497
498 // BlockAddress
499 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
500
501 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
502 setOperationAction(ISD::ADDC, MVT::i32, Custom);
503 setOperationAction(ISD::ADDE, MVT::i32, Custom);
504 setOperationAction(ISD::SUBC, MVT::i32, Custom);
505 setOperationAction(ISD::SUBE, MVT::i32, Custom);
506 setOperationAction(ISD::ADDC, MVT::i64, Custom);
507 setOperationAction(ISD::ADDE, MVT::i64, Custom);
508 setOperationAction(ISD::SUBC, MVT::i64, Custom);
509 setOperationAction(ISD::SUBE, MVT::i64, Custom);
510
511 // AArch64 lacks both left-rotate and popcount instructions.
512 setOperationAction(ISD::ROTL, MVT::i32, Expand);
513 setOperationAction(ISD::ROTL, MVT::i64, Expand);
514 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
515 setOperationAction(ISD::ROTL, VT, Expand);
516 setOperationAction(ISD::ROTR, VT, Expand);
517 }
518
519 // AArch64 doesn't have i32 MULH{S|U}.
520 setOperationAction(ISD::MULHU, MVT::i32, Expand);
521 setOperationAction(ISD::MULHS, MVT::i32, Expand);
522
523 // AArch64 doesn't have {U|S}MUL_LOHI.
524 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
525 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
526
527 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
528 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
529 setOperationAction(ISD::CTPOP, MVT::i128, Custom);
530
531 setOperationAction(ISD::ABS, MVT::i32, Custom);
532 setOperationAction(ISD::ABS, MVT::i64, Custom);
533
534 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
535 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
536 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
537 setOperationAction(ISD::SDIVREM, VT, Expand);
538 setOperationAction(ISD::UDIVREM, VT, Expand);
539 }
540 setOperationAction(ISD::SREM, MVT::i32, Expand);
541 setOperationAction(ISD::SREM, MVT::i64, Expand);
542 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
543 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
544 setOperationAction(ISD::UREM, MVT::i32, Expand);
545 setOperationAction(ISD::UREM, MVT::i64, Expand);
546
547 // Custom lower Add/Sub/Mul with overflow.
548 setOperationAction(ISD::SADDO, MVT::i32, Custom);
549 setOperationAction(ISD::SADDO, MVT::i64, Custom);
550 setOperationAction(ISD::UADDO, MVT::i32, Custom);
551 setOperationAction(ISD::UADDO, MVT::i64, Custom);
552 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
553 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
554 setOperationAction(ISD::USUBO, MVT::i32, Custom);
555 setOperationAction(ISD::USUBO, MVT::i64, Custom);
556 setOperationAction(ISD::SMULO, MVT::i32, Custom);
557 setOperationAction(ISD::SMULO, MVT::i64, Custom);
558 setOperationAction(ISD::UMULO, MVT::i32, Custom);
559 setOperationAction(ISD::UMULO, MVT::i64, Custom);
560
561 setOperationAction(ISD::FSIN, MVT::f32, Expand);
562 setOperationAction(ISD::FSIN, MVT::f64, Expand);
563 setOperationAction(ISD::FCOS, MVT::f32, Expand);
564 setOperationAction(ISD::FCOS, MVT::f64, Expand);
565 setOperationAction(ISD::FPOW, MVT::f32, Expand);
566 setOperationAction(ISD::FPOW, MVT::f64, Expand);
567 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
568 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
569 if (Subtarget->hasFullFP16())
570 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
571 else
572 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
573
574 setOperationAction(ISD::FREM, MVT::f16, Promote);
575 setOperationAction(ISD::FREM, MVT::v4f16, Expand);
576 setOperationAction(ISD::FREM, MVT::v8f16, Expand);
577 setOperationAction(ISD::FPOW, MVT::f16, Promote);
578 setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
579 setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
580 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
581 setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
582 setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
583 setOperationAction(ISD::FCOS, MVT::f16, Promote);
584 setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
585 setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
586 setOperationAction(ISD::FSIN, MVT::f16, Promote);
587 setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
588 setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
589 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
590 setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
591 setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
592 setOperationAction(ISD::FEXP, MVT::f16, Promote);
593 setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
594 setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
595 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
596 setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
597 setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
598 setOperationAction(ISD::FLOG, MVT::f16, Promote);
599 setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
600 setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
601 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
602 setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
603 setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
604 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
605 setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
606 setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
607
608 if (!Subtarget->hasFullFP16()) {
609 setOperationAction(ISD::SELECT, MVT::f16, Promote);
610 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
611 setOperationAction(ISD::SETCC, MVT::f16, Promote);
612 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
613 setOperationAction(ISD::FADD, MVT::f16, Promote);
614 setOperationAction(ISD::FSUB, MVT::f16, Promote);
615 setOperationAction(ISD::FMUL, MVT::f16, Promote);
616 setOperationAction(ISD::FDIV, MVT::f16, Promote);
617 setOperationAction(ISD::FMA, MVT::f16, Promote);
618 setOperationAction(ISD::FNEG, MVT::f16, Promote);
619 setOperationAction(ISD::FABS, MVT::f16, Promote);
620 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
621 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
622 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
623 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
624 setOperationAction(ISD::FRINT, MVT::f16, Promote);
625 setOperationAction(ISD::FROUND, MVT::f16, Promote);
626 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
627 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
628 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
629 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
630 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
631 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
632
633 // promote v4f16 to v4f32 when that is known to be safe.
634 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
635 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
636 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
637 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
638 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
639 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
640 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
641 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
642
643 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
644 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
645 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
646 setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
647 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
648 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
649 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
650 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
651 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
652 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
653 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
654 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
655 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
656 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
657 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
658 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
659
660 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
661 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
662 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
663 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
664 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
665 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
666 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
667 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
668 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
669 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
670 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
671 setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
672 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
673 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
674 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
675 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
676 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
677 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
678 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
679 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
680 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
681 }
682
683 // AArch64 has implementations of a lot of rounding-like FP operations.
684 for (MVT Ty : {MVT::f32, MVT::f64}) {
685 setOperationAction(ISD::FFLOOR, Ty, Legal);
686 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
687 setOperationAction(ISD::FCEIL, Ty, Legal);
688 setOperationAction(ISD::FRINT, Ty, Legal);
689 setOperationAction(ISD::FTRUNC, Ty, Legal);
690 setOperationAction(ISD::FROUND, Ty, Legal);
691 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
692 setOperationAction(ISD::FMINNUM, Ty, Legal);
693 setOperationAction(ISD::FMAXNUM, Ty, Legal);
694 setOperationAction(ISD::FMINIMUM, Ty, Legal);
695 setOperationAction(ISD::FMAXIMUM, Ty, Legal);
696 setOperationAction(ISD::LROUND, Ty, Legal);
697 setOperationAction(ISD::LLROUND, Ty, Legal);
698 setOperationAction(ISD::LRINT, Ty, Legal);
699 setOperationAction(ISD::LLRINT, Ty, Legal);
700 }
701
702 if (Subtarget->hasFullFP16()) {
703 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
704 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
705 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
706 setOperationAction(ISD::FRINT, MVT::f16, Legal);
707 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
708 setOperationAction(ISD::FROUND, MVT::f16, Legal);
709 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
710 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
711 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
712 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
713 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
714 }
715
716 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
717
718 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
719 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
720
721 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
722 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
723 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
724 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
725 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
726
727 // Generate outline atomics library calls only if LSE was not specified for
728 // subtarget
729 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
730 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
731 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
732 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
733 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
734 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
735 setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
736 setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
737 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
738 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
739 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
740 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
741 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
742 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
743 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
744 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
745 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
746 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
747 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
748 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
749 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
750 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
751 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
752 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
753 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
754 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
755#define LCALLNAMES(A, B, N) \
756 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
757 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
758 setLibcallName(A##N##_REL, #B #N "_rel"); \
759 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
760#define LCALLNAME4(A, B) \
761 LCALLNAMES(A, B, 1) \
762 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
763#define LCALLNAME5(A, B) \
764 LCALLNAMES(A, B, 1) \
765 LCALLNAMES(A, B, 2) \
766 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
767 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
768 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
769 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
770 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
771 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
772 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
773#undef LCALLNAMES
774#undef LCALLNAME4
775#undef LCALLNAME5
776 }
777
778 // 128-bit loads and stores can be done without expanding
779 setOperationAction(ISD::LOAD, MVT::i128, Custom);
780 setOperationAction(ISD::STORE, MVT::i128, Custom);
781
782 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
783 // custom lowering, as there are no un-paired non-temporal stores and
784 // legalization will break up 256 bit inputs.
785 setOperationAction(ISD::STORE, MVT::v32i8, Custom);
786 setOperationAction(ISD::STORE, MVT::v16i16, Custom);
787 setOperationAction(ISD::STORE, MVT::v16f16, Custom);
788 setOperationAction(ISD::STORE, MVT::v8i32, Custom);
789 setOperationAction(ISD::STORE, MVT::v8f32, Custom);
790 setOperationAction(ISD::STORE, MVT::v4f64, Custom);
791 setOperationAction(ISD::STORE, MVT::v4i64, Custom);
792
793 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
794 // This requires the Performance Monitors extension.
795 if (Subtarget->hasPerfMon())
796 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
797
798 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
799 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
800 // Issue __sincos_stret if available.
801 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
802 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
803 } else {
804 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
805 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
806 }
807
808 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
809 // MSVCRT doesn't have powi; fall back to pow
810 setLibcallName(RTLIB::POWI_F32, nullptr);
811 setLibcallName(RTLIB::POWI_F64, nullptr);
812 }
813
814 // Make floating-point constants legal for the large code model, so they don't
815 // become loads from the constant pool.
816 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
817 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
818 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
819 }
820
821 // AArch64 does not have floating-point extending loads, i1 sign-extending
822 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
823 for (MVT VT : MVT::fp_valuetypes()) {
824 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
825 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
826 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
827 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
828 }
829 for (MVT VT : MVT::integer_valuetypes())
830 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
831
832 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
833 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
834 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
835 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
836 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
837 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
838 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
839
840 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
841 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
842 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
843
844 // Indexed loads and stores are supported.
845 for (unsigned im = (unsigned)ISD::PRE_INC;
846 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
847 setIndexedLoadAction(im, MVT::i8, Legal);
848 setIndexedLoadAction(im, MVT::i16, Legal);
849 setIndexedLoadAction(im, MVT::i32, Legal);
850 setIndexedLoadAction(im, MVT::i64, Legal);
851 setIndexedLoadAction(im, MVT::f64, Legal);
852 setIndexedLoadAction(im, MVT::f32, Legal);
853 setIndexedLoadAction(im, MVT::f16, Legal);
854 setIndexedLoadAction(im, MVT::bf16, Legal);
855 setIndexedStoreAction(im, MVT::i8, Legal);
856 setIndexedStoreAction(im, MVT::i16, Legal);
857 setIndexedStoreAction(im, MVT::i32, Legal);
858 setIndexedStoreAction(im, MVT::i64, Legal);
859 setIndexedStoreAction(im, MVT::f64, Legal);
860 setIndexedStoreAction(im, MVT::f32, Legal);
861 setIndexedStoreAction(im, MVT::f16, Legal);
862 setIndexedStoreAction(im, MVT::bf16, Legal);
863 }
864
865 // Trap.
866 setOperationAction(ISD::TRAP, MVT::Other, Legal);
867 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
868 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
869
870 // We combine OR nodes for bitfield operations.
871 setTargetDAGCombine(ISD::OR);
872 // Try to create BICs for vector ANDs.
873 setTargetDAGCombine(ISD::AND);
874
875 // Vector add and sub nodes may conceal a high-half opportunity.
876 // Also, try to fold ADD into CSINC/CSINV..
877 setTargetDAGCombine(ISD::ADD);
878 setTargetDAGCombine(ISD::ABS);
879 setTargetDAGCombine(ISD::SUB);
880 setTargetDAGCombine(ISD::SRL);
881 setTargetDAGCombine(ISD::XOR);
882 setTargetDAGCombine(ISD::SINT_TO_FP);
883 setTargetDAGCombine(ISD::UINT_TO_FP);
884
885 // TODO: Do the same for FP_TO_*INT_SAT.
886 setTargetDAGCombine(ISD::FP_TO_SINT);
887 setTargetDAGCombine(ISD::FP_TO_UINT);
888 setTargetDAGCombine(ISD::FDIV);
889
890 // Try and combine setcc with csel
891 setTargetDAGCombine(ISD::SETCC);
892
893 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
894
895 setTargetDAGCombine(ISD::ANY_EXTEND);
896 setTargetDAGCombine(ISD::ZERO_EXTEND);
897 setTargetDAGCombine(ISD::SIGN_EXTEND);
898 setTargetDAGCombine(ISD::VECTOR_SPLICE);
899 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
900 setTargetDAGCombine(ISD::TRUNCATE);
901 setTargetDAGCombine(ISD::CONCAT_VECTORS);
902 setTargetDAGCombine(ISD::STORE);
903 if (Subtarget->supportsAddressTopByteIgnored())
904 setTargetDAGCombine(ISD::LOAD);
905
906 setTargetDAGCombine(ISD::MUL);
907
908 setTargetDAGCombine(ISD::SELECT);
909 setTargetDAGCombine(ISD::VSELECT);
910
911 setTargetDAGCombine(ISD::INTRINSIC_VOID);
912 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
913 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
914 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
915 setTargetDAGCombine(ISD::VECREDUCE_ADD);
916 setTargetDAGCombine(ISD::STEP_VECTOR);
917
918 setTargetDAGCombine(ISD::GlobalAddress);
919
920 // In case of strict alignment, avoid an excessive number of byte wide stores.
921 MaxStoresPerMemsetOptSize = 8;
922 MaxStoresPerMemset = Subtarget->requiresStrictAlign()
923 ? MaxStoresPerMemsetOptSize : 32;
924
925 MaxGluedStoresPerMemcpy = 4;
926 MaxStoresPerMemcpyOptSize = 4;
927 MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
928 ? MaxStoresPerMemcpyOptSize : 16;
929
930 MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
931
932 MaxLoadsPerMemcmpOptSize = 4;
933 MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
934 ? MaxLoadsPerMemcmpOptSize : 8;
935
936 setStackPointerRegisterToSaveRestore(AArch64::SP);
937
938 setSchedulingPreference(Sched::Hybrid);
939
940 EnableExtLdPromotion = true;
941
942 // Set required alignment.
943 setMinFunctionAlignment(Align(4));
944 // Set preferred alignments.
945 setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
946 setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
947
948 // Only change the limit for entries in a jump table if specified by
949 // the sub target, but not at the command line.
950 unsigned MaxJT = STI.getMaximumJumpTableSize();
951 if (MaxJT && getMaximumJumpTableSize() == UINT_MAX(2147483647 *2U +1U))
952 setMaximumJumpTableSize(MaxJT);
953
954 setHasExtractBitsInsn(true);
955
956 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
957
958 if (Subtarget->hasNEON()) {
959 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
960 // silliness like this:
961 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
962 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
963 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
964 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
965 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
966 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
967 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
968 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
969 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
970 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
971 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
972 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
973 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
974 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
975 setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand);
976 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
977 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
978 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
979 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
980 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
981 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
982 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
983 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
984 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
985 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
986 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
987
988 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
989 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
990 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
991 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
992 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
993
994 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
995
996 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
997 // elements smaller than i32, so promote the input to i32 first.
998 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
999 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
1000 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
1001 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
1002 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
1003 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
1004
1005 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
1006 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1007 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1008 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
1009 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
1010 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
1011 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
1012 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
1013 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1014
1015 if (Subtarget->hasFullFP16()) {
1016 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1017 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1018 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
1019 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1020 } else {
1021 // when AArch64 doesn't have fullfp16 support, promote the input
1022 // to i32 first.
1023 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
1024 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
1025 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
1026 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
1027 }
1028
1029 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
1030 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
1031 setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
1032 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
1033 setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
1034 setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
1035 setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
1036 setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
1037
1038 // AArch64 doesn't have MUL.2d:
1039 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
1040 // Custom handling for some quad-vector types to detect MULL.
1041 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
1042 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1043 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1044
1045 // Saturates
1046 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1047 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1048 setOperationAction(ISD::SADDSAT, VT, Legal);
1049 setOperationAction(ISD::UADDSAT, VT, Legal);
1050 setOperationAction(ISD::SSUBSAT, VT, Legal);
1051 setOperationAction(ISD::USUBSAT, VT, Legal);
1052 }
1053
1054 for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1055 MVT::v4i32}) {
1056 setOperationAction(ISD::ABDS, VT, Legal);
1057 setOperationAction(ISD::ABDU, VT, Legal);
1058 }
1059
1060 // Vector reductions
1061 for (MVT VT : { MVT::v4f16, MVT::v2f32,
1062 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1063 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1064 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1065 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1066
1067 setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
1068 }
1069 }
1070 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1071 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1072 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1073 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1074 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1075 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1076 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1077 }
1078 setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
1079
1080 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
1081 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
1082 // Likewise, narrowing and extending vector loads/stores aren't handled
1083 // directly.
1084 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1085 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
1086
1087 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1088 setOperationAction(ISD::MULHS, VT, Legal);
1089 setOperationAction(ISD::MULHU, VT, Legal);
1090 } else {
1091 setOperationAction(ISD::MULHS, VT, Expand);
1092 setOperationAction(ISD::MULHU, VT, Expand);
1093 }
1094 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1095 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1096
1097 setOperationAction(ISD::BSWAP, VT, Expand);
1098 setOperationAction(ISD::CTTZ, VT, Expand);
1099
1100 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
1101 setTruncStoreAction(VT, InnerVT, Expand);
1102 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1103 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1104 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1105 }
1106 }
1107
1108 // AArch64 has implementations of a lot of rounding-like FP operations.
1109 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
1110 setOperationAction(ISD::FFLOOR, Ty, Legal);
1111 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
1112 setOperationAction(ISD::FCEIL, Ty, Legal);
1113 setOperationAction(ISD::FRINT, Ty, Legal);
1114 setOperationAction(ISD::FTRUNC, Ty, Legal);
1115 setOperationAction(ISD::FROUND, Ty, Legal);
1116 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
1117 }
1118
1119 if (Subtarget->hasFullFP16()) {
1120 for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
1121 setOperationAction(ISD::FFLOOR, Ty, Legal);
1122 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
1123 setOperationAction(ISD::FCEIL, Ty, Legal);
1124 setOperationAction(ISD::FRINT, Ty, Legal);
1125 setOperationAction(ISD::FTRUNC, Ty, Legal);
1126 setOperationAction(ISD::FROUND, Ty, Legal);
1127 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
1128 }
1129 }
1130
1131 if (Subtarget->hasSVE())
1132 setOperationAction(ISD::VSCALE, MVT::i32, Custom);
1133
1134 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
1135
1136 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1137 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1138 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1139 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1140 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1141 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1142 }
1143
1144 if (Subtarget->hasSVE()) {
1145 for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1146 setOperationAction(ISD::BITREVERSE, VT, Custom);
1147 setOperationAction(ISD::BSWAP, VT, Custom);
1148 setOperationAction(ISD::CTLZ, VT, Custom);
1149 setOperationAction(ISD::CTPOP, VT, Custom);
1150 setOperationAction(ISD::CTTZ, VT, Custom);
1151 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1152 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1153 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1154 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1155 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1156 setOperationAction(ISD::MGATHER, VT, Custom);
1157 setOperationAction(ISD::MSCATTER, VT, Custom);
1158 setOperationAction(ISD::MLOAD, VT, Custom);
1159 setOperationAction(ISD::MUL, VT, Custom);
1160 setOperationAction(ISD::MULHS, VT, Custom);
1161 setOperationAction(ISD::MULHU, VT, Custom);
1162 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1163 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1164 setOperationAction(ISD::SELECT, VT, Custom);
1165 setOperationAction(ISD::SETCC, VT, Custom);
1166 setOperationAction(ISD::SDIV, VT, Custom);
1167 setOperationAction(ISD::UDIV, VT, Custom);
1168 setOperationAction(ISD::SMIN, VT, Custom);
1169 setOperationAction(ISD::UMIN, VT, Custom);
1170 setOperationAction(ISD::SMAX, VT, Custom);
1171 setOperationAction(ISD::UMAX, VT, Custom);
1172 setOperationAction(ISD::SHL, VT, Custom);
1173 setOperationAction(ISD::SRL, VT, Custom);
1174 setOperationAction(ISD::SRA, VT, Custom);
1175 setOperationAction(ISD::ABS, VT, Custom);
1176 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1177 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1178 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1179 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1180 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1181 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1182 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1183 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1184
1185 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1186 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1187 setOperationAction(ISD::SELECT_CC, VT, Expand);
1188 setOperationAction(ISD::ROTL, VT, Expand);
1189 setOperationAction(ISD::ROTR, VT, Expand);
1190 }
1191
1192 // Illegal unpacked integer vector types.
1193 for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1194 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1195 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1196 }
1197
1198 // Legalize unpacked bitcasts to REINTERPRET_CAST.
1199 for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
1200 MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
1201 setOperationAction(ISD::BITCAST, VT, Custom);
1202
1203 for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
1204 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1205 setOperationAction(ISD::SELECT, VT, Custom);
1206 setOperationAction(ISD::SETCC, VT, Custom);
1207 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1208 setOperationAction(ISD::TRUNCATE, VT, Custom);
1209 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1210 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1211 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1212
1213 setOperationAction(ISD::SELECT_CC, VT, Expand);
1214 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1215 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1216
1217 // There are no legal MVT::nxv16f## based types.
1218 if (VT != MVT::nxv16i1) {
1219 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1220 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1221 }
1222 }
1223
1224 // NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
1225 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1226 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1227 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1228 setOperationAction(ISD::MLOAD, VT, Custom);
1229 setOperationAction(ISD::MSTORE, VT, Custom);
1230 setOperationAction(ISD::MGATHER, VT, Custom);
1231 setOperationAction(ISD::MSCATTER, VT, Custom);
1232 }
1233
1234 for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
1235 for (MVT InnerVT : MVT::fp_scalable_vector_valuetypes()) {
1236 // Avoid marking truncating FP stores as legal to prevent the
1237 // DAGCombiner from creating unsupported truncating stores.
1238 setTruncStoreAction(VT, InnerVT, Expand);
1239 // SVE does not have floating-point extending loads.
1240 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1241 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1242 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1243 }
1244 }
1245
1246 // SVE supports truncating stores of 64 and 128-bit vectors
1247 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom);
1248 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom);
1249 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom);
1250 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
1251 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
1252
1253 for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1254 MVT::nxv4f32, MVT::nxv2f64}) {
1255 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1256 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1257 setOperationAction(ISD::MGATHER, VT, Custom);
1258 setOperationAction(ISD::MSCATTER, VT, Custom);
1259 setOperationAction(ISD::MLOAD, VT, Custom);
1260 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1261 setOperationAction(ISD::SELECT, VT, Custom);
1262 setOperationAction(ISD::FADD, VT, Custom);
1263 setOperationAction(ISD::FDIV, VT, Custom);
1264 setOperationAction(ISD::FMA, VT, Custom);
1265 setOperationAction(ISD::FMAXIMUM, VT, Custom);
1266 setOperationAction(ISD::FMAXNUM, VT, Custom);
1267 setOperationAction(ISD::FMINIMUM, VT, Custom);
1268 setOperationAction(ISD::FMINNUM, VT, Custom);
1269 setOperationAction(ISD::FMUL, VT, Custom);
1270 setOperationAction(ISD::FNEG, VT, Custom);
1271 setOperationAction(ISD::FSUB, VT, Custom);
1272 setOperationAction(ISD::FCEIL, VT, Custom);
1273 setOperationAction(ISD::FFLOOR, VT, Custom);
1274 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1275 setOperationAction(ISD::FRINT, VT, Custom);
1276 setOperationAction(ISD::FROUND, VT, Custom);
1277 setOperationAction(ISD::FROUNDEVEN, VT, Custom);
1278 setOperationAction(ISD::FTRUNC, VT, Custom);
1279 setOperationAction(ISD::FSQRT, VT, Custom);
1280 setOperationAction(ISD::FABS, VT, Custom);
1281 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1282 setOperationAction(ISD::FP_ROUND, VT, Custom);
1283 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1284 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1285 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1286 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1287 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1288
1289 setOperationAction(ISD::SELECT_CC, VT, Expand);
1290 }
1291
1292 for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1293 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1294 setOperationAction(ISD::MGATHER, VT, Custom);
1295 setOperationAction(ISD::MSCATTER, VT, Custom);
1296 setOperationAction(ISD::MLOAD, VT, Custom);
1297 }
1298
1299 setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
1300
1301 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
1302 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
1303
1304 // NOTE: Currently this has to happen after computeRegisterProperties rather
1305 // than the preferred option of combining it with the addRegisterClass call.
1306 if (Subtarget->useSVEForFixedLengthVectors()) {
1307 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
1308 if (useSVEForFixedLengthVectorVT(VT))
1309 addTypeForFixedLengthSVE(VT);
1310 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
1311 if (useSVEForFixedLengthVectorVT(VT))
1312 addTypeForFixedLengthSVE(VT);
1313
1314 // 64bit results can mean a bigger than NEON input.
1315 for (auto VT : {MVT::v8i8, MVT::v4i16})
1316 setOperationAction(ISD::TRUNCATE, VT, Custom);
1317 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
1318
1319 // 128bit results imply a bigger than NEON input.
1320 for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1321 setOperationAction(ISD::TRUNCATE, VT, Custom);
1322 for (auto VT : {MVT::v8f16, MVT::v4f32})
1323 setOperationAction(ISD::FP_ROUND, VT, Custom);
1324
1325 // These operations are not supported on NEON but SVE can do them.
1326 setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
1327 setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
1328 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1329 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
1330 setOperationAction(ISD::MUL, MVT::v1i64, Custom);
1331 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1332 setOperationAction(ISD::MULHS, MVT::v1i64, Custom);
1333 setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
1334 setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
1335 setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
1336 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
1337 setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
1338 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
1339 setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
1340 setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
1341 setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
1342 setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
1343 setOperationAction(ISD::SDIV, MVT::v2i64, Custom);
1344 setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
1345 setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
1346 setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
1347 setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
1348 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
1349 setOperationAction(ISD::UDIV, MVT::v16i8, Custom);
1350 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
1351 setOperationAction(ISD::UDIV, MVT::v8i16, Custom);
1352 setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
1353 setOperationAction(ISD::UDIV, MVT::v4i32, Custom);
1354 setOperationAction(ISD::UDIV, MVT::v1i64, Custom);
1355 setOperationAction(ISD::UDIV, MVT::v2i64, Custom);
1356 setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
1357 setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
1358 setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
1359 setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
1360 setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
1361 setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
1362 setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
1363 setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
1364
1365 // Int operations with no NEON support.
1366 for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1367 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1368 setOperationAction(ISD::BITREVERSE, VT, Custom);
1369 setOperationAction(ISD::CTTZ, VT, Custom);
1370 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1371 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1372 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1373 }
1374
1375 // FP operations with no NEON support.
1376 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
1377 MVT::v1f64, MVT::v2f64})
1378 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1379
1380 // Use SVE for vectors with more than 2 elements.
1381 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1382 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1383 }
1384
1385 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv2i1, MVT::nxv2i64);
1386 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32);
1387 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16);
1388 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
1389 }
1390
1391 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
1392}
1393
1394void AArch64TargetLowering::addTypeForNEON(MVT VT) {
1395 assert(VT.isVector() && "VT should be a vector type")(static_cast <bool> (VT.isVector() && "VT should be a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1395, __extension__ __PRETTY_FUNCTION__))
;
1396
1397 if (VT.isFloatingPoint()) {
1398 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
1399 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
1400 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
1401 }
1402
1403 // Mark vector float intrinsics as expand.
1404 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
1405 setOperationAction(ISD::FSIN, VT, Expand);
1406 setOperationAction(ISD::FCOS, VT, Expand);
1407 setOperationAction(ISD::FPOW, VT, Expand);
1408 setOperationAction(ISD::FLOG, VT, Expand);
1409 setOperationAction(ISD::FLOG2, VT, Expand);
1410 setOperationAction(ISD::FLOG10, VT, Expand);
1411 setOperationAction(ISD::FEXP, VT, Expand);
1412 setOperationAction(ISD::FEXP2, VT, Expand);
1413 }
1414
1415 // But we do support custom-lowering for FCOPYSIGN.
1416 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
1417 ((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16()))
1418 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1419
1420 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1421 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1422 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1423 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1424 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1425 setOperationAction(ISD::SRA, VT, Custom);
1426 setOperationAction(ISD::SRL, VT, Custom);
1427 setOperationAction(ISD::SHL, VT, Custom);
1428 setOperationAction(ISD::OR, VT, Custom);
1429 setOperationAction(ISD::SETCC, VT, Custom);
1430 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
1431
1432 setOperationAction(ISD::SELECT, VT, Expand);
1433 setOperationAction(ISD::SELECT_CC, VT, Expand);
1434 setOperationAction(ISD::VSELECT, VT, Expand);
1435 for (MVT InnerVT : MVT::all_valuetypes())
1436 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
1437
1438 // CNT supports only B element sizes, then use UADDLP to widen.
1439 if (VT != MVT::v8i8 && VT != MVT::v16i8)
1440 setOperationAction(ISD::CTPOP, VT, Custom);
1441
1442 setOperationAction(ISD::UDIV, VT, Expand);
1443 setOperationAction(ISD::SDIV, VT, Expand);
1444 setOperationAction(ISD::UREM, VT, Expand);
1445 setOperationAction(ISD::SREM, VT, Expand);
1446 setOperationAction(ISD::FREM, VT, Expand);
1447
1448 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1449 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1450
1451 if (!VT.isFloatingPoint())
1452 setOperationAction(ISD::ABS, VT, Legal);
1453
1454 // [SU][MIN|MAX] are available for all NEON types apart from i64.
1455 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
1456 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
1457 setOperationAction(Opcode, VT, Legal);
1458
1459 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
1460 if (VT.isFloatingPoint() &&
1461 VT.getVectorElementType() != MVT::bf16 &&
1462 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
1463 for (unsigned Opcode :
1464 {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
1465 setOperationAction(Opcode, VT, Legal);
1466
1467 if (Subtarget->isLittleEndian()) {
1468 for (unsigned im = (unsigned)ISD::PRE_INC;
1469 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1470 setIndexedLoadAction(im, VT, Legal);
1471 setIndexedStoreAction(im, VT, Legal);
1472 }
1473 }
1474}
1475
1476void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
1477 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1477, __extension__ __PRETTY_FUNCTION__))
;
1478
1479 // By default everything must be expanded.
1480 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1481 setOperationAction(Op, VT, Expand);
1482
1483 // We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
1484 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1485
1486 if (VT.isFloatingPoint()) {
1487 setCondCodeAction(ISD::SETO, VT, Expand);
1488 setCondCodeAction(ISD::SETOLT, VT, Expand);
1489 setCondCodeAction(ISD::SETLT, VT, Expand);
1490 setCondCodeAction(ISD::SETOLE, VT, Expand);
1491 setCondCodeAction(ISD::SETLE, VT, Expand);
1492 setCondCodeAction(ISD::SETULT, VT, Expand);
1493 setCondCodeAction(ISD::SETULE, VT, Expand);
1494 setCondCodeAction(ISD::SETUGE, VT, Expand);
1495 setCondCodeAction(ISD::SETUGT, VT, Expand);
1496 setCondCodeAction(ISD::SETUEQ, VT, Expand);
1497 setCondCodeAction(ISD::SETUNE, VT, Expand);
1498 }
1499
1500 // Mark integer truncating stores as having custom lowering
1501 if (VT.isInteger()) {
1502 MVT InnerVT = VT.changeVectorElementType(MVT::i8);
1503 while (InnerVT != VT) {
1504 setTruncStoreAction(VT, InnerVT, Custom);
1505 InnerVT = InnerVT.changeVectorElementType(
1506 MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
1507 }
1508 }
1509
1510 // Lower fixed length vector operations to scalable equivalents.
1511 setOperationAction(ISD::ABS, VT, Custom);
1512 setOperationAction(ISD::ADD, VT, Custom);
1513 setOperationAction(ISD::AND, VT, Custom);
1514 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1515 setOperationAction(ISD::BITCAST, VT, Custom);
1516 setOperationAction(ISD::BITREVERSE, VT, Custom);
1517 setOperationAction(ISD::BSWAP, VT, Custom);
1518 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1519 setOperationAction(ISD::CTLZ, VT, Custom);
1520 setOperationAction(ISD::CTPOP, VT, Custom);
1521 setOperationAction(ISD::CTTZ, VT, Custom);
1522 setOperationAction(ISD::FABS, VT, Custom);
1523 setOperationAction(ISD::FADD, VT, Custom);
1524 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1525 setOperationAction(ISD::FCEIL, VT, Custom);
1526 setOperationAction(ISD::FDIV, VT, Custom);
1527 setOperationAction(ISD::FFLOOR, VT, Custom);
1528 setOperationAction(ISD::FMA, VT, Custom);
1529 setOperationAction(ISD::FMAXIMUM, VT, Custom);
1530 setOperationAction(ISD::FMAXNUM, VT, Custom);
1531 setOperationAction(ISD::FMINIMUM, VT, Custom);
1532 setOperationAction(ISD::FMINNUM, VT, Custom);
1533 setOperationAction(ISD::FMUL, VT, Custom);
1534 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1535 setOperationAction(ISD::FNEG, VT, Custom);
1536 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1537 setOperationAction(ISD::FP_ROUND, VT, Custom);
1538 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1539 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1540 setOperationAction(ISD::FRINT, VT, Custom);
1541 setOperationAction(ISD::FROUND, VT, Custom);
1542 setOperationAction(ISD::FROUNDEVEN, VT, Custom);
1543 setOperationAction(ISD::FSQRT, VT, Custom);
1544 setOperationAction(ISD::FSUB, VT, Custom);
1545 setOperationAction(ISD::FTRUNC, VT, Custom);
1546 setOperationAction(ISD::LOAD, VT, Custom);
1547 setOperationAction(ISD::MGATHER, VT, Custom);
1548 setOperationAction(ISD::MLOAD, VT, Custom);
1549 setOperationAction(ISD::MSCATTER, VT, Custom);
1550 setOperationAction(ISD::MSTORE, VT, Custom);
1551 setOperationAction(ISD::MUL, VT, Custom);
1552 setOperationAction(ISD::MULHS, VT, Custom);
1553 setOperationAction(ISD::MULHU, VT, Custom);
1554 setOperationAction(ISD::OR, VT, Custom);
1555 setOperationAction(ISD::SDIV, VT, Custom);
1556 setOperationAction(ISD::SELECT, VT, Custom);
1557 setOperationAction(ISD::SETCC, VT, Custom);
1558 setOperationAction(ISD::SHL, VT, Custom);
1559 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1560 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
1561 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1562 setOperationAction(ISD::SMAX, VT, Custom);
1563 setOperationAction(ISD::SMIN, VT, Custom);
1564 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1565 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1566 setOperationAction(ISD::SRA, VT, Custom);
1567 setOperationAction(ISD::SRL, VT, Custom);
1568 setOperationAction(ISD::STORE, VT, Custom);
1569 setOperationAction(ISD::SUB, VT, Custom);
1570 setOperationAction(ISD::TRUNCATE, VT, Custom);
1571 setOperationAction(ISD::UDIV, VT, Custom);
1572 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1573 setOperationAction(ISD::UMAX, VT, Custom);
1574 setOperationAction(ISD::UMIN, VT, Custom);
1575 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1576 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1577 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1578 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1579 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1580 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1581 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1582 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1583 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1584 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1585 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1586 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1587 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1588 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1589 setOperationAction(ISD::VSELECT, VT, Custom);
1590 setOperationAction(ISD::XOR, VT, Custom);
1591 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1592}
1593
1594void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
1595 addRegisterClass(VT, &AArch64::FPR64RegClass);
1596 addTypeForNEON(VT);
1597}
1598
1599void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
1600 addRegisterClass(VT, &AArch64::FPR128RegClass);
1601 addTypeForNEON(VT);
1602}
1603
1604EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
1605 LLVMContext &C, EVT VT) const {
1606 if (!VT.isVector())
1607 return MVT::i32;
1608 if (VT.isScalableVector())
1609 return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount());
1610 return VT.changeVectorElementTypeToInteger();
1611}
1612
1613static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
1614 const APInt &Demanded,
1615 TargetLowering::TargetLoweringOpt &TLO,
1616 unsigned NewOpc) {
1617 uint64_t OldImm = Imm, NewImm, Enc;
1618 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
1619
1620 // Return if the immediate is already all zeros, all ones, a bimm32 or a
1621 // bimm64.
1622 if (Imm == 0 || Imm == Mask ||
1623 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
1624 return false;
1625
1626 unsigned EltSize = Size;
1627 uint64_t DemandedBits = Demanded.getZExtValue();
1628
1629 // Clear bits that are not demanded.
1630 Imm &= DemandedBits;
1631
1632 while (true) {
1633 // The goal here is to set the non-demanded bits in a way that minimizes
1634 // the number of switching between 0 and 1. In order to achieve this goal,
1635 // we set the non-demanded bits to the value of the preceding demanded bits.
1636 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
1637 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
1638 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
1639 // The final result is 0b11000011.
1640 uint64_t NonDemandedBits = ~DemandedBits;
1641 uint64_t InvertedImm = ~Imm & DemandedBits;
1642 uint64_t RotatedImm =
1643 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
1644 NonDemandedBits;
1645 uint64_t Sum = RotatedImm + NonDemandedBits;
1646 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
1647 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
1648 NewImm = (Imm | Ones) & Mask;
1649
1650 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
1651 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
1652 // we halve the element size and continue the search.
1653 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
1654 break;
1655
1656 // We cannot shrink the element size any further if it is 2-bits.
1657 if (EltSize == 2)
1658 return false;
1659
1660 EltSize /= 2;
1661 Mask >>= EltSize;
1662 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
1663
1664 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
1665 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
1666 return false;
1667
1668 // Merge the upper and lower halves of Imm and DemandedBits.
1669 Imm |= Hi;
1670 DemandedBits |= DemandedBitsHi;
1671 }
1672
1673 ++NumOptimizedImms;
1674
1675 // Replicate the element across the register width.
1676 while (EltSize < Size) {
1677 NewImm |= NewImm << EltSize;
1678 EltSize *= 2;
1679 }
1680
1681 (void)OldImm;
1682 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1683, __extension__ __PRETTY_FUNCTION__))
1683 "demanded bits should never be altered")(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1683, __extension__ __PRETTY_FUNCTION__))
;
1684 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")(static_cast <bool> (OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? void (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1684, __extension__ __PRETTY_FUNCTION__))
;
1685
1686 // Create the new constant immediate node.
1687 EVT VT = Op.getValueType();
1688 SDLoc DL(Op);
1689 SDValue New;
1690
1691 // If the new constant immediate is all-zeros or all-ones, let the target
1692 // independent DAG combine optimize this node.
1693 if (NewImm == 0 || NewImm == OrigMask) {
1694 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
1695 TLO.DAG.getConstant(NewImm, DL, VT));
1696 // Otherwise, create a machine node so that target independent DAG combine
1697 // doesn't undo this optimization.
1698 } else {
1699 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
1700 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
1701 New = SDValue(
1702 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
1703 }
1704
1705 return TLO.CombineTo(Op, New);
1706}
1707
1708bool AArch64TargetLowering::targetShrinkDemandedConstant(
1709 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1710 TargetLoweringOpt &TLO) const {
1711 // Delay this optimization to as late as possible.
1712 if (!TLO.LegalOps)
1713 return false;
1714
1715 if (!EnableOptimizeLogicalImm)
1716 return false;
1717
1718 EVT VT = Op.getValueType();
1719 if (VT.isVector())
1720 return false;
1721
1722 unsigned Size = VT.getSizeInBits();
1723 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1724, __extension__ __PRETTY_FUNCTION__))
1724 "i32 or i64 is expected after legalization.")(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1724, __extension__ __PRETTY_FUNCTION__))
;
1725
1726 // Exit early if we demand all bits.
1727 if (DemandedBits.countPopulation() == Size)
1728 return false;
1729
1730 unsigned NewOpc;
1731 switch (Op.getOpcode()) {
1732 default:
1733 return false;
1734 case ISD::AND:
1735 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1736 break;
1737 case ISD::OR:
1738 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1739 break;
1740 case ISD::XOR:
1741 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1742 break;
1743 }
1744 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1745 if (!C)
1746 return false;
1747 uint64_t Imm = C->getZExtValue();
1748 return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
1749}
1750
1751/// computeKnownBitsForTargetNode - Determine which of the bits specified in
1752/// Mask are known to be either zero or one and return them Known.
1753void AArch64TargetLowering::computeKnownBitsForTargetNode(
1754 const SDValue Op, KnownBits &Known,
1755 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
1756 switch (Op.getOpcode()) {
1757 default:
1758 break;
1759 case AArch64ISD::CSEL: {
1760 KnownBits Known2;
1761 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1762 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1763 Known = KnownBits::commonBits(Known, Known2);
1764 break;
1765 }
1766 case AArch64ISD::LOADgot:
1767 case AArch64ISD::ADDlow: {
1768 if (!Subtarget->isTargetILP32())
1769 break;
1770 // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
1771 Known.Zero = APInt::getHighBitsSet(64, 32);
1772 break;
1773 }
1774 case ISD::INTRINSIC_W_CHAIN: {
1775 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1776 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1777 switch (IntID) {
1778 default: return;
1779 case Intrinsic::aarch64_ldaxr:
1780 case Intrinsic::aarch64_ldxr: {
1781 unsigned BitWidth = Known.getBitWidth();
1782 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1783 unsigned MemBits = VT.getScalarSizeInBits();
1784 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1785 return;
1786 }
1787 }
1788 break;
1789 }
1790 case ISD::INTRINSIC_WO_CHAIN:
1791 case ISD::INTRINSIC_VOID: {
1792 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1793 switch (IntNo) {
1794 default:
1795 break;
1796 case Intrinsic::aarch64_neon_umaxv:
1797 case Intrinsic::aarch64_neon_uminv: {
1798 // Figure out the datatype of the vector operand. The UMINV instruction
1799 // will zero extend the result, so we can mark as known zero all the
1800 // bits larger than the element datatype. 32-bit or larget doesn't need
1801 // this as those are legal types and will be handled by isel directly.
1802 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1803 unsigned BitWidth = Known.getBitWidth();
1804 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1805 assert(BitWidth >= 8 && "Unexpected width!")(static_cast <bool> (BitWidth >= 8 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1805, __extension__ __PRETTY_FUNCTION__))
;
1806 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1807 Known.Zero |= Mask;
1808 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1809 assert(BitWidth >= 16 && "Unexpected width!")(static_cast <bool> (BitWidth >= 16 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1809, __extension__ __PRETTY_FUNCTION__))
;
1810 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1811 Known.Zero |= Mask;
1812 }
1813 break;
1814 } break;
1815 }
1816 }
1817 }
1818}
1819
1820MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1821 EVT) const {
1822 return MVT::i64;
1823}
1824
1825bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1826 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1827 bool *Fast) const {
1828 if (Subtarget->requiresStrictAlign())
1829 return false;
1830
1831 if (Fast) {
1832 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1833 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1834 // See comments in performSTORECombine() for more details about
1835 // these conditions.
1836
1837 // Code that uses clang vector extensions can mark that it
1838 // wants unaligned accesses to be treated as fast by
1839 // underspecifying alignment to be 1 or 2.
1840 Alignment <= 2 ||
1841
1842 // Disregard v2i64. Memcpy lowering produces those and splitting
1843 // them regresses performance on micro-benchmarks and olden/bh.
1844 VT == MVT::v2i64;
1845 }
1846 return true;
1847}
1848
1849// Same as above but handling LLTs instead.
1850bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1851 LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1852 bool *Fast) const {
1853 if (Subtarget->requiresStrictAlign())
1854 return false;
1855
1856 if (Fast) {
1857 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1858 *Fast = !Subtarget->isMisaligned128StoreSlow() ||
1859 Ty.getSizeInBytes() != 16 ||
1860 // See comments in performSTORECombine() for more details about
1861 // these conditions.
1862
1863 // Code that uses clang vector extensions can mark that it
1864 // wants unaligned accesses to be treated as fast by
1865 // underspecifying alignment to be 1 or 2.
1866 Alignment <= 2 ||
1867
1868 // Disregard v2i64. Memcpy lowering produces those and splitting
1869 // them regresses performance on micro-benchmarks and olden/bh.
1870 Ty == LLT::fixed_vector(2, 64);
1871 }
1872 return true;
1873}
1874
1875FastISel *
1876AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1877 const TargetLibraryInfo *libInfo) const {
1878 return AArch64::createFastISel(funcInfo, libInfo);
1879}
1880
1881const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1882#define MAKE_CASE(V) \
1883 case V: \
1884 return #V;
1885 switch ((AArch64ISD::NodeType)Opcode) {
1886 case AArch64ISD::FIRST_NUMBER:
1887 break;
1888 MAKE_CASE(AArch64ISD::CALL)
1889 MAKE_CASE(AArch64ISD::ADRP)
1890 MAKE_CASE(AArch64ISD::ADR)
1891 MAKE_CASE(AArch64ISD::ADDlow)
1892 MAKE_CASE(AArch64ISD::LOADgot)
1893 MAKE_CASE(AArch64ISD::RET_FLAG)
1894 MAKE_CASE(AArch64ISD::BRCOND)
1895 MAKE_CASE(AArch64ISD::CSEL)
1896 MAKE_CASE(AArch64ISD::CSINV)
1897 MAKE_CASE(AArch64ISD::CSNEG)
1898 MAKE_CASE(AArch64ISD::CSINC)
1899 MAKE_CASE(AArch64ISD::THREAD_POINTER)
1900 MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
1901 MAKE_CASE(AArch64ISD::ADD_PRED)
1902 MAKE_CASE(AArch64ISD::MUL_PRED)
1903 MAKE_CASE(AArch64ISD::MULHS_PRED)
1904 MAKE_CASE(AArch64ISD::MULHU_PRED)
1905 MAKE_CASE(AArch64ISD::SDIV_PRED)
1906 MAKE_CASE(AArch64ISD::SHL_PRED)
1907 MAKE_CASE(AArch64ISD::SMAX_PRED)
1908 MAKE_CASE(AArch64ISD::SMIN_PRED)
1909 MAKE_CASE(AArch64ISD::SRA_PRED)
1910 MAKE_CASE(AArch64ISD::SRL_PRED)
1911 MAKE_CASE(AArch64ISD::SUB_PRED)
1912 MAKE_CASE(AArch64ISD::UDIV_PRED)
1913 MAKE_CASE(AArch64ISD::UMAX_PRED)
1914 MAKE_CASE(AArch64ISD::UMIN_PRED)
1915 MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
1916 MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
1917 MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
1918 MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
1919 MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
1920 MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
1921 MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
1922 MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
1923 MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
1924 MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
1925 MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
1926 MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
1927 MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
1928 MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
1929 MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
1930 MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
1931 MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
1932 MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
1933 MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
1934 MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
1935 MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
1936 MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
1937 MAKE_CASE(AArch64ISD::ADC)
1938 MAKE_CASE(AArch64ISD::SBC)
1939 MAKE_CASE(AArch64ISD::ADDS)
1940 MAKE_CASE(AArch64ISD::SUBS)
1941 MAKE_CASE(AArch64ISD::ADCS)
1942 MAKE_CASE(AArch64ISD::SBCS)
1943 MAKE_CASE(AArch64ISD::ANDS)
1944 MAKE_CASE(AArch64ISD::CCMP)
1945 MAKE_CASE(AArch64ISD::CCMN)
1946 MAKE_CASE(AArch64ISD::FCCMP)
1947 MAKE_CASE(AArch64ISD::FCMP)
1948 MAKE_CASE(AArch64ISD::STRICT_FCMP)
1949 MAKE_CASE(AArch64ISD::STRICT_FCMPE)
1950 MAKE_CASE(AArch64ISD::DUP)
1951 MAKE_CASE(AArch64ISD::DUPLANE8)
1952 MAKE_CASE(AArch64ISD::DUPLANE16)
1953 MAKE_CASE(AArch64ISD::DUPLANE32)
1954 MAKE_CASE(AArch64ISD::DUPLANE64)
1955 MAKE_CASE(AArch64ISD::MOVI)
1956 MAKE_CASE(AArch64ISD::MOVIshift)
1957 MAKE_CASE(AArch64ISD::MOVIedit)
1958 MAKE_CASE(AArch64ISD::MOVImsl)
1959 MAKE_CASE(AArch64ISD::FMOV)
1960 MAKE_CASE(AArch64ISD::MVNIshift)
1961 MAKE_CASE(AArch64ISD::MVNImsl)
1962 MAKE_CASE(AArch64ISD::BICi)
1963 MAKE_CASE(AArch64ISD::ORRi)
1964 MAKE_CASE(AArch64ISD::BSP)
1965 MAKE_CASE(AArch64ISD::EXTR)
1966 MAKE_CASE(AArch64ISD::ZIP1)
1967 MAKE_CASE(AArch64ISD::ZIP2)
1968 MAKE_CASE(AArch64ISD::UZP1)
1969 MAKE_CASE(AArch64ISD::UZP2)
1970 MAKE_CASE(AArch64ISD::TRN1)
1971 MAKE_CASE(AArch64ISD::TRN2)
1972 MAKE_CASE(AArch64ISD::REV16)
1973 MAKE_CASE(AArch64ISD::REV32)
1974 MAKE_CASE(AArch64ISD::REV64)
1975 MAKE_CASE(AArch64ISD::EXT)
1976 MAKE_CASE(AArch64ISD::SPLICE)
1977 MAKE_CASE(AArch64ISD::VSHL)
1978 MAKE_CASE(AArch64ISD::VLSHR)
1979 MAKE_CASE(AArch64ISD::VASHR)
1980 MAKE_CASE(AArch64ISD::VSLI)
1981 MAKE_CASE(AArch64ISD::VSRI)
1982 MAKE_CASE(AArch64ISD::CMEQ)
1983 MAKE_CASE(AArch64ISD::CMGE)
1984 MAKE_CASE(AArch64ISD::CMGT)
1985 MAKE_CASE(AArch64ISD::CMHI)
1986 MAKE_CASE(AArch64ISD::CMHS)
1987 MAKE_CASE(AArch64ISD::FCMEQ)
1988 MAKE_CASE(AArch64ISD::FCMGE)
1989 MAKE_CASE(AArch64ISD::FCMGT)
1990 MAKE_CASE(AArch64ISD::CMEQz)
1991 MAKE_CASE(AArch64ISD::CMGEz)
1992 MAKE_CASE(AArch64ISD::CMGTz)
1993 MAKE_CASE(AArch64ISD::CMLEz)
1994 MAKE_CASE(AArch64ISD::CMLTz)
1995 MAKE_CASE(AArch64ISD::FCMEQz)
1996 MAKE_CASE(AArch64ISD::FCMGEz)
1997 MAKE_CASE(AArch64ISD::FCMGTz)
1998 MAKE_CASE(AArch64ISD::FCMLEz)
1999 MAKE_CASE(AArch64ISD::FCMLTz)
2000 MAKE_CASE(AArch64ISD::SADDV)
2001 MAKE_CASE(AArch64ISD::UADDV)
2002 MAKE_CASE(AArch64ISD::SRHADD)
2003 MAKE_CASE(AArch64ISD::URHADD)
2004 MAKE_CASE(AArch64ISD::SHADD)
2005 MAKE_CASE(AArch64ISD::UHADD)
2006 MAKE_CASE(AArch64ISD::SDOT)
2007 MAKE_CASE(AArch64ISD::UDOT)
2008 MAKE_CASE(AArch64ISD::SMINV)
2009 MAKE_CASE(AArch64ISD::UMINV)
2010 MAKE_CASE(AArch64ISD::SMAXV)
2011 MAKE_CASE(AArch64ISD::UMAXV)
2012 MAKE_CASE(AArch64ISD::SADDV_PRED)
2013 MAKE_CASE(AArch64ISD::UADDV_PRED)
2014 MAKE_CASE(AArch64ISD::SMAXV_PRED)
2015 MAKE_CASE(AArch64ISD::UMAXV_PRED)
2016 MAKE_CASE(AArch64ISD::SMINV_PRED)
2017 MAKE_CASE(AArch64ISD::UMINV_PRED)
2018 MAKE_CASE(AArch64ISD::ORV_PRED)
2019 MAKE_CASE(AArch64ISD::EORV_PRED)
2020 MAKE_CASE(AArch64ISD::ANDV_PRED)
2021 MAKE_CASE(AArch64ISD::CLASTA_N)
2022 MAKE_CASE(AArch64ISD::CLASTB_N)
2023 MAKE_CASE(AArch64ISD::LASTA)
2024 MAKE_CASE(AArch64ISD::LASTB)
2025 MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
2026 MAKE_CASE(AArch64ISD::TBL)
2027 MAKE_CASE(AArch64ISD::FADD_PRED)
2028 MAKE_CASE(AArch64ISD::FADDA_PRED)
2029 MAKE_CASE(AArch64ISD::FADDV_PRED)
2030 MAKE_CASE(AArch64ISD::FDIV_PRED)
2031 MAKE_CASE(AArch64ISD::FMA_PRED)
2032 MAKE_CASE(AArch64ISD::FMAX_PRED)
2033 MAKE_CASE(AArch64ISD::FMAXV_PRED)
2034 MAKE_CASE(AArch64ISD::FMAXNM_PRED)
2035 MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
2036 MAKE_CASE(AArch64ISD::FMIN_PRED)
2037 MAKE_CASE(AArch64ISD::FMINV_PRED)
2038 MAKE_CASE(AArch64ISD::FMINNM_PRED)
2039 MAKE_CASE(AArch64ISD::FMINNMV_PRED)
2040 MAKE_CASE(AArch64ISD::FMUL_PRED)
2041 MAKE_CASE(AArch64ISD::FSUB_PRED)
2042 MAKE_CASE(AArch64ISD::BIC)
2043 MAKE_CASE(AArch64ISD::BIT)
2044 MAKE_CASE(AArch64ISD::CBZ)
2045 MAKE_CASE(AArch64ISD::CBNZ)
2046 MAKE_CASE(AArch64ISD::TBZ)
2047 MAKE_CASE(AArch64ISD::TBNZ)
2048 MAKE_CASE(AArch64ISD::TC_RETURN)
2049 MAKE_CASE(AArch64ISD::PREFETCH)
2050 MAKE_CASE(AArch64ISD::SITOF)
2051 MAKE_CASE(AArch64ISD::UITOF)
2052 MAKE_CASE(AArch64ISD::NVCAST)
2053 MAKE_CASE(AArch64ISD::MRS)
2054 MAKE_CASE(AArch64ISD::SQSHL_I)
2055 MAKE_CASE(AArch64ISD::UQSHL_I)
2056 MAKE_CASE(AArch64ISD::SRSHR_I)
2057 MAKE_CASE(AArch64ISD::URSHR_I)
2058 MAKE_CASE(AArch64ISD::SQSHLU_I)
2059 MAKE_CASE(AArch64ISD::WrapperLarge)
2060 MAKE_CASE(AArch64ISD::LD2post)
2061 MAKE_CASE(AArch64ISD::LD3post)
2062 MAKE_CASE(AArch64ISD::LD4post)
2063 MAKE_CASE(AArch64ISD::ST2post)
2064 MAKE_CASE(AArch64ISD::ST3post)
2065 MAKE_CASE(AArch64ISD::ST4post)
2066 MAKE_CASE(AArch64ISD::LD1x2post)
2067 MAKE_CASE(AArch64ISD::LD1x3post)
2068 MAKE_CASE(AArch64ISD::LD1x4post)
2069 MAKE_CASE(AArch64ISD::ST1x2post)
2070 MAKE_CASE(AArch64ISD::ST1x3post)
2071 MAKE_CASE(AArch64ISD::ST1x4post)
2072 MAKE_CASE(AArch64ISD::LD1DUPpost)
2073 MAKE_CASE(AArch64ISD::LD2DUPpost)
2074 MAKE_CASE(AArch64ISD::LD3DUPpost)
2075 MAKE_CASE(AArch64ISD::LD4DUPpost)
2076 MAKE_CASE(AArch64ISD::LD1LANEpost)
2077 MAKE_CASE(AArch64ISD::LD2LANEpost)
2078 MAKE_CASE(AArch64ISD::LD3LANEpost)
2079 MAKE_CASE(AArch64ISD::LD4LANEpost)
2080 MAKE_CASE(AArch64ISD::ST2LANEpost)
2081 MAKE_CASE(AArch64ISD::ST3LANEpost)
2082 MAKE_CASE(AArch64ISD::ST4LANEpost)
2083 MAKE_CASE(AArch64ISD::SMULL)
2084 MAKE_CASE(AArch64ISD::UMULL)
2085 MAKE_CASE(AArch64ISD::FRECPE)
2086 MAKE_CASE(AArch64ISD::FRECPS)
2087 MAKE_CASE(AArch64ISD::FRSQRTE)
2088 MAKE_CASE(AArch64ISD::FRSQRTS)
2089 MAKE_CASE(AArch64ISD::STG)
2090 MAKE_CASE(AArch64ISD::STZG)
2091 MAKE_CASE(AArch64ISD::ST2G)
2092 MAKE_CASE(AArch64ISD::STZ2G)
2093 MAKE_CASE(AArch64ISD::SUNPKHI)
2094 MAKE_CASE(AArch64ISD::SUNPKLO)
2095 MAKE_CASE(AArch64ISD::UUNPKHI)
2096 MAKE_CASE(AArch64ISD::UUNPKLO)
2097 MAKE_CASE(AArch64ISD::INSR)
2098 MAKE_CASE(AArch64ISD::PTEST)
2099 MAKE_CASE(AArch64ISD::PTRUE)
2100 MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
2101 MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
2102 MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
2103 MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO)
2104 MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO)
2105 MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO)
2106 MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO)
2107 MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO)
2108 MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
2109 MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
2110 MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
2111 MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO)
2112 MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO)
2113 MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO)
2114 MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO)
2115 MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
2116 MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
2117 MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
2118 MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
2119 MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
2120 MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
2121 MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO)
2122 MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO)
2123 MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO)
2124 MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO)
2125 MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO)
2126 MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO)
2127 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO)
2128 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO)
2129 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO)
2130 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO)
2131 MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO)
2132 MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO)
2133 MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO)
2134 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO)
2135 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO)
2136 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO)
2137 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO)
2138 MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO)
2139 MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
2140 MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
2141 MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
2142 MAKE_CASE(AArch64ISD::ST1_PRED)
2143 MAKE_CASE(AArch64ISD::SST1_PRED)
2144 MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
2145 MAKE_CASE(AArch64ISD::SST1_SXTW_PRED)
2146 MAKE_CASE(AArch64ISD::SST1_UXTW_PRED)
2147 MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED)
2148 MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED)
2149 MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
2150 MAKE_CASE(AArch64ISD::SSTNT1_PRED)
2151 MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
2152 MAKE_CASE(AArch64ISD::LDP)
2153 MAKE_CASE(AArch64ISD::STP)
2154 MAKE_CASE(AArch64ISD::STNP)
2155 MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
2156 MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
2157 MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
2158 MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
2159 MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
2160 MAKE_CASE(AArch64ISD::INDEX_VECTOR)
2161 MAKE_CASE(AArch64ISD::UADDLP)
2162 MAKE_CASE(AArch64ISD::CALL_RVMARKER)
2163 }
2164#undef MAKE_CASE
2165 return nullptr;
2166}
2167
2168MachineBasicBlock *
2169AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
2170 MachineBasicBlock *MBB) const {
2171 // We materialise the F128CSEL pseudo-instruction as some control flow and a
2172 // phi node:
2173
2174 // OrigBB:
2175 // [... previous instrs leading to comparison ...]
2176 // b.ne TrueBB
2177 // b EndBB
2178 // TrueBB:
2179 // ; Fallthrough
2180 // EndBB:
2181 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
2182
2183 MachineFunction *MF = MBB->getParent();
2184 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2185 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2186 DebugLoc DL = MI.getDebugLoc();
2187 MachineFunction::iterator It = ++MBB->getIterator();
2188
2189 Register DestReg = MI.getOperand(0).getReg();
2190 Register IfTrueReg = MI.getOperand(1).getReg();
2191 Register IfFalseReg = MI.getOperand(2).getReg();
2192 unsigned CondCode = MI.getOperand(3).getImm();
2193 bool NZCVKilled = MI.getOperand(4).isKill();
2194
2195 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
2196 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
2197 MF->insert(It, TrueBB);
2198 MF->insert(It, EndBB);
2199
2200 // Transfer rest of current basic-block to EndBB
2201 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
2202 MBB->end());
2203 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
2204
2205 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
2206 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
2207 MBB->addSuccessor(TrueBB);
2208 MBB->addSuccessor(EndBB);
2209
2210 // TrueBB falls through to the end.
2211 TrueBB->addSuccessor(EndBB);
2212
2213 if (!NZCVKilled) {
2214 TrueBB->addLiveIn(AArch64::NZCV);
2215 EndBB->addLiveIn(AArch64::NZCV);
2216 }
2217
2218 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
2219 .addReg(IfTrueReg)
2220 .addMBB(TrueBB)
2221 .addReg(IfFalseReg)
2222 .addMBB(MBB);
2223
2224 MI.eraseFromParent();
2225 return EndBB;
2226}
2227
2228MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
2229 MachineInstr &MI, MachineBasicBlock *BB) const {
2230 assert(!isAsynchronousEHPersonality(classifyEHPersonality((static_cast <bool> (!isAsynchronousEHPersonality(classifyEHPersonality
( BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? void (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2232, __extension__ __PRETTY_FUNCTION__))
2231 BB->getParent()->getFunction().getPersonalityFn())) &&(static_cast <bool> (!isAsynchronousEHPersonality(classifyEHPersonality
( BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? void (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2232, __extension__ __PRETTY_FUNCTION__))
2232 "SEH does not use catchret!")(static_cast <bool> (!isAsynchronousEHPersonality(classifyEHPersonality
( BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? void (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2232, __extension__ __PRETTY_FUNCTION__))
;
2233 return BB;
2234}
2235
2236MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
2237 MachineInstr &MI, MachineBasicBlock *BB) const {
2238 switch (MI.getOpcode()) {
2239 default:
2240#ifndef NDEBUG
2241 MI.dump();
2242#endif
2243 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2243)
;
2244
2245 case AArch64::F128CSEL:
2246 return EmitF128CSEL(MI, BB);
2247
2248 case TargetOpcode::STACKMAP:
2249 case TargetOpcode::PATCHPOINT:
2250 case TargetOpcode::STATEPOINT:
2251 return emitPatchPoint(MI, BB);
2252
2253 case AArch64::CATCHRET:
2254 return EmitLoweredCatchRet(MI, BB);
2255 }
2256}
2257
2258//===----------------------------------------------------------------------===//
2259// AArch64 Lowering private implementation.
2260//===----------------------------------------------------------------------===//
2261
2262//===----------------------------------------------------------------------===//
2263// Lowering Code
2264//===----------------------------------------------------------------------===//
2265
2266// Forward declarations of SVE fixed length lowering helpers
2267static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT);
2268static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
2269static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
2270static SDValue convertFixedMaskToScalableVector(SDValue Mask,
2271 SelectionDAG &DAG);
2272
2273/// isZerosVector - Check whether SDNode N is a zero-filled vector.
2274static bool isZerosVector(const SDNode *N) {
2275 // Look through a bit convert.
2276 while (N->getOpcode() == ISD::BITCAST)
2277 N = N->getOperand(0).getNode();
2278
2279 if (ISD::isConstantSplatVectorAllZeros(N))
2280 return true;
2281
2282 if (N->getOpcode() != AArch64ISD::DUP)
2283 return false;
2284
2285 auto Opnd0 = N->getOperand(0);
2286 auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
2287 auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
2288 return (CINT && CINT->isNullValue()) || (CFP && CFP->isZero());
2289}
2290
2291/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
2292/// CC
2293static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
2294 switch (CC) {
2295 default:
2296 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2296)
;
2297 case ISD::SETNE:
2298 return AArch64CC::NE;
2299 case ISD::SETEQ:
2300 return AArch64CC::EQ;
2301 case ISD::SETGT:
2302 return AArch64CC::GT;
2303 case ISD::SETGE:
2304 return AArch64CC::GE;
2305 case ISD::SETLT:
2306 return AArch64CC::LT;
2307 case ISD::SETLE:
2308 return AArch64CC::LE;
2309 case ISD::SETUGT:
2310 return AArch64CC::HI;
2311 case ISD::SETUGE:
2312 return AArch64CC::HS;
2313 case ISD::SETULT:
2314 return AArch64CC::LO;
2315 case ISD::SETULE:
2316 return AArch64CC::LS;
2317 }
2318}
2319
2320/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
2321static void changeFPCCToAArch64CC(ISD::CondCode CC,
2322 AArch64CC::CondCode &CondCode,
2323 AArch64CC::CondCode &CondCode2) {
2324 CondCode2 = AArch64CC::AL;
2325 switch (CC) {
2326 default:
2327 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2327)
;
2328 case ISD::SETEQ:
2329 case ISD::SETOEQ:
2330 CondCode = AArch64CC::EQ;
2331 break;
2332 case ISD::SETGT:
2333 case ISD::SETOGT:
2334 CondCode = AArch64CC::GT;
2335 break;
2336 case ISD::SETGE:
2337 case ISD::SETOGE:
2338 CondCode = AArch64CC::GE;
2339 break;
2340 case ISD::SETOLT:
2341 CondCode = AArch64CC::MI;
2342 break;
2343 case ISD::SETOLE:
2344 CondCode = AArch64CC::LS;
2345 break;
2346 case ISD::SETONE:
2347 CondCode = AArch64CC::MI;
2348 CondCode2 = AArch64CC::GT;
2349 break;
2350 case ISD::SETO:
2351 CondCode = AArch64CC::VC;
2352 break;
2353 case ISD::SETUO:
2354 CondCode = AArch64CC::VS;
2355 break;
2356 case ISD::SETUEQ:
2357 CondCode = AArch64CC::EQ;
2358 CondCode2 = AArch64CC::VS;
2359 break;
2360 case ISD::SETUGT:
2361 CondCode = AArch64CC::HI;
2362 break;
2363 case ISD::SETUGE:
2364 CondCode = AArch64CC::PL;
2365 break;
2366 case ISD::SETLT:
2367 case ISD::SETULT:
2368 CondCode = AArch64CC::LT;
2369 break;
2370 case ISD::SETLE:
2371 case ISD::SETULE:
2372 CondCode = AArch64CC::LE;
2373 break;
2374 case ISD::SETNE:
2375 case ISD::SETUNE:
2376 CondCode = AArch64CC::NE;
2377 break;
2378 }
2379}
2380
2381/// Convert a DAG fp condition code to an AArch64 CC.
2382/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
2383/// should be AND'ed instead of OR'ed.
2384static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
2385 AArch64CC::CondCode &CondCode,
2386 AArch64CC::CondCode &CondCode2) {
2387 CondCode2 = AArch64CC::AL;
2388 switch (CC) {
2389 default:
2390 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2391 assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void
(0) : __assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2391, __extension__ __PRETTY_FUNCTION__))
;
2392 break;
2393 case ISD::SETONE:
2394 // (a one b)
2395 // == ((a olt b) || (a ogt b))
2396 // == ((a ord b) && (a une b))
2397 CondCode = AArch64CC::VC;
2398 CondCode2 = AArch64CC::NE;
2399 break;
2400 case ISD::SETUEQ:
2401 // (a ueq b)
2402 // == ((a uno b) || (a oeq b))
2403 // == ((a ule b) && (a uge b))
2404 CondCode = AArch64CC::PL;
2405 CondCode2 = AArch64CC::LE;
2406 break;
2407 }
2408}
2409
2410/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
2411/// CC usable with the vector instructions. Fewer operations are available
2412/// without a real NZCV register, so we have to use less efficient combinations
2413/// to get the same effect.
2414static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
2415 AArch64CC::CondCode &CondCode,
2416 AArch64CC::CondCode &CondCode2,
2417 bool &Invert) {
2418 Invert = false;
2419 switch (CC) {
2420 default:
2421 // Mostly the scalar mappings work fine.
2422 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2423 break;
2424 case ISD::SETUO:
2425 Invert = true;
2426 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2427 case ISD::SETO:
2428 CondCode = AArch64CC::MI;
2429 CondCode2 = AArch64CC::GE;
2430 break;
2431 case ISD::SETUEQ:
2432 case ISD::SETULT:
2433 case ISD::SETULE:
2434 case ISD::SETUGT:
2435 case ISD::SETUGE:
2436 // All of the compare-mask comparisons are ordered, but we can switch
2437 // between the two by a double inversion. E.g. ULE == !OGT.
2438 Invert = true;
2439 changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
2440 CondCode, CondCode2);
2441 break;
2442 }
2443}
2444
2445static bool isLegalArithImmed(uint64_t C) {
2446 // Matches AArch64DAGToDAGISel::SelectArithImmed().
2447 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
2448 LLVM_DEBUG(dbgs() << "Is imm " << Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
2449 << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
2450 return IsLegal;
2451}
2452
2453// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
2454// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
2455// can be set differently by this operation. It comes down to whether
2456// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
2457// everything is fine. If not then the optimization is wrong. Thus general
2458// comparisons are only valid if op2 != 0.
2459//
2460// So, finally, the only LLVM-native comparisons that don't mention C and V
2461// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
2462// the absence of information about op2.
2463static bool isCMN(SDValue Op, ISD::CondCode CC) {
2464 return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
2465 (CC == ISD::SETEQ || CC == ISD::SETNE);
2466}
2467
2468static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
2469 SelectionDAG &DAG, SDValue Chain,
2470 bool IsSignaling) {
2471 EVT VT = LHS.getValueType();
2472 assert(VT != MVT::f128)(static_cast <bool> (VT != MVT::f128) ? void (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2472, __extension__ __PRETTY_FUNCTION__))
;
2473 assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented")(static_cast <bool> (VT != MVT::f16 && "Lowering of strict fp16 not yet implemented"
) ? void (0) : __assert_fail ("VT != MVT::f16 && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2473, __extension__ __PRETTY_FUNCTION__))
;
2474 unsigned Opcode =
2475 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
2476 return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
2477}
2478
2479static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2480 const SDLoc &dl, SelectionDAG &DAG) {
2481 EVT VT = LHS.getValueType();
2482 const bool FullFP16 =
2483 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2484
2485 if (VT.isFloatingPoint()) {
2486 assert(VT != MVT::f128)(static_cast <bool> (VT != MVT::f128) ? void (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2486, __extension__ __PRETTY_FUNCTION__))
;
2487 if (VT == MVT::f16 && !FullFP16) {
2488 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
2489 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
2490 VT = MVT::f32;
2491 }
2492 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
2493 }
2494
2495 // The CMP instruction is just an alias for SUBS, and representing it as
2496 // SUBS means that it's possible to get CSE with subtract operations.
2497 // A later phase can perform the optimization of setting the destination
2498 // register to WZR/XZR if it ends up being unused.
2499 unsigned Opcode = AArch64ISD::SUBS;
2500
2501 if (isCMN(RHS, CC)) {
2502 // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
2503 Opcode = AArch64ISD::ADDS;
2504 RHS = RHS.getOperand(1);
2505 } else if (isCMN(LHS, CC)) {
2506 // As we are looking for EQ/NE compares, the operands can be commuted ; can
2507 // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
2508 Opcode = AArch64ISD::ADDS;
2509 LHS = LHS.getOperand(1);
2510 } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
2511 if (LHS.getOpcode() == ISD::AND) {
2512 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
2513 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
2514 // of the signed comparisons.
2515 const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
2516 DAG.getVTList(VT, MVT_CC),
2517 LHS.getOperand(0),
2518 LHS.getOperand(1));
2519 // Replace all users of (and X, Y) with newly generated (ands X, Y)
2520 DAG.ReplaceAllUsesWith(LHS, ANDSNode);
2521 return ANDSNode.getValue(1);
2522 } else if (LHS.getOpcode() == AArch64ISD::ANDS) {
2523 // Use result of ANDS
2524 return LHS.getValue(1);
2525 }
2526 }
2527
2528 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
2529 .getValue(1);
2530}
2531
2532/// \defgroup AArch64CCMP CMP;CCMP matching
2533///
2534/// These functions deal with the formation of CMP;CCMP;... sequences.
2535/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
2536/// a comparison. They set the NZCV flags to a predefined value if their
2537/// predicate is false. This allows to express arbitrary conjunctions, for
2538/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
2539/// expressed as:
2540/// cmp A
2541/// ccmp B, inv(CB), CA
2542/// check for CB flags
2543///
2544/// This naturally lets us implement chains of AND operations with SETCC
2545/// operands. And we can even implement some other situations by transforming
2546/// them:
2547/// - We can implement (NEG SETCC) i.e. negating a single comparison by
2548/// negating the flags used in a CCMP/FCCMP operations.
2549/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
2550/// by negating the flags we test for afterwards. i.e.
2551/// NEG (CMP CCMP CCCMP ...) can be implemented.
2552/// - Note that we can only ever negate all previously processed results.
2553/// What we can not implement by flipping the flags to test is a negation
2554/// of two sub-trees (because the negation affects all sub-trees emitted so
2555/// far, so the 2nd sub-tree we emit would also affect the first).
2556/// With those tools we can implement some OR operations:
2557/// - (OR (SETCC A) (SETCC B)) can be implemented via:
2558/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
2559/// - After transforming OR to NEG/AND combinations we may be able to use NEG
2560/// elimination rules from earlier to implement the whole thing as a
2561/// CCMP/FCCMP chain.
2562///
2563/// As complete example:
2564/// or (or (setCA (cmp A)) (setCB (cmp B)))
2565/// (and (setCC (cmp C)) (setCD (cmp D)))"
2566/// can be reassociated to:
2567/// or (and (setCC (cmp C)) setCD (cmp D))
2568// (or (setCA (cmp A)) (setCB (cmp B)))
2569/// can be transformed to:
2570/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
2571/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
2572/// which can be implemented as:
2573/// cmp C
2574/// ccmp D, inv(CD), CC
2575/// ccmp A, CA, inv(CD)
2576/// ccmp B, CB, inv(CA)
2577/// check for CB flags
2578///
2579/// A counterexample is "or (and A B) (and C D)" which translates to
2580/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
2581/// can only implement 1 of the inner (not) operations, but not both!
2582/// @{
2583
2584/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
2585static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
2586 ISD::CondCode CC, SDValue CCOp,
2587 AArch64CC::CondCode Predicate,
2588 AArch64CC::CondCode OutCC,
2589 const SDLoc &DL, SelectionDAG &DAG) {
2590 unsigned Opcode = 0;
2591 const bool FullFP16 =
2592 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2593
2594 if (LHS.getValueType().isFloatingPoint()) {
2595 assert(LHS.getValueType() != MVT::f128)(static_cast <bool> (LHS.getValueType() != MVT::f128) ?
void (0) : __assert_fail ("LHS.getValueType() != MVT::f128",
"/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2595, __extension__ __PRETTY_FUNCTION__))
;
2596 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
2597 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
2598 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
2599 }
2600 Opcode = AArch64ISD::FCCMP;
2601 } else if (RHS.getOpcode() == ISD::SUB) {
2602 SDValue SubOp0 = RHS.getOperand(0);
2603 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2604 // See emitComparison() on why we can only do this for SETEQ and SETNE.
2605 Opcode = AArch64ISD::CCMN;
2606 RHS = RHS.getOperand(1);
2607 }
2608 }
2609 if (Opcode == 0)
2610 Opcode = AArch64ISD::CCMP;
2611
2612 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
2613 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
2614 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
2615 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
2616 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
2617}
2618
2619/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
2620/// expressed as a conjunction. See \ref AArch64CCMP.
2621/// \param CanNegate Set to true if we can negate the whole sub-tree just by
2622/// changing the conditions on the SETCC tests.
2623/// (this means we can call emitConjunctionRec() with
2624/// Negate==true on this sub-tree)
2625/// \param MustBeFirst Set to true if this subtree needs to be negated and we
2626/// cannot do the negation naturally. We are required to
2627/// emit the subtree first in this case.
2628/// \param WillNegate Is true if are called when the result of this
2629/// subexpression must be negated. This happens when the
2630/// outer expression is an OR. We can use this fact to know
2631/// that we have a double negation (or (or ...) ...) that
2632/// can be implemented for free.
2633static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
2634 bool &MustBeFirst, bool WillNegate,
2635 unsigned Depth = 0) {
2636 if (!Val.hasOneUse())
2637 return false;
2638 unsigned Opcode = Val->getOpcode();
2639 if (Opcode == ISD::SETCC) {
2640 if (Val->getOperand(0).getValueType() == MVT::f128)
2641 return false;
2642 CanNegate = true;
2643 MustBeFirst = false;
2644 return true;
2645 }
2646 // Protect against exponential runtime and stack overflow.
2647 if (Depth > 6)
2648 return false;
2649 if (Opcode == ISD::AND || Opcode == ISD::OR) {
2650 bool IsOR = Opcode == ISD::OR;
2651 SDValue O0 = Val->getOperand(0);
2652 SDValue O1 = Val->getOperand(1);
2653 bool CanNegateL;
2654 bool MustBeFirstL;
2655 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
2656 return false;
2657 bool CanNegateR;
2658 bool MustBeFirstR;
2659 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
2660 return false;
2661
2662 if (MustBeFirstL && MustBeFirstR)
2663 return false;
2664
2665 if (IsOR) {
2666 // For an OR expression we need to be able to naturally negate at least
2667 // one side or we cannot do the transformation at all.
2668 if (!CanNegateL && !CanNegateR)
2669 return false;
2670 // If we the result of the OR will be negated and we can naturally negate
2671 // the leafs, then this sub-tree as a whole negates naturally.
2672 CanNegate = WillNegate && CanNegateL && CanNegateR;
2673 // If we cannot naturally negate the whole sub-tree, then this must be
2674 // emitted first.
2675 MustBeFirst = !CanNegate;
2676 } else {
2677 assert(Opcode == ISD::AND && "Must be OR or AND")(static_cast <bool> (Opcode == ISD::AND && "Must be OR or AND"
) ? void (0) : __assert_fail ("Opcode == ISD::AND && \"Must be OR or AND\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2677, __extension__ __PRETTY_FUNCTION__))
;
2678 // We cannot naturally negate an AND operation.
2679 CanNegate = false;
2680 MustBeFirst = MustBeFirstL || MustBeFirstR;
2681 }
2682 return true;
2683 }
2684 return false;
2685}
2686
2687/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
2688/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
2689/// Tries to transform the given i1 producing node @p Val to a series compare
2690/// and conditional compare operations. @returns an NZCV flags producing node
2691/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
2692/// transformation was not possible.
2693/// \p Negate is true if we want this sub-tree being negated just by changing
2694/// SETCC conditions.
2695static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
2696 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
2697 AArch64CC::CondCode Predicate) {
2698 // We're at a tree leaf, produce a conditional comparison operation.
2699 unsigned Opcode = Val->getOpcode();
2700 if (Opcode == ISD::SETCC) {
2701 SDValue LHS = Val->getOperand(0);
2702 SDValue RHS = Val->getOperand(1);
2703 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
2704 bool isInteger = LHS.getValueType().isInteger();
2705 if (Negate)
2706 CC = getSetCCInverse(CC, LHS.getValueType());
2707 SDLoc DL(Val);
2708 // Determine OutCC and handle FP special case.
2709 if (isInteger) {
2710 OutCC = changeIntCCToAArch64CC(CC);
2711 } else {
2712 assert(LHS.getValueType().isFloatingPoint())(static_cast <bool> (LHS.getValueType().isFloatingPoint
()) ? void (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2712, __extension__ __PRETTY_FUNCTION__))
;
2713 AArch64CC::CondCode ExtraCC;
2714 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
2715 // Some floating point conditions can't be tested with a single condition
2716 // code. Construct an additional comparison in this case.
2717 if (ExtraCC != AArch64CC::AL) {
2718 SDValue ExtraCmp;
2719 if (!CCOp.getNode())
2720 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
2721 else
2722 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
2723 ExtraCC, DL, DAG);
2724 CCOp = ExtraCmp;
2725 Predicate = ExtraCC;
2726 }
2727 }
2728
2729 // Produce a normal comparison if we are first in the chain
2730 if (!CCOp)
2731 return emitComparison(LHS, RHS, CC, DL, DAG);
2732 // Otherwise produce a ccmp.
2733 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
2734 DAG);
2735 }
2736 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree")(static_cast <bool> (Val->hasOneUse() && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("Val->hasOneUse() && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2736, __extension__ __PRETTY_FUNCTION__))
;
2737
2738 bool IsOR = Opcode == ISD::OR;
2739
2740 SDValue LHS = Val->getOperand(0);
2741 bool CanNegateL;
2742 bool MustBeFirstL;
2743 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
2744 assert(ValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidL && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2744, __extension__ __PRETTY_FUNCTION__))
;
2745 (void)ValidL;
2746
2747 SDValue RHS = Val->getOperand(1);
2748 bool CanNegateR;
2749 bool MustBeFirstR;
2750 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
2751 assert(ValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2751, __extension__ __PRETTY_FUNCTION__))
;
2752 (void)ValidR;
2753
2754 // Swap sub-tree that must come first to the right side.
2755 if (MustBeFirstL) {
2756 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2756, __extension__ __PRETTY_FUNCTION__))
;
2757 std::swap(LHS, RHS);
2758 std::swap(CanNegateL, CanNegateR);
2759 std::swap(MustBeFirstL, MustBeFirstR);
2760 }
2761
2762 bool NegateR;
2763 bool NegateAfterR;
2764 bool NegateL;
2765 bool NegateAfterAll;
2766 if (Opcode == ISD::OR) {
2767 // Swap the sub-tree that we can negate naturally to the left.
2768 if (!CanNegateL) {
2769 assert(CanNegateR && "at least one side must be negatable")(static_cast <bool> (CanNegateR && "at least one side must be negatable"
) ? void (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2769, __extension__ __PRETTY_FUNCTION__))
;
2770 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2770, __extension__ __PRETTY_FUNCTION__))
;
2771 assert(!Negate)(static_cast <bool> (!Negate) ? void (0) : __assert_fail
("!Negate", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2771, __extension__ __PRETTY_FUNCTION__))
;
2772 std::swap(LHS, RHS);
2773 NegateR = false;
2774 NegateAfterR = true;
2775 } else {
2776 // Negate the left sub-tree if possible, otherwise negate the result.
2777 NegateR = CanNegateR;
2778 NegateAfterR = !CanNegateR;
2779 }
2780 NegateL = true;
2781 NegateAfterAll = !Negate;
2782 } else {
2783 assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree")(static_cast <bool> (Opcode == ISD::AND && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("Opcode == ISD::AND && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2783, __extension__ __PRETTY_FUNCTION__))
;
2784 assert(!Negate && "Valid conjunction/disjunction tree")(static_cast <bool> (!Negate && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2784, __extension__ __PRETTY_FUNCTION__))
;
2785
2786 NegateL = false;
2787 NegateR = false;
2788 NegateAfterR = false;
2789 NegateAfterAll = false;
2790 }
2791
2792 // Emit sub-trees.
2793 AArch64CC::CondCode RHSCC;
2794 SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
2795 if (NegateAfterR)
2796 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
2797 SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
2798 if (NegateAfterAll)
2799 OutCC = AArch64CC::getInvertedCondCode(OutCC);
2800 return CmpL;
2801}
2802
2803/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
2804/// In some cases this is even possible with OR operations in the expression.
2805/// See \ref AArch64CCMP.
2806/// \see emitConjunctionRec().
2807static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
2808 AArch64CC::CondCode &OutCC) {
2809 bool DummyCanNegate;
2810 bool DummyMustBeFirst;
2811 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
2812 return SDValue();
2813
2814 return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
2815}
2816
2817/// @}
2818
2819/// Returns how profitable it is to fold a comparison's operand's shift and/or
2820/// extension operations.
2821static unsigned getCmpOperandFoldingProfit(SDValue Op) {
2822 auto isSupportedExtend = [&](SDValue V) {
2823 if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
2824 return true;
2825
2826 if (V.getOpcode() == ISD::AND)
2827 if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
2828 uint64_t Mask = MaskCst->getZExtValue();
2829 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
2830 }
2831
2832 return false;
2833 };
2834
2835 if (!Op.hasOneUse())
2836 return 0;
2837
2838 if (isSupportedExtend(Op))
2839 return 1;
2840
2841 unsigned Opc = Op.getOpcode();
2842 if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
2843 if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2844 uint64_t Shift = ShiftCst->getZExtValue();
2845 if (isSupportedExtend(Op.getOperand(0)))
2846 return (Shift <= 4) ? 2 : 1;
2847 EVT VT = Op.getValueType();
2848 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
2849 return 1;
2850 }
2851
2852 return 0;
2853}
2854
2855static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2856 SDValue &AArch64cc, SelectionDAG &DAG,
2857 const SDLoc &dl) {
2858 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2859 EVT VT = RHS.getValueType();
2860 uint64_t C = RHSC->getZExtValue();
2861 if (!isLegalArithImmed(C)) {
2862 // Constant does not fit, try adjusting it by one?
2863 switch (CC) {
2864 default:
2865 break;
2866 case ISD::SETLT:
2867 case ISD::SETGE:
2868 if ((VT == MVT::i32 && C != 0x80000000 &&
2869 isLegalArithImmed((uint32_t)(C - 1))) ||
2870 (VT == MVT::i64 && C != 0x80000000ULL &&
2871 isLegalArithImmed(C - 1ULL))) {
2872 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2873 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2874 RHS = DAG.getConstant(C, dl, VT);
2875 }
2876 break;
2877 case ISD::SETULT:
2878 case ISD::SETUGE:
2879 if ((VT == MVT::i32 && C != 0 &&
2880 isLegalArithImmed((uint32_t)(C - 1))) ||
2881 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
2882 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2883 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2884 RHS = DAG.getConstant(C, dl, VT);
2885 }
2886 break;
2887 case ISD::SETLE:
2888 case ISD::SETGT:
2889 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
2890 isLegalArithImmed((uint32_t)(C + 1))) ||
2891 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
2892 isLegalArithImmed(C + 1ULL))) {
2893 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2894 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2895 RHS = DAG.getConstant(C, dl, VT);
2896 }
2897 break;
2898 case ISD::SETULE:
2899 case ISD::SETUGT:
2900 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
2901 isLegalArithImmed((uint32_t)(C + 1))) ||
2902 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
2903 isLegalArithImmed(C + 1ULL))) {
2904 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2905 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2906 RHS = DAG.getConstant(C, dl, VT);
2907 }
2908 break;
2909 }
2910 }
2911 }
2912
2913 // Comparisons are canonicalized so that the RHS operand is simpler than the
2914 // LHS one, the extreme case being when RHS is an immediate. However, AArch64
2915 // can fold some shift+extend operations on the RHS operand, so swap the
2916 // operands if that can be done.
2917 //
2918 // For example:
2919 // lsl w13, w11, #1
2920 // cmp w13, w12
2921 // can be turned into:
2922 // cmp w12, w11, lsl #1
2923 if (!isa<ConstantSDNode>(RHS) ||
2924 !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
2925 SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
2926
2927 if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
2928 std::swap(LHS, RHS);
2929 CC = ISD::getSetCCSwappedOperands(CC);
2930 }
2931 }
2932
2933 SDValue Cmp;
2934 AArch64CC::CondCode AArch64CC;
2935 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
2936 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
2937
2938 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
2939 // For the i8 operand, the largest immediate is 255, so this can be easily
2940 // encoded in the compare instruction. For the i16 operand, however, the
2941 // largest immediate cannot be encoded in the compare.
2942 // Therefore, use a sign extending load and cmn to avoid materializing the
2943 // -1 constant. For example,
2944 // movz w1, #65535
2945 // ldrh w0, [x0, #0]
2946 // cmp w0, w1
2947 // >
2948 // ldrsh w0, [x0, #0]
2949 // cmn w0, #1
2950 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
2951 // if and only if (sext LHS) == (sext RHS). The checks are in place to
2952 // ensure both the LHS and RHS are truly zero extended and to make sure the
2953 // transformation is profitable.
2954 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
2955 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
2956 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
2957 LHS.getNode()->hasNUsesOfValue(1, 0)) {
2958 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
2959 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
2960 SDValue SExt =
2961 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
2962 DAG.getValueType(MVT::i16));
2963 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
2964 RHS.getValueType()),
2965 CC, dl, DAG);
2966 AArch64CC = changeIntCCToAArch64CC(CC);
2967 }
2968 }
2969
2970 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
2971 if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
2972 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
2973 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
2974 }
2975 }
2976 }
2977
2978 if (!Cmp) {
2979 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
2980 AArch64CC = changeIntCCToAArch64CC(CC);
2981 }
2982 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
2983 return Cmp;
2984}
2985
2986static std::pair<SDValue, SDValue>
2987getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
2988 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2989, __extension__ __PRETTY_FUNCTION__))
2989 "Unsupported value type")(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2989, __extension__ __PRETTY_FUNCTION__))
;
2990 SDValue Value, Overflow;
2991 SDLoc DL(Op);
2992 SDValue LHS = Op.getOperand(0);
2993 SDValue RHS = Op.getOperand(1);
2994 unsigned Opc = 0;
2995 switch (Op.getOpcode()) {
2996 default:
2997 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2997)
;
2998 case ISD::SADDO:
2999 Opc = AArch64ISD::ADDS;
3000 CC = AArch64CC::VS;
3001 break;
3002 case ISD::UADDO:
3003 Opc = AArch64ISD::ADDS;
3004 CC = AArch64CC::HS;
3005 break;
3006 case ISD::SSUBO:
3007 Opc = AArch64ISD::SUBS;
3008 CC = AArch64CC::VS;
3009 break;
3010 case ISD::USUBO:
3011 Opc = AArch64ISD::SUBS;
3012 CC = AArch64CC::LO;
3013 break;
3014 // Multiply needs a little bit extra work.
3015 case ISD::SMULO:
3016 case ISD::UMULO: {
3017 CC = AArch64CC::NE;
3018 bool IsSigned = Op.getOpcode() == ISD::SMULO;
3019 if (Op.getValueType() == MVT::i32) {
3020 // Extend to 64-bits, then perform a 64-bit multiply.
3021 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3022 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
3023 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
3024 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
3025 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3026
3027 // Check that the result fits into a 32-bit integer.
3028 SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC);
3029 if (IsSigned) {
3030 // cmp xreg, wreg, sxtw
3031 SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value);
3032 Overflow =
3033 DAG.getNode(AArch64ISD::SUBS, DL, VTs, Mul, SExtMul).getValue(1);
3034 } else {
3035 // tst xreg, #0xffffffff00000000
3036 SDValue UpperBits = DAG.getConstant(0xFFFFFFFF00000000, DL, MVT::i64);
3037 Overflow =
3038 DAG.getNode(AArch64ISD::ANDS, DL, VTs, Mul, UpperBits).getValue(1);
3039 }
3040 break;
3041 }
3042 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"Expected an i64 value type") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3042, __extension__ __PRETTY_FUNCTION__))
;
3043 // For the 64 bit multiply
3044 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
3045 if (IsSigned) {
3046 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
3047 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
3048 DAG.getConstant(63, DL, MVT::i64));
3049 // It is important that LowerBits is last, otherwise the arithmetic
3050 // shift will not be folded into the compare (SUBS).
3051 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3052 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
3053 .getValue(1);
3054 } else {
3055 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
3056 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3057 Overflow =
3058 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
3059 DAG.getConstant(0, DL, MVT::i64),
3060 UpperBits).getValue(1);
3061 }
3062 break;
3063 }
3064 } // switch (...)
3065
3066 if (Opc) {
3067 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
3068
3069 // Emit the AArch64 operation with overflow check.
3070 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
3071 Overflow = Value.getValue(1);
3072 }
3073 return std::make_pair(Value, Overflow);
3074}
3075
3076SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
3077 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
3078 return LowerToScalableOp(Op, DAG);
3079
3080 SDValue Sel = Op.getOperand(0);
3081 SDValue Other = Op.getOperand(1);
3082 SDLoc dl(Sel);
3083
3084 // If the operand is an overflow checking operation, invert the condition
3085 // code and kill the Not operation. I.e., transform:
3086 // (xor (overflow_op_bool, 1))
3087 // -->
3088 // (csel 1, 0, invert(cc), overflow_op_bool)
3089 // ... which later gets transformed to just a cset instruction with an
3090 // inverted condition code, rather than a cset + eor sequence.
3091 if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
3092 // Only lower legal XALUO ops.
3093 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
3094 return SDValue();
3095
3096 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3097 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3098 AArch64CC::CondCode CC;
3099 SDValue Value, Overflow;
3100 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
3101 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3102 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
3103 CCVal, Overflow);
3104 }
3105 // If neither operand is a SELECT_CC, give up.
3106 if (Sel.getOpcode() != ISD::SELECT_CC)
3107 std::swap(Sel, Other);
3108 if (Sel.getOpcode() != ISD::SELECT_CC)
3109 return Op;
3110
3111 // The folding we want to perform is:
3112 // (xor x, (select_cc a, b, cc, 0, -1) )
3113 // -->
3114 // (csel x, (xor x, -1), cc ...)
3115 //
3116 // The latter will get matched to a CSINV instruction.
3117
3118 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
3119 SDValue LHS = Sel.getOperand(0);
3120 SDValue RHS = Sel.getOperand(1);
3121 SDValue TVal = Sel.getOperand(2);
3122 SDValue FVal = Sel.getOperand(3);
3123
3124 // FIXME: This could be generalized to non-integer comparisons.
3125 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
3126 return Op;
3127
3128 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
3129 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
3130
3131 // The values aren't constants, this isn't the pattern we're looking for.
3132 if (!CFVal || !CTVal)
3133 return Op;
3134
3135 // We can commute the SELECT_CC by inverting the condition. This
3136 // might be needed to make this fit into a CSINV pattern.
3137 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
3138 std::swap(TVal, FVal);
3139 std::swap(CTVal, CFVal);
3140 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
3141 }
3142
3143 // If the constants line up, perform the transform!
3144 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
3145 SDValue CCVal;
3146 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
3147
3148 FVal = Other;
3149 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
3150 DAG.getConstant(-1ULL, dl, Other.getValueType()));
3151
3152 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
3153 CCVal, Cmp);
3154 }
3155
3156 return Op;
3157}
3158
3159static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
3160 EVT VT = Op.getValueType();
3161
3162 // Let legalize expand this if it isn't a legal type yet.
3163 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3164 return SDValue();
3165
3166 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3167
3168 unsigned Opc;
3169 bool ExtraOp = false;
3170 switch (Op.getOpcode()) {
3171 default:
3172 llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3172)
;
3173 case ISD::ADDC:
3174 Opc = AArch64ISD::ADDS;
3175 break;
3176 case ISD::SUBC:
3177 Opc = AArch64ISD::SUBS;
3178 break;
3179 case ISD::ADDE:
3180 Opc = AArch64ISD::ADCS;
3181 ExtraOp = true;
3182 break;
3183 case ISD::SUBE:
3184 Opc = AArch64ISD::SBCS;
3185 ExtraOp = true;
3186 break;
3187 }
3188
3189 if (!ExtraOp)
3190 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
3191 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
3192 Op.getOperand(2));
3193}
3194
3195static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
3196 // Let legalize expand this if it isn't a legal type yet.
3197 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3198 return SDValue();
3199
3200 SDLoc dl(Op);
3201 AArch64CC::CondCode CC;
3202 // The actual operation that sets the overflow or carry flag.
3203 SDValue Value, Overflow;
3204 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
3205
3206 // We use 0 and 1 as false and true values.
3207 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3208 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3209
3210 // We use an inverted condition, because the conditional select is inverted
3211 // too. This will allow it to be selected to a single instruction:
3212 // CSINC Wd, WZR, WZR, invert(cond).
3213 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3214 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
3215 CCVal, Overflow);
3216
3217 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3218 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3219}
3220
3221// Prefetch operands are:
3222// 1: Address to prefetch
3223// 2: bool isWrite
3224// 3: int locality (0 = no locality ... 3 = extreme locality)
3225// 4: bool isDataCache
3226static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
3227 SDLoc DL(Op);
3228 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3229 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
3230 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3231
3232 bool IsStream = !Locality;
3233 // When the locality number is set
3234 if (Locality) {
3235 // The front-end should have filtered out the out-of-range values
3236 assert(Locality <= 3 && "Prefetch locality out-of-range")(static_cast <bool> (Locality <= 3 && "Prefetch locality out-of-range"
) ? void (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3236, __extension__ __PRETTY_FUNCTION__))
;
3237 // The locality degree is the opposite of the cache speed.
3238 // Put the number the other way around.
3239 // The encoding starts at 0 for level 1
3240 Locality = 3 - Locality;
3241 }
3242
3243 // built the mask value encoding the expected behavior.
3244 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
3245 (!IsData << 3) | // IsDataCache bit
3246 (Locality << 1) | // Cache level bits
3247 (unsigned)IsStream; // Stream bit
3248 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
3249 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
3250}
3251
3252SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
3253 SelectionDAG &DAG) const {
3254 EVT VT = Op.getValueType();
3255 if (VT.isScalableVector())
3256 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
3257
3258 if (useSVEForFixedLengthVectorVT(VT))
3259 return LowerFixedLengthFPExtendToSVE(Op, DAG);
3260
3261 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")(static_cast <bool> (Op.getValueType() == MVT::f128 &&
"Unexpected lowering") ? void (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3261, __extension__ __PRETTY_FUNCTION__))
;
3262 return SDValue();
3263}
3264
3265SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
3266 SelectionDAG &DAG) const {
3267 if (Op.getValueType().isScalableVector())
3268 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
3269
3270 bool IsStrict = Op->isStrictFPOpcode();
3271 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3272 EVT SrcVT = SrcVal.getValueType();
3273
3274 if (useSVEForFixedLengthVectorVT(SrcVT))
3275 return LowerFixedLengthFPRoundToSVE(Op, DAG);
3276
3277 if (SrcVT != MVT::f128) {
3278 // Expand cases where the input is a vector bigger than NEON.
3279 if (useSVEForFixedLengthVectorVT(SrcVT))
3280 return SDValue();
3281
3282 // It's legal except when f128 is involved
3283 return Op;
3284 }
3285
3286 return SDValue();
3287}
3288
3289SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
3290 SelectionDAG &DAG) const {
3291 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3292 // Any additional optimization in this function should be recorded
3293 // in the cost tables.
3294 EVT InVT = Op.getOperand(0).getValueType();
3295 EVT VT = Op.getValueType();
3296
3297 if (VT.isScalableVector()) {
3298 unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
3299 ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
3300 : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
3301 return LowerToPredicatedOp(Op, DAG, Opcode);
3302 }
3303
3304 if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
3305 return LowerFixedLengthFPToIntToSVE(Op, DAG);
3306
3307 unsigned NumElts = InVT.getVectorNumElements();
3308
3309 // f16 conversions are promoted to f32 when full fp16 is not supported.
3310 if (InVT.getVectorElementType() == MVT::f16 &&
3311 !Subtarget->hasFullFP16()) {
3312 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
3313 SDLoc dl(Op);
3314 return DAG.getNode(
3315 Op.getOpcode(), dl, Op.getValueType(),
3316 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
3317 }
3318
3319 uint64_t VTSize = VT.getFixedSizeInBits();
3320 uint64_t InVTSize = InVT.getFixedSizeInBits();
3321 if (VTSize < InVTSize) {
3322 SDLoc dl(Op);
3323 SDValue Cv =
3324 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
3325 Op.getOperand(0));
3326 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
3327 }
3328
3329 if (VTSize > InVTSize) {
3330 SDLoc dl(Op);
3331 MVT ExtVT =
3332 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
3333 VT.getVectorNumElements());
3334 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
3335 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
3336 }
3337
3338 // Type changing conversions are illegal.
3339 return Op;
3340}
3341
3342SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
3343 SelectionDAG &DAG) const {
3344 bool IsStrict = Op->isStrictFPOpcode();
3345 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3346
3347 if (SrcVal.getValueType().isVector())
3348 return LowerVectorFP_TO_INT(Op, DAG);
3349
3350 // f16 conversions are promoted to f32 when full fp16 is not supported.
3351 if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
3352 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")(static_cast <bool> (!IsStrict && "Lowering of strict fp16 not yet implemented"
) ? void (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3352, __extension__ __PRETTY_FUNCTION__))
;
3353 SDLoc dl(Op);
3354 return DAG.getNode(
3355 Op.getOpcode(), dl, Op.getValueType(),
3356 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
3357 }
3358
3359 if (SrcVal.getValueType() != MVT::f128) {
3360 // It's legal except when f128 is involved
3361 return Op;
3362 }
3363
3364 return SDValue();
3365}
3366
3367SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
3368 SelectionDAG &DAG) const {
3369 // AArch64 FP-to-int conversions saturate to the destination register size, so
3370 // we can lower common saturating conversions to simple instructions.
3371 SDValue SrcVal = Op.getOperand(0);
3372
3373 EVT SrcVT = SrcVal.getValueType();
3374 EVT DstVT = Op.getValueType();
3375
3376 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3377 uint64_t SatWidth = SatVT.getScalarSizeInBits();
3378 uint64_t DstWidth = DstVT.getScalarSizeInBits();
3379 assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width")(static_cast <bool> (SatWidth <= DstWidth &&
"Saturation width cannot exceed result width") ? void (0) : __assert_fail
("SatWidth <= DstWidth && \"Saturation width cannot exceed result width\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3379, __extension__ __PRETTY_FUNCTION__))
;
3380
3381 // TODO: Support lowering of NEON and SVE conversions.
3382 if (SrcVT.isVector())
3383 return SDValue();
3384
3385 // TODO: Saturate to SatWidth explicitly.
3386 if (SatWidth != DstWidth)
3387 return SDValue();
3388
3389 // In the absence of FP16 support, promote f32 to f16, like LowerFP_TO_INT().
3390 if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16())
3391 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
3392 DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal),
3393 Op.getOperand(1));
3394
3395 // Cases that we can emit directly.
3396 if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
3397 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
3398 (DstVT == MVT::i64 || DstVT == MVT::i32))
3399 return Op;
3400
3401 // For all other cases, fall back on the expanded form.
3402 return SDValue();
3403}
3404
3405SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
3406 SelectionDAG &DAG) const {
3407 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3408 // Any additional optimization in this function should be recorded
3409 // in the cost tables.
3410 EVT VT = Op.getValueType();
3411 SDLoc dl(Op);
3412 SDValue In = Op.getOperand(0);
3413 EVT InVT = In.getValueType();
3414 unsigned Opc = Op.getOpcode();
3415 bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
3416
3417 if (VT.isScalableVector()) {
3418 if (InVT.getVectorElementType() == MVT::i1) {
3419 // We can't directly extend an SVE predicate; extend it first.
3420 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3421 EVT CastVT = getPromotedVTForPredicate(InVT);
3422 In = DAG.getNode(CastOpc, dl, CastVT, In);
3423 return DAG.getNode(Opc, dl, VT, In);
3424 }
3425
3426 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
3427 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
3428 return LowerToPredicatedOp(Op, DAG, Opcode);
3429 }
3430
3431 if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
3432 return LowerFixedLengthIntToFPToSVE(Op, DAG);
3433
3434 uint64_t VTSize = VT.getFixedSizeInBits();
3435 uint64_t InVTSize = InVT.getFixedSizeInBits();
3436 if (VTSize < InVTSize) {
3437 MVT CastVT =
3438 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
3439 InVT.getVectorNumElements());
3440 In = DAG.getNode(Opc, dl, CastVT, In);
3441 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
3442 }
3443
3444 if (VTSize > InVTSize) {
3445 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3446 EVT CastVT = VT.changeVectorElementTypeToInteger();
3447 In = DAG.getNode(CastOpc, dl, CastVT, In);
3448 return DAG.getNode(Opc, dl, VT, In);
3449 }
3450
3451 return Op;
3452}
3453
3454SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
3455 SelectionDAG &DAG) const {
3456 if (Op.getValueType().isVector())
3457 return LowerVectorINT_TO_FP(Op, DAG);
3458
3459 bool IsStrict = Op->isStrictFPOpcode();
3460 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3461
3462 // f16 conversions are promoted to f32 when full fp16 is not supported.
3463 if (Op.getValueType() == MVT::f16 &&
3464 !Subtarget->hasFullFP16()) {
3465 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")(static_cast <bool> (!IsStrict && "Lowering of strict fp16 not yet implemented"
) ? void (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3465, __extension__ __PRETTY_FUNCTION__))
;
3466 SDLoc dl(Op);
3467 return DAG.getNode(
3468 ISD::FP_ROUND, dl, MVT::f16,
3469 DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
3470 DAG.getIntPtrConstant(0, dl));
3471 }
3472
3473 // i128 conversions are libcalls.
3474 if (SrcVal.getValueType() == MVT::i128)
3475 return SDValue();
3476
3477 // Other conversions are legal, unless it's to the completely software-based
3478 // fp128.
3479 if (Op.getValueType() != MVT::f128)
3480 return Op;
3481 return SDValue();
3482}
3483
3484SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
3485 SelectionDAG &DAG) const {
3486 // For iOS, we want to call an alternative entry point: __sincos_stret,
3487 // which returns the values in two S / D registers.
3488 SDLoc dl(Op);
3489 SDValue Arg = Op.getOperand(0);
3490 EVT ArgVT = Arg.getValueType();
3491 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
3492
3493 ArgListTy Args;
3494 ArgListEntry Entry;
3495
3496 Entry.Node = Arg;
3497 Entry.Ty = ArgTy;
3498 Entry.IsSExt = false;
3499 Entry.IsZExt = false;
3500 Args.push_back(Entry);
3501
3502 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
3503 : RTLIB::SINCOS_STRET_F32;
3504 const char *LibcallName = getLibcallName(LC);
3505 SDValue Callee =
3506 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
3507
3508 StructType *RetTy = StructType::get(ArgTy, ArgTy);
3509 TargetLowering::CallLoweringInfo CLI(DAG);
3510 CLI.setDebugLoc(dl)
3511 .setChain(DAG.getEntryNode())
3512 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
3513
3514 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3515 return CallResult.first;
3516}
3517
3518static MVT getSVEContainerType(EVT ContentTy);
3519
3520SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
3521 SelectionDAG &DAG) const {
3522 EVT OpVT = Op.getValueType();
3523 EVT ArgVT = Op.getOperand(0).getValueType();
3524
3525 if (useSVEForFixedLengthVectorVT(OpVT))
3526 return LowerFixedLengthBitcastToSVE(Op, DAG);
3527
3528 if (OpVT.isScalableVector()) {
3529 if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) {
3530 assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() &&(static_cast <bool> (OpVT.isFloatingPoint() && !
ArgVT.isFloatingPoint() && "Expected int->fp bitcast!"
) ? void (0) : __assert_fail ("OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() && \"Expected int->fp bitcast!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3531, __extension__ __PRETTY_FUNCTION__))
3531 "Expected int->fp bitcast!")(static_cast <bool> (OpVT.isFloatingPoint() && !
ArgVT.isFloatingPoint() && "Expected int->fp bitcast!"
) ? void (0) : __assert_fail ("OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() && \"Expected int->fp bitcast!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3531, __extension__ __PRETTY_FUNCTION__))
;
3532 SDValue ExtResult =
3533 DAG.getNode(ISD::ANY_EXTEND, SDLoc(Op), getSVEContainerType(ArgVT),
3534 Op.getOperand(0));
3535 return getSVESafeBitCast(OpVT, ExtResult, DAG);
3536 }
3537 return getSVESafeBitCast(OpVT, Op.getOperand(0), DAG);
3538 }
3539
3540 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
3541 return SDValue();
3542
3543 assert(ArgVT == MVT::i16)(static_cast <bool> (ArgVT == MVT::i16) ? void (0) : __assert_fail
("ArgVT == MVT::i16", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3543, __extension__ __PRETTY_FUNCTION__))
;
3544 SDLoc DL(Op);
3545
3546 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
3547 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
3548 return SDValue(
3549 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
3550 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
3551 0);
3552}
3553
3554static EVT getExtensionTo64Bits(const EVT &OrigVT) {
3555 if (OrigVT.getSizeInBits() >= 64)
3556 return OrigVT;
3557
3558 assert(OrigVT.isSimple() && "Expecting a simple value type")(static_cast <bool> (OrigVT.isSimple() && "Expecting a simple value type"
) ? void (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3558, __extension__ __PRETTY_FUNCTION__))
;
3559
3560 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
3561 switch (OrigSimpleTy) {
3562 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3562)
;
3563 case MVT::v2i8:
3564 case MVT::v2i16:
3565 return MVT::v2i32;
3566 case MVT::v4i8:
3567 return MVT::v4i16;
3568 }
3569}
3570
3571static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
3572 const EVT &OrigTy,
3573 const EVT &ExtTy,
3574 unsigned ExtOpcode) {
3575 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
3576 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
3577 // 64-bits we need to insert a new extension so that it will be 64-bits.
3578 assert(ExtTy.is128BitVector() && "Unexpected extension size")(static_cast <bool> (ExtTy.is128BitVector() && "Unexpected extension size"
) ? void (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3578, __extension__ __PRETTY_FUNCTION__))
;
3579 if (OrigTy.getSizeInBits() >= 64)
3580 return N;
3581
3582 // Must extend size to at least 64 bits to be used as an operand for VMULL.
3583 EVT NewVT = getExtensionTo64Bits(OrigTy);
3584
3585 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
3586}
3587
3588static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
3589 bool isSigned) {
3590 EVT VT = N->getValueType(0);
3591
3592 if (N->getOpcode() != ISD::BUILD_VECTOR)
3593 return false;
3594
3595 for (const SDValue &Elt : N->op_values()) {
3596 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
3597 unsigned EltSize = VT.getScalarSizeInBits();
3598 unsigned HalfSize = EltSize / 2;
3599 if (isSigned) {
3600 if (!isIntN(HalfSize, C->getSExtValue()))
3601 return false;
3602 } else {
3603 if (!isUIntN(HalfSize, C->getZExtValue()))
3604 return false;
3605 }
3606 continue;
3607 }
3608 return false;
3609 }
3610
3611 return true;
3612}
3613
3614static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
3615 if (N->getOpcode() == ISD::SIGN_EXTEND ||
3616 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
3617 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
3618 N->getOperand(0)->getValueType(0),
3619 N->getValueType(0),
3620 N->getOpcode());
3621
3622 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "expected BUILD_VECTOR") ? void (0) : __assert_fail
("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3622, __extension__ __PRETTY_FUNCTION__))
;
3623 EVT VT = N->getValueType(0);
3624 SDLoc dl(N);
3625 unsigned EltSize = VT.getScalarSizeInBits() / 2;
3626 unsigned NumElts = VT.getVectorNumElements();
3627 MVT TruncVT = MVT::getIntegerVT(EltSize);
3628 SmallVector<SDValue, 8> Ops;
3629 for (unsigned i = 0; i != NumElts; ++i) {
3630 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
3631 const APInt &CInt = C->getAPIntValue();
3632 // Element types smaller than 32 bits are not legal, so use i32 elements.
3633 // The values are implicitly truncated so sext vs. zext doesn't matter.
3634 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
3635 }
3636 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
3637}
3638
3639static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
3640 return N->getOpcode() == ISD::SIGN_EXTEND ||
3641 N->getOpcode() == ISD::ANY_EXTEND ||
3642 isExtendedBUILD_VECTOR(N, DAG, true);
3643}
3644
3645static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
3646 return N->getOpcode() == ISD::ZERO_EXTEND ||
3647 N->getOpcode() == ISD::ANY_EXTEND ||
3648 isExtendedBUILD_VECTOR(N, DAG, false);
3649}
3650
3651static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
3652 unsigned Opcode = N->getOpcode();
3653 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3654 SDNode *N0 = N->getOperand(0).getNode();
3655 SDNode *N1 = N->getOperand(1).getNode();
3656 return N0->hasOneUse() && N1->hasOneUse() &&
3657 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
3658 }
3659 return false;
3660}
3661
3662static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
3663 unsigned Opcode = N->getOpcode();
3664 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3665 SDNode *N0 = N->getOperand(0).getNode();
3666 SDNode *N1 = N->getOperand(1).getNode();
3667 return N0->hasOneUse() && N1->hasOneUse() &&
3668 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
3669 }
3670 return false;
3671}
3672
3673SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
3674 SelectionDAG &DAG) const {
3675 // The rounding mode is in bits 23:22 of the FPSCR.
3676 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
3677 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
3678 // so that the shift + and get folded into a bitfield extract.
3679 SDLoc dl(Op);
3680
3681 SDValue Chain = Op.getOperand(0);
3682 SDValue FPCR_64 = DAG.getNode(
3683 ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
3684 {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
3685 Chain = FPCR_64.getValue(1);
3686 SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
3687 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
3688 DAG.getConstant(1U << 22, dl, MVT::i32));
3689 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
3690 DAG.getConstant(22, dl, MVT::i32));
3691 SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
3692 DAG.getConstant(3, dl, MVT::i32));
3693 return DAG.getMergeValues({AND, Chain}, dl);
3694}
3695
3696SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
3697 SelectionDAG &DAG) const {
3698 SDLoc DL(Op);
3699 SDValue Chain = Op->getOperand(0);
3700 SDValue RMValue = Op->getOperand(1);
3701
3702 // The rounding mode is in bits 23:22 of the FPCR.
3703 // The llvm.set.rounding argument value to the rounding mode in FPCR mapping
3704 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
3705 // ((arg - 1) & 3) << 22).
3706 //
3707 // The argument of llvm.set.rounding must be within the segment [0, 3], so
3708 // NearestTiesToAway (4) is not handled here. It is responsibility of the code
3709 // generated llvm.set.rounding to ensure this condition.
3710
3711 // Calculate new value of FPCR[23:22].
3712 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
3713 DAG.getConstant(1, DL, MVT::i32));
3714 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
3715 DAG.getConstant(0x3, DL, MVT::i32));
3716 RMValue =
3717 DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
3718 DAG.getConstant(AArch64::RoundingBitsPos, DL, MVT::i32));
3719 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue);
3720
3721 // Get current value of FPCR.
3722 SDValue Ops[] = {
3723 Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
3724 SDValue FPCR =
3725 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
3726 Chain = FPCR.getValue(1);
3727 FPCR = FPCR.getValue(0);
3728
3729 // Put new rounding mode into FPSCR[23:22].
3730 const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos);
3731 FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR,
3732 DAG.getConstant(RMMask, DL, MVT::i64));
3733 FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue);
3734 SDValue Ops2[] = {
3735 Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
3736 FPCR};
3737 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
3738}
3739
3740SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
3741 EVT VT = Op.getValueType();
3742
3743 // If SVE is available then i64 vector multiplications can also be made legal.
3744 bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
3745
3746 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
3747 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
3748
3749 // Multiplications are only custom-lowered for 128-bit vectors so that
3750 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
3751 assert(VT.is128BitVector() && VT.isInteger() &&(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3752, __extension__ __PRETTY_FUNCTION__))
3752 "unexpected type for custom-lowering ISD::MUL")(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3752, __extension__ __PRETTY_FUNCTION__))
;
3753 SDNode *N0 = Op.getOperand(0).getNode();
3754 SDNode *N1 = Op.getOperand(1).getNode();
3755 unsigned NewOpc = 0;
3756 bool isMLA = false;
3757 bool isN0SExt = isSignExtended(N0, DAG);
3758 bool isN1SExt = isSignExtended(N1, DAG);
3759 if (isN0SExt && isN1SExt)
3760 NewOpc = AArch64ISD::SMULL;
3761 else {
3762 bool isN0ZExt = isZeroExtended(N0, DAG);
3763 bool isN1ZExt = isZeroExtended(N1, DAG);
3764 if (isN0ZExt && isN1ZExt)
3765 NewOpc = AArch64ISD::UMULL;
3766 else if (isN1SExt || isN1ZExt) {
3767 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
3768 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
3769 if (isN1SExt && isAddSubSExt(N0, DAG)) {
3770 NewOpc = AArch64ISD::SMULL;
3771 isMLA = true;
3772 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
3773 NewOpc = AArch64ISD::UMULL;
3774 isMLA = true;
3775 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
3776 std::swap(N0, N1);
3777 NewOpc = AArch64ISD::UMULL;
3778 isMLA = true;
3779 }
3780 }
3781
3782 if (!NewOpc) {
3783 if (VT == MVT::v2i64)
3784 // Fall through to expand this. It is not legal.
3785 return SDValue();
3786 else
3787 // Other vector multiplications are legal.
3788 return Op;
3789 }
3790 }
3791
3792 // Legalize to a S/UMULL instruction
3793 SDLoc DL(Op);
3794 SDValue Op0;
3795 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
3796 if (!isMLA) {
3797 Op0 = skipExtensionForVectorMULL(N0, DAG);
3798 assert(Op0.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3800, __extension__ __PRETTY_FUNCTION__))
3799 Op1.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3800, __extension__ __PRETTY_FUNCTION__))
3800 "unexpected types for extended operands to VMULL")(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3800, __extension__ __PRETTY_FUNCTION__))
;
3801 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
3802 }
3803 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
3804 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
3805 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
3806 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
3807 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
3808 EVT Op1VT = Op1.getValueType();
3809 return DAG.getNode(N0->getOpcode(), DL, VT,
3810 DAG.getNode(NewOpc, DL, VT,
3811 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
3812 DAG.getNode(NewOpc, DL, VT,
3813 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
3814}
3815
3816static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
3817 int Pattern) {
3818 return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
3819 DAG.getTargetConstant(Pattern, DL, MVT::i32));
3820}
3821
3822static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
3823 SDLoc DL(Op);
3824 EVT OutVT = Op.getValueType();
3825 SDValue InOp = Op.getOperand(1);
3826 EVT InVT = InOp.getValueType();
3827
3828 // Return the operand if the cast isn't changing type,
3829 // i.e. <n x 16 x i1> -> <n x 16 x i1>
3830 if (InVT == OutVT)
3831 return InOp;
3832
3833 SDValue Reinterpret =
3834 DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, InOp);
3835
3836 // If the argument converted to an svbool is a ptrue or a comparison, the
3837 // lanes introduced by the widening are zero by construction.
3838 switch (InOp.getOpcode()) {
3839 case AArch64ISD::SETCC_MERGE_ZERO:
3840 return Reinterpret;
3841 case ISD::INTRINSIC_WO_CHAIN:
3842 if (InOp.getConstantOperandVal(0) == Intrinsic::aarch64_sve_ptrue)
3843 return Reinterpret;
3844 }
3845
3846 // Otherwise, zero the newly introduced lanes.
3847 SDValue Mask = getPTrue(DAG, DL, InVT, AArch64SVEPredPattern::all);
3848 SDValue MaskReinterpret =
3849 DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, Mask);
3850 return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret);
3851}
3852
3853SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
3854 SelectionDAG &DAG) const {
3855 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3856 SDLoc dl(Op);
3857 switch (IntNo) {
3858 default: return SDValue(); // Don't custom lower most intrinsics.
3859 case Intrinsic::thread_pointer: {
3860 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3861 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
3862 }
3863 case Intrinsic::aarch64_neon_abs: {
3864 EVT Ty = Op.getValueType();
3865 if (Ty == MVT::i64) {
3866 SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
3867 Op.getOperand(1));
3868 Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
3869 return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
3870 } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
3871 return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
3872 } else {
3873 report_fatal_error("Unexpected type for AArch64 NEON intrinic");
3874 }
3875 }
3876 case Intrinsic::aarch64_neon_smax:
3877 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
3878 Op.getOperand(1), Op.getOperand(2));
3879 case Intrinsic::aarch64_neon_umax:
3880 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
3881 Op.getOperand(1), Op.getOperand(2));
3882 case Intrinsic::aarch64_neon_smin:
3883 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
3884 Op.getOperand(1), Op.getOperand(2));
3885 case Intrinsic::aarch64_neon_umin:
3886 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
3887 Op.getOperand(1), Op.getOperand(2));
3888
3889 case Intrinsic::aarch64_sve_sunpkhi:
3890 return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
3891 Op.getOperand(1));
3892 case Intrinsic::aarch64_sve_sunpklo:
3893 return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
3894 Op.getOperand(1));
3895 case Intrinsic::aarch64_sve_uunpkhi:
3896 return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
3897 Op.getOperand(1));
3898 case Intrinsic::aarch64_sve_uunpklo:
3899 return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
3900 Op.getOperand(1));
3901 case Intrinsic::aarch64_sve_clasta_n:
3902 return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
3903 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3904 case Intrinsic::aarch64_sve_clastb_n:
3905 return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
3906 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3907 case Intrinsic::aarch64_sve_lasta:
3908 return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
3909 Op.getOperand(1), Op.getOperand(2));
3910 case Intrinsic::aarch64_sve_lastb:
3911 return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
3912 Op.getOperand(1), Op.getOperand(2));
3913 case Intrinsic::aarch64_sve_rev:
3914 return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
3915 Op.getOperand(1));
3916 case Intrinsic::aarch64_sve_tbl:
3917 return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
3918 Op.getOperand(1), Op.getOperand(2));
3919 case Intrinsic::aarch64_sve_trn1:
3920 return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
3921 Op.getOperand(1), Op.getOperand(2));
3922 case Intrinsic::aarch64_sve_trn2:
3923 return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
3924 Op.getOperand(1), Op.getOperand(2));
3925 case Intrinsic::aarch64_sve_uzp1:
3926 return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
3927 Op.getOperand(1), Op.getOperand(2));
3928 case Intrinsic::aarch64_sve_uzp2:
3929 return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
3930 Op.getOperand(1), Op.getOperand(2));
3931 case Intrinsic::aarch64_sve_zip1:
3932 return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
3933 Op.getOperand(1), Op.getOperand(2));
3934 case Intrinsic::aarch64_sve_zip2:
3935 return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
3936 Op.getOperand(1), Op.getOperand(2));
3937 case Intrinsic::aarch64_sve_splice:
3938 return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
3939 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3940 case Intrinsic::aarch64_sve_ptrue:
3941 return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
3942 Op.getOperand(1));
3943 case Intrinsic::aarch64_sve_clz:
3944 return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
3945 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3946 case Intrinsic::aarch64_sve_cnt: {
3947 SDValue Data = Op.getOperand(3);
3948 // CTPOP only supports integer operands.
3949 if (Data.getValueType().isFloatingPoint())
3950 Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
3951 return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
3952 Op.getOperand(2), Data, Op.getOperand(1));
3953 }
3954 case Intrinsic::aarch64_sve_dupq_lane:
3955 return LowerDUPQLane(Op, DAG);
3956 case Intrinsic::aarch64_sve_convert_from_svbool:
3957 return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
3958 Op.getOperand(1));
3959 case Intrinsic::aarch64_sve_convert_to_svbool:
3960 return lowerConvertToSVBool(Op, DAG);
3961 case Intrinsic::aarch64_sve_fneg:
3962 return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
3963 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3964 case Intrinsic::aarch64_sve_frintp:
3965 return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
3966 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3967 case Intrinsic::aarch64_sve_frintm:
3968 return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
3969 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3970 case Intrinsic::aarch64_sve_frinti:
3971 return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
3972 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3973 case Intrinsic::aarch64_sve_frintx:
3974 return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
3975 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3976 case Intrinsic::aarch64_sve_frinta:
3977 return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
3978 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3979 case Intrinsic::aarch64_sve_frintn:
3980 return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
3981 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3982 case Intrinsic::aarch64_sve_frintz:
3983 return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
3984 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3985 case Intrinsic::aarch64_sve_ucvtf:
3986 return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
3987 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3988 Op.getOperand(1));
3989 case Intrinsic::aarch64_sve_scvtf:
3990 return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
3991 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3992 Op.getOperand(1));
3993 case Intrinsic::aarch64_sve_fcvtzu:
3994 return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
3995 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3996 Op.getOperand(1));
3997 case Intrinsic::aarch64_sve_fcvtzs:
3998 return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
3999 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4000 Op.getOperand(1));
4001 case Intrinsic::aarch64_sve_fsqrt:
4002 return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
4003 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4004 case Intrinsic::aarch64_sve_frecpx:
4005 return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
4006 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4007 case Intrinsic::aarch64_sve_fabs:
4008 return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
4009 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4010 case Intrinsic::aarch64_sve_abs:
4011 return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
4012 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4013 case Intrinsic::aarch64_sve_neg:
4014 return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
4015 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4016 case Intrinsic::aarch64_sve_insr: {
4017 SDValue Scalar = Op.getOperand(2);
4018 EVT ScalarTy = Scalar.getValueType();
4019 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
4020 Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
4021
4022 return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
4023 Op.getOperand(1), Scalar);
4024 }
4025 case Intrinsic::aarch64_sve_rbit:
4026 return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
4027 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4028 Op.getOperand(1));
4029 case Intrinsic::aarch64_sve_revb:
4030 return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
4031 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4032 case Intrinsic::aarch64_sve_sxtb:
4033 return DAG.getNode(
4034 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4035 Op.getOperand(2), Op.getOperand(3),
4036 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
4037 Op.getOperand(1));
4038 case Intrinsic::aarch64_sve_sxth:
4039 return DAG.getNode(
4040 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4041 Op.getOperand(2), Op.getOperand(3),
4042 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
4043 Op.getOperand(1));
4044 case Intrinsic::aarch64_sve_sxtw:
4045 return DAG.getNode(
4046 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4047 Op.getOperand(2), Op.getOperand(3),
4048 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
4049 Op.getOperand(1));
4050 case Intrinsic::aarch64_sve_uxtb:
4051 return DAG.getNode(
4052 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4053 Op.getOperand(2), Op.getOperand(3),
4054 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
4055 Op.getOperand(1));
4056 case Intrinsic::aarch64_sve_uxth:
4057 return DAG.getNode(
4058 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4059 Op.getOperand(2), Op.getOperand(3),
4060 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
4061 Op.getOperand(1));
4062 case Intrinsic::aarch64_sve_uxtw:
4063 return DAG.getNode(
4064 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4065 Op.getOperand(2), Op.getOperand(3),
4066 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
4067 Op.getOperand(1));
4068
4069 case Intrinsic::localaddress: {
4070 const auto &MF = DAG.getMachineFunction();
4071 const auto *RegInfo = Subtarget->getRegisterInfo();
4072 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
4073 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
4074 Op.getSimpleValueType());
4075 }
4076
4077 case Intrinsic::eh_recoverfp: {
4078 // FIXME: This needs to be implemented to correctly handle highly aligned
4079 // stack objects. For now we simply return the incoming FP. Refer D53541
4080 // for more details.
4081 SDValue FnOp = Op.getOperand(1);
4082 SDValue IncomingFPOp = Op.getOperand(2);
4083 GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
4084 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
4085 if (!Fn)
4086 report_fatal_error(
4087 "llvm.eh.recoverfp must take a function as the first argument");
4088 return IncomingFPOp;
4089 }
4090
4091 case Intrinsic::aarch64_neon_vsri:
4092 case Intrinsic::aarch64_neon_vsli: {
4093 EVT Ty = Op.getValueType();
4094
4095 if (!Ty.isVector())
4096 report_fatal_error("Unexpected type for aarch64_neon_vsli");
4097
4098 assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits())(static_cast <bool> (Op.getConstantOperandVal(3) <= Ty
.getScalarSizeInBits()) ? void (0) : __assert_fail ("Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4098, __extension__ __PRETTY_FUNCTION__))
;
4099
4100 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
4101 unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
4102 return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
4103 Op.getOperand(3));
4104 }
4105
4106 case Intrinsic::aarch64_neon_srhadd:
4107 case Intrinsic::aarch64_neon_urhadd:
4108 case Intrinsic::aarch64_neon_shadd:
4109 case Intrinsic::aarch64_neon_uhadd: {
4110 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4111 IntNo == Intrinsic::aarch64_neon_shadd);
4112 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4113 IntNo == Intrinsic::aarch64_neon_urhadd);
4114 unsigned Opcode =
4115 IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
4116 : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
4117 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4118 Op.getOperand(2));
4119 }
4120 case Intrinsic::aarch64_neon_sabd:
4121 case Intrinsic::aarch64_neon_uabd: {
4122 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU
4123 : ISD::ABDS;
4124 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4125 Op.getOperand(2));
4126 }
4127 case Intrinsic::aarch64_neon_uaddlp: {
4128 unsigned Opcode = AArch64ISD::UADDLP;
4129 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1));
4130 }
4131 case Intrinsic::aarch64_neon_sdot:
4132 case Intrinsic::aarch64_neon_udot:
4133 case Intrinsic::aarch64_sve_sdot:
4134 case Intrinsic::aarch64_sve_udot: {
4135 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
4136 IntNo == Intrinsic::aarch64_sve_udot)
4137 ? AArch64ISD::UDOT
4138 : AArch64ISD::SDOT;
4139 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4140 Op.getOperand(2), Op.getOperand(3));
4141 }
4142 }
4143}
4144
4145bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
4146 if (VT.getVectorElementType() == MVT::i8 ||
4147 VT.getVectorElementType() == MVT::i16) {
4148 EltTy = MVT::i32;
4149 return true;
4150 }
4151 return false;
4152}
4153
4154bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
4155 if (VT.getVectorElementType() == MVT::i32 &&
4156 VT.getVectorElementCount().getKnownMinValue() >= 4)
4157 return true;
4158
4159 return false;
4160}
4161
4162bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
4163 return ExtVal.getValueType().isScalableVector();
4164}
4165
4166unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
4167 std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
4168 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
4169 AArch64ISD::GLD1_MERGE_ZERO},
4170 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
4171 AArch64ISD::GLD1_UXTW_MERGE_ZERO},
4172 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
4173 AArch64ISD::GLD1_MERGE_ZERO},
4174 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
4175 AArch64ISD::GLD1_SXTW_MERGE_ZERO},
4176 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
4177 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
4178 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
4179 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
4180 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
4181 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
4182 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
4183 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
4184 };
4185 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4186 return AddrModes.find(Key)->second;
4187}
4188
4189unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
4190 std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
4191 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
4192 AArch64ISD::SST1_PRED},
4193 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
4194 AArch64ISD::SST1_UXTW_PRED},
4195 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
4196 AArch64ISD::SST1_PRED},
4197 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
4198 AArch64ISD::SST1_SXTW_PRED},
4199 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
4200 AArch64ISD::SST1_SCALED_PRED},
4201 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
4202 AArch64ISD::SST1_UXTW_SCALED_PRED},
4203 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
4204 AArch64ISD::SST1_SCALED_PRED},
4205 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
4206 AArch64ISD::SST1_SXTW_SCALED_PRED},
4207 };
4208 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4209 return AddrModes.find(Key)->second;
4210}
4211
4212unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
4213 switch (Opcode) {
4214 default:
4215 llvm_unreachable("unimplemented opcode")::llvm::llvm_unreachable_internal("unimplemented opcode", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4215)
;
4216 return Opcode;
4217 case AArch64ISD::GLD1_MERGE_ZERO:
4218 return AArch64ISD::GLD1S_MERGE_ZERO;
4219 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
4220 return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
4221 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
4222 return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
4223 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
4224 return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
4225 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
4226 return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
4227 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
4228 return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
4229 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
4230 return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
4231 }
4232}
4233
4234bool getGatherScatterIndexIsExtended(SDValue Index) {
4235 unsigned Opcode = Index.getOpcode();
4236 if (Opcode == ISD::SIGN_EXTEND_INREG)
4237 return true;
4238
4239 if (Opcode == ISD::AND) {
4240 SDValue Splat = Index.getOperand(1);
4241 if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
4242 return false;
4243 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
4244 if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
4245 return false;
4246 return true;
4247 }
4248
4249 return false;
4250}
4251
4252// If the base pointer of a masked gather or scatter is null, we
4253// may be able to swap BasePtr & Index and use the vector + register
4254// or vector + immediate addressing mode, e.g.
4255// VECTOR + REGISTER:
4256// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
4257// -> getelementptr %offset, <vscale x N x T> %indices
4258// VECTOR + IMMEDIATE:
4259// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
4260// -> getelementptr #x, <vscale x N x T> %indices
4261void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
4262 unsigned &Opcode, bool IsGather,
4263 SelectionDAG &DAG) {
4264 if (!isNullConstant(BasePtr))
4265 return;
4266
4267 // FIXME: This will not match for fixed vector type codegen as the nodes in
4268 // question will have fixed<->scalable conversions around them. This should be
4269 // moved to a DAG combine or complex pattern so that is executes after all of
4270 // the fixed vector insert and extracts have been removed. This deficiency
4271 // will result in a sub-optimal addressing mode being used, i.e. an ADD not
4272 // being folded into the scatter/gather.
4273 ConstantSDNode *Offset = nullptr;
4274 if (Index.getOpcode() == ISD::ADD)
4275 if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
4276 if (isa<ConstantSDNode>(SplatVal))
4277 Offset = cast<ConstantSDNode>(SplatVal);
4278 else {
4279 BasePtr = SplatVal;
4280 Index = Index->getOperand(0);
4281 return;
4282 }
4283 }
4284
4285 unsigned NewOp =
4286 IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
4287
4288 if (!Offset) {
4289 std::swap(BasePtr, Index);
4290 Opcode = NewOp;
4291 return;
4292 }
4293
4294 uint64_t OffsetVal = Offset->getZExtValue();
4295 unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
4296 auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
4297
4298 if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
4299 // Index is out of range for the immediate addressing mode
4300 BasePtr = ConstOffset;
4301 Index = Index->getOperand(0);
4302 return;
4303 }
4304
4305 // Immediate is in range
4306 Opcode = NewOp;
4307 BasePtr = Index->getOperand(0);
4308 Index = ConstOffset;
4309}
4310
4311SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
4312 SelectionDAG &DAG) const {
4313 SDLoc DL(Op);
4314 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
4315 assert(MGT && "Can only custom lower gather load nodes")(static_cast <bool> (MGT && "Can only custom lower gather load nodes"
) ? void (0) : __assert_fail ("MGT && \"Can only custom lower gather load nodes\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4315, __extension__ __PRETTY_FUNCTION__))
;
4316
4317 bool IsFixedLength = MGT->getMemoryVT().isFixedLengthVector();
4318
4319 SDValue Index = MGT->getIndex();
4320 SDValue Chain = MGT->getChain();
4321 SDValue PassThru = MGT->getPassThru();
4322 SDValue Mask = MGT->getMask();
4323 SDValue BasePtr = MGT->getBasePtr();
4324 ISD::LoadExtType ExtTy = MGT->getExtensionType();
4325
4326 ISD::MemIndexType IndexType = MGT->getIndexType();
4327 bool IsScaled =
4328 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4329 bool IsSigned =
4330 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4331 bool IdxNeedsExtend =
4332 getGatherScatterIndexIsExtended(Index) ||
4333 Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4334 bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
4335
4336 EVT VT = PassThru.getSimpleValueType();
4337 EVT IndexVT = Index.getSimpleValueType();
4338 EVT MemVT = MGT->getMemoryVT();
4339 SDValue InputVT = DAG.getValueType(MemVT);
4340
4341 if (VT.getVectorElementType() == MVT::bf16 &&
4342 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4343 return SDValue();
4344
4345 if (IsFixedLength) {
4346 assert(Subtarget->useSVEForFixedLengthVectors() &&(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4347, __extension__ __PRETTY_FUNCTION__))
4347 "Cannot lower when not using SVE for fixed vectors")(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4347, __extension__ __PRETTY_FUNCTION__))
;
4348 IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
4349 MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
4350 InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
4351 }
4352
4353 if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
4354 PassThru = SDValue();
4355
4356 if (VT.isFloatingPoint() && !IsFixedLength) {
4357 // Handle FP data by using an integer gather and casting the result.
4358 if (PassThru) {
4359 EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4360 PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
4361 }
4362 InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4363 }
4364
4365 SDVTList VTs = DAG.getVTList(IndexVT, MVT::Other);
4366
4367 if (getGatherScatterIndexIsExtended(Index))
4368 Index = Index.getOperand(0);
4369
4370 unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
4371 selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4372 /*isGather=*/true, DAG);
4373
4374 if (ResNeedsSignExtend)
4375 Opcode = getSignExtendedGatherOpcode(Opcode);
4376
4377 if (IsFixedLength) {
4378 if (Index.getSimpleValueType().isFixedLengthVector())
4379 Index = convertToScalableVector(DAG, IndexVT, Index);
4380 if (BasePtr.getSimpleValueType().isFixedLengthVector())
4381 BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
4382 Mask = convertFixedMaskToScalableVector(Mask, DAG);
4383 }
4384
4385 SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT};
4386 SDValue Result = DAG.getNode(Opcode, DL, VTs, Ops);
4387 Chain = Result.getValue(1);
4388
4389 if (IsFixedLength) {
4390 Result = convertFromScalableVector(
4391 DAG, VT.changeVectorElementType(IndexVT.getVectorElementType()),
4392 Result);
4393 Result = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Result);
4394 Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
4395
4396 if (PassThru)
4397 Result = DAG.getSelect(DL, VT, MGT->getMask(), Result, PassThru);
4398 } else {
4399 if (PassThru)
4400 Result = DAG.getSelect(DL, IndexVT, Mask, Result, PassThru);
4401
4402 if (VT.isFloatingPoint())
4403 Result = getSVESafeBitCast(VT, Result, DAG);
4404 }
4405
4406 return DAG.getMergeValues({Result, Chain}, DL);
4407}
4408
4409SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
4410 SelectionDAG &DAG) const {
4411 SDLoc DL(Op);
4412 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
4413 assert(MSC && "Can only custom lower scatter store nodes")(static_cast <bool> (MSC && "Can only custom lower scatter store nodes"
) ? void (0) : __assert_fail ("MSC && \"Can only custom lower scatter store nodes\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4413, __extension__ __PRETTY_FUNCTION__))
;
4414
4415 bool IsFixedLength = MSC->getMemoryVT().isFixedLengthVector();
4416
4417 SDValue Index = MSC->getIndex();
4418 SDValue Chain = MSC->getChain();
4419 SDValue StoreVal = MSC->getValue();
4420 SDValue Mask = MSC->getMask();
4421 SDValue BasePtr = MSC->getBasePtr();
4422
4423 ISD::MemIndexType IndexType = MSC->getIndexType();
4424 bool IsScaled =
4425 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4426 bool IsSigned =
4427 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4428 bool NeedsExtend =
4429 getGatherScatterIndexIsExtended(Index) ||
4430 Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4431
4432 EVT VT = StoreVal.getSimpleValueType();
4433 EVT IndexVT = Index.getSimpleValueType();
4434 SDVTList VTs = DAG.getVTList(MVT::Other);
4435 EVT MemVT = MSC->getMemoryVT();
4436 SDValue InputVT = DAG.getValueType(MemVT);
4437
4438 if (VT.getVectorElementType() == MVT::bf16 &&
4439 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4440 return SDValue();
4441
4442 if (IsFixedLength) {
4443 assert(Subtarget->useSVEForFixedLengthVectors() &&(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4444, __extension__ __PRETTY_FUNCTION__))
4444 "Cannot lower when not using SVE for fixed vectors")(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4444, __extension__ __PRETTY_FUNCTION__))
;
4445 IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
4446 MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
4447 InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
4448
4449 StoreVal =
4450 DAG.getNode(ISD::BITCAST, DL, VT.changeTypeToInteger(), StoreVal);
4451 StoreVal = DAG.getNode(
4452 ISD::ANY_EXTEND, DL,
4453 VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal);
4454 StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal);
4455 } else if (VT.isFloatingPoint()) {
4456 // Handle FP data by casting the data so an integer scatter can be used.
4457 EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4458 StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
4459 InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4460 }
4461
4462 if (getGatherScatterIndexIsExtended(Index))
4463 Index = Index.getOperand(0);
4464
4465 unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
4466 selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4467 /*isGather=*/false, DAG);
4468
4469 if (IsFixedLength) {
4470 if (Index.getSimpleValueType().isFixedLengthVector())
4471 Index = convertToScalableVector(DAG, IndexVT, Index);
4472 if (BasePtr.getSimpleValueType().isFixedLengthVector())
4473 BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
4474 Mask = convertFixedMaskToScalableVector(Mask, DAG);
4475 }
4476
4477 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
4478 return DAG.getNode(Opcode, DL, VTs, Ops);
4479}
4480
4481SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
4482 SDLoc DL(Op);
4483 MaskedLoadSDNode *LoadNode = cast<MaskedLoadSDNode>(Op);
4484 assert(LoadNode && "Expected custom lowering of a masked load node")(static_cast <bool> (LoadNode && "Expected custom lowering of a masked load node"
) ? void (0) : __assert_fail ("LoadNode && \"Expected custom lowering of a masked load node\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4484, __extension__ __PRETTY_FUNCTION__))
;
4485 EVT VT = Op->getValueType(0);
4486
4487 if (useSVEForFixedLengthVectorVT(VT, true))
4488 return LowerFixedLengthVectorMLoadToSVE(Op, DAG);
4489
4490 SDValue PassThru = LoadNode->getPassThru();
4491 SDValue Mask = LoadNode->getMask();
4492
4493 if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
4494 return Op;
4495
4496 SDValue Load = DAG.getMaskedLoad(
4497 VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
4498 LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
4499 LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
4500 LoadNode->getExtensionType());
4501
4502 SDValue Result = DAG.getSelect(DL, VT, Mask, Load, PassThru);
4503
4504 return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
4505}
4506
4507// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
4508static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
4509 EVT VT, EVT MemVT,
4510 SelectionDAG &DAG) {
4511 assert(VT.isVector() && "VT should be a vector type")(static_cast <bool> (VT.isVector() && "VT should be a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4511, __extension__ __PRETTY_FUNCTION__))
;
4512 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16)(static_cast <bool> (MemVT == MVT::v4i8 && VT ==
MVT::v4i16) ? void (0) : __assert_fail ("MemVT == MVT::v4i8 && VT == MVT::v4i16"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4512, __extension__ __PRETTY_FUNCTION__))
;
4513
4514 SDValue Value = ST->getValue();
4515
4516 // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
4517 // the word lane which represent the v4i8 subvector. It optimizes the store
4518 // to:
4519 //
4520 // xtn v0.8b, v0.8h
4521 // str s0, [x0]
4522
4523 SDValue Undef = DAG.getUNDEF(MVT::i16);
4524 SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
4525 {Undef, Undef, Undef, Undef});
4526
4527 SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
4528 Value, UndefVec);
4529 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
4530
4531 Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
4532 SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4533 Trunc, DAG.getConstant(0, DL, MVT::i64));
4534
4535 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
4536 ST->getBasePtr(), ST->getMemOperand());
4537}
4538
4539// Custom lowering for any store, vector or scalar and/or default or with
4540// a truncate operations. Currently only custom lower truncate operation
4541// from vector v4i16 to v4i8 or volatile stores of i128.
4542SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
4543 SelectionDAG &DAG) const {
4544 SDLoc Dl(Op);
4545 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
4546 assert (StoreNode && "Can only custom lower store nodes")(static_cast <bool> (StoreNode && "Can only custom lower store nodes"
) ? void (0) : __assert_fail ("StoreNode && \"Can only custom lower store nodes\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4546, __extension__ __PRETTY_FUNCTION__))
;
4547
4548 SDValue Value = StoreNode->getValue();
4549
4550 EVT VT = Value.getValueType();
4551 EVT MemVT = StoreNode->getMemoryVT();
4552
4553 if (VT.isVector()) {
4554 if (useSVEForFixedLengthVectorVT(VT, true))
4555 return LowerFixedLengthVectorStoreToSVE(Op, DAG);
4556
4557 unsigned AS = StoreNode->getAddressSpace();
4558 Align Alignment = StoreNode->getAlign();
4559 if (Alignment < MemVT.getStoreSize() &&
4560 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
4561 StoreNode->getMemOperand()->getFlags(),
4562 nullptr)) {
4563 return scalarizeVectorStore(StoreNode, DAG);
4564 }
4565
4566 if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
4567 MemVT == MVT::v4i8) {
4568 return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
4569 }
4570 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
4571 // the custom lowering, as there are no un-paired non-temporal stores and
4572 // legalization will break up 256 bit inputs.
4573 ElementCount EC = MemVT.getVectorElementCount();
4574 if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
4575 EC.isKnownEven() &&
4576 ((MemVT.getScalarSizeInBits() == 8u ||
4577 MemVT.getScalarSizeInBits() == 16u ||
4578 MemVT.getScalarSizeInBits() == 32u ||
4579 MemVT.getScalarSizeInBits() == 64u))) {
4580 SDValue Lo =
4581 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
4582 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4583 StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
4584 SDValue Hi =
4585 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
4586 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4587 StoreNode->getValue(),
4588 DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
4589 SDValue Result = DAG.getMemIntrinsicNode(
4590 AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
4591 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4592 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4593 return Result;
4594 }
4595 } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
4596 assert(StoreNode->getValue()->getValueType(0) == MVT::i128)(static_cast <bool> (StoreNode->getValue()->getValueType
(0) == MVT::i128) ? void (0) : __assert_fail ("StoreNode->getValue()->getValueType(0) == MVT::i128"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4596, __extension__ __PRETTY_FUNCTION__))
;
4597 SDValue Lo =
4598 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4599 DAG.getConstant(0, Dl, MVT::i64));
4600 SDValue Hi =
4601 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4602 DAG.getConstant(1, Dl, MVT::i64));
4603 SDValue Result = DAG.getMemIntrinsicNode(
4604 AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
4605 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4606 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4607 return Result;
4608 }
4609
4610 return SDValue();
4611}
4612
4613// Custom lowering for extending v4i8 vector loads.
4614SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
4615 SelectionDAG &DAG) const {
4616 SDLoc DL(Op);
4617 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
4618 assert(LoadNode && "Expected custom lowering of a load node")(static_cast <bool> (LoadNode && "Expected custom lowering of a load node"
) ? void (0) : __assert_fail ("LoadNode && \"Expected custom lowering of a load node\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4618, __extension__ __PRETTY_FUNCTION__))
;
4619 EVT VT = Op->getValueType(0);
4620 assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32")(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v4i32
) && "Expected v4i16 or v4i32") ? void (0) : __assert_fail
("(VT == MVT::v4i16 || VT == MVT::v4i32) && \"Expected v4i16 or v4i32\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4620, __extension__ __PRETTY_FUNCTION__))
;
4621
4622 if (LoadNode->getMemoryVT() != MVT::v4i8)
4623 return SDValue();
4624
4625 unsigned ExtType;
4626 if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
4627 ExtType = ISD::SIGN_EXTEND;
4628 else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
4629 LoadNode->getExtensionType() == ISD::EXTLOAD)
4630 ExtType = ISD::ZERO_EXTEND;
4631 else
4632 return SDValue();
4633
4634 SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
4635 LoadNode->getBasePtr(), MachinePointerInfo());
4636 SDValue Chain = Load.getValue(1);
4637 SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load);
4638 SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec);
4639 SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC);
4640 Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext,
4641 DAG.getConstant(0, DL, MVT::i64));
4642 if (VT == MVT::v4i32)
4643 Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext);
4644 return DAG.getMergeValues({Ext, Chain}, DL);
4645}
4646
4647// Generate SUBS and CSEL for integer abs.
4648SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
4649 MVT VT = Op.getSimpleValueType();
4650
4651 if (VT.isVector())
4652 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
4653
4654 SDLoc DL(Op);
4655 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4656 Op.getOperand(0));
4657 // Generate SUBS & CSEL.
4658 SDValue Cmp =
4659 DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
4660 Op.getOperand(0), DAG.getConstant(0, DL, VT));
4661 return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
4662 DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
4663 Cmp.getValue(1));
4664}
4665
4666SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
4667 SelectionDAG &DAG) const {
4668 LLVM_DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
4669 LLVM_DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
4670
4671 switch (Op.getOpcode()) {
4672 default:
4673 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4673)
;
4674 return SDValue();
4675 case ISD::BITCAST:
4676 return LowerBITCAST(Op, DAG);
4677 case ISD::GlobalAddress:
4678 return LowerGlobalAddress(Op, DAG);
4679 case ISD::GlobalTLSAddress:
4680 return LowerGlobalTLSAddress(Op, DAG);
4681 case ISD::SETCC:
4682 case ISD::STRICT_FSETCC:
4683 case ISD::STRICT_FSETCCS:
4684 return LowerSETCC(Op, DAG);
4685 case ISD::BR_CC:
4686 return LowerBR_CC(Op, DAG);
4687 case ISD::SELECT:
4688 return LowerSELECT(Op, DAG);
4689 case ISD::SELECT_CC:
4690 return LowerSELECT_CC(Op, DAG);
4691 case ISD::JumpTable:
4692 return LowerJumpTable(Op, DAG);
4693 case ISD::BR_JT:
4694 return LowerBR_JT(Op, DAG);
4695 case ISD::ConstantPool:
4696 return LowerConstantPool(Op, DAG);
4697 case ISD::BlockAddress:
4698 return LowerBlockAddress(Op, DAG);
4699 case ISD::VASTART:
4700 return LowerVASTART(Op, DAG);
4701 case ISD::VACOPY:
4702 return LowerVACOPY(Op, DAG);
4703 case ISD::VAARG:
4704 return LowerVAARG(Op, DAG);
4705 case ISD::ADDC:
4706 case ISD::ADDE:
4707 case ISD::SUBC:
4708 case ISD::SUBE:
4709 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
4710 case ISD::SADDO:
4711 case ISD::UADDO:
4712 case ISD::SSUBO:
4713 case ISD::USUBO:
4714 case ISD::SMULO:
4715 case ISD::UMULO:
4716 return LowerXALUO(Op, DAG);
4717 case ISD::FADD:
4718 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
4719 case ISD::FSUB:
4720 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
4721 case ISD::FMUL:
4722 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
4723 case ISD::FMA:
4724 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
4725 case ISD::FDIV:
4726 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
4727 case ISD::FNEG:
4728 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
4729 case ISD::FCEIL:
4730 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
4731 case ISD::FFLOOR:
4732 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
4733 case ISD::FNEARBYINT:
4734 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
4735 case ISD::FRINT:
4736 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
4737 case ISD::FROUND:
4738 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
4739 case ISD::FROUNDEVEN:
4740 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
4741 case ISD::FTRUNC:
4742 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
4743 case ISD::FSQRT:
4744 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
4745 case ISD::FABS:
4746 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
4747 case ISD::FP_ROUND:
4748 case ISD::STRICT_FP_ROUND:
4749 return LowerFP_ROUND(Op, DAG);
4750 case ISD::FP_EXTEND:
4751 return LowerFP_EXTEND(Op, DAG);
4752 case ISD::FRAMEADDR:
4753 return LowerFRAMEADDR(Op, DAG);
4754 case ISD::SPONENTRY:
4755 return LowerSPONENTRY(Op, DAG);
4756 case ISD::RETURNADDR:
4757 return LowerRETURNADDR(Op, DAG);
4758 case ISD::ADDROFRETURNADDR:
4759 return LowerADDROFRETURNADDR(Op, DAG);
4760 case ISD::CONCAT_VECTORS:
4761 return LowerCONCAT_VECTORS(Op, DAG);
4762 case ISD::INSERT_VECTOR_ELT:
4763 return LowerINSERT_VECTOR_ELT(Op, DAG);
4764 case ISD::EXTRACT_VECTOR_ELT:
4765 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
4766 case ISD::BUILD_VECTOR:
4767 return LowerBUILD_VECTOR(Op, DAG);
4768 case ISD::VECTOR_SHUFFLE:
4769 return LowerVECTOR_SHUFFLE(Op, DAG);
4770 case ISD::SPLAT_VECTOR:
4771 return LowerSPLAT_VECTOR(Op, DAG);
4772 case ISD::EXTRACT_SUBVECTOR:
4773 return LowerEXTRACT_SUBVECTOR(Op, DAG);
4774 case ISD::INSERT_SUBVECTOR:
4775 return LowerINSERT_SUBVECTOR(Op, DAG);
4776 case ISD::SDIV:
4777 case ISD::UDIV:
4778 return LowerDIV(Op, DAG);
4779 case ISD::SMIN:
4780 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
4781 /*OverrideNEON=*/true);
4782 case ISD::UMIN:
4783 return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
4784 /*OverrideNEON=*/true);
4785 case ISD::SMAX:
4786 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
4787 /*OverrideNEON=*/true);
4788 case ISD::UMAX:
4789 return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
4790 /*OverrideNEON=*/true);
4791 case ISD::SRA:
4792 case ISD::SRL:
4793 case ISD::SHL:
4794 return LowerVectorSRA_SRL_SHL(Op, DAG);
4795 case ISD::SHL_PARTS:
4796 case ISD::SRL_PARTS:
4797 case ISD::SRA_PARTS:
4798 return LowerShiftParts(Op, DAG);
4799 case ISD::CTPOP:
4800 return LowerCTPOP(Op, DAG);
4801 case ISD::FCOPYSIGN:
4802 return LowerFCOPYSIGN(Op, DAG);
4803 case ISD::OR:
4804 return LowerVectorOR(Op, DAG);
4805 case ISD::XOR:
4806 return LowerXOR(Op, DAG);
4807 case ISD::PREFETCH:
4808 return LowerPREFETCH(Op, DAG);
4809 case ISD::SINT_TO_FP:
4810 case ISD::UINT_TO_FP:
4811 case ISD::STRICT_SINT_TO_FP:
4812 case ISD::STRICT_UINT_TO_FP:
4813 return LowerINT_TO_FP(Op, DAG);
4814 case ISD::FP_TO_SINT:
4815 case ISD::FP_TO_UINT:
4816 case ISD::STRICT_FP_TO_SINT:
4817 case ISD::STRICT_FP_TO_UINT:
4818 return LowerFP_TO_INT(Op, DAG);
4819 case ISD::FP_TO_SINT_SAT:
4820 case ISD::FP_TO_UINT_SAT:
4821 return LowerFP_TO_INT_SAT(Op, DAG);
4822 case ISD::FSINCOS:
4823 return LowerFSINCOS(Op, DAG);
4824 case ISD::FLT_ROUNDS_:
4825 return LowerFLT_ROUNDS_(Op, DAG);
4826 case ISD::SET_ROUNDING:
4827 return LowerSET_ROUNDING(Op, DAG);
4828 case ISD::MUL:
4829 return LowerMUL(Op, DAG);
4830 case ISD::MULHS:
4831 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED,
4832 /*OverrideNEON=*/true);
4833 case ISD::MULHU:
4834 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
4835 /*OverrideNEON=*/true);
4836 case ISD::INTRINSIC_WO_CHAIN:
4837 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4838 case ISD::STORE:
4839 return LowerSTORE(Op, DAG);
4840 case ISD::MSTORE:
4841 return LowerFixedLengthVectorMStoreToSVE(Op, DAG);
4842 case ISD::MGATHER:
4843 return LowerMGATHER(Op, DAG);
4844 case ISD::MSCATTER:
4845 return LowerMSCATTER(Op, DAG);
4846 case ISD::VECREDUCE_SEQ_FADD:
4847 return LowerVECREDUCE_SEQ_FADD(Op, DAG);
4848 case ISD::VECREDUCE_ADD:
4849 case ISD::VECREDUCE_AND:
4850 case ISD::VECREDUCE_OR:
4851 case ISD::VECREDUCE_XOR:
4852 case ISD::VECREDUCE_SMAX:
4853 case ISD::VECREDUCE_SMIN:
4854 case ISD::VECREDUCE_UMAX:
4855 case ISD::VECREDUCE_UMIN:
4856 case ISD::VECREDUCE_FADD:
4857 case ISD::VECREDUCE_FMAX:
4858 case ISD::VECREDUCE_FMIN:
4859 return LowerVECREDUCE(Op, DAG);
4860 case ISD::ATOMIC_LOAD_SUB:
4861 return LowerATOMIC_LOAD_SUB(Op, DAG);
4862 case ISD::ATOMIC_LOAD_AND:
4863 return LowerATOMIC_LOAD_AND(Op, DAG);
4864 case ISD::DYNAMIC_STACKALLOC:
4865 return LowerDYNAMIC_STACKALLOC(Op, DAG);
4866 case ISD::VSCALE:
4867 return LowerVSCALE(Op, DAG);
4868 case ISD::ANY_EXTEND:
4869 case ISD::SIGN_EXTEND:
4870 case ISD::ZERO_EXTEND:
4871 return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
4872 case ISD::SIGN_EXTEND_INREG: {
4873 // Only custom lower when ExtraVT has a legal byte based element type.
4874 EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4875 EVT ExtraEltVT = ExtraVT.getVectorElementType();
4876 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
4877 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
4878 return SDValue();
4879
4880 return LowerToPredicatedOp(Op, DAG,
4881 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
4882 }
4883 case ISD::TRUNCATE:
4884 return LowerTRUNCATE(Op, DAG);
4885 case ISD::MLOAD:
4886 return LowerMLOAD(Op, DAG);
4887 case ISD::LOAD:
4888 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
4889 return LowerFixedLengthVectorLoadToSVE(Op, DAG);
4890 return LowerLOAD(Op, DAG);
4891 case ISD::ADD:
4892 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
4893 case ISD::AND:
4894 return LowerToScalableOp(Op, DAG);
4895 case ISD::SUB:
4896 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
4897 case ISD::FMAXIMUM:
4898 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
4899 case ISD::FMAXNUM:
4900 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
4901 case ISD::FMINIMUM:
4902 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED);
4903 case ISD::FMINNUM:
4904 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
4905 case ISD::VSELECT:
4906 return LowerFixedLengthVectorSelectToSVE(Op, DAG);
4907 case ISD::ABS:
4908 return LowerABS(Op, DAG);
4909 case ISD::BITREVERSE:
4910 return LowerBitreverse(Op, DAG);
4911 case ISD::BSWAP:
4912 return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
4913 case ISD::CTLZ:
4914 return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
4915 /*OverrideNEON=*/true);
4916 case ISD::CTTZ:
4917 return LowerCTTZ(Op, DAG);
4918 case ISD::VECTOR_SPLICE:
4919 return LowerVECTOR_SPLICE(Op, DAG);
4920 }
4921}
4922
4923bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
4924 return !Subtarget->useSVEForFixedLengthVectors();
4925}
4926
4927bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
4928 EVT VT, bool OverrideNEON) const {
4929 if (!Subtarget->useSVEForFixedLengthVectors())
4930 return false;
4931
4932 if (!VT.isFixedLengthVector())
4933 return false;
4934
4935 // Don't use SVE for vectors we cannot scalarize if required.
4936 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
4937 // Fixed length predicates should be promoted to i8.
4938 // NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
4939 case MVT::i1:
4940 default:
4941 return false;
4942 case MVT::i8:
4943 case MVT::i16:
4944 case MVT::i32:
4945 case MVT::i64:
4946 case MVT::f16:
4947 case MVT::f32:
4948 case MVT::f64:
4949 break;
4950 }
4951
4952 // All SVE implementations support NEON sized vectors.
4953 if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
4954 return true;
4955
4956 // Ensure NEON MVTs only belong to a single register class.
4957 if (VT.getFixedSizeInBits() <= 128)
4958 return false;
4959
4960 // Don't use SVE for types that don't fit.
4961 if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
4962 return false;
4963
4964 // TODO: Perhaps an artificial restriction, but worth having whilst getting
4965 // the base fixed length SVE support in place.
4966 if (!VT.isPow2VectorType())
4967 return false;
4968
4969 return true;
4970}
4971
4972//===----------------------------------------------------------------------===//
4973// Calling Convention Implementation
4974//===----------------------------------------------------------------------===//
4975
4976/// Selects the correct CCAssignFn for a given CallingConvention value.
4977CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
4978 bool IsVarArg) const {
4979 switch (CC) {
4980 default:
4981 report_fatal_error("Unsupported calling convention.");
4982 case CallingConv::WebKit_JS:
4983 return CC_AArch64_WebKit_JS;
4984 case CallingConv::GHC:
4985 return CC_AArch64_GHC;
4986 case CallingConv::C:
4987 case CallingConv::Fast:
4988 case CallingConv::PreserveMost:
4989 case CallingConv::CXX_FAST_TLS:
4990 case CallingConv::Swift:
4991 case CallingConv::SwiftTail:
4992 case CallingConv::Tail:
4993 if (Subtarget->isTargetWindows() && IsVarArg)
4994 return CC_AArch64_Win64_VarArg;
4995 if (!Subtarget->isTargetDarwin())
4996 return CC_AArch64_AAPCS;
4997 if (!IsVarArg)
4998 return CC_AArch64_DarwinPCS;
4999 return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
5000 : CC_AArch64_DarwinPCS_VarArg;
5001 case CallingConv::Win64:
5002 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
5003 case CallingConv::CFGuard_Check:
5004 return CC_AArch64_Win64_CFGuard_Check;
5005 case CallingConv::AArch64_VectorCall:
5006 case CallingConv::AArch64_SVE_VectorCall:
5007 return CC_AArch64_AAPCS;
5008 }
5009}
5010
5011CCAssignFn *
5012AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
5013 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
5014 : RetCC_AArch64_AAPCS;
5015}
5016
5017SDValue AArch64TargetLowering::LowerFormalArguments(
5018 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
5019 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5020 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5021 MachineFunction &MF = DAG.getMachineFunction();
5022 MachineFrameInfo &MFI = MF.getFrameInfo();
5023 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
5024
5025 // Assign locations to all of the incoming arguments.
5026 SmallVector<CCValAssign, 16> ArgLocs;
5027 DenseMap<unsigned, SDValue> CopiedRegs;
5028 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
5029 *DAG.getContext());
5030
5031 // At this point, Ins[].VT may already be promoted to i32. To correctly
5032 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
5033 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
5034 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
5035 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
5036 // LocVT.
5037 unsigned NumArgs = Ins.size();
5038 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
5039 unsigned CurArgIdx = 0;
5040 for (unsigned i = 0; i != NumArgs; ++i) {
5041 MVT ValVT = Ins[i].VT;
5042 if (Ins[i].isOrigArg()) {
5043 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
5044 CurArgIdx = Ins[i].getOrigArgIndex();
5045
5046 // Get type of the original argument.
5047 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
5048 /*AllowUnknown*/ true);
5049 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
5050 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5051 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5052 ValVT = MVT::i8;
5053 else if (ActualMVT == MVT::i16)
5054 ValVT = MVT::i16;
5055 }
5056 bool UseVarArgCC = false;
5057 if (IsWin64)
5058 UseVarArgCC = isVarArg;
5059 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
5060 bool Res =
5061 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
5062 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5062, __extension__ __PRETTY_FUNCTION__))
;
5063 (void)Res;
5064 }
5065 SmallVector<SDValue, 16> ArgValues;
5066 unsigned ExtraArgLocs = 0;
5067 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5068 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
5069
5070 if (Ins[i].Flags.isByVal()) {
5071 // Byval is used for HFAs in the PCS, but the system should work in a
5072 // non-compliant manner for larger structs.
5073 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5074 int Size = Ins[i].Flags.getByValSize();
5075 unsigned NumRegs = (Size + 7) / 8;
5076
5077 // FIXME: This works on big-endian for composite byvals, which are the common
5078 // case. It should also work for fundamental types too.
5079 unsigned FrameIdx =
5080 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
5081 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
5082 InVals.push_back(FrameIdxN);
5083
5084 continue;
5085 }
5086
5087 if (Ins[i].Flags.isSwiftAsync())
5088 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5089
5090 SDValue ArgValue;
5091 if (VA.isRegLoc()) {
5092 // Arguments stored in registers.
5093 EVT RegVT = VA.getLocVT();
5094 const TargetRegisterClass *RC;
5095
5096 if (RegVT == MVT::i32)
5097 RC = &AArch64::GPR32RegClass;
5098 else if (RegVT == MVT::i64)
5099 RC = &AArch64::GPR64RegClass;
5100 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
5101 RC = &AArch64::FPR16RegClass;
5102 else if (RegVT == MVT::f32)
5103 RC = &AArch64::FPR32RegClass;
5104 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
5105 RC = &AArch64::FPR64RegClass;
5106 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
5107 RC = &AArch64::FPR128RegClass;
5108 else if (RegVT.isScalableVector() &&
5109 RegVT.getVectorElementType() == MVT::i1)
5110 RC = &AArch64::PPRRegClass;
5111 else if (RegVT.isScalableVector())
5112 RC = &AArch64::ZPRRegClass;
5113 else
5114 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5114)
;
5115
5116 // Transform the arguments in physical registers into virtual ones.
5117 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
5118 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
5119
5120 // If this is an 8, 16 or 32-bit value, it is really passed promoted
5121 // to 64 bits. Insert an assert[sz]ext to capture this, then
5122 // truncate to the right size.
5123 switch (VA.getLocInfo()) {
5124 default:
5125 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5125)
;
5126 case CCValAssign::Full:
5127 break;
5128 case CCValAssign::Indirect:
5129 assert(VA.getValVT().isScalableVector() &&(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5130, __extension__ __PRETTY_FUNCTION__))
5130 "Only scalable vectors can be passed indirectly")(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5130, __extension__ __PRETTY_FUNCTION__))
;
5131 break;
5132 case CCValAssign::BCvt:
5133 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
5134 break;
5135 case CCValAssign::AExt:
5136 case CCValAssign::SExt:
5137 case CCValAssign::ZExt:
5138 break;
5139 case CCValAssign::AExtUpper:
5140 ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
5141 DAG.getConstant(32, DL, RegVT));
5142 ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
5143 break;
5144 }
5145 } else { // VA.isRegLoc()
5146 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")(static_cast <bool> (VA.isMemLoc() && "CCValAssign is neither reg nor mem"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5146, __extension__ __PRETTY_FUNCTION__))
;
5147 unsigned ArgOffset = VA.getLocMemOffset();
5148 unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
5149 ? VA.getLocVT().getSizeInBits()
5150 : VA.getValVT().getSizeInBits()) / 8;
5151
5152 uint32_t BEAlign = 0;
5153 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
5154 !Ins[i].Flags.isInConsecutiveRegs())
5155 BEAlign = 8 - ArgSize;
5156
5157 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
5158
5159 // Create load nodes to retrieve arguments from the stack.
5160 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5161
5162 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
5163 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
5164 MVT MemVT = VA.getValVT();
5165
5166 switch (VA.getLocInfo()) {
5167 default:
5168 break;
5169 case CCValAssign::Trunc:
5170 case CCValAssign::BCvt:
5171 MemVT = VA.getLocVT();
5172 break;
5173 case CCValAssign::Indirect:
5174 assert(VA.getValVT().isScalableVector() &&(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5175, __extension__ __PRETTY_FUNCTION__))
5175 "Only scalable vectors can be passed indirectly")(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5175, __extension__ __PRETTY_FUNCTION__))
;
5176 MemVT = VA.getLocVT();
5177 break;
5178 case CCValAssign::SExt:
5179 ExtType = ISD::SEXTLOAD;
5180 break;
5181 case CCValAssign::ZExt:
5182 ExtType = ISD::ZEXTLOAD;
5183 break;
5184 case CCValAssign::AExt:
5185 ExtType = ISD::EXTLOAD;
5186 break;
5187 }
5188
5189 ArgValue = DAG.getExtLoad(
5190 ExtType, DL, VA.getLocVT(), Chain, FIN,
5191 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
5192 MemVT);
5193 }
5194
5195 if (VA.getLocInfo() == CCValAssign::Indirect) {
5196 assert(VA.getValVT().isScalableVector() &&(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5197, __extension__ __PRETTY_FUNCTION__))
5197 "Only scalable vectors can be passed indirectly")(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5197, __extension__ __PRETTY_FUNCTION__))
;
5198
5199 uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
5200 unsigned NumParts = 1;
5201 if (Ins[i].Flags.isInConsecutiveRegs()) {
5202 assert(!Ins[i].Flags.isInConsecutiveRegsLast())(static_cast <bool> (!Ins[i].Flags.isInConsecutiveRegsLast
()) ? void (0) : __assert_fail ("!Ins[i].Flags.isInConsecutiveRegsLast()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5202, __extension__ __PRETTY_FUNCTION__))
;
5203 while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
5204 ++NumParts;
5205 }
5206
5207 MVT PartLoad = VA.getValVT();
5208 SDValue Ptr = ArgValue;
5209
5210 // Ensure we generate all loads for each tuple part, whilst updating the
5211 // pointer after each load correctly using vscale.
5212 while (NumParts > 0) {
5213 ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
5214 InVals.push_back(ArgValue);
5215 NumParts--;
5216 if (NumParts > 0) {
5217 SDValue BytesIncrement = DAG.getVScale(
5218 DL, Ptr.getValueType(),
5219 APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
5220 SDNodeFlags Flags;
5221 Flags.setNoUnsignedWrap(true);
5222 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5223 BytesIncrement, Flags);
5224 ExtraArgLocs++;
5225 i++;
5226 }
5227 }
5228 } else {
5229 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
5230 ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
5231 ArgValue, DAG.getValueType(MVT::i32));
5232 InVals.push_back(ArgValue);
5233 }
5234 }
5235 assert((ArgLocs.size() + ExtraArgLocs) == Ins.size())(static_cast <bool> ((ArgLocs.size() + ExtraArgLocs) ==
Ins.size()) ? void (0) : __assert_fail ("(ArgLocs.size() + ExtraArgLocs) == Ins.size()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5235, __extension__ __PRETTY_FUNCTION__))
;
5236
5237 // varargs
5238 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5239 if (isVarArg) {
5240 if (!Subtarget->isTargetDarwin() || IsWin64) {
5241 // The AAPCS variadic function ABI is identical to the non-variadic
5242 // one. As a result there may be more arguments in registers and we should
5243 // save them for future reference.
5244 // Win64 variadic functions also pass arguments in registers, but all float
5245 // arguments are passed in integer registers.
5246 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
5247 }
5248
5249 // This will point to the next argument passed via stack.
5250 unsigned StackOffset = CCInfo.getNextStackOffset();
5251 // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
5252 StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
5253 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
5254
5255 if (MFI.hasMustTailInVarArgFunc()) {
5256 SmallVector<MVT, 2> RegParmTypes;
5257 RegParmTypes.push_back(MVT::i64);
5258 RegParmTypes.push_back(MVT::f128);
5259 // Compute the set of forwarded registers. The rest are scratch.
5260 SmallVectorImpl<ForwardedRegister> &Forwards =
5261 FuncInfo->getForwardedMustTailRegParms();
5262 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
5263 CC_AArch64_AAPCS);
5264
5265 // Conservatively forward X8, since it might be used for aggregate return.
5266 if (!CCInfo.isAllocated(AArch64::X8)) {
5267 unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
5268 Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
5269 }
5270 }
5271 }
5272
5273 // On Windows, InReg pointers must be returned, so record the pointer in a
5274 // virtual register at the start of the function so it can be returned in the
5275 // epilogue.
5276 if (IsWin64) {
5277 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
5278 if (Ins[I].Flags.isInReg()) {
5279 assert(!FuncInfo->getSRetReturnReg())(static_cast <bool> (!FuncInfo->getSRetReturnReg()) ?
void (0) : __assert_fail ("!FuncInfo->getSRetReturnReg()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5279, __extension__ __PRETTY_FUNCTION__))
;
5280
5281 MVT PtrTy = getPointerTy(DAG.getDataLayout());
5282 Register Reg =
5283 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
5284 FuncInfo->setSRetReturnReg(Reg);
5285
5286 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
5287 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
5288 break;
5289 }
5290 }
5291 }
5292
5293 unsigned StackArgSize = CCInfo.getNextStackOffset();
5294 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
5295 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
5296 // This is a non-standard ABI so by fiat I say we're allowed to make full
5297 // use of the stack area to be popped, which must be aligned to 16 bytes in
5298 // any case:
5299 StackArgSize = alignTo(StackArgSize, 16);
5300
5301 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
5302 // a multiple of 16.
5303 FuncInfo->setArgumentStackToRestore(StackArgSize);
5304
5305 // This realignment carries over to the available bytes below. Our own
5306 // callers will guarantee the space is free by giving an aligned value to
5307 // CALLSEQ_START.
5308 }
5309 // Even if we're not expected to free up the space, it's useful to know how
5310 // much is there while considering tail calls (because we can reuse it).
5311 FuncInfo->setBytesInStackArgArea(StackArgSize);
5312
5313 if (Subtarget->hasCustomCallingConv())
5314 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
5315
5316 return Chain;
5317}
5318
5319void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
5320 SelectionDAG &DAG,
5321 const SDLoc &DL,
5322 SDValue &Chain) const {
5323 MachineFunction &MF = DAG.getMachineFunction();
5324 MachineFrameInfo &MFI = MF.getFrameInfo();
5325 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5326 auto PtrVT = getPointerTy(DAG.getDataLayout());
5327 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
5328
5329 SmallVector<SDValue, 8> MemOps;
5330
5331 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
5332 AArch64::X3, AArch64::X4, AArch64::X5,
5333 AArch64::X6, AArch64::X7 };
5334 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
5335 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
5336
5337 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
5338 int GPRIdx = 0;
5339 if (GPRSaveSize != 0) {
5340 if (IsWin64) {
5341 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
5342 if (GPRSaveSize & 15)
5343 // The extra size here, if triggered, will always be 8.
5344 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
5345 } else
5346 GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
5347
5348 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
5349
5350 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
5351 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
5352 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
5353 SDValue Store = DAG.getStore(
5354 Val.getValue(1), DL, Val, FIN,
5355 IsWin64
5356 ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
5357 GPRIdx,
5358 (i - FirstVariadicGPR) * 8)
5359 : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
5360 MemOps.push_back(Store);
5361 FIN =
5362 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
5363 }
5364 }
5365 FuncInfo->setVarArgsGPRIndex(GPRIdx);
5366 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
5367
5368 if (Subtarget->hasFPARMv8() && !IsWin64) {
5369 static const MCPhysReg FPRArgRegs[] = {
5370 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
5371 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
5372 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
5373 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
5374
5375 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
5376 int FPRIdx = 0;
5377 if (FPRSaveSize != 0) {
5378 FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
5379
5380 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
5381
5382 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
5383 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
5384 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
5385
5386 SDValue Store = DAG.getStore(
5387 Val.getValue(1), DL, Val, FIN,
5388 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
5389 MemOps.push_back(Store);
5390 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
5391 DAG.getConstant(16, DL, PtrVT));
5392 }
5393 }
5394 FuncInfo->setVarArgsFPRIndex(FPRIdx);
5395 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
5396 }
5397
5398 if (!MemOps.empty()) {
5399 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
5400 }
5401}
5402
5403/// LowerCallResult - Lower the result values of a call into the
5404/// appropriate copies out of appropriate physical registers.
5405SDValue AArch64TargetLowering::LowerCallResult(
5406 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5407 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5408 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
5409 SDValue ThisVal) const {
5410 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
5411 // Assign locations to each value returned by this call.
5412 SmallVector<CCValAssign, 16> RVLocs;
5413 DenseMap<unsigned, SDValue> CopiedRegs;
5414 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5415 *DAG.getContext());
5416 CCInfo.AnalyzeCallResult(Ins, RetCC);
5417
5418 // Copy all of the result registers out of their specified physreg.
5419 for (unsigned i = 0; i != RVLocs.size(); ++i) {
5420 CCValAssign VA = RVLocs[i];
5421
5422 // Pass 'this' value directly from the argument to return value, to avoid
5423 // reg unit interference
5424 if (i == 0 && isThisReturn) {
5425 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i64 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5426, __extension__ __PRETTY_FUNCTION__))
5426 "unexpected return calling convention register assignment")(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i64 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5426, __extension__ __PRETTY_FUNCTION__))
;
5427 InVals.push_back(ThisVal);
5428 continue;
5429 }
5430
5431 // Avoid copying a physreg twice since RegAllocFast is incompetent and only
5432 // allows one use of a physreg per block.
5433 SDValue Val = CopiedRegs.lookup(VA.getLocReg());
5434 if (!Val) {
5435 Val =
5436 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
5437 Chain = Val.getValue(1);
5438 InFlag = Val.getValue(2);
5439 CopiedRegs[VA.getLocReg()] = Val;
5440 }
5441
5442 switch (VA.getLocInfo()) {
5443 default:
5444 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5444)
;
5445 case CCValAssign::Full:
5446 break;
5447 case CCValAssign::BCvt:
5448 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5449 break;
5450 case CCValAssign::AExtUpper:
5451 Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
5452 DAG.getConstant(32, DL, VA.getLocVT()));
5453 LLVM_FALLTHROUGH[[gnu::fallthrough]];
5454 case CCValAssign::AExt:
5455 LLVM_FALLTHROUGH[[gnu::fallthrough]];
5456 case CCValAssign::ZExt:
5457 Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
5458 break;
5459 }
5460
5461 InVals.push_back(Val);
5462 }
5463
5464 return Chain;
5465}
5466
5467/// Return true if the calling convention is one that we can guarantee TCO for.
5468static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
5469 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
5470 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
5471}
5472
5473/// Return true if we might ever do TCO for calls with this calling convention.
5474static bool mayTailCallThisCC(CallingConv::ID CC) {
5475 switch (CC) {
5476 case CallingConv::C:
5477 case CallingConv::AArch64_SVE_VectorCall:
5478 case CallingConv::PreserveMost:
5479 case CallingConv::Swift:
5480 case CallingConv::SwiftTail:
5481 case CallingConv::Tail:
5482 case CallingConv::Fast:
5483 return true;
5484 default:
5485 return false;
5486 }
5487}
5488
5489bool AArch64TargetLowering::isEligibleForTailCallOptimization(
5490 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
5491 const SmallVectorImpl<ISD::OutputArg> &Outs,
5492 const SmallVectorImpl<SDValue> &OutVals,
5493 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
5494 if (!mayTailCallThisCC(CalleeCC))
5495 return false;
5496
5497 MachineFunction &MF = DAG.getMachineFunction();
5498 const Function &CallerF = MF.getFunction();
5499 CallingConv::ID CallerCC = CallerF.getCallingConv();
5500
5501 // Functions using the C or Fast calling convention that have an SVE signature
5502 // preserve more registers and should assume the SVE_VectorCall CC.
5503 // The check for matching callee-saved regs will determine whether it is
5504 // eligible for TCO.
5505 if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) &&
5506 AArch64RegisterInfo::hasSVEArgsOrReturn(&MF))
5507 CallerCC = CallingConv::AArch64_SVE_VectorCall;
5508
5509 bool CCMatch = CallerCC == CalleeCC;
5510
5511 // When using the Windows calling convention on a non-windows OS, we want
5512 // to back up and restore X18 in such functions; we can't do a tail call
5513 // from those functions.
5514 if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
5515 CalleeCC != CallingConv::Win64)
5516 return false;
5517
5518 // Byval parameters hand the function a pointer directly into the stack area
5519 // we want to reuse during a tail call. Working around this *is* possible (see
5520 // X86) but less efficient and uglier in LowerCall.
5521 for (Function::const_arg_iterator i = CallerF.arg_begin(),
5522 e = CallerF.arg_end();
5523 i != e; ++i) {
5524 if (i->hasByValAttr())
5525 return false;
5526
5527 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
5528 // In this case, it is necessary to save/restore X0 in the callee. Tail
5529 // call opt interferes with this. So we disable tail call opt when the
5530 // caller has an argument with "inreg" attribute.
5531
5532 // FIXME: Check whether the callee also has an "inreg" argument.
5533 if (i->hasInRegAttr())
5534 return false;
5535 }
5536
5537 if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
5538 return CCMatch;
5539
5540 // Externally-defined functions with weak linkage should not be
5541 // tail-called on AArch64 when the OS does not support dynamic
5542 // pre-emption of symbols, as the AAELF spec requires normal calls
5543 // to undefined weak functions to be replaced with a NOP or jump to the
5544 // next instruction. The behaviour of branch instructions in this
5545 // situation (as used for tail calls) is implementation-defined, so we
5546 // cannot rely on the linker replacing the tail call with a return.
5547 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5548 const GlobalValue *GV = G->getGlobal();
5549 const Triple &TT = getTargetMachine().getTargetTriple();
5550 if (GV->hasExternalWeakLinkage() &&
5551 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
5552 return false;
5553 }
5554
5555 // Now we search for cases where we can use a tail call without changing the
5556 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
5557 // concept.
5558
5559 // I want anyone implementing a new calling convention to think long and hard
5560 // about this assert.
5561 assert((!isVarArg || CalleeCC == CallingConv::C) &&(static_cast <bool> ((!isVarArg || CalleeCC == CallingConv
::C) && "Unexpected variadic calling convention") ? void
(0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5562, __extension__ __PRETTY_FUNCTION__))
5562 "Unexpected variadic calling convention")(static_cast <bool> ((!isVarArg || CalleeCC == CallingConv
::C) && "Unexpected variadic calling convention") ? void
(0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5562, __extension__ __PRETTY_FUNCTION__))
;
5563
5564 LLVMContext &C = *DAG.getContext();
5565 if (isVarArg && !Outs.empty()) {
5566 // At least two cases here: if caller is fastcc then we can't have any
5567 // memory arguments (we'd be expected to clean up the stack afterwards). If
5568 // caller is C then we could potentially use its argument area.
5569
5570 // FIXME: for now we take the most conservative of these in both cases:
5571 // disallow all variadic memory operands.
5572 SmallVector<CCValAssign, 16> ArgLocs;
5573 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5574
5575 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
5576 for (const CCValAssign &ArgLoc : ArgLocs)
5577 if (!ArgLoc.isRegLoc())
5578 return false;
5579 }
5580
5581 // Check that the call results are passed in the same way.
5582 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
5583 CCAssignFnForCall(CalleeCC, isVarArg),
5584 CCAssignFnForCall(CallerCC, isVarArg)))
5585 return false;
5586 // The callee has to preserve all registers the caller needs to preserve.
5587 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5588 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5589 if (!CCMatch) {
5590 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5591 if (Subtarget->hasCustomCallingConv()) {
5592 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
5593 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
5594 }
5595 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5596 return false;
5597 }
5598
5599 // Nothing more to check if the callee is taking no arguments
5600 if (Outs.empty())
5601 return true;
5602
5603 SmallVector<CCValAssign, 16> ArgLocs;
5604 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5605
5606 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
5607
5608 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5609
5610 // If any of the arguments is passed indirectly, it must be SVE, so the
5611 // 'getBytesInStackArgArea' is not sufficient to determine whether we need to
5612 // allocate space on the stack. That is why we determine this explicitly here
5613 // the call cannot be a tailcall.
5614 if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
5615 assert((A.getLocInfo() != CCValAssign::Indirect ||(static_cast <bool> ((A.getLocInfo() != CCValAssign::Indirect
|| A.getValVT().isScalableVector()) && "Expected value to be scalable"
) ? void (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5617, __extension__ __PRETTY_FUNCTION__))
5616 A.getValVT().isScalableVector()) &&(static_cast <bool> ((A.getLocInfo() != CCValAssign::Indirect
|| A.getValVT().isScalableVector()) && "Expected value to be scalable"
) ? void (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5617, __extension__ __PRETTY_FUNCTION__))
5617 "Expected value to be scalable")(static_cast <bool> ((A.getLocInfo() != CCValAssign::Indirect
|| A.getValVT().isScalableVector()) && "Expected value to be scalable"
) ? void (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5617, __extension__ __PRETTY_FUNCTION__))
;
5618 return A.getLocInfo() == CCValAssign::Indirect;
5619 }))
5620 return false;
5621
5622 // If the stack arguments for this call do not fit into our own save area then
5623 // the call cannot be made tail.
5624 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
5625 return false;
5626
5627 const MachineRegisterInfo &MRI = MF.getRegInfo();
5628 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5629 return false;
5630
5631 return true;
5632}
5633
5634SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
5635 SelectionDAG &DAG,
5636 MachineFrameInfo &MFI,
5637 int ClobberedFI) const {
5638 SmallVector<SDValue, 8> ArgChains;
5639 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
5640 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
5641
5642 // Include the original chain at the beginning of the list. When this is
5643 // used by target LowerCall hooks, this helps legalize find the
5644 // CALLSEQ_BEGIN node.
5645 ArgChains.push_back(Chain);
5646
5647 // Add a chain value for each stack argument corresponding
5648 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
5649 UE = DAG.getEntryNode().getNode()->use_end();
5650 U != UE; ++U)
5651 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
5652 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
5653 if (FI->getIndex() < 0) {
5654 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
5655 int64_t InLastByte = InFirstByte;
5656 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
5657
5658 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
5659 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
5660 ArgChains.push_back(SDValue(L, 1));
5661 }
5662
5663 // Build a tokenfactor for all the chains.
5664 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
5665}
5666
5667bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
5668 bool TailCallOpt) const {
5669 return (CallCC == CallingConv::Fast && TailCallOpt) ||
5670 CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail;
5671}
5672
5673/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
5674/// and add input and output parameter nodes.
5675SDValue
5676AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
5677 SmallVectorImpl<SDValue> &InVals) const {
5678 SelectionDAG &DAG = CLI.DAG;
5679 SDLoc &DL = CLI.DL;
5680 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
5681 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
5682 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
5683 SDValue Chain = CLI.Chain;
5684 SDValue Callee = CLI.Callee;
5685 bool &IsTailCall = CLI.IsTailCall;
5686 CallingConv::ID CallConv = CLI.CallConv;
5687 bool IsVarArg = CLI.IsVarArg;
5688
5689 MachineFunction &MF = DAG.getMachineFunction();
5690 MachineFunction::CallSiteInfo CSInfo;
5691 bool IsThisReturn = false;
5692
5693 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5694 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
5695 bool IsSibCall = false;
5696 bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CallConv);
5697
5698 // Check callee args/returns for SVE registers and set calling convention
5699 // accordingly.
5700 if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) {
5701 bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
5702 return Out.VT.isScalableVector();
5703 });
5704 bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
5705 return In.VT.isScalableVector();
5706 });
5707
5708 if (CalleeInSVE || CalleeOutSVE)
5709 CallConv = CallingConv::AArch64_SVE_VectorCall;
5710 }
5711
5712 if (IsTailCall) {
5713 // Check if it's really possible to do a tail call.
5714 IsTailCall = isEligibleForTailCallOptimization(
5715 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
5716
5717 // A sibling call is one where we're under the usual C ABI and not planning
5718 // to change that but can still do a tail call:
5719 if (!TailCallOpt && IsTailCall && CallConv != CallingConv::Tail &&
5720 CallConv != CallingConv::SwiftTail)
5721 IsSibCall = true;
5722
5723 if (IsTailCall)
5724 ++NumTailCalls;
5725 }
5726
5727 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
5728 report_fatal_error("failed to perform tail call elimination on a call "
5729 "site marked musttail");
5730
5731 // Analyze operands of the call, assigning locations to each operand.
5732 SmallVector<CCValAssign, 16> ArgLocs;
5733 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
5734 *DAG.getContext());
5735
5736 if (IsVarArg) {
5737 // Handle fixed and variable vector arguments differently.
5738 // Variable vector arguments always go into memory.
5739 unsigned NumArgs = Outs.size();
5740
5741 for (unsigned i = 0; i != NumArgs; ++i) {
5742 MVT ArgVT = Outs[i].VT;
5743 if (!Outs[i].IsFixed && ArgVT.isScalableVector())
5744 report_fatal_error("Passing SVE types to variadic functions is "
5745 "currently not supported");
5746
5747 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5748 bool UseVarArgCC = !Outs[i].IsFixed;
5749 // On Windows, the fixed arguments in a vararg call are passed in GPRs
5750 // too, so use the vararg CC to force them to integer registers.
5751 if (IsCalleeWin64)
5752 UseVarArgCC = true;
5753 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
5754 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
5755 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5755, __extension__ __PRETTY_FUNCTION__))
;
5756 (void)Res;
5757 }
5758 } else {
5759 // At this point, Outs[].VT may already be promoted to i32. To correctly
5760 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
5761 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
5762 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
5763 // we use a special version of AnalyzeCallOperands to pass in ValVT and
5764 // LocVT.
5765 unsigned NumArgs = Outs.size();
5766 for (unsigned i = 0; i != NumArgs; ++i) {
5767 MVT ValVT = Outs[i].VT;
5768 // Get type of the original argument.
5769 EVT ActualVT = getValueType(DAG.getDataLayout(),
5770 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
5771 /*AllowUnknown*/ true);
5772 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
5773 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5774 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5775 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5776 ValVT = MVT::i8;
5777 else if (ActualMVT == MVT::i16)
5778 ValVT = MVT::i16;
5779
5780 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
5781 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
5782 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5782, __extension__ __PRETTY_FUNCTION__))
;
5783 (void)Res;
5784 }
5785 }
5786
5787 // Get a count of how many bytes are to be pushed on the stack.
5788 unsigned NumBytes = CCInfo.getNextStackOffset();
5789
5790 if (IsSibCall) {
5791 // Since we're not changing the ABI to make this a tail call, the memory
5792 // operands are already available in the caller's incoming argument space.
5793 NumBytes = 0;
5794 }
5795
5796 // FPDiff is the byte offset of the call's argument area from the callee's.
5797 // Stores to callee stack arguments will be placed in FixedStackSlots offset
5798 // by this amount for a tail call. In a sibling call it must be 0 because the
5799 // caller will deallocate the entire stack and the callee still expects its
5800 // arguments to begin at SP+0. Completely unused for non-tail calls.
5801 int FPDiff = 0;
5802
5803 if (IsTailCall && !IsSibCall) {
5804 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
5805
5806 // Since callee will pop argument stack as a tail call, we must keep the
5807 // popped size 16-byte aligned.
5808 NumBytes = alignTo(NumBytes, 16);
5809
5810 // FPDiff will be negative if this tail call requires more space than we
5811 // would automatically have in our incoming argument space. Positive if we
5812 // can actually shrink the stack.
5813 FPDiff = NumReusableBytes - NumBytes;
5814
5815 // Update the required reserved area if this is the tail call requiring the
5816 // most argument stack space.
5817 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
5818 FuncInfo->setTailCallReservedStack(-FPDiff);
5819
5820 // The stack pointer must be 16-byte aligned at all times it's used for a
5821 // memory operation, which in practice means at *all* times and in
5822 // particular across call boundaries. Therefore our own arguments started at
5823 // a 16-byte aligned SP and the delta applied for the tail call should
5824 // satisfy the same constraint.
5825 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")(static_cast <bool> (FPDiff % 16 == 0 && "unaligned stack on tail call"
) ? void (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5825, __extension__ __PRETTY_FUNCTION__))
;
5826 }
5827
5828 // Adjust the stack pointer for the new arguments...
5829 // These operations are automatically eliminated by the prolog/epilog pass
5830 if (!IsSibCall)
5831 Chain = DAG.getCALLSEQ_START(Chain, IsTailCall ? 0 : NumBytes, 0, DL);
5832
5833 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
5834 getPointerTy(DAG.getDataLayout()));
5835
5836 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5837 SmallSet<unsigned, 8> RegsUsed;
5838 SmallVector<SDValue, 8> MemOpChains;
5839 auto PtrVT = getPointerTy(DAG.getDataLayout());
5840
5841 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
5842 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
5843 for (const auto &F : Forwards) {
5844 SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
5845 RegsToPass.emplace_back(F.PReg, Val);
5846 }
5847 }
5848
5849 // Walk the register/memloc assignments, inserting copies/loads.
5850 unsigned ExtraArgLocs = 0;
5851 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5852 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
5853 SDValue Arg = OutVals[i];
5854 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5855
5856 // Promote the value if needed.
5857 switch (VA.getLocInfo()) {
5858 default:
5859 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5859)
;
5860 case CCValAssign::Full:
5861 break;
5862 case CCValAssign::SExt:
5863 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
5864 break;
5865 case CCValAssign::ZExt:
5866 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
5867 break;
5868 case CCValAssign::AExt:
5869 if (Outs[i].ArgVT == MVT::i1) {
5870 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
5871 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
5872 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
5873 }
5874 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5875 break;
5876 case CCValAssign::AExtUpper:
5877 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")(static_cast <bool> (VA.getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits") ? void (0) : __assert_fail
("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5877, __extension__ __PRETTY_FUNCTION__))
;
5878 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5879 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
5880 DAG.getConstant(32, DL, VA.getLocVT()));
5881 break;
5882 case CCValAssign::BCvt:
5883 Arg = DAG.getBitcast(VA.getLocVT(), Arg);
5884 break;
5885 case CCValAssign::Trunc:
5886 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
5887 break;
5888 case CCValAssign::FPExt:
5889 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
5890 break;
5891 case CCValAssign::Indirect:
5892 assert(VA.getValVT().isScalableVector() &&(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5893, __extension__ __PRETTY_FUNCTION__))
5893 "Only scalable vectors can be passed indirectly")(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5893, __extension__ __PRETTY_FUNCTION__))
;
5894
5895 uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
5896 uint64_t PartSize = StoreSize;
5897 unsigned NumParts = 1;
5898 if (Outs[i].Flags.isInConsecutiveRegs()) {
5899 assert(!Outs[i].Flags.isInConsecutiveRegsLast())(static_cast <bool> (!Outs[i].Flags.isInConsecutiveRegsLast
()) ? void (0) : __assert_fail ("!Outs[i].Flags.isInConsecutiveRegsLast()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5899, __extension__ __PRETTY_FUNCTION__))
;
5900 while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
5901 ++NumParts;
5902 StoreSize *= NumParts;
5903 }
5904
5905 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
5906 Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
5907 Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
5908 int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
5909 MFI.setStackID(FI, TargetStackID::ScalableVector);
5910
5911 MachinePointerInfo MPI =
5912 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
5913 SDValue Ptr = DAG.getFrameIndex(
5914 FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
5915 SDValue SpillSlot = Ptr;
5916
5917 // Ensure we generate all stores for each tuple part, whilst updating the
5918 // pointer after each store correctly using vscale.
5919 while (NumParts) {
5920 Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
5921 NumParts--;
5922 if (NumParts > 0) {
5923 SDValue BytesIncrement = DAG.getVScale(
5924 DL, Ptr.getValueType(),
5925 APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
5926 SDNodeFlags Flags;
5927 Flags.setNoUnsignedWrap(true);
5928
5929 MPI = MachinePointerInfo(MPI.getAddrSpace());
5930 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5931 BytesIncrement, Flags);
5932 ExtraArgLocs++;
5933 i++;
5934 }
5935 }
5936
5937 Arg = SpillSlot;
5938 break;
5939 }
5940
5941 if (VA.isRegLoc()) {
5942 if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
5943 Outs[0].VT == MVT::i64) {
5944 assert(VA.getLocVT() == MVT::i64 &&(static_cast <bool> (VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5945, __extension__ __PRETTY_FUNCTION__))
5945 "unexpected calling convention register assignment")(static_cast <bool> (VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5945, __extension__ __PRETTY_FUNCTION__))
;
5946 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i64 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5947, __extension__ __PRETTY_FUNCTION__))
5947 "unexpected use of 'returned'")(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i64 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5947, __extension__ __PRETTY_FUNCTION__))
;
5948 IsThisReturn = true;
5949 }
5950 if (RegsUsed.count(VA.getLocReg())) {
5951 // If this register has already been used then we're trying to pack
5952 // parts of an [N x i32] into an X-register. The extension type will
5953 // take care of putting the two halves in the right place but we have to
5954 // combine them.
5955 SDValue &Bits =
5956 llvm::find_if(RegsToPass,
5957 [=](const std::pair<unsigned, SDValue> &Elt) {
5958 return Elt.first == VA.getLocReg();
5959 })
5960 ->second;
5961 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
5962 // Call site info is used for function's parameter entry value
5963 // tracking. For now we track only simple cases when parameter
5964 // is transferred through whole register.
5965 llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
5966 return ArgReg.Reg == VA.getLocReg();
5967 });
5968 } else {
5969 RegsToPass.emplace_back(VA.getLocReg(), Arg);
5970 RegsUsed.insert(VA.getLocReg());
5971 const TargetOptions &Options = DAG.getTarget().Options;
5972 if (Options.EmitCallSiteInfo)
5973 CSInfo.emplace_back(VA.getLocReg(), i);
5974 }
5975 } else {
5976 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5976, __extension__ __PRETTY_FUNCTION__))
;
5977
5978 SDValue DstAddr;
5979 MachinePointerInfo DstInfo;
5980
5981 // FIXME: This works on big-endian for composite byvals, which are the
5982 // common case. It should also work for fundamental types too.
5983 uint32_t BEAlign = 0;
5984 unsigned OpSize;
5985 if (VA.getLocInfo() == CCValAssign::Indirect ||
5986 VA.getLocInfo() == CCValAssign::Trunc)
5987 OpSize = VA.getLocVT().getFixedSizeInBits();
5988 else
5989 OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
5990 : VA.getValVT().getSizeInBits();
5991 OpSize = (OpSize + 7) / 8;
5992 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
5993 !Flags.isInConsecutiveRegs()) {
5994 if (OpSize < 8)
5995 BEAlign = 8 - OpSize;
5996 }
5997 unsigned LocMemOffset = VA.getLocMemOffset();
5998 int32_t Offset = LocMemOffset + BEAlign;
5999 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
6000 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
6001
6002 if (IsTailCall) {
6003 Offset = Offset + FPDiff;
6004 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
6005
6006 DstAddr = DAG.getFrameIndex(FI, PtrVT);
6007 DstInfo =
6008 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
6009
6010 // Make sure any stack arguments overlapping with where we're storing
6011 // are loaded before this eventual operation. Otherwise they'll be
6012 // clobbered.
6013 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
6014 } else {
6015 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
6016
6017 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
6018 DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
6019 LocMemOffset);
6020 }
6021
6022 if (Outs[i].Flags.isByVal()) {
6023 SDValue SizeNode =
6024 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
6025 SDValue Cpy = DAG.getMemcpy(
6026 Chain, DL, DstAddr, Arg, SizeNode,
6027 Outs[i].Flags.getNonZeroByValAlign(),
6028 /*isVol = */ false, /*AlwaysInline = */ false,
6029 /*isTailCall = */ false, DstInfo, MachinePointerInfo());
6030
6031 MemOpChains.push_back(Cpy);
6032 } else {
6033 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
6034 // promo