Bug Summary

File:llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1114, column 10
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/build-llvm/include -I /build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2021-01-26-035717-31997-1 -x c++ /build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64ISelLowering.h"
14#include "AArch64CallingConvention.h"
15#include "AArch64ExpandImm.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/ADT/Triple.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/ObjCARCRVAttr.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/CallingConvLower.h"
35#include "llvm/CodeGen/MachineBasicBlock.h"
36#include "llvm/CodeGen/MachineFrameInfo.h"
37#include "llvm/CodeGen/MachineFunction.h"
38#include "llvm/CodeGen/MachineInstr.h"
39#include "llvm/CodeGen/MachineInstrBuilder.h"
40#include "llvm/CodeGen/MachineMemOperand.h"
41#include "llvm/CodeGen/MachineRegisterInfo.h"
42#include "llvm/CodeGen/RuntimeLibcalls.h"
43#include "llvm/CodeGen/SelectionDAG.h"
44#include "llvm/CodeGen/SelectionDAGNodes.h"
45#include "llvm/CodeGen/TargetCallingConv.h"
46#include "llvm/CodeGen/TargetInstrInfo.h"
47#include "llvm/CodeGen/ValueTypes.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DataLayout.h"
51#include "llvm/IR/DebugLoc.h"
52#include "llvm/IR/DerivedTypes.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/GetElementPtrTypeIterator.h"
55#include "llvm/IR/GlobalValue.h"
56#include "llvm/IR/IRBuilder.h"
57#include "llvm/IR/Instruction.h"
58#include "llvm/IR/Instructions.h"
59#include "llvm/IR/IntrinsicInst.h"
60#include "llvm/IR/Intrinsics.h"
61#include "llvm/IR/IntrinsicsAArch64.h"
62#include "llvm/IR/Module.h"
63#include "llvm/IR/OperandTraits.h"
64#include "llvm/IR/PatternMatch.h"
65#include "llvm/IR/Type.h"
66#include "llvm/IR/Use.h"
67#include "llvm/IR/Value.h"
68#include "llvm/MC/MCRegisterInfo.h"
69#include "llvm/Support/Casting.h"
70#include "llvm/Support/CodeGen.h"
71#include "llvm/Support/CommandLine.h"
72#include "llvm/Support/Compiler.h"
73#include "llvm/Support/Debug.h"
74#include "llvm/Support/ErrorHandling.h"
75#include "llvm/Support/KnownBits.h"
76#include "llvm/Support/MachineValueType.h"
77#include "llvm/Support/MathExtras.h"
78#include "llvm/Support/raw_ostream.h"
79#include "llvm/Target/TargetMachine.h"
80#include "llvm/Target/TargetOptions.h"
81#include <algorithm>
82#include <bitset>
83#include <cassert>
84#include <cctype>
85#include <cstdint>
86#include <cstdlib>
87#include <iterator>
88#include <limits>
89#include <tuple>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94using namespace llvm::PatternMatch;
95
96#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
97
98STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls"}
;
99STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts"}
;
100STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized"}
;
101
102// FIXME: The necessary dtprel relocations don't seem to be supported
103// well in the GNU bfd and gold linkers at the moment. Therefore, by
104// default, for now, fall back to GeneralDynamic code generation.
105cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
106 "aarch64-elf-ldtls-generation", cl::Hidden,
107 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
108 cl::init(false));
109
110static cl::opt<bool>
111EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
112 cl::desc("Enable AArch64 logical imm instruction "
113 "optimization"),
114 cl::init(true));
115
116// Temporary option added for the purpose of testing functionality added
117// to DAGCombiner.cpp in D92230. It is expected that this can be removed
118// in future when both implementations will be based off MGATHER rather
119// than the GLD1 nodes added for the SVE gather load intrinsics.
120static cl::opt<bool>
121EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
122 cl::desc("Combine extends of AArch64 masked "
123 "gather intrinsics"),
124 cl::init(true));
125
126/// Value type used for condition codes.
127static const MVT MVT_CC = MVT::i32;
128
129static inline EVT getPackedSVEVectorVT(EVT VT) {
130 switch (VT.getSimpleVT().SimpleTy) {
131 default:
132 llvm_unreachable("unexpected element type for vector")::llvm::llvm_unreachable_internal("unexpected element type for vector"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 132)
;
133 case MVT::i8:
134 return MVT::nxv16i8;
135 case MVT::i16:
136 return MVT::nxv8i16;
137 case MVT::i32:
138 return MVT::nxv4i32;
139 case MVT::i64:
140 return MVT::nxv2i64;
141 case MVT::f16:
142 return MVT::nxv8f16;
143 case MVT::f32:
144 return MVT::nxv4f32;
145 case MVT::f64:
146 return MVT::nxv2f64;
147 case MVT::bf16:
148 return MVT::nxv8bf16;
149 }
150}
151
152// NOTE: Currently there's only a need to return integer vector types. If this
153// changes then just add an extra "type" parameter.
154static inline EVT getPackedSVEVectorVT(ElementCount EC) {
155 switch (EC.getKnownMinValue()) {
156 default:
157 llvm_unreachable("unexpected element count for vector")::llvm::llvm_unreachable_internal("unexpected element count for vector"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 157)
;
158 case 16:
159 return MVT::nxv16i8;
160 case 8:
161 return MVT::nxv8i16;
162 case 4:
163 return MVT::nxv4i32;
164 case 2:
165 return MVT::nxv2i64;
166 }
167}
168
169static inline EVT getPromotedVTForPredicate(EVT VT) {
170 assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&((VT.isScalableVector() && (VT.getVectorElementType()
== MVT::i1) && "Expected scalable predicate vector type!"
) ? static_cast<void> (0) : __assert_fail ("VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && \"Expected scalable predicate vector type!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 171, __PRETTY_FUNCTION__))
171 "Expected scalable predicate vector type!")((VT.isScalableVector() && (VT.getVectorElementType()
== MVT::i1) && "Expected scalable predicate vector type!"
) ? static_cast<void> (0) : __assert_fail ("VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && \"Expected scalable predicate vector type!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 171, __PRETTY_FUNCTION__))
;
172 switch (VT.getVectorMinNumElements()) {
173 default:
174 llvm_unreachable("unexpected element count for vector")::llvm::llvm_unreachable_internal("unexpected element count for vector"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 174)
;
175 case 2:
176 return MVT::nxv2i64;
177 case 4:
178 return MVT::nxv4i32;
179 case 8:
180 return MVT::nxv8i16;
181 case 16:
182 return MVT::nxv16i8;
183 }
184}
185
186/// Returns true if VT's elements occupy the lowest bit positions of its
187/// associated register class without any intervening space.
188///
189/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
190/// same register class, but only nxv8f16 can be treated as a packed vector.
191static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
192 assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&((VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal
(VT) && "Expected legal vector type!") ? static_cast<
void> (0) : __assert_fail ("VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal vector type!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 193, __PRETTY_FUNCTION__))
193 "Expected legal vector type!")((VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal
(VT) && "Expected legal vector type!") ? static_cast<
void> (0) : __assert_fail ("VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal vector type!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 193, __PRETTY_FUNCTION__))
;
194 return VT.isFixedLengthVector() ||
195 VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
196}
197
198// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
199// predicate and end with a passthru value matching the result type.
200static bool isMergePassthruOpcode(unsigned Opc) {
201 switch (Opc) {
202 default:
203 return false;
204 case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
205 case AArch64ISD::BSWAP_MERGE_PASSTHRU:
206 case AArch64ISD::CTLZ_MERGE_PASSTHRU:
207 case AArch64ISD::CTPOP_MERGE_PASSTHRU:
208 case AArch64ISD::DUP_MERGE_PASSTHRU:
209 case AArch64ISD::ABS_MERGE_PASSTHRU:
210 case AArch64ISD::NEG_MERGE_PASSTHRU:
211 case AArch64ISD::FNEG_MERGE_PASSTHRU:
212 case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
213 case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
214 case AArch64ISD::FCEIL_MERGE_PASSTHRU:
215 case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
216 case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
217 case AArch64ISD::FRINT_MERGE_PASSTHRU:
218 case AArch64ISD::FROUND_MERGE_PASSTHRU:
219 case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
220 case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
221 case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
222 case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
223 case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
224 case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
225 case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
226 case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
227 case AArch64ISD::FSQRT_MERGE_PASSTHRU:
228 case AArch64ISD::FRECPX_MERGE_PASSTHRU:
229 case AArch64ISD::FABS_MERGE_PASSTHRU:
230 return true;
231 }
232}
233
234AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
235 const AArch64Subtarget &STI)
236 : TargetLowering(TM), Subtarget(&STI) {
237 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
238 // we have to make something up. Arbitrarily, choose ZeroOrOne.
239 setBooleanContents(ZeroOrOneBooleanContent);
240 // When comparing vectors the result sets the different elements in the
241 // vector to all-one or all-zero.
242 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
243
244 // Set up the register classes.
245 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
246 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
247
248 if (Subtarget->hasFPARMv8()) {
249 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
250 addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
251 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
252 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
253 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
254 }
255
256 if (Subtarget->hasNEON()) {
257 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
258 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
259 // Someone set us up the NEON.
260 addDRTypeForNEON(MVT::v2f32);
261 addDRTypeForNEON(MVT::v8i8);
262 addDRTypeForNEON(MVT::v4i16);
263 addDRTypeForNEON(MVT::v2i32);
264 addDRTypeForNEON(MVT::v1i64);
265 addDRTypeForNEON(MVT::v1f64);
266 addDRTypeForNEON(MVT::v4f16);
267 if (Subtarget->hasBF16())
268 addDRTypeForNEON(MVT::v4bf16);
269
270 addQRTypeForNEON(MVT::v4f32);
271 addQRTypeForNEON(MVT::v2f64);
272 addQRTypeForNEON(MVT::v16i8);
273 addQRTypeForNEON(MVT::v8i16);
274 addQRTypeForNEON(MVT::v4i32);
275 addQRTypeForNEON(MVT::v2i64);
276 addQRTypeForNEON(MVT::v8f16);
277 if (Subtarget->hasBF16())
278 addQRTypeForNEON(MVT::v8bf16);
279 }
280
281 if (Subtarget->hasSVE()) {
282 // Add legal sve predicate types
283 addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
284 addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
285 addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
286 addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
287
288 // Add legal sve data types
289 addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
290 addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
291 addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
292 addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
293
294 addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
295 addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
296 addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
297 addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
298 addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
299 addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
300
301 if (Subtarget->hasBF16()) {
302 addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
303 addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
304 addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
305 }
306
307 if (Subtarget->useSVEForFixedLengthVectors()) {
308 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
309 if (useSVEForFixedLengthVectorVT(VT))
310 addRegisterClass(VT, &AArch64::ZPRRegClass);
311
312 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
313 if (useSVEForFixedLengthVectorVT(VT))
314 addRegisterClass(VT, &AArch64::ZPRRegClass);
315 }
316
317 for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
318 setOperationAction(ISD::SADDSAT, VT, Legal);
319 setOperationAction(ISD::UADDSAT, VT, Legal);
320 setOperationAction(ISD::SSUBSAT, VT, Legal);
321 setOperationAction(ISD::USUBSAT, VT, Legal);
322 setOperationAction(ISD::UREM, VT, Expand);
323 setOperationAction(ISD::SREM, VT, Expand);
324 setOperationAction(ISD::SDIVREM, VT, Expand);
325 setOperationAction(ISD::UDIVREM, VT, Expand);
326 }
327
328 for (auto VT :
329 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
330 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
331 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
332
333 for (auto VT :
334 { MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32,
335 MVT::nxv2f64 }) {
336 setCondCodeAction(ISD::SETO, VT, Expand);
337 setCondCodeAction(ISD::SETOLT, VT, Expand);
338 setCondCodeAction(ISD::SETLT, VT, Expand);
339 setCondCodeAction(ISD::SETOLE, VT, Expand);
340 setCondCodeAction(ISD::SETLE, VT, Expand);
341 setCondCodeAction(ISD::SETULT, VT, Expand);
342 setCondCodeAction(ISD::SETULE, VT, Expand);
343 setCondCodeAction(ISD::SETUGE, VT, Expand);
344 setCondCodeAction(ISD::SETUGT, VT, Expand);
345 setCondCodeAction(ISD::SETUEQ, VT, Expand);
346 setCondCodeAction(ISD::SETUNE, VT, Expand);
347 }
348 }
349
350 // Compute derived properties from the register classes
351 computeRegisterProperties(Subtarget->getRegisterInfo());
352
353 // Provide all sorts of operation actions
354 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
355 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
356 setOperationAction(ISD::SETCC, MVT::i32, Custom);
357 setOperationAction(ISD::SETCC, MVT::i64, Custom);
358 setOperationAction(ISD::SETCC, MVT::f16, Custom);
359 setOperationAction(ISD::SETCC, MVT::f32, Custom);
360 setOperationAction(ISD::SETCC, MVT::f64, Custom);
361 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
362 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
363 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
364 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
365 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
366 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
367 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
368 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
369 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
370 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
371 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
372 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
373 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
374 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
375 setOperationAction(ISD::SELECT, MVT::i32, Custom);
376 setOperationAction(ISD::SELECT, MVT::i64, Custom);
377 setOperationAction(ISD::SELECT, MVT::f16, Custom);
378 setOperationAction(ISD::SELECT, MVT::f32, Custom);
379 setOperationAction(ISD::SELECT, MVT::f64, Custom);
380 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
381 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
382 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
383 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
384 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
385 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
386 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
387
388 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
389 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
390 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
391
392 setOperationAction(ISD::FREM, MVT::f32, Expand);
393 setOperationAction(ISD::FREM, MVT::f64, Expand);
394 setOperationAction(ISD::FREM, MVT::f80, Expand);
395
396 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
397
398 // Custom lowering hooks are needed for XOR
399 // to fold it into CSINC/CSINV.
400 setOperationAction(ISD::XOR, MVT::i32, Custom);
401 setOperationAction(ISD::XOR, MVT::i64, Custom);
402
403 // Virtually no operation on f128 is legal, but LLVM can't expand them when
404 // there's a valid register class, so we need custom operations in most cases.
405 setOperationAction(ISD::FABS, MVT::f128, Expand);
406 setOperationAction(ISD::FADD, MVT::f128, LibCall);
407 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
408 setOperationAction(ISD::FCOS, MVT::f128, Expand);
409 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
410 setOperationAction(ISD::FMA, MVT::f128, Expand);
411 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
412 setOperationAction(ISD::FNEG, MVT::f128, Expand);
413 setOperationAction(ISD::FPOW, MVT::f128, Expand);
414 setOperationAction(ISD::FREM, MVT::f128, Expand);
415 setOperationAction(ISD::FRINT, MVT::f128, Expand);
416 setOperationAction(ISD::FSIN, MVT::f128, Expand);
417 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
418 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
419 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
420 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
421 setOperationAction(ISD::SETCC, MVT::f128, Custom);
422 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
423 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
424 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
425 setOperationAction(ISD::SELECT, MVT::f128, Custom);
426 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
427 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
428
429 // Lowering for many of the conversions is actually specified by the non-f128
430 // type. The LowerXXX function will be trivial when f128 isn't involved.
431 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
432 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
433 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
434 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
435 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
436 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
437 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
438 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
439 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
440 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
441 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
442 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
443 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
444 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
445 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
446 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
447 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
448 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
449 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
450 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
451 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
452 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
453 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
454 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
455 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
456 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
457 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
458 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
459 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
460 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
461
462 // Variable arguments.
463 setOperationAction(ISD::VASTART, MVT::Other, Custom);
464 setOperationAction(ISD::VAARG, MVT::Other, Custom);
465 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
466 setOperationAction(ISD::VAEND, MVT::Other, Expand);
467
468 // Variable-sized objects.
469 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
470 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
471
472 if (Subtarget->isTargetWindows())
473 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
474 else
475 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
476
477 // Constant pool entries
478 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
479
480 // BlockAddress
481 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
482
483 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
484 setOperationAction(ISD::ADDC, MVT::i32, Custom);
485 setOperationAction(ISD::ADDE, MVT::i32, Custom);
486 setOperationAction(ISD::SUBC, MVT::i32, Custom);
487 setOperationAction(ISD::SUBE, MVT::i32, Custom);
488 setOperationAction(ISD::ADDC, MVT::i64, Custom);
489 setOperationAction(ISD::ADDE, MVT::i64, Custom);
490 setOperationAction(ISD::SUBC, MVT::i64, Custom);
491 setOperationAction(ISD::SUBE, MVT::i64, Custom);
492
493 // AArch64 lacks both left-rotate and popcount instructions.
494 setOperationAction(ISD::ROTL, MVT::i32, Expand);
495 setOperationAction(ISD::ROTL, MVT::i64, Expand);
496 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
497 setOperationAction(ISD::ROTL, VT, Expand);
498 setOperationAction(ISD::ROTR, VT, Expand);
499 }
500
501 // AArch64 doesn't have i32 MULH{S|U}.
502 setOperationAction(ISD::MULHU, MVT::i32, Expand);
503 setOperationAction(ISD::MULHS, MVT::i32, Expand);
504
505 // AArch64 doesn't have {U|S}MUL_LOHI.
506 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
507 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
508
509 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
510 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
511 setOperationAction(ISD::CTPOP, MVT::i128, Custom);
512
513 setOperationAction(ISD::ABS, MVT::i32, Custom);
514 setOperationAction(ISD::ABS, MVT::i64, Custom);
515
516 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
517 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
518 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
519 setOperationAction(ISD::SDIVREM, VT, Expand);
520 setOperationAction(ISD::UDIVREM, VT, Expand);
521 }
522 setOperationAction(ISD::SREM, MVT::i32, Expand);
523 setOperationAction(ISD::SREM, MVT::i64, Expand);
524 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
525 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
526 setOperationAction(ISD::UREM, MVT::i32, Expand);
527 setOperationAction(ISD::UREM, MVT::i64, Expand);
528
529 // Custom lower Add/Sub/Mul with overflow.
530 setOperationAction(ISD::SADDO, MVT::i32, Custom);
531 setOperationAction(ISD::SADDO, MVT::i64, Custom);
532 setOperationAction(ISD::UADDO, MVT::i32, Custom);
533 setOperationAction(ISD::UADDO, MVT::i64, Custom);
534 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
535 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
536 setOperationAction(ISD::USUBO, MVT::i32, Custom);
537 setOperationAction(ISD::USUBO, MVT::i64, Custom);
538 setOperationAction(ISD::SMULO, MVT::i32, Custom);
539 setOperationAction(ISD::SMULO, MVT::i64, Custom);
540 setOperationAction(ISD::UMULO, MVT::i32, Custom);
541 setOperationAction(ISD::UMULO, MVT::i64, Custom);
542
543 setOperationAction(ISD::FSIN, MVT::f32, Expand);
544 setOperationAction(ISD::FSIN, MVT::f64, Expand);
545 setOperationAction(ISD::FCOS, MVT::f32, Expand);
546 setOperationAction(ISD::FCOS, MVT::f64, Expand);
547 setOperationAction(ISD::FPOW, MVT::f32, Expand);
548 setOperationAction(ISD::FPOW, MVT::f64, Expand);
549 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
550 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
551 if (Subtarget->hasFullFP16())
552 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
553 else
554 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
555
556 setOperationAction(ISD::FREM, MVT::f16, Promote);
557 setOperationAction(ISD::FREM, MVT::v4f16, Expand);
558 setOperationAction(ISD::FREM, MVT::v8f16, Expand);
559 setOperationAction(ISD::FPOW, MVT::f16, Promote);
560 setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
561 setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
562 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
563 setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
564 setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
565 setOperationAction(ISD::FCOS, MVT::f16, Promote);
566 setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
567 setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
568 setOperationAction(ISD::FSIN, MVT::f16, Promote);
569 setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
570 setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
571 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
572 setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
573 setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
574 setOperationAction(ISD::FEXP, MVT::f16, Promote);
575 setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
576 setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
577 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
578 setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
579 setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
580 setOperationAction(ISD::FLOG, MVT::f16, Promote);
581 setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
582 setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
583 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
584 setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
585 setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
586 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
587 setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
588 setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
589
590 if (!Subtarget->hasFullFP16()) {
591 setOperationAction(ISD::SELECT, MVT::f16, Promote);
592 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
593 setOperationAction(ISD::SETCC, MVT::f16, Promote);
594 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
595 setOperationAction(ISD::FADD, MVT::f16, Promote);
596 setOperationAction(ISD::FSUB, MVT::f16, Promote);
597 setOperationAction(ISD::FMUL, MVT::f16, Promote);
598 setOperationAction(ISD::FDIV, MVT::f16, Promote);
599 setOperationAction(ISD::FMA, MVT::f16, Promote);
600 setOperationAction(ISD::FNEG, MVT::f16, Promote);
601 setOperationAction(ISD::FABS, MVT::f16, Promote);
602 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
603 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
604 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
605 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
606 setOperationAction(ISD::FRINT, MVT::f16, Promote);
607 setOperationAction(ISD::FROUND, MVT::f16, Promote);
608 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
609 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
610 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
611 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
612 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
613
614 // promote v4f16 to v4f32 when that is known to be safe.
615 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
616 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
617 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
618 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
619 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
620 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
621 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
622 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
623
624 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
625 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
626 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
627 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
628 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
629 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
630 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
631 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
632 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
633 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
634 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
635 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
636 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
637 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
638 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
639
640 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
641 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
642 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
643 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
644 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
645 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
646 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
647 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
648 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
649 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
650 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
651 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
652 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
653 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
654 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
655 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
656 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
657 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
658 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
659 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
660 }
661
662 // AArch64 has implementations of a lot of rounding-like FP operations.
663 for (MVT Ty : {MVT::f32, MVT::f64}) {
664 setOperationAction(ISD::FFLOOR, Ty, Legal);
665 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
666 setOperationAction(ISD::FCEIL, Ty, Legal);
667 setOperationAction(ISD::FRINT, Ty, Legal);
668 setOperationAction(ISD::FTRUNC, Ty, Legal);
669 setOperationAction(ISD::FROUND, Ty, Legal);
670 setOperationAction(ISD::FMINNUM, Ty, Legal);
671 setOperationAction(ISD::FMAXNUM, Ty, Legal);
672 setOperationAction(ISD::FMINIMUM, Ty, Legal);
673 setOperationAction(ISD::FMAXIMUM, Ty, Legal);
674 setOperationAction(ISD::LROUND, Ty, Legal);
675 setOperationAction(ISD::LLROUND, Ty, Legal);
676 setOperationAction(ISD::LRINT, Ty, Legal);
677 setOperationAction(ISD::LLRINT, Ty, Legal);
678 }
679
680 if (Subtarget->hasFullFP16()) {
681 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
682 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
683 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
684 setOperationAction(ISD::FRINT, MVT::f16, Legal);
685 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
686 setOperationAction(ISD::FROUND, MVT::f16, Legal);
687 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
688 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
689 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
690 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
691 }
692
693 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
694
695 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
696
697 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
698 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
699 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
700 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
701 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
702
703 // Generate outline atomics library calls only if LSE was not specified for
704 // subtarget
705 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
706 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
707 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
708 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
709 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
710 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
711 setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
712 setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
713 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
714 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
715 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
716 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
717 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
718 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
719 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
720 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
721 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
722 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
723 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
724 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
725 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
726 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
727 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
728 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
729 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
730 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
731#define LCALLNAMES(A, B, N) \
732 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
733 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
734 setLibcallName(A##N##_REL, #B #N "_rel"); \
735 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
736#define LCALLNAME4(A, B) \
737 LCALLNAMES(A, B, 1) \
738 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
739#define LCALLNAME5(A, B) \
740 LCALLNAMES(A, B, 1) \
741 LCALLNAMES(A, B, 2) \
742 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
743 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
744 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
745 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
746 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
747 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
748 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
749#undef LCALLNAMES
750#undef LCALLNAME4
751#undef LCALLNAME5
752 }
753
754 // 128-bit loads and stores can be done without expanding
755 setOperationAction(ISD::LOAD, MVT::i128, Custom);
756 setOperationAction(ISD::STORE, MVT::i128, Custom);
757
758 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
759 // custom lowering, as there are no un-paired non-temporal stores and
760 // legalization will break up 256 bit inputs.
761 setOperationAction(ISD::STORE, MVT::v32i8, Custom);
762 setOperationAction(ISD::STORE, MVT::v16i16, Custom);
763 setOperationAction(ISD::STORE, MVT::v16f16, Custom);
764 setOperationAction(ISD::STORE, MVT::v8i32, Custom);
765 setOperationAction(ISD::STORE, MVT::v8f32, Custom);
766 setOperationAction(ISD::STORE, MVT::v4f64, Custom);
767 setOperationAction(ISD::STORE, MVT::v4i64, Custom);
768
769 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
770 // This requires the Performance Monitors extension.
771 if (Subtarget->hasPerfMon())
772 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
773
774 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
775 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
776 // Issue __sincos_stret if available.
777 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
778 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
779 } else {
780 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
781 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
782 }
783
784 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
785 // MSVCRT doesn't have powi; fall back to pow
786 setLibcallName(RTLIB::POWI_F32, nullptr);
787 setLibcallName(RTLIB::POWI_F64, nullptr);
788 }
789
790 // Make floating-point constants legal for the large code model, so they don't
791 // become loads from the constant pool.
792 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
793 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
794 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
795 }
796
797 // AArch64 does not have floating-point extending loads, i1 sign-extending
798 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
799 for (MVT VT : MVT::fp_valuetypes()) {
800 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
801 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
802 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
803 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
804 }
805 for (MVT VT : MVT::integer_valuetypes())
806 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
807
808 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
809 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
810 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
811 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
812 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
813 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
814 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
815
816 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
817 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
818 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
819
820 // Indexed loads and stores are supported.
821 for (unsigned im = (unsigned)ISD::PRE_INC;
822 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
823 setIndexedLoadAction(im, MVT::i8, Legal);
824 setIndexedLoadAction(im, MVT::i16, Legal);
825 setIndexedLoadAction(im, MVT::i32, Legal);
826 setIndexedLoadAction(im, MVT::i64, Legal);
827 setIndexedLoadAction(im, MVT::f64, Legal);
828 setIndexedLoadAction(im, MVT::f32, Legal);
829 setIndexedLoadAction(im, MVT::f16, Legal);
830 setIndexedLoadAction(im, MVT::bf16, Legal);
831 setIndexedStoreAction(im, MVT::i8, Legal);
832 setIndexedStoreAction(im, MVT::i16, Legal);
833 setIndexedStoreAction(im, MVT::i32, Legal);
834 setIndexedStoreAction(im, MVT::i64, Legal);
835 setIndexedStoreAction(im, MVT::f64, Legal);
836 setIndexedStoreAction(im, MVT::f32, Legal);
837 setIndexedStoreAction(im, MVT::f16, Legal);
838 setIndexedStoreAction(im, MVT::bf16, Legal);
839 }
840
841 // Trap.
842 setOperationAction(ISD::TRAP, MVT::Other, Legal);
843 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
844 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
845
846 // We combine OR nodes for bitfield operations.
847 setTargetDAGCombine(ISD::OR);
848 // Try to create BICs for vector ANDs.
849 setTargetDAGCombine(ISD::AND);
850
851 // Vector add and sub nodes may conceal a high-half opportunity.
852 // Also, try to fold ADD into CSINC/CSINV..
853 setTargetDAGCombine(ISD::ADD);
854 setTargetDAGCombine(ISD::ABS);
855 setTargetDAGCombine(ISD::SUB);
856 setTargetDAGCombine(ISD::SRL);
857 setTargetDAGCombine(ISD::XOR);
858 setTargetDAGCombine(ISD::SINT_TO_FP);
859 setTargetDAGCombine(ISD::UINT_TO_FP);
860
861 setTargetDAGCombine(ISD::FP_TO_SINT);
862 setTargetDAGCombine(ISD::FP_TO_UINT);
863 setTargetDAGCombine(ISD::FDIV);
864
865 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
866
867 setTargetDAGCombine(ISD::ANY_EXTEND);
868 setTargetDAGCombine(ISD::ZERO_EXTEND);
869 setTargetDAGCombine(ISD::SIGN_EXTEND);
870 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
871 setTargetDAGCombine(ISD::TRUNCATE);
872 setTargetDAGCombine(ISD::CONCAT_VECTORS);
873 setTargetDAGCombine(ISD::STORE);
874 if (Subtarget->supportsAddressTopByteIgnored())
875 setTargetDAGCombine(ISD::LOAD);
876
877 setTargetDAGCombine(ISD::MGATHER);
878 setTargetDAGCombine(ISD::MSCATTER);
879
880 setTargetDAGCombine(ISD::MUL);
881
882 setTargetDAGCombine(ISD::SELECT);
883 setTargetDAGCombine(ISD::VSELECT);
884
885 setTargetDAGCombine(ISD::INTRINSIC_VOID);
886 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
887 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
888 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
889 setTargetDAGCombine(ISD::VECREDUCE_ADD);
890
891 setTargetDAGCombine(ISD::GlobalAddress);
892
893 // In case of strict alignment, avoid an excessive number of byte wide stores.
894 MaxStoresPerMemsetOptSize = 8;
895 MaxStoresPerMemset = Subtarget->requiresStrictAlign()
896 ? MaxStoresPerMemsetOptSize : 32;
897
898 MaxGluedStoresPerMemcpy = 4;
899 MaxStoresPerMemcpyOptSize = 4;
900 MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
901 ? MaxStoresPerMemcpyOptSize : 16;
902
903 MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
904
905 MaxLoadsPerMemcmpOptSize = 4;
906 MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
907 ? MaxLoadsPerMemcmpOptSize : 8;
908
909 setStackPointerRegisterToSaveRestore(AArch64::SP);
910
911 setSchedulingPreference(Sched::Hybrid);
912
913 EnableExtLdPromotion = true;
914
915 // Set required alignment.
916 setMinFunctionAlignment(Align(4));
917 // Set preferred alignments.
918 setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
919 setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
920
921 // Only change the limit for entries in a jump table if specified by
922 // the sub target, but not at the command line.
923 unsigned MaxJT = STI.getMaximumJumpTableSize();
924 if (MaxJT && getMaximumJumpTableSize() == UINT_MAX(2147483647 *2U +1U))
925 setMaximumJumpTableSize(MaxJT);
926
927 setHasExtractBitsInsn(true);
928
929 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
930
931 if (Subtarget->hasNEON()) {
932 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
933 // silliness like this:
934 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
935 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
936 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
937 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
938 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
939 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
940 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
941 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
942 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
943 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
944 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
945 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
946 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
947 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
948 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
949 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
950 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
951 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
952 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
953 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
954 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
955 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
956 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
957 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
958 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
959
960 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
961 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
962 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
963 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
964 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
965
966 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
967
968 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
969 // elements smaller than i32, so promote the input to i32 first.
970 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
971 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
972 // i8 vector elements also need promotion to i32 for v8i8
973 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
974 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
975 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
976 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
977 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
978 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
979 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
980 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
981 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
982 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
983 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
984
985 if (Subtarget->hasFullFP16()) {
986 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
987 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
988 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
989 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
990 } else {
991 // when AArch64 doesn't have fullfp16 support, promote the input
992 // to i32 first.
993 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
994 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
995 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
996 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
997 }
998
999 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
1000 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
1001
1002 // AArch64 doesn't have MUL.2d:
1003 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
1004 // Custom handling for some quad-vector types to detect MULL.
1005 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
1006 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1007 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1008
1009 // Saturates
1010 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1011 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1012 setOperationAction(ISD::SADDSAT, VT, Legal);
1013 setOperationAction(ISD::UADDSAT, VT, Legal);
1014 setOperationAction(ISD::SSUBSAT, VT, Legal);
1015 setOperationAction(ISD::USUBSAT, VT, Legal);
1016 }
1017
1018 // Vector reductions
1019 for (MVT VT : { MVT::v4f16, MVT::v2f32,
1020 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1021 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1022 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1023
1024 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16())
1025 setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
1026 }
1027 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1028 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1029 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1030 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1031 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1032 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1033 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1034 }
1035 setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
1036
1037 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
1038 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
1039 // Likewise, narrowing and extending vector loads/stores aren't handled
1040 // directly.
1041 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1042 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
1043
1044 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1045 setOperationAction(ISD::MULHS, VT, Legal);
1046 setOperationAction(ISD::MULHU, VT, Legal);
1047 } else {
1048 setOperationAction(ISD::MULHS, VT, Expand);
1049 setOperationAction(ISD::MULHU, VT, Expand);
1050 }
1051 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1052 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1053
1054 setOperationAction(ISD::BSWAP, VT, Expand);
1055 setOperationAction(ISD::CTTZ, VT, Expand);
1056
1057 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
1058 setTruncStoreAction(VT, InnerVT, Expand);
1059 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1060 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1061 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1062 }
1063 }
1064
1065 // AArch64 has implementations of a lot of rounding-like FP operations.
1066 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
1067 setOperationAction(ISD::FFLOOR, Ty, Legal);
1068 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
1069 setOperationAction(ISD::FCEIL, Ty, Legal);
1070 setOperationAction(ISD::FRINT, Ty, Legal);
1071 setOperationAction(ISD::FTRUNC, Ty, Legal);
1072 setOperationAction(ISD::FROUND, Ty, Legal);
1073 }
1074
1075 if (Subtarget->hasFullFP16()) {
1076 for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
1077 setOperationAction(ISD::FFLOOR, Ty, Legal);
1078 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
1079 setOperationAction(ISD::FCEIL, Ty, Legal);
1080 setOperationAction(ISD::FRINT, Ty, Legal);
1081 setOperationAction(ISD::FTRUNC, Ty, Legal);
1082 setOperationAction(ISD::FROUND, Ty, Legal);
1083 }
1084 }
1085
1086 if (Subtarget->hasSVE())
1087 setOperationAction(ISD::VSCALE, MVT::i32, Custom);
1088
1089 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
1090 }
1091
1092 if (Subtarget->hasSVE()) {
1093 // FIXME: Add custom lowering of MLOAD to handle different passthrus (not a
1094 // splat of 0 or undef) once vector selects supported in SVE codegen. See
1095 // D68877 for more details.
1096 for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1097 setOperationAction(ISD::BITREVERSE, VT, Custom);
1098 setOperationAction(ISD::BSWAP, VT, Custom);
1099 setOperationAction(ISD::CTLZ, VT, Custom);
1100 setOperationAction(ISD::CTPOP, VT, Custom);
1101 setOperationAction(ISD::CTTZ, VT, Custom);
1102 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1103 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1104 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1105 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1106 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1107 setOperationAction(ISD::MGATHER, VT, Custom);
1108 setOperationAction(ISD::MSCATTER, VT, Custom);
1109 setOperationAction(ISD::MUL, VT, Custom);
1110 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1111 setOperationAction(ISD::SELECT, VT, Custom);
1112 setOperationAction(ISD::SDIV, VT, Custom);
1113 setOperationAction(ISD::UDIV, VT, Custom);
1114 setOperationAction(ISD::SMIN, VT, Custom);
1115 setOperationAction(ISD::UMIN, VT, Custom);
1116 setOperationAction(ISD::SMAX, VT, Custom);
1117 setOperationAction(ISD::UMAX, VT, Custom);
1118 setOperationAction(ISD::SHL, VT, Custom);
1119 setOperationAction(ISD::SRL, VT, Custom);
1120 setOperationAction(ISD::SRA, VT, Custom);
1121 setOperationAction(ISD::ABS, VT, Custom);
1122 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1123 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1124 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1125 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1126 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1127 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1128 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1129 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1130 }
1131
1132 // Illegal unpacked integer vector types.
1133 for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1134 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1135 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1136 }
1137
1138 for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
1139 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1140 setOperationAction(ISD::SELECT, VT, Custom);
1141 setOperationAction(ISD::SETCC, VT, Custom);
1142 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1143 setOperationAction(ISD::TRUNCATE, VT, Custom);
1144 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1145 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1146 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1147
1148 // There are no legal MVT::nxv16f## based types.
1149 if (VT != MVT::nxv16i1) {
1150 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1151 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1152 }
1153 }
1154
1155 for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1156 MVT::nxv4f32, MVT::nxv2f64}) {
1157 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1158 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1159 setOperationAction(ISD::MGATHER, VT, Custom);
1160 setOperationAction(ISD::MSCATTER, VT, Custom);
1161 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1162 setOperationAction(ISD::SELECT, VT, Custom);
1163 setOperationAction(ISD::FADD, VT, Custom);
1164 setOperationAction(ISD::FDIV, VT, Custom);
1165 setOperationAction(ISD::FMA, VT, Custom);
1166 setOperationAction(ISD::FMAXNUM, VT, Custom);
1167 setOperationAction(ISD::FMINNUM, VT, Custom);
1168 setOperationAction(ISD::FMUL, VT, Custom);
1169 setOperationAction(ISD::FNEG, VT, Custom);
1170 setOperationAction(ISD::FSUB, VT, Custom);
1171 setOperationAction(ISD::FCEIL, VT, Custom);
1172 setOperationAction(ISD::FFLOOR, VT, Custom);
1173 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1174 setOperationAction(ISD::FRINT, VT, Custom);
1175 setOperationAction(ISD::FROUND, VT, Custom);
1176 setOperationAction(ISD::FROUNDEVEN, VT, Custom);
1177 setOperationAction(ISD::FTRUNC, VT, Custom);
1178 setOperationAction(ISD::FSQRT, VT, Custom);
1179 setOperationAction(ISD::FABS, VT, Custom);
1180 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1181 setOperationAction(ISD::FP_ROUND, VT, Custom);
1182 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1183 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1184 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1185 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1186 }
1187
1188 for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1189 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1190 setOperationAction(ISD::MGATHER, VT, Custom);
1191 setOperationAction(ISD::MSCATTER, VT, Custom);
1192 }
1193
1194 setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
1195
1196 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
1197 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
1198
1199 // NOTE: Currently this has to happen after computeRegisterProperties rather
1200 // than the preferred option of combining it with the addRegisterClass call.
1201 if (Subtarget->useSVEForFixedLengthVectors()) {
1202 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
1203 if (useSVEForFixedLengthVectorVT(VT))
1204 addTypeForFixedLengthSVE(VT);
1205 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
1206 if (useSVEForFixedLengthVectorVT(VT))
1207 addTypeForFixedLengthSVE(VT);
1208
1209 // 64bit results can mean a bigger than NEON input.
1210 for (auto VT : {MVT::v8i8, MVT::v4i16})
1211 setOperationAction(ISD::TRUNCATE, VT, Custom);
1212 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
1213
1214 // 128bit results imply a bigger than NEON input.
1215 for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1216 setOperationAction(ISD::TRUNCATE, VT, Custom);
1217 for (auto VT : {MVT::v8f16, MVT::v4f32})
1218 setOperationAction(ISD::FP_ROUND, VT, Expand);
1219
1220 // These operations are not supported on NEON but SVE can do them.
1221 setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
1222 setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
1223 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1224 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
1225 setOperationAction(ISD::MUL, MVT::v1i64, Custom);
1226 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1227 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
1228 setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
1229 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
1230 setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
1231 setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
1232 setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
1233 setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
1234 setOperationAction(ISD::SDIV, MVT::v2i64, Custom);
1235 setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
1236 setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
1237 setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
1238 setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
1239 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
1240 setOperationAction(ISD::UDIV, MVT::v16i8, Custom);
1241 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
1242 setOperationAction(ISD::UDIV, MVT::v8i16, Custom);
1243 setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
1244 setOperationAction(ISD::UDIV, MVT::v4i32, Custom);
1245 setOperationAction(ISD::UDIV, MVT::v1i64, Custom);
1246 setOperationAction(ISD::UDIV, MVT::v2i64, Custom);
1247 setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
1248 setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
1249 setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
1250 setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
1251 setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
1252 setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
1253 setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
1254 setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
1255
1256 // Int operations with no NEON support.
1257 for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1258 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1259 setOperationAction(ISD::BITREVERSE, VT, Custom);
1260 setOperationAction(ISD::CTTZ, VT, Custom);
1261 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1262 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1263 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1264 }
1265
1266 // FP operations with no NEON support.
1267 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
1268 MVT::v1f64, MVT::v2f64})
1269 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1270
1271 // Use SVE for vectors with more than 2 elements.
1272 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1273 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1274 }
1275 }
1276
1277 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
1278}
1279
1280void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
1281 assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1281, __PRETTY_FUNCTION__))
;
1282
1283 if (VT.isFloatingPoint()) {
1284 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
1285 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
1286 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
1287 }
1288
1289 // Mark vector float intrinsics as expand.
1290 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
1291 setOperationAction(ISD::FSIN, VT, Expand);
1292 setOperationAction(ISD::FCOS, VT, Expand);
1293 setOperationAction(ISD::FPOW, VT, Expand);
1294 setOperationAction(ISD::FLOG, VT, Expand);
1295 setOperationAction(ISD::FLOG2, VT, Expand);
1296 setOperationAction(ISD::FLOG10, VT, Expand);
1297 setOperationAction(ISD::FEXP, VT, Expand);
1298 setOperationAction(ISD::FEXP2, VT, Expand);
1299
1300 // But we do support custom-lowering for FCOPYSIGN.
1301 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1302 }
1303
1304 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1305 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1306 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1307 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1308 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1309 setOperationAction(ISD::SRA, VT, Custom);
1310 setOperationAction(ISD::SRL, VT, Custom);
1311 setOperationAction(ISD::SHL, VT, Custom);
1312 setOperationAction(ISD::OR, VT, Custom);
1313 setOperationAction(ISD::SETCC, VT, Custom);
1314 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
1315
1316 setOperationAction(ISD::SELECT, VT, Expand);
1317 setOperationAction(ISD::SELECT_CC, VT, Expand);
1318 setOperationAction(ISD::VSELECT, VT, Expand);
1319 for (MVT InnerVT : MVT::all_valuetypes())
1320 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
1321
1322 // CNT supports only B element sizes, then use UADDLP to widen.
1323 if (VT != MVT::v8i8 && VT != MVT::v16i8)
1324 setOperationAction(ISD::CTPOP, VT, Custom);
1325
1326 setOperationAction(ISD::UDIV, VT, Expand);
1327 setOperationAction(ISD::SDIV, VT, Expand);
1328 setOperationAction(ISD::UREM, VT, Expand);
1329 setOperationAction(ISD::SREM, VT, Expand);
1330 setOperationAction(ISD::FREM, VT, Expand);
1331
1332 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1333 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1334
1335 if (!VT.isFloatingPoint())
1336 setOperationAction(ISD::ABS, VT, Legal);
1337
1338 // [SU][MIN|MAX] are available for all NEON types apart from i64.
1339 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
1340 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
1341 setOperationAction(Opcode, VT, Legal);
1342
1343 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
1344 if (VT.isFloatingPoint() &&
1345 VT.getVectorElementType() != MVT::bf16 &&
1346 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
1347 for (unsigned Opcode :
1348 {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
1349 setOperationAction(Opcode, VT, Legal);
1350
1351 if (Subtarget->isLittleEndian()) {
1352 for (unsigned im = (unsigned)ISD::PRE_INC;
1353 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1354 setIndexedLoadAction(im, VT, Legal);
1355 setIndexedStoreAction(im, VT, Legal);
1356 }
1357 }
1358}
1359
1360void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
1361 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")((VT.isFixedLengthVector() && "Expected fixed length vector type!"
) ? static_cast<void> (0) : __assert_fail ("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1361, __PRETTY_FUNCTION__))
;
1362
1363 // By default everything must be expanded.
1364 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1365 setOperationAction(Op, VT, Expand);
1366
1367 // We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
1368 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1369
1370 // Lower fixed length vector operations to scalable equivalents.
1371 setOperationAction(ISD::ABS, VT, Custom);
1372 setOperationAction(ISD::ADD, VT, Custom);
1373 setOperationAction(ISD::AND, VT, Custom);
1374 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1375 setOperationAction(ISD::BITREVERSE, VT, Custom);
1376 setOperationAction(ISD::BSWAP, VT, Custom);
1377 setOperationAction(ISD::CTLZ, VT, Custom);
1378 setOperationAction(ISD::CTPOP, VT, Custom);
1379 setOperationAction(ISD::CTTZ, VT, Custom);
1380 setOperationAction(ISD::FADD, VT, Custom);
1381 setOperationAction(ISD::FCEIL, VT, Custom);
1382 setOperationAction(ISD::FDIV, VT, Custom);
1383 setOperationAction(ISD::FFLOOR, VT, Custom);
1384 setOperationAction(ISD::FMA, VT, Custom);
1385 setOperationAction(ISD::FMAXNUM, VT, Custom);
1386 setOperationAction(ISD::FMINNUM, VT, Custom);
1387 setOperationAction(ISD::FMUL, VT, Custom);
1388 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1389 setOperationAction(ISD::FNEG, VT, Custom);
1390 setOperationAction(ISD::FRINT, VT, Custom);
1391 setOperationAction(ISD::FROUND, VT, Custom);
1392 setOperationAction(ISD::FSQRT, VT, Custom);
1393 setOperationAction(ISD::FSUB, VT, Custom);
1394 setOperationAction(ISD::FTRUNC, VT, Custom);
1395 setOperationAction(ISD::LOAD, VT, Custom);
1396 setOperationAction(ISD::MUL, VT, Custom);
1397 setOperationAction(ISD::OR, VT, Custom);
1398 setOperationAction(ISD::SDIV, VT, Custom);
1399 setOperationAction(ISD::SETCC, VT, Custom);
1400 setOperationAction(ISD::SHL, VT, Custom);
1401 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1402 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
1403 setOperationAction(ISD::SMAX, VT, Custom);
1404 setOperationAction(ISD::SMIN, VT, Custom);
1405 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1406 setOperationAction(ISD::SRA, VT, Custom);
1407 setOperationAction(ISD::SRL, VT, Custom);
1408 setOperationAction(ISD::STORE, VT, Custom);
1409 setOperationAction(ISD::SUB, VT, Custom);
1410 setOperationAction(ISD::TRUNCATE, VT, Custom);
1411 setOperationAction(ISD::UDIV, VT, Custom);
1412 setOperationAction(ISD::UMAX, VT, Custom);
1413 setOperationAction(ISD::UMIN, VT, Custom);
1414 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1415 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1416 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1417 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1418 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1419 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1420 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1421 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1422 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1423 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1424 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1425 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1426 setOperationAction(ISD::VSELECT, VT, Custom);
1427 setOperationAction(ISD::XOR, VT, Custom);
1428 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1429}
1430
1431void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
1432 addRegisterClass(VT, &AArch64::FPR64RegClass);
1433 addTypeForNEON(VT, MVT::v2i32);
1434}
1435
1436void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
1437 addRegisterClass(VT, &AArch64::FPR128RegClass);
1438 addTypeForNEON(VT, MVT::v4i32);
1439}
1440
1441EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
1442 LLVMContext &C, EVT VT) const {
1443 if (!VT.isVector())
1444 return MVT::i32;
1445 if (VT.isScalableVector())
1446 return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount());
1447 return VT.changeVectorElementTypeToInteger();
1448}
1449
1450static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
1451 const APInt &Demanded,
1452 TargetLowering::TargetLoweringOpt &TLO,
1453 unsigned NewOpc) {
1454 uint64_t OldImm = Imm, NewImm, Enc;
1455 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
1456
1457 // Return if the immediate is already all zeros, all ones, a bimm32 or a
1458 // bimm64.
1459 if (Imm == 0 || Imm == Mask ||
1460 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
1461 return false;
1462
1463 unsigned EltSize = Size;
1464 uint64_t DemandedBits = Demanded.getZExtValue();
1465
1466 // Clear bits that are not demanded.
1467 Imm &= DemandedBits;
1468
1469 while (true) {
1470 // The goal here is to set the non-demanded bits in a way that minimizes
1471 // the number of switching between 0 and 1. In order to achieve this goal,
1472 // we set the non-demanded bits to the value of the preceding demanded bits.
1473 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
1474 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
1475 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
1476 // The final result is 0b11000011.
1477 uint64_t NonDemandedBits = ~DemandedBits;
1478 uint64_t InvertedImm = ~Imm & DemandedBits;
1479 uint64_t RotatedImm =
1480 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
1481 NonDemandedBits;
1482 uint64_t Sum = RotatedImm + NonDemandedBits;
1483 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
1484 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
1485 NewImm = (Imm | Ones) & Mask;
1486
1487 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
1488 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
1489 // we halve the element size and continue the search.
1490 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
1491 break;
1492
1493 // We cannot shrink the element size any further if it is 2-bits.
1494 if (EltSize == 2)
1495 return false;
1496
1497 EltSize /= 2;
1498 Mask >>= EltSize;
1499 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
1500
1501 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
1502 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
1503 return false;
1504
1505 // Merge the upper and lower halves of Imm and DemandedBits.
1506 Imm |= Hi;
1507 DemandedBits |= DemandedBitsHi;
1508 }
1509
1510 ++NumOptimizedImms;
1511
1512 // Replicate the element across the register width.
1513 while (EltSize < Size) {
1514 NewImm |= NewImm << EltSize;
1515 EltSize *= 2;
1516 }
1517
1518 (void)OldImm;
1519 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1520, __PRETTY_FUNCTION__))
1520 "demanded bits should never be altered")((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1520, __PRETTY_FUNCTION__))
;
1521 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")((OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? static_cast<void> (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1521, __PRETTY_FUNCTION__))
;
1522
1523 // Create the new constant immediate node.
1524 EVT VT = Op.getValueType();
1525 SDLoc DL(Op);
1526 SDValue New;
1527
1528 // If the new constant immediate is all-zeros or all-ones, let the target
1529 // independent DAG combine optimize this node.
1530 if (NewImm == 0 || NewImm == OrigMask) {
1531 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
1532 TLO.DAG.getConstant(NewImm, DL, VT));
1533 // Otherwise, create a machine node so that target independent DAG combine
1534 // doesn't undo this optimization.
1535 } else {
1536 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
1537 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
1538 New = SDValue(
1539 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
1540 }
1541
1542 return TLO.CombineTo(Op, New);
1543}
1544
1545bool AArch64TargetLowering::targetShrinkDemandedConstant(
1546 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1547 TargetLoweringOpt &TLO) const {
1548 // Delay this optimization to as late as possible.
1549 if (!TLO.LegalOps)
1550 return false;
1551
1552 if (!EnableOptimizeLogicalImm)
1553 return false;
1554
1555 EVT VT = Op.getValueType();
1556 if (VT.isVector())
1557 return false;
1558
1559 unsigned Size = VT.getSizeInBits();
1560 assert((Size == 32 || Size == 64) &&(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1561, __PRETTY_FUNCTION__))
1561 "i32 or i64 is expected after legalization.")(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1561, __PRETTY_FUNCTION__))
;
1562
1563 // Exit early if we demand all bits.
1564 if (DemandedBits.countPopulation() == Size)
1565 return false;
1566
1567 unsigned NewOpc;
1568 switch (Op.getOpcode()) {
1569 default:
1570 return false;
1571 case ISD::AND:
1572 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1573 break;
1574 case ISD::OR:
1575 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1576 break;
1577 case ISD::XOR:
1578 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1579 break;
1580 }
1581 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1582 if (!C)
1583 return false;
1584 uint64_t Imm = C->getZExtValue();
1585 return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
1586}
1587
1588/// computeKnownBitsForTargetNode - Determine which of the bits specified in
1589/// Mask are known to be either zero or one and return them Known.
1590void AArch64TargetLowering::computeKnownBitsForTargetNode(
1591 const SDValue Op, KnownBits &Known,
1592 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
1593 switch (Op.getOpcode()) {
1594 default:
1595 break;
1596 case AArch64ISD::CSEL: {
1597 KnownBits Known2;
1598 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1599 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1600 Known = KnownBits::commonBits(Known, Known2);
1601 break;
1602 }
1603 case AArch64ISD::LOADgot:
1604 case AArch64ISD::ADDlow: {
1605 if (!Subtarget->isTargetILP32())
1606 break;
1607 // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
1608 Known.Zero = APInt::getHighBitsSet(64, 32);
1609 break;
1610 }
1611 case ISD::INTRINSIC_W_CHAIN: {
1612 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1613 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1614 switch (IntID) {
1615 default: return;
1616 case Intrinsic::aarch64_ldaxr:
1617 case Intrinsic::aarch64_ldxr: {
1618 unsigned BitWidth = Known.getBitWidth();
1619 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1620 unsigned MemBits = VT.getScalarSizeInBits();
1621 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1622 return;
1623 }
1624 }
1625 break;
1626 }
1627 case ISD::INTRINSIC_WO_CHAIN:
1628 case ISD::INTRINSIC_VOID: {
1629 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1630 switch (IntNo) {
1631 default:
1632 break;
1633 case Intrinsic::aarch64_neon_umaxv:
1634 case Intrinsic::aarch64_neon_uminv: {
1635 // Figure out the datatype of the vector operand. The UMINV instruction
1636 // will zero extend the result, so we can mark as known zero all the
1637 // bits larger than the element datatype. 32-bit or larget doesn't need
1638 // this as those are legal types and will be handled by isel directly.
1639 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1640 unsigned BitWidth = Known.getBitWidth();
1641 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1642 assert(BitWidth >= 8 && "Unexpected width!")((BitWidth >= 8 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1642, __PRETTY_FUNCTION__))
;
1643 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1644 Known.Zero |= Mask;
1645 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1646 assert(BitWidth >= 16 && "Unexpected width!")((BitWidth >= 16 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1646, __PRETTY_FUNCTION__))
;
1647 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1648 Known.Zero |= Mask;
1649 }
1650 break;
1651 } break;
1652 }
1653 }
1654 }
1655}
1656
1657MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1658 EVT) const {
1659 return MVT::i64;
1660}
1661
1662bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1663 EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
1664 bool *Fast) const {
1665 if (Subtarget->requiresStrictAlign())
1666 return false;
1667
1668 if (Fast) {
1669 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1670 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1671 // See comments in performSTORECombine() for more details about
1672 // these conditions.
1673
1674 // Code that uses clang vector extensions can mark that it
1675 // wants unaligned accesses to be treated as fast by
1676 // underspecifying alignment to be 1 or 2.
1677 Align <= 2 ||
1678
1679 // Disregard v2i64. Memcpy lowering produces those and splitting
1680 // them regresses performance on micro-benchmarks and olden/bh.
1681 VT == MVT::v2i64;
1682 }
1683 return true;
1684}
1685
1686// Same as above but handling LLTs instead.
1687bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1688 LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1689 bool *Fast) const {
1690 if (Subtarget->requiresStrictAlign())
1691 return false;
1692
1693 if (Fast) {
1694 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1695 *Fast = !Subtarget->isMisaligned128StoreSlow() ||
1696 Ty.getSizeInBytes() != 16 ||
1697 // See comments in performSTORECombine() for more details about
1698 // these conditions.
1699
1700 // Code that uses clang vector extensions can mark that it
1701 // wants unaligned accesses to be treated as fast by
1702 // underspecifying alignment to be 1 or 2.
1703 Alignment <= 2 ||
1704
1705 // Disregard v2i64. Memcpy lowering produces those and splitting
1706 // them regresses performance on micro-benchmarks and olden/bh.
1707 Ty == LLT::vector(2, 64);
1708 }
1709 return true;
1710}
1711
1712FastISel *
1713AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1714 const TargetLibraryInfo *libInfo) const {
1715 return AArch64::createFastISel(funcInfo, libInfo);
1716}
1717
1718const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1719#define MAKE_CASE(V) \
1720 case V: \
1721 return #V;
1722 switch ((AArch64ISD::NodeType)Opcode) {
1723 case AArch64ISD::FIRST_NUMBER:
1724 break;
1725 MAKE_CASE(AArch64ISD::CALL)
1726 MAKE_CASE(AArch64ISD::ADRP)
1727 MAKE_CASE(AArch64ISD::ADR)
1728 MAKE_CASE(AArch64ISD::ADDlow)
1729 MAKE_CASE(AArch64ISD::LOADgot)
1730 MAKE_CASE(AArch64ISD::RET_FLAG)
1731 MAKE_CASE(AArch64ISD::BRCOND)
1732 MAKE_CASE(AArch64ISD::CSEL)
1733 MAKE_CASE(AArch64ISD::FCSEL)
1734 MAKE_CASE(AArch64ISD::CSINV)
1735 MAKE_CASE(AArch64ISD::CSNEG)
1736 MAKE_CASE(AArch64ISD::CSINC)
1737 MAKE_CASE(AArch64ISD::THREAD_POINTER)
1738 MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
1739 MAKE_CASE(AArch64ISD::ADD_PRED)
1740 MAKE_CASE(AArch64ISD::MUL_PRED)
1741 MAKE_CASE(AArch64ISD::SDIV_PRED)
1742 MAKE_CASE(AArch64ISD::SHL_PRED)
1743 MAKE_CASE(AArch64ISD::SMAX_PRED)
1744 MAKE_CASE(AArch64ISD::SMIN_PRED)
1745 MAKE_CASE(AArch64ISD::SRA_PRED)
1746 MAKE_CASE(AArch64ISD::SRL_PRED)
1747 MAKE_CASE(AArch64ISD::SUB_PRED)
1748 MAKE_CASE(AArch64ISD::UDIV_PRED)
1749 MAKE_CASE(AArch64ISD::UMAX_PRED)
1750 MAKE_CASE(AArch64ISD::UMIN_PRED)
1751 MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
1752 MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
1753 MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
1754 MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
1755 MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
1756 MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
1757 MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
1758 MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
1759 MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
1760 MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
1761 MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
1762 MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
1763 MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
1764 MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
1765 MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
1766 MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
1767 MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
1768 MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
1769 MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
1770 MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
1771 MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
1772 MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
1773 MAKE_CASE(AArch64ISD::ADC)
1774 MAKE_CASE(AArch64ISD::SBC)
1775 MAKE_CASE(AArch64ISD::ADDS)
1776 MAKE_CASE(AArch64ISD::SUBS)
1777 MAKE_CASE(AArch64ISD::ADCS)
1778 MAKE_CASE(AArch64ISD::SBCS)
1779 MAKE_CASE(AArch64ISD::ANDS)
1780 MAKE_CASE(AArch64ISD::CCMP)
1781 MAKE_CASE(AArch64ISD::CCMN)
1782 MAKE_CASE(AArch64ISD::FCCMP)
1783 MAKE_CASE(AArch64ISD::FCMP)
1784 MAKE_CASE(AArch64ISD::STRICT_FCMP)
1785 MAKE_CASE(AArch64ISD::STRICT_FCMPE)
1786 MAKE_CASE(AArch64ISD::DUP)
1787 MAKE_CASE(AArch64ISD::DUPLANE8)
1788 MAKE_CASE(AArch64ISD::DUPLANE16)
1789 MAKE_CASE(AArch64ISD::DUPLANE32)
1790 MAKE_CASE(AArch64ISD::DUPLANE64)
1791 MAKE_CASE(AArch64ISD::MOVI)
1792 MAKE_CASE(AArch64ISD::MOVIshift)
1793 MAKE_CASE(AArch64ISD::MOVIedit)
1794 MAKE_CASE(AArch64ISD::MOVImsl)
1795 MAKE_CASE(AArch64ISD::FMOV)
1796 MAKE_CASE(AArch64ISD::MVNIshift)
1797 MAKE_CASE(AArch64ISD::MVNImsl)
1798 MAKE_CASE(AArch64ISD::BICi)
1799 MAKE_CASE(AArch64ISD::ORRi)
1800 MAKE_CASE(AArch64ISD::BSP)
1801 MAKE_CASE(AArch64ISD::NEG)
1802 MAKE_CASE(AArch64ISD::EXTR)
1803 MAKE_CASE(AArch64ISD::ZIP1)
1804 MAKE_CASE(AArch64ISD::ZIP2)
1805 MAKE_CASE(AArch64ISD::UZP1)
1806 MAKE_CASE(AArch64ISD::UZP2)
1807 MAKE_CASE(AArch64ISD::TRN1)
1808 MAKE_CASE(AArch64ISD::TRN2)
1809 MAKE_CASE(AArch64ISD::REV16)
1810 MAKE_CASE(AArch64ISD::REV32)
1811 MAKE_CASE(AArch64ISD::REV64)
1812 MAKE_CASE(AArch64ISD::EXT)
1813 MAKE_CASE(AArch64ISD::VSHL)
1814 MAKE_CASE(AArch64ISD::VLSHR)
1815 MAKE_CASE(AArch64ISD::VASHR)
1816 MAKE_CASE(AArch64ISD::VSLI)
1817 MAKE_CASE(AArch64ISD::VSRI)
1818 MAKE_CASE(AArch64ISD::CMEQ)
1819 MAKE_CASE(AArch64ISD::CMGE)
1820 MAKE_CASE(AArch64ISD::CMGT)
1821 MAKE_CASE(AArch64ISD::CMHI)
1822 MAKE_CASE(AArch64ISD::CMHS)
1823 MAKE_CASE(AArch64ISD::FCMEQ)
1824 MAKE_CASE(AArch64ISD::FCMGE)
1825 MAKE_CASE(AArch64ISD::FCMGT)
1826 MAKE_CASE(AArch64ISD::CMEQz)
1827 MAKE_CASE(AArch64ISD::CMGEz)
1828 MAKE_CASE(AArch64ISD::CMGTz)
1829 MAKE_CASE(AArch64ISD::CMLEz)
1830 MAKE_CASE(AArch64ISD::CMLTz)
1831 MAKE_CASE(AArch64ISD::FCMEQz)
1832 MAKE_CASE(AArch64ISD::FCMGEz)
1833 MAKE_CASE(AArch64ISD::FCMGTz)
1834 MAKE_CASE(AArch64ISD::FCMLEz)
1835 MAKE_CASE(AArch64ISD::FCMLTz)
1836 MAKE_CASE(AArch64ISD::SADDV)
1837 MAKE_CASE(AArch64ISD::UADDV)
1838 MAKE_CASE(AArch64ISD::SRHADD)
1839 MAKE_CASE(AArch64ISD::URHADD)
1840 MAKE_CASE(AArch64ISD::SHADD)
1841 MAKE_CASE(AArch64ISD::UHADD)
1842 MAKE_CASE(AArch64ISD::SMINV)
1843 MAKE_CASE(AArch64ISD::UMINV)
1844 MAKE_CASE(AArch64ISD::SMAXV)
1845 MAKE_CASE(AArch64ISD::UMAXV)
1846 MAKE_CASE(AArch64ISD::SADDV_PRED)
1847 MAKE_CASE(AArch64ISD::UADDV_PRED)
1848 MAKE_CASE(AArch64ISD::SMAXV_PRED)
1849 MAKE_CASE(AArch64ISD::UMAXV_PRED)
1850 MAKE_CASE(AArch64ISD::SMINV_PRED)
1851 MAKE_CASE(AArch64ISD::UMINV_PRED)
1852 MAKE_CASE(AArch64ISD::ORV_PRED)
1853 MAKE_CASE(AArch64ISD::EORV_PRED)
1854 MAKE_CASE(AArch64ISD::ANDV_PRED)
1855 MAKE_CASE(AArch64ISD::CLASTA_N)
1856 MAKE_CASE(AArch64ISD::CLASTB_N)
1857 MAKE_CASE(AArch64ISD::LASTA)
1858 MAKE_CASE(AArch64ISD::LASTB)
1859 MAKE_CASE(AArch64ISD::REV)
1860 MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
1861 MAKE_CASE(AArch64ISD::TBL)
1862 MAKE_CASE(AArch64ISD::FADD_PRED)
1863 MAKE_CASE(AArch64ISD::FADDA_PRED)
1864 MAKE_CASE(AArch64ISD::FADDV_PRED)
1865 MAKE_CASE(AArch64ISD::FDIV_PRED)
1866 MAKE_CASE(AArch64ISD::FMA_PRED)
1867 MAKE_CASE(AArch64ISD::FMAXV_PRED)
1868 MAKE_CASE(AArch64ISD::FMAXNM_PRED)
1869 MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
1870 MAKE_CASE(AArch64ISD::FMINV_PRED)
1871 MAKE_CASE(AArch64ISD::FMINNM_PRED)
1872 MAKE_CASE(AArch64ISD::FMINNMV_PRED)
1873 MAKE_CASE(AArch64ISD::FMUL_PRED)
1874 MAKE_CASE(AArch64ISD::FSUB_PRED)
1875 MAKE_CASE(AArch64ISD::BIT)
1876 MAKE_CASE(AArch64ISD::CBZ)
1877 MAKE_CASE(AArch64ISD::CBNZ)
1878 MAKE_CASE(AArch64ISD::TBZ)
1879 MAKE_CASE(AArch64ISD::TBNZ)
1880 MAKE_CASE(AArch64ISD::TC_RETURN)
1881 MAKE_CASE(AArch64ISD::PREFETCH)
1882 MAKE_CASE(AArch64ISD::SITOF)
1883 MAKE_CASE(AArch64ISD::UITOF)
1884 MAKE_CASE(AArch64ISD::NVCAST)
1885 MAKE_CASE(AArch64ISD::SQSHL_I)
1886 MAKE_CASE(AArch64ISD::UQSHL_I)
1887 MAKE_CASE(AArch64ISD::SRSHR_I)
1888 MAKE_CASE(AArch64ISD::URSHR_I)
1889 MAKE_CASE(AArch64ISD::SQSHLU_I)
1890 MAKE_CASE(AArch64ISD::WrapperLarge)
1891 MAKE_CASE(AArch64ISD::LD2post)
1892 MAKE_CASE(AArch64ISD::LD3post)
1893 MAKE_CASE(AArch64ISD::LD4post)
1894 MAKE_CASE(AArch64ISD::ST2post)
1895 MAKE_CASE(AArch64ISD::ST3post)
1896 MAKE_CASE(AArch64ISD::ST4post)
1897 MAKE_CASE(AArch64ISD::LD1x2post)
1898 MAKE_CASE(AArch64ISD::LD1x3post)
1899 MAKE_CASE(AArch64ISD::LD1x4post)
1900 MAKE_CASE(AArch64ISD::ST1x2post)
1901 MAKE_CASE(AArch64ISD::ST1x3post)
1902 MAKE_CASE(AArch64ISD::ST1x4post)
1903 MAKE_CASE(AArch64ISD::LD1DUPpost)
1904 MAKE_CASE(AArch64ISD::LD2DUPpost)
1905 MAKE_CASE(AArch64ISD::LD3DUPpost)
1906 MAKE_CASE(AArch64ISD::LD4DUPpost)
1907 MAKE_CASE(AArch64ISD::LD1LANEpost)
1908 MAKE_CASE(AArch64ISD::LD2LANEpost)
1909 MAKE_CASE(AArch64ISD::LD3LANEpost)
1910 MAKE_CASE(AArch64ISD::LD4LANEpost)
1911 MAKE_CASE(AArch64ISD::ST2LANEpost)
1912 MAKE_CASE(AArch64ISD::ST3LANEpost)
1913 MAKE_CASE(AArch64ISD::ST4LANEpost)
1914 MAKE_CASE(AArch64ISD::SMULL)
1915 MAKE_CASE(AArch64ISD::UMULL)
1916 MAKE_CASE(AArch64ISD::FRECPE)
1917 MAKE_CASE(AArch64ISD::FRECPS)
1918 MAKE_CASE(AArch64ISD::FRSQRTE)
1919 MAKE_CASE(AArch64ISD::FRSQRTS)
1920 MAKE_CASE(AArch64ISD::STG)
1921 MAKE_CASE(AArch64ISD::STZG)
1922 MAKE_CASE(AArch64ISD::ST2G)
1923 MAKE_CASE(AArch64ISD::STZ2G)
1924 MAKE_CASE(AArch64ISD::SUNPKHI)
1925 MAKE_CASE(AArch64ISD::SUNPKLO)
1926 MAKE_CASE(AArch64ISD::UUNPKHI)
1927 MAKE_CASE(AArch64ISD::UUNPKLO)
1928 MAKE_CASE(AArch64ISD::INSR)
1929 MAKE_CASE(AArch64ISD::PTEST)
1930 MAKE_CASE(AArch64ISD::PTRUE)
1931 MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
1932 MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
1933 MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
1934 MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO)
1935 MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO)
1936 MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO)
1937 MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO)
1938 MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO)
1939 MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
1940 MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
1941 MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
1942 MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO)
1943 MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO)
1944 MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO)
1945 MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO)
1946 MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
1947 MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
1948 MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
1949 MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
1950 MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
1951 MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
1952 MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO)
1953 MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO)
1954 MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO)
1955 MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO)
1956 MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO)
1957 MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO)
1958 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO)
1959 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO)
1960 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO)
1961 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO)
1962 MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO)
1963 MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO)
1964 MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO)
1965 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO)
1966 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO)
1967 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO)
1968 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO)
1969 MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO)
1970 MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
1971 MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
1972 MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
1973 MAKE_CASE(AArch64ISD::ST1_PRED)
1974 MAKE_CASE(AArch64ISD::SST1_PRED)
1975 MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
1976 MAKE_CASE(AArch64ISD::SST1_SXTW_PRED)
1977 MAKE_CASE(AArch64ISD::SST1_UXTW_PRED)
1978 MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED)
1979 MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED)
1980 MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
1981 MAKE_CASE(AArch64ISD::SSTNT1_PRED)
1982 MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
1983 MAKE_CASE(AArch64ISD::LDP)
1984 MAKE_CASE(AArch64ISD::STP)
1985 MAKE_CASE(AArch64ISD::STNP)
1986 MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
1987 MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
1988 MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
1989 MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
1990 MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
1991 MAKE_CASE(AArch64ISD::INDEX_VECTOR)
1992 MAKE_CASE(AArch64ISD::UABD)
1993 MAKE_CASE(AArch64ISD::SABD)
1994 MAKE_CASE(AArch64ISD::CALL_RVMARKER)
1995 }
1996#undef MAKE_CASE
1997 return nullptr;
1998}
1999
2000MachineBasicBlock *
2001AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
2002 MachineBasicBlock *MBB) const {
2003 // We materialise the F128CSEL pseudo-instruction as some control flow and a
2004 // phi node:
2005
2006 // OrigBB:
2007 // [... previous instrs leading to comparison ...]
2008 // b.ne TrueBB
2009 // b EndBB
2010 // TrueBB:
2011 // ; Fallthrough
2012 // EndBB:
2013 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
2014
2015 MachineFunction *MF = MBB->getParent();
2016 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2017 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2018 DebugLoc DL = MI.getDebugLoc();
2019 MachineFunction::iterator It = ++MBB->getIterator();
2020
2021 Register DestReg = MI.getOperand(0).getReg();
2022 Register IfTrueReg = MI.getOperand(1).getReg();
2023 Register IfFalseReg = MI.getOperand(2).getReg();
2024 unsigned CondCode = MI.getOperand(3).getImm();
2025 bool NZCVKilled = MI.getOperand(4).isKill();
2026
2027 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
2028 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
2029 MF->insert(It, TrueBB);
2030 MF->insert(It, EndBB);
2031
2032 // Transfer rest of current basic-block to EndBB
2033 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
2034 MBB->end());
2035 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
2036
2037 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
2038 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
2039 MBB->addSuccessor(TrueBB);
2040 MBB->addSuccessor(EndBB);
2041
2042 // TrueBB falls through to the end.
2043 TrueBB->addSuccessor(EndBB);
2044
2045 if (!NZCVKilled) {
2046 TrueBB->addLiveIn(AArch64::NZCV);
2047 EndBB->addLiveIn(AArch64::NZCV);
2048 }
2049
2050 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
2051 .addReg(IfTrueReg)
2052 .addMBB(TrueBB)
2053 .addReg(IfFalseReg)
2054 .addMBB(MBB);
2055
2056 MI.eraseFromParent();
2057 return EndBB;
2058}
2059
2060MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
2061 MachineInstr &MI, MachineBasicBlock *BB) const {
2062 assert(!isAsynchronousEHPersonality(classifyEHPersonality(((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2064, __PRETTY_FUNCTION__))
2063 BB->getParent()->getFunction().getPersonalityFn())) &&((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2064, __PRETTY_FUNCTION__))
2064 "SEH does not use catchret!")((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2064, __PRETTY_FUNCTION__))
;
2065 return BB;
2066}
2067
2068MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
2069 MachineInstr &MI, MachineBasicBlock *BB) const {
2070 switch (MI.getOpcode()) {
2071 default:
2072#ifndef NDEBUG
2073 MI.dump();
2074#endif
2075 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2075)
;
2076
2077 case AArch64::F128CSEL:
2078 return EmitF128CSEL(MI, BB);
2079
2080 case TargetOpcode::STACKMAP:
2081 case TargetOpcode::PATCHPOINT:
2082 case TargetOpcode::STATEPOINT:
2083 return emitPatchPoint(MI, BB);
2084
2085 case AArch64::CATCHRET:
2086 return EmitLoweredCatchRet(MI, BB);
2087 }
2088}
2089
2090//===----------------------------------------------------------------------===//
2091// AArch64 Lowering private implementation.
2092//===----------------------------------------------------------------------===//
2093
2094//===----------------------------------------------------------------------===//
2095// Lowering Code
2096//===----------------------------------------------------------------------===//
2097
2098/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
2099/// CC
2100static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
2101 switch (CC) {
2102 default:
2103 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2103)
;
2104 case ISD::SETNE:
2105 return AArch64CC::NE;
2106 case ISD::SETEQ:
2107 return AArch64CC::EQ;
2108 case ISD::SETGT:
2109 return AArch64CC::GT;
2110 case ISD::SETGE:
2111 return AArch64CC::GE;
2112 case ISD::SETLT:
2113 return AArch64CC::LT;
2114 case ISD::SETLE:
2115 return AArch64CC::LE;
2116 case ISD::SETUGT:
2117 return AArch64CC::HI;
2118 case ISD::SETUGE:
2119 return AArch64CC::HS;
2120 case ISD::SETULT:
2121 return AArch64CC::LO;
2122 case ISD::SETULE:
2123 return AArch64CC::LS;
2124 }
2125}
2126
2127/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
2128static void changeFPCCToAArch64CC(ISD::CondCode CC,
2129 AArch64CC::CondCode &CondCode,
2130 AArch64CC::CondCode &CondCode2) {
2131 CondCode2 = AArch64CC::AL;
2132 switch (CC) {
2133 default:
2134 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2134)
;
2135 case ISD::SETEQ:
2136 case ISD::SETOEQ:
2137 CondCode = AArch64CC::EQ;
2138 break;
2139 case ISD::SETGT:
2140 case ISD::SETOGT:
2141 CondCode = AArch64CC::GT;
2142 break;
2143 case ISD::SETGE:
2144 case ISD::SETOGE:
2145 CondCode = AArch64CC::GE;
2146 break;
2147 case ISD::SETOLT:
2148 CondCode = AArch64CC::MI;
2149 break;
2150 case ISD::SETOLE:
2151 CondCode = AArch64CC::LS;
2152 break;
2153 case ISD::SETONE:
2154 CondCode = AArch64CC::MI;
2155 CondCode2 = AArch64CC::GT;
2156 break;
2157 case ISD::SETO:
2158 CondCode = AArch64CC::VC;
2159 break;
2160 case ISD::SETUO:
2161 CondCode = AArch64CC::VS;
2162 break;
2163 case ISD::SETUEQ:
2164 CondCode = AArch64CC::EQ;
2165 CondCode2 = AArch64CC::VS;
2166 break;
2167 case ISD::SETUGT:
2168 CondCode = AArch64CC::HI;
2169 break;
2170 case ISD::SETUGE:
2171 CondCode = AArch64CC::PL;
2172 break;
2173 case ISD::SETLT:
2174 case ISD::SETULT:
2175 CondCode = AArch64CC::LT;
2176 break;
2177 case ISD::SETLE:
2178 case ISD::SETULE:
2179 CondCode = AArch64CC::LE;
2180 break;
2181 case ISD::SETNE:
2182 case ISD::SETUNE:
2183 CondCode = AArch64CC::NE;
2184 break;
2185 }
2186}
2187
2188/// Convert a DAG fp condition code to an AArch64 CC.
2189/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
2190/// should be AND'ed instead of OR'ed.
2191static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
2192 AArch64CC::CondCode &CondCode,
2193 AArch64CC::CondCode &CondCode2) {
2194 CondCode2 = AArch64CC::AL;
2195 switch (CC) {
2196 default:
2197 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2198 assert(CondCode2 == AArch64CC::AL)((CondCode2 == AArch64CC::AL) ? static_cast<void> (0) :
__assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2198, __PRETTY_FUNCTION__))
;
2199 break;
2200 case ISD::SETONE:
2201 // (a one b)
2202 // == ((a olt b) || (a ogt b))
2203 // == ((a ord b) && (a une b))
2204 CondCode = AArch64CC::VC;
2205 CondCode2 = AArch64CC::NE;
2206 break;
2207 case ISD::SETUEQ:
2208 // (a ueq b)
2209 // == ((a uno b) || (a oeq b))
2210 // == ((a ule b) && (a uge b))
2211 CondCode = AArch64CC::PL;
2212 CondCode2 = AArch64CC::LE;
2213 break;
2214 }
2215}
2216
2217/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
2218/// CC usable with the vector instructions. Fewer operations are available
2219/// without a real NZCV register, so we have to use less efficient combinations
2220/// to get the same effect.
2221static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
2222 AArch64CC::CondCode &CondCode,
2223 AArch64CC::CondCode &CondCode2,
2224 bool &Invert) {
2225 Invert = false;
2226 switch (CC) {
2227 default:
2228 // Mostly the scalar mappings work fine.
2229 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2230 break;
2231 case ISD::SETUO:
2232 Invert = true;
2233 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2234 case ISD::SETO:
2235 CondCode = AArch64CC::MI;
2236 CondCode2 = AArch64CC::GE;
2237 break;
2238 case ISD::SETUEQ:
2239 case ISD::SETULT:
2240 case ISD::SETULE:
2241 case ISD::SETUGT:
2242 case ISD::SETUGE:
2243 // All of the compare-mask comparisons are ordered, but we can switch
2244 // between the two by a double inversion. E.g. ULE == !OGT.
2245 Invert = true;
2246 changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
2247 CondCode, CondCode2);
2248 break;
2249 }
2250}
2251
2252static bool isLegalArithImmed(uint64_t C) {
2253 // Matches AArch64DAGToDAGISel::SelectArithImmed().
2254 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
2255 LLVM_DEBUG(dbgs() << "Is imm " << Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
2256 << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
2257 return IsLegal;
2258}
2259
2260// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
2261// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
2262// can be set differently by this operation. It comes down to whether
2263// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
2264// everything is fine. If not then the optimization is wrong. Thus general
2265// comparisons are only valid if op2 != 0.
2266//
2267// So, finally, the only LLVM-native comparisons that don't mention C and V
2268// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
2269// the absence of information about op2.
2270static bool isCMN(SDValue Op, ISD::CondCode CC) {
2271 return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
2272 (CC == ISD::SETEQ || CC == ISD::SETNE);
2273}
2274
2275static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
2276 SelectionDAG &DAG, SDValue Chain,
2277 bool IsSignaling) {
2278 EVT VT = LHS.getValueType();
2279 assert(VT != MVT::f128)((VT != MVT::f128) ? static_cast<void> (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2279, __PRETTY_FUNCTION__))
;
2280 assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented")((VT != MVT::f16 && "Lowering of strict fp16 not yet implemented"
) ? static_cast<void> (0) : __assert_fail ("VT != MVT::f16 && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2280, __PRETTY_FUNCTION__))
;
2281 unsigned Opcode =
2282 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
2283 return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
2284}
2285
2286static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2287 const SDLoc &dl, SelectionDAG &DAG) {
2288 EVT VT = LHS.getValueType();
2289 const bool FullFP16 =
2290 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2291
2292 if (VT.isFloatingPoint()) {
2293 assert(VT != MVT::f128)((VT != MVT::f128) ? static_cast<void> (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2293, __PRETTY_FUNCTION__))
;
2294 if (VT == MVT::f16 && !FullFP16) {
2295 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
2296 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
2297 VT = MVT::f32;
2298 }
2299 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
2300 }
2301
2302 // The CMP instruction is just an alias for SUBS, and representing it as
2303 // SUBS means that it's possible to get CSE with subtract operations.
2304 // A later phase can perform the optimization of setting the destination
2305 // register to WZR/XZR if it ends up being unused.
2306 unsigned Opcode = AArch64ISD::SUBS;
2307
2308 if (isCMN(RHS, CC)) {
2309 // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
2310 Opcode = AArch64ISD::ADDS;
2311 RHS = RHS.getOperand(1);
2312 } else if (isCMN(LHS, CC)) {
2313 // As we are looking for EQ/NE compares, the operands can be commuted ; can
2314 // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
2315 Opcode = AArch64ISD::ADDS;
2316 LHS = LHS.getOperand(1);
2317 } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
2318 if (LHS.getOpcode() == ISD::AND) {
2319 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
2320 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
2321 // of the signed comparisons.
2322 const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
2323 DAG.getVTList(VT, MVT_CC),
2324 LHS.getOperand(0),
2325 LHS.getOperand(1));
2326 // Replace all users of (and X, Y) with newly generated (ands X, Y)
2327 DAG.ReplaceAllUsesWith(LHS, ANDSNode);
2328 return ANDSNode.getValue(1);
2329 } else if (LHS.getOpcode() == AArch64ISD::ANDS) {
2330 // Use result of ANDS
2331 return LHS.getValue(1);
2332 }
2333 }
2334
2335 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
2336 .getValue(1);
2337}
2338
2339/// \defgroup AArch64CCMP CMP;CCMP matching
2340///
2341/// These functions deal with the formation of CMP;CCMP;... sequences.
2342/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
2343/// a comparison. They set the NZCV flags to a predefined value if their
2344/// predicate is false. This allows to express arbitrary conjunctions, for
2345/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
2346/// expressed as:
2347/// cmp A
2348/// ccmp B, inv(CB), CA
2349/// check for CB flags
2350///
2351/// This naturally lets us implement chains of AND operations with SETCC
2352/// operands. And we can even implement some other situations by transforming
2353/// them:
2354/// - We can implement (NEG SETCC) i.e. negating a single comparison by
2355/// negating the flags used in a CCMP/FCCMP operations.
2356/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
2357/// by negating the flags we test for afterwards. i.e.
2358/// NEG (CMP CCMP CCCMP ...) can be implemented.
2359/// - Note that we can only ever negate all previously processed results.
2360/// What we can not implement by flipping the flags to test is a negation
2361/// of two sub-trees (because the negation affects all sub-trees emitted so
2362/// far, so the 2nd sub-tree we emit would also affect the first).
2363/// With those tools we can implement some OR operations:
2364/// - (OR (SETCC A) (SETCC B)) can be implemented via:
2365/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
2366/// - After transforming OR to NEG/AND combinations we may be able to use NEG
2367/// elimination rules from earlier to implement the whole thing as a
2368/// CCMP/FCCMP chain.
2369///
2370/// As complete example:
2371/// or (or (setCA (cmp A)) (setCB (cmp B)))
2372/// (and (setCC (cmp C)) (setCD (cmp D)))"
2373/// can be reassociated to:
2374/// or (and (setCC (cmp C)) setCD (cmp D))
2375// (or (setCA (cmp A)) (setCB (cmp B)))
2376/// can be transformed to:
2377/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
2378/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
2379/// which can be implemented as:
2380/// cmp C
2381/// ccmp D, inv(CD), CC
2382/// ccmp A, CA, inv(CD)
2383/// ccmp B, CB, inv(CA)
2384/// check for CB flags
2385///
2386/// A counterexample is "or (and A B) (and C D)" which translates to
2387/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
2388/// can only implement 1 of the inner (not) operations, but not both!
2389/// @{
2390
2391/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
2392static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
2393 ISD::CondCode CC, SDValue CCOp,
2394 AArch64CC::CondCode Predicate,
2395 AArch64CC::CondCode OutCC,
2396 const SDLoc &DL, SelectionDAG &DAG) {
2397 unsigned Opcode = 0;
2398 const bool FullFP16 =
2399 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2400
2401 if (LHS.getValueType().isFloatingPoint()) {
2402 assert(LHS.getValueType() != MVT::f128)((LHS.getValueType() != MVT::f128) ? static_cast<void> (
0) : __assert_fail ("LHS.getValueType() != MVT::f128", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2402, __PRETTY_FUNCTION__))
;
2403 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
2404 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
2405 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
2406 }
2407 Opcode = AArch64ISD::FCCMP;
2408 } else if (RHS.getOpcode() == ISD::SUB) {
2409 SDValue SubOp0 = RHS.getOperand(0);
2410 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2411 // See emitComparison() on why we can only do this for SETEQ and SETNE.
2412 Opcode = AArch64ISD::CCMN;
2413 RHS = RHS.getOperand(1);
2414 }
2415 }
2416 if (Opcode == 0)
2417 Opcode = AArch64ISD::CCMP;
2418
2419 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
2420 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
2421 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
2422 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
2423 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
2424}
2425
2426/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
2427/// expressed as a conjunction. See \ref AArch64CCMP.
2428/// \param CanNegate Set to true if we can negate the whole sub-tree just by
2429/// changing the conditions on the SETCC tests.
2430/// (this means we can call emitConjunctionRec() with
2431/// Negate==true on this sub-tree)
2432/// \param MustBeFirst Set to true if this subtree needs to be negated and we
2433/// cannot do the negation naturally. We are required to
2434/// emit the subtree first in this case.
2435/// \param WillNegate Is true if are called when the result of this
2436/// subexpression must be negated. This happens when the
2437/// outer expression is an OR. We can use this fact to know
2438/// that we have a double negation (or (or ...) ...) that
2439/// can be implemented for free.
2440static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
2441 bool &MustBeFirst, bool WillNegate,
2442 unsigned Depth = 0) {
2443 if (!Val.hasOneUse())
2444 return false;
2445 unsigned Opcode = Val->getOpcode();
2446 if (Opcode == ISD::SETCC) {
2447 if (Val->getOperand(0).getValueType() == MVT::f128)
2448 return false;
2449 CanNegate = true;
2450 MustBeFirst = false;
2451 return true;
2452 }
2453 // Protect against exponential runtime and stack overflow.
2454 if (Depth > 6)
2455 return false;
2456 if (Opcode == ISD::AND || Opcode == ISD::OR) {
2457 bool IsOR = Opcode == ISD::OR;
2458 SDValue O0 = Val->getOperand(0);
2459 SDValue O1 = Val->getOperand(1);
2460 bool CanNegateL;
2461 bool MustBeFirstL;
2462 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
2463 return false;
2464 bool CanNegateR;
2465 bool MustBeFirstR;
2466 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
2467 return false;
2468
2469 if (MustBeFirstL && MustBeFirstR)
2470 return false;
2471
2472 if (IsOR) {
2473 // For an OR expression we need to be able to naturally negate at least
2474 // one side or we cannot do the transformation at all.
2475 if (!CanNegateL && !CanNegateR)
2476 return false;
2477 // If we the result of the OR will be negated and we can naturally negate
2478 // the leafs, then this sub-tree as a whole negates naturally.
2479 CanNegate = WillNegate && CanNegateL && CanNegateR;
2480 // If we cannot naturally negate the whole sub-tree, then this must be
2481 // emitted first.
2482 MustBeFirst = !CanNegate;
2483 } else {
2484 assert(Opcode == ISD::AND && "Must be OR or AND")((Opcode == ISD::AND && "Must be OR or AND") ? static_cast
<void> (0) : __assert_fail ("Opcode == ISD::AND && \"Must be OR or AND\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2484, __PRETTY_FUNCTION__))
;
2485 // We cannot naturally negate an AND operation.
2486 CanNegate = false;
2487 MustBeFirst = MustBeFirstL || MustBeFirstR;
2488 }
2489 return true;
2490 }
2491 return false;
2492}
2493
2494/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
2495/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
2496/// Tries to transform the given i1 producing node @p Val to a series compare
2497/// and conditional compare operations. @returns an NZCV flags producing node
2498/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
2499/// transformation was not possible.
2500/// \p Negate is true if we want this sub-tree being negated just by changing
2501/// SETCC conditions.
2502static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
2503 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
2504 AArch64CC::CondCode Predicate) {
2505 // We're at a tree leaf, produce a conditional comparison operation.
2506 unsigned Opcode = Val->getOpcode();
2507 if (Opcode == ISD::SETCC) {
2508 SDValue LHS = Val->getOperand(0);
2509 SDValue RHS = Val->getOperand(1);
2510 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
2511 bool isInteger = LHS.getValueType().isInteger();
2512 if (Negate)
2513 CC = getSetCCInverse(CC, LHS.getValueType());
2514 SDLoc DL(Val);
2515 // Determine OutCC and handle FP special case.
2516 if (isInteger) {
2517 OutCC = changeIntCCToAArch64CC(CC);
2518 } else {
2519 assert(LHS.getValueType().isFloatingPoint())((LHS.getValueType().isFloatingPoint()) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2519, __PRETTY_FUNCTION__))
;
2520 AArch64CC::CondCode ExtraCC;
2521 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
2522 // Some floating point conditions can't be tested with a single condition
2523 // code. Construct an additional comparison in this case.
2524 if (ExtraCC != AArch64CC::AL) {
2525 SDValue ExtraCmp;
2526 if (!CCOp.getNode())
2527 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
2528 else
2529 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
2530 ExtraCC, DL, DAG);
2531 CCOp = ExtraCmp;
2532 Predicate = ExtraCC;
2533 }
2534 }
2535
2536 // Produce a normal comparison if we are first in the chain
2537 if (!CCOp)
2538 return emitComparison(LHS, RHS, CC, DL, DAG);
2539 // Otherwise produce a ccmp.
2540 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
2541 DAG);
2542 }
2543 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree")((Val->hasOneUse() && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("Val->hasOneUse() && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2543, __PRETTY_FUNCTION__))
;
2544
2545 bool IsOR = Opcode == ISD::OR;
2546
2547 SDValue LHS = Val->getOperand(0);
2548 bool CanNegateL;
2549 bool MustBeFirstL;
2550 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
2551 assert(ValidL && "Valid conjunction/disjunction tree")((ValidL && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2551, __PRETTY_FUNCTION__))
;
2552 (void)ValidL;
2553
2554 SDValue RHS = Val->getOperand(1);
2555 bool CanNegateR;
2556 bool MustBeFirstR;
2557 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
2558 assert(ValidR && "Valid conjunction/disjunction tree")((ValidR && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2558, __PRETTY_FUNCTION__))
;
2559 (void)ValidR;
2560
2561 // Swap sub-tree that must come first to the right side.
2562 if (MustBeFirstL) {
2563 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")((!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2563, __PRETTY_FUNCTION__))
;
2564 std::swap(LHS, RHS);
2565 std::swap(CanNegateL, CanNegateR);
2566 std::swap(MustBeFirstL, MustBeFirstR);
2567 }
2568
2569 bool NegateR;
2570 bool NegateAfterR;
2571 bool NegateL;
2572 bool NegateAfterAll;
2573 if (Opcode == ISD::OR) {
2574 // Swap the sub-tree that we can negate naturally to the left.
2575 if (!CanNegateL) {
2576 assert(CanNegateR && "at least one side must be negatable")((CanNegateR && "at least one side must be negatable"
) ? static_cast<void> (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2576, __PRETTY_FUNCTION__))
;
2577 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")((!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2577, __PRETTY_FUNCTION__))
;
2578 assert(!Negate)((!Negate) ? static_cast<void> (0) : __assert_fail ("!Negate"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2578, __PRETTY_FUNCTION__))
;
2579 std::swap(LHS, RHS);
2580 NegateR = false;
2581 NegateAfterR = true;
2582 } else {
2583 // Negate the left sub-tree if possible, otherwise negate the result.
2584 NegateR = CanNegateR;
2585 NegateAfterR = !CanNegateR;
2586 }
2587 NegateL = true;
2588 NegateAfterAll = !Negate;
2589 } else {
2590 assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree")((Opcode == ISD::AND && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("Opcode == ISD::AND && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2590, __PRETTY_FUNCTION__))
;
2591 assert(!Negate && "Valid conjunction/disjunction tree")((!Negate && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2591, __PRETTY_FUNCTION__))
;
2592
2593 NegateL = false;
2594 NegateR = false;
2595 NegateAfterR = false;
2596 NegateAfterAll = false;
2597 }
2598
2599 // Emit sub-trees.
2600 AArch64CC::CondCode RHSCC;
2601 SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
2602 if (NegateAfterR)
2603 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
2604 SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
2605 if (NegateAfterAll)
2606 OutCC = AArch64CC::getInvertedCondCode(OutCC);
2607 return CmpL;
2608}
2609
2610/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
2611/// In some cases this is even possible with OR operations in the expression.
2612/// See \ref AArch64CCMP.
2613/// \see emitConjunctionRec().
2614static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
2615 AArch64CC::CondCode &OutCC) {
2616 bool DummyCanNegate;
2617 bool DummyMustBeFirst;
2618 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
2619 return SDValue();
2620
2621 return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
2622}
2623
2624/// @}
2625
2626/// Returns how profitable it is to fold a comparison's operand's shift and/or
2627/// extension operations.
2628static unsigned getCmpOperandFoldingProfit(SDValue Op) {
2629 auto isSupportedExtend = [&](SDValue V) {
2630 if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
2631 return true;
2632
2633 if (V.getOpcode() == ISD::AND)
2634 if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
2635 uint64_t Mask = MaskCst->getZExtValue();
2636 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
2637 }
2638
2639 return false;
2640 };
2641
2642 if (!Op.hasOneUse())
2643 return 0;
2644
2645 if (isSupportedExtend(Op))
2646 return 1;
2647
2648 unsigned Opc = Op.getOpcode();
2649 if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
2650 if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2651 uint64_t Shift = ShiftCst->getZExtValue();
2652 if (isSupportedExtend(Op.getOperand(0)))
2653 return (Shift <= 4) ? 2 : 1;
2654 EVT VT = Op.getValueType();
2655 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
2656 return 1;
2657 }
2658
2659 return 0;
2660}
2661
2662static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2663 SDValue &AArch64cc, SelectionDAG &DAG,
2664 const SDLoc &dl) {
2665 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2666 EVT VT = RHS.getValueType();
2667 uint64_t C = RHSC->getZExtValue();
2668 if (!isLegalArithImmed(C)) {
2669 // Constant does not fit, try adjusting it by one?
2670 switch (CC) {
2671 default:
2672 break;
2673 case ISD::SETLT:
2674 case ISD::SETGE:
2675 if ((VT == MVT::i32 && C != 0x80000000 &&
2676 isLegalArithImmed((uint32_t)(C - 1))) ||
2677 (VT == MVT::i64 && C != 0x80000000ULL &&
2678 isLegalArithImmed(C - 1ULL))) {
2679 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2680 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2681 RHS = DAG.getConstant(C, dl, VT);
2682 }
2683 break;
2684 case ISD::SETULT:
2685 case ISD::SETUGE:
2686 if ((VT == MVT::i32 && C != 0 &&
2687 isLegalArithImmed((uint32_t)(C - 1))) ||
2688 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
2689 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2690 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2691 RHS = DAG.getConstant(C, dl, VT);
2692 }
2693 break;
2694 case ISD::SETLE:
2695 case ISD::SETGT:
2696 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
2697 isLegalArithImmed((uint32_t)(C + 1))) ||
2698 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
2699 isLegalArithImmed(C + 1ULL))) {
2700 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2701 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2702 RHS = DAG.getConstant(C, dl, VT);
2703 }
2704 break;
2705 case ISD::SETULE:
2706 case ISD::SETUGT:
2707 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
2708 isLegalArithImmed((uint32_t)(C + 1))) ||
2709 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
2710 isLegalArithImmed(C + 1ULL))) {
2711 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2712 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2713 RHS = DAG.getConstant(C, dl, VT);
2714 }
2715 break;
2716 }
2717 }
2718 }
2719
2720 // Comparisons are canonicalized so that the RHS operand is simpler than the
2721 // LHS one, the extreme case being when RHS is an immediate. However, AArch64
2722 // can fold some shift+extend operations on the RHS operand, so swap the
2723 // operands if that can be done.
2724 //
2725 // For example:
2726 // lsl w13, w11, #1
2727 // cmp w13, w12
2728 // can be turned into:
2729 // cmp w12, w11, lsl #1
2730 if (!isa<ConstantSDNode>(RHS) ||
2731 !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
2732 SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
2733
2734 if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
2735 std::swap(LHS, RHS);
2736 CC = ISD::getSetCCSwappedOperands(CC);
2737 }
2738 }
2739
2740 SDValue Cmp;
2741 AArch64CC::CondCode AArch64CC;
2742 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
2743 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
2744
2745 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
2746 // For the i8 operand, the largest immediate is 255, so this can be easily
2747 // encoded in the compare instruction. For the i16 operand, however, the
2748 // largest immediate cannot be encoded in the compare.
2749 // Therefore, use a sign extending load and cmn to avoid materializing the
2750 // -1 constant. For example,
2751 // movz w1, #65535
2752 // ldrh w0, [x0, #0]
2753 // cmp w0, w1
2754 // >
2755 // ldrsh w0, [x0, #0]
2756 // cmn w0, #1
2757 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
2758 // if and only if (sext LHS) == (sext RHS). The checks are in place to
2759 // ensure both the LHS and RHS are truly zero extended and to make sure the
2760 // transformation is profitable.
2761 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
2762 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
2763 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
2764 LHS.getNode()->hasNUsesOfValue(1, 0)) {
2765 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
2766 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
2767 SDValue SExt =
2768 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
2769 DAG.getValueType(MVT::i16));
2770 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
2771 RHS.getValueType()),
2772 CC, dl, DAG);
2773 AArch64CC = changeIntCCToAArch64CC(CC);
2774 }
2775 }
2776
2777 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
2778 if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
2779 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
2780 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
2781 }
2782 }
2783 }
2784
2785 if (!Cmp) {
2786 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
2787 AArch64CC = changeIntCCToAArch64CC(CC);
2788 }
2789 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
2790 return Cmp;
2791}
2792
2793static std::pair<SDValue, SDValue>
2794getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
2795 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2796, __PRETTY_FUNCTION__))
2796 "Unsupported value type")(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2796, __PRETTY_FUNCTION__))
;
2797 SDValue Value, Overflow;
2798 SDLoc DL(Op);
2799 SDValue LHS = Op.getOperand(0);
2800 SDValue RHS = Op.getOperand(1);
2801 unsigned Opc = 0;
2802 switch (Op.getOpcode()) {
2803 default:
2804 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2804)
;
2805 case ISD::SADDO:
2806 Opc = AArch64ISD::ADDS;
2807 CC = AArch64CC::VS;
2808 break;
2809 case ISD::UADDO:
2810 Opc = AArch64ISD::ADDS;
2811 CC = AArch64CC::HS;
2812 break;
2813 case ISD::SSUBO:
2814 Opc = AArch64ISD::SUBS;
2815 CC = AArch64CC::VS;
2816 break;
2817 case ISD::USUBO:
2818 Opc = AArch64ISD::SUBS;
2819 CC = AArch64CC::LO;
2820 break;
2821 // Multiply needs a little bit extra work.
2822 case ISD::SMULO:
2823 case ISD::UMULO: {
2824 CC = AArch64CC::NE;
2825 bool IsSigned = Op.getOpcode() == ISD::SMULO;
2826 if (Op.getValueType() == MVT::i32) {
2827 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2828 // For a 32 bit multiply with overflow check we want the instruction
2829 // selector to generate a widening multiply (SMADDL/UMADDL). For that we
2830 // need to generate the following pattern:
2831 // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
2832 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
2833 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
2834 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2835 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
2836 DAG.getConstant(0, DL, MVT::i64));
2837 // On AArch64 the upper 32 bits are always zero extended for a 32 bit
2838 // operation. We need to clear out the upper 32 bits, because we used a
2839 // widening multiply that wrote all 64 bits. In the end this should be a
2840 // noop.
2841 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
2842 if (IsSigned) {
2843 // The signed overflow check requires more than just a simple check for
2844 // any bit set in the upper 32 bits of the result. These bits could be
2845 // just the sign bits of a negative number. To perform the overflow
2846 // check we have to arithmetic shift right the 32nd bit of the result by
2847 // 31 bits. Then we compare the result to the upper 32 bits.
2848 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
2849 DAG.getConstant(32, DL, MVT::i64));
2850 UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
2851 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
2852 DAG.getConstant(31, DL, MVT::i64));
2853 // It is important that LowerBits is last, otherwise the arithmetic
2854 // shift will not be folded into the compare (SUBS).
2855 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
2856 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2857 .getValue(1);
2858 } else {
2859 // The overflow check for unsigned multiply is easy. We only need to
2860 // check if any of the upper 32 bits are set. This can be done with a
2861 // CMP (shifted register). For that we need to generate the following
2862 // pattern:
2863 // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
2864 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2865 DAG.getConstant(32, DL, MVT::i64));
2866 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2867 Overflow =
2868 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2869 DAG.getConstant(0, DL, MVT::i64),
2870 UpperBits).getValue(1);
2871 }
2872 break;
2873 }
2874 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")((Op.getValueType() == MVT::i64 && "Expected an i64 value type"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2874, __PRETTY_FUNCTION__))
;
2875 // For the 64 bit multiply
2876 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2877 if (IsSigned) {
2878 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
2879 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
2880 DAG.getConstant(63, DL, MVT::i64));
2881 // It is important that LowerBits is last, otherwise the arithmetic
2882 // shift will not be folded into the compare (SUBS).
2883 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2884 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2885 .getValue(1);
2886 } else {
2887 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
2888 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2889 Overflow =
2890 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2891 DAG.getConstant(0, DL, MVT::i64),
2892 UpperBits).getValue(1);
2893 }
2894 break;
2895 }
2896 } // switch (...)
2897
2898 if (Opc) {
2899 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
2900
2901 // Emit the AArch64 operation with overflow check.
2902 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
2903 Overflow = Value.getValue(1);
2904 }
2905 return std::make_pair(Value, Overflow);
2906}
2907
2908SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
2909 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
2910 return LowerToScalableOp(Op, DAG);
2911
2912 SDValue Sel = Op.getOperand(0);
2913 SDValue Other = Op.getOperand(1);
2914 SDLoc dl(Sel);
2915
2916 // If the operand is an overflow checking operation, invert the condition
2917 // code and kill the Not operation. I.e., transform:
2918 // (xor (overflow_op_bool, 1))
2919 // -->
2920 // (csel 1, 0, invert(cc), overflow_op_bool)
2921 // ... which later gets transformed to just a cset instruction with an
2922 // inverted condition code, rather than a cset + eor sequence.
2923 if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
2924 // Only lower legal XALUO ops.
2925 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
2926 return SDValue();
2927
2928 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2929 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2930 AArch64CC::CondCode CC;
2931 SDValue Value, Overflow;
2932 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
2933 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2934 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
2935 CCVal, Overflow);
2936 }
2937 // If neither operand is a SELECT_CC, give up.
2938 if (Sel.getOpcode() != ISD::SELECT_CC)
2939 std::swap(Sel, Other);
2940 if (Sel.getOpcode() != ISD::SELECT_CC)
2941 return Op;
2942
2943 // The folding we want to perform is:
2944 // (xor x, (select_cc a, b, cc, 0, -1) )
2945 // -->
2946 // (csel x, (xor x, -1), cc ...)
2947 //
2948 // The latter will get matched to a CSINV instruction.
2949
2950 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
2951 SDValue LHS = Sel.getOperand(0);
2952 SDValue RHS = Sel.getOperand(1);
2953 SDValue TVal = Sel.getOperand(2);
2954 SDValue FVal = Sel.getOperand(3);
2955
2956 // FIXME: This could be generalized to non-integer comparisons.
2957 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
2958 return Op;
2959
2960 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
2961 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
2962
2963 // The values aren't constants, this isn't the pattern we're looking for.
2964 if (!CFVal || !CTVal)
2965 return Op;
2966
2967 // We can commute the SELECT_CC by inverting the condition. This
2968 // might be needed to make this fit into a CSINV pattern.
2969 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
2970 std::swap(TVal, FVal);
2971 std::swap(CTVal, CFVal);
2972 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
2973 }
2974
2975 // If the constants line up, perform the transform!
2976 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
2977 SDValue CCVal;
2978 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
2979
2980 FVal = Other;
2981 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
2982 DAG.getConstant(-1ULL, dl, Other.getValueType()));
2983
2984 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
2985 CCVal, Cmp);
2986 }
2987
2988 return Op;
2989}
2990
2991static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
2992 EVT VT = Op.getValueType();
2993
2994 // Let legalize expand this if it isn't a legal type yet.
2995 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
2996 return SDValue();
2997
2998 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
2999
3000 unsigned Opc;
3001 bool ExtraOp = false;
3002 switch (Op.getOpcode()) {
3003 default:
3004 llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3004)
;
3005 case ISD::ADDC:
3006 Opc = AArch64ISD::ADDS;
3007 break;
3008 case ISD::SUBC:
3009 Opc = AArch64ISD::SUBS;
3010 break;
3011 case ISD::ADDE:
3012 Opc = AArch64ISD::ADCS;
3013 ExtraOp = true;
3014 break;
3015 case ISD::SUBE:
3016 Opc = AArch64ISD::SBCS;
3017 ExtraOp = true;
3018 break;
3019 }
3020
3021 if (!ExtraOp)
3022 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
3023 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
3024 Op.getOperand(2));
3025}
3026
3027static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
3028 // Let legalize expand this if it isn't a legal type yet.
3029 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3030 return SDValue();
3031
3032 SDLoc dl(Op);
3033 AArch64CC::CondCode CC;
3034 // The actual operation that sets the overflow or carry flag.
3035 SDValue Value, Overflow;
3036 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
3037
3038 // We use 0 and 1 as false and true values.
3039 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3040 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3041
3042 // We use an inverted condition, because the conditional select is inverted
3043 // too. This will allow it to be selected to a single instruction:
3044 // CSINC Wd, WZR, WZR, invert(cond).
3045 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3046 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
3047 CCVal, Overflow);
3048
3049 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3050 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3051}
3052
3053// Prefetch operands are:
3054// 1: Address to prefetch
3055// 2: bool isWrite
3056// 3: int locality (0 = no locality ... 3 = extreme locality)
3057// 4: bool isDataCache
3058static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
3059 SDLoc DL(Op);
3060 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3061 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
3062 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3063
3064 bool IsStream = !Locality;
3065 // When the locality number is set
3066 if (Locality) {
3067 // The front-end should have filtered out the out-of-range values
3068 assert(Locality <= 3 && "Prefetch locality out-of-range")((Locality <= 3 && "Prefetch locality out-of-range"
) ? static_cast<void> (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3068, __PRETTY_FUNCTION__))
;
3069 // The locality degree is the opposite of the cache speed.
3070 // Put the number the other way around.
3071 // The encoding starts at 0 for level 1
3072 Locality = 3 - Locality;
3073 }
3074
3075 // built the mask value encoding the expected behavior.
3076 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
3077 (!IsData << 3) | // IsDataCache bit
3078 (Locality << 1) | // Cache level bits
3079 (unsigned)IsStream; // Stream bit
3080 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
3081 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
3082}
3083
3084SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
3085 SelectionDAG &DAG) const {
3086 if (Op.getValueType().isScalableVector())
3087 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
3088
3089 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")((Op.getValueType() == MVT::f128 && "Unexpected lowering"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3089, __PRETTY_FUNCTION__))
;
3090 return SDValue();
3091}
3092
3093SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
3094 SelectionDAG &DAG) const {
3095 if (Op.getValueType().isScalableVector())
3096 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
3097
3098 bool IsStrict = Op->isStrictFPOpcode();
3099 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3100 EVT SrcVT = SrcVal.getValueType();
3101
3102 if (SrcVT != MVT::f128) {
3103 // Expand cases where the input is a vector bigger than NEON.
3104 if (useSVEForFixedLengthVectorVT(SrcVT))
3105 return SDValue();
3106
3107 // It's legal except when f128 is involved
3108 return Op;
3109 }
3110
3111 return SDValue();
3112}
3113
3114SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
3115 SelectionDAG &DAG) const {
3116 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3117 // Any additional optimization in this function should be recorded
3118 // in the cost tables.
3119 EVT InVT = Op.getOperand(0).getValueType();
3120 EVT VT = Op.getValueType();
3121
3122 if (VT.isScalableVector()) {
3123 unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
3124 ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
3125 : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
3126 return LowerToPredicatedOp(Op, DAG, Opcode);
3127 }
3128
3129 unsigned NumElts = InVT.getVectorNumElements();
3130
3131 // f16 conversions are promoted to f32 when full fp16 is not supported.
3132 if (InVT.getVectorElementType() == MVT::f16 &&
3133 !Subtarget->hasFullFP16()) {
3134 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
3135 SDLoc dl(Op);
3136 return DAG.getNode(
3137 Op.getOpcode(), dl, Op.getValueType(),
3138 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
3139 }
3140
3141 uint64_t VTSize = VT.getFixedSizeInBits();
3142 uint64_t InVTSize = InVT.getFixedSizeInBits();
3143 if (VTSize < InVTSize) {
3144 SDLoc dl(Op);
3145 SDValue Cv =
3146 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
3147 Op.getOperand(0));
3148 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
3149 }
3150
3151 if (VTSize > InVTSize) {
3152 SDLoc dl(Op);
3153 MVT ExtVT =
3154 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
3155 VT.getVectorNumElements());
3156 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
3157 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
3158 }
3159
3160 // Type changing conversions are illegal.
3161 return Op;
3162}
3163
3164SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
3165 SelectionDAG &DAG) const {
3166 bool IsStrict = Op->isStrictFPOpcode();
3167 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3168
3169 if (SrcVal.getValueType().isVector())
3170 return LowerVectorFP_TO_INT(Op, DAG);
3171
3172 // f16 conversions are promoted to f32 when full fp16 is not supported.
3173 if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
3174 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")((!IsStrict && "Lowering of strict fp16 not yet implemented"
) ? static_cast<void> (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3174, __PRETTY_FUNCTION__))
;
3175 SDLoc dl(Op);
3176 return DAG.getNode(
3177 Op.getOpcode(), dl, Op.getValueType(),
3178 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
3179 }
3180
3181 if (SrcVal.getValueType() != MVT::f128) {
3182 // It's legal except when f128 is involved
3183 return Op;
3184 }
3185
3186 return SDValue();
3187}
3188
3189SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
3190 SelectionDAG &DAG) const {
3191 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3192 // Any additional optimization in this function should be recorded
3193 // in the cost tables.
3194 EVT VT = Op.getValueType();
3195 SDLoc dl(Op);
3196 SDValue In = Op.getOperand(0);
3197 EVT InVT = In.getValueType();
3198 unsigned Opc = Op.getOpcode();
3199 bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
3200
3201 if (VT.isScalableVector()) {
3202 if (InVT.getVectorElementType() == MVT::i1) {
3203 // We can't directly extend an SVE predicate; extend it first.
3204 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3205 EVT CastVT = getPromotedVTForPredicate(InVT);
3206 In = DAG.getNode(CastOpc, dl, CastVT, In);
3207 return DAG.getNode(Opc, dl, VT, In);
3208 }
3209
3210 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
3211 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
3212 return LowerToPredicatedOp(Op, DAG, Opcode);
3213 }
3214
3215 uint64_t VTSize = VT.getFixedSizeInBits();
3216 uint64_t InVTSize = InVT.getFixedSizeInBits();
3217 if (VTSize < InVTSize) {
3218 MVT CastVT =
3219 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
3220 InVT.getVectorNumElements());
3221 In = DAG.getNode(Opc, dl, CastVT, In);
3222 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
3223 }
3224
3225 if (VTSize > InVTSize) {
3226 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3227 EVT CastVT = VT.changeVectorElementTypeToInteger();
3228 In = DAG.getNode(CastOpc, dl, CastVT, In);
3229 return DAG.getNode(Opc, dl, VT, In);
3230 }
3231
3232 return Op;
3233}
3234
3235SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
3236 SelectionDAG &DAG) const {
3237 if (Op.getValueType().isVector())
3238 return LowerVectorINT_TO_FP(Op, DAG);
3239
3240 bool IsStrict = Op->isStrictFPOpcode();
3241 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3242
3243 // f16 conversions are promoted to f32 when full fp16 is not supported.
3244 if (Op.getValueType() == MVT::f16 &&
3245 !Subtarget->hasFullFP16()) {
3246 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")((!IsStrict && "Lowering of strict fp16 not yet implemented"
) ? static_cast<void> (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3246, __PRETTY_FUNCTION__))
;
3247 SDLoc dl(Op);
3248 return DAG.getNode(
3249 ISD::FP_ROUND, dl, MVT::f16,
3250 DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
3251 DAG.getIntPtrConstant(0, dl));
3252 }
3253
3254 // i128 conversions are libcalls.
3255 if (SrcVal.getValueType() == MVT::i128)
3256 return SDValue();
3257
3258 // Other conversions are legal, unless it's to the completely software-based
3259 // fp128.
3260 if (Op.getValueType() != MVT::f128)
3261 return Op;
3262 return SDValue();
3263}
3264
3265SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
3266 SelectionDAG &DAG) const {
3267 // For iOS, we want to call an alternative entry point: __sincos_stret,
3268 // which returns the values in two S / D registers.
3269 SDLoc dl(Op);
3270 SDValue Arg = Op.getOperand(0);
3271 EVT ArgVT = Arg.getValueType();
3272 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
3273
3274 ArgListTy Args;
3275 ArgListEntry Entry;
3276
3277 Entry.Node = Arg;
3278 Entry.Ty = ArgTy;
3279 Entry.IsSExt = false;
3280 Entry.IsZExt = false;
3281 Args.push_back(Entry);
3282
3283 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
3284 : RTLIB::SINCOS_STRET_F32;
3285 const char *LibcallName = getLibcallName(LC);
3286 SDValue Callee =
3287 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
3288
3289 StructType *RetTy = StructType::get(ArgTy, ArgTy);
3290 TargetLowering::CallLoweringInfo CLI(DAG);
3291 CLI.setDebugLoc(dl)
3292 .setChain(DAG.getEntryNode())
3293 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
3294
3295 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3296 return CallResult.first;
3297}
3298
3299static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
3300 EVT OpVT = Op.getValueType();
3301 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
3302 return SDValue();
3303
3304 assert(Op.getOperand(0).getValueType() == MVT::i16)((Op.getOperand(0).getValueType() == MVT::i16) ? static_cast<
void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::i16"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3304, __PRETTY_FUNCTION__))
;
3305 SDLoc DL(Op);
3306
3307 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
3308 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
3309 return SDValue(
3310 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
3311 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
3312 0);
3313}
3314
3315static EVT getExtensionTo64Bits(const EVT &OrigVT) {
3316 if (OrigVT.getSizeInBits() >= 64)
3317 return OrigVT;
3318
3319 assert(OrigVT.isSimple() && "Expecting a simple value type")((OrigVT.isSimple() && "Expecting a simple value type"
) ? static_cast<void> (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3319, __PRETTY_FUNCTION__))
;
3320
3321 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
3322 switch (OrigSimpleTy) {
3323 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3323)
;
3324 case MVT::v2i8:
3325 case MVT::v2i16:
3326 return MVT::v2i32;
3327 case MVT::v4i8:
3328 return MVT::v4i16;
3329 }
3330}
3331
3332static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
3333 const EVT &OrigTy,
3334 const EVT &ExtTy,
3335 unsigned ExtOpcode) {
3336 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
3337 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
3338 // 64-bits we need to insert a new extension so that it will be 64-bits.
3339 assert(ExtTy.is128BitVector() && "Unexpected extension size")((ExtTy.is128BitVector() && "Unexpected extension size"
) ? static_cast<void> (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3339, __PRETTY_FUNCTION__))
;
3340 if (OrigTy.getSizeInBits() >= 64)
3341 return N;
3342
3343 // Must extend size to at least 64 bits to be used as an operand for VMULL.
3344 EVT NewVT = getExtensionTo64Bits(OrigTy);
3345
3346 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
3347}
3348
3349static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
3350 bool isSigned) {
3351 EVT VT = N->getValueType(0);
3352
3353 if (N->getOpcode() != ISD::BUILD_VECTOR)
3354 return false;
3355
3356 for (const SDValue &Elt : N->op_values()) {
3357 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
3358 unsigned EltSize = VT.getScalarSizeInBits();
3359 unsigned HalfSize = EltSize / 2;
3360 if (isSigned) {
3361 if (!isIntN(HalfSize, C->getSExtValue()))
3362 return false;
3363 } else {
3364 if (!isUIntN(HalfSize, C->getZExtValue()))
3365 return false;
3366 }
3367 continue;
3368 }
3369 return false;
3370 }
3371
3372 return true;
3373}
3374
3375static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
3376 if (N->getOpcode() == ISD::SIGN_EXTEND ||
3377 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
3378 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
3379 N->getOperand(0)->getValueType(0),
3380 N->getValueType(0),
3381 N->getOpcode());
3382
3383 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")((N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3383, __PRETTY_FUNCTION__))
;
3384 EVT VT = N->getValueType(0);
3385 SDLoc dl(N);
3386 unsigned EltSize = VT.getScalarSizeInBits() / 2;
3387 unsigned NumElts = VT.getVectorNumElements();
3388 MVT TruncVT = MVT::getIntegerVT(EltSize);
3389 SmallVector<SDValue, 8> Ops;
3390 for (unsigned i = 0; i != NumElts; ++i) {
3391 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
3392 const APInt &CInt = C->getAPIntValue();
3393 // Element types smaller than 32 bits are not legal, so use i32 elements.
3394 // The values are implicitly truncated so sext vs. zext doesn't matter.
3395 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
3396 }
3397 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
3398}
3399
3400static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
3401 return N->getOpcode() == ISD::SIGN_EXTEND ||
3402 N->getOpcode() == ISD::ANY_EXTEND ||
3403 isExtendedBUILD_VECTOR(N, DAG, true);
3404}
3405
3406static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
3407 return N->getOpcode() == ISD::ZERO_EXTEND ||
3408 N->getOpcode() == ISD::ANY_EXTEND ||
3409 isExtendedBUILD_VECTOR(N, DAG, false);
3410}
3411
3412static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
3413 unsigned Opcode = N->getOpcode();
3414 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3415 SDNode *N0 = N->getOperand(0).getNode();
3416 SDNode *N1 = N->getOperand(1).getNode();
3417 return N0->hasOneUse() && N1->hasOneUse() &&
3418 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
3419 }
3420 return false;
3421}
3422
3423static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
3424 unsigned Opcode = N->getOpcode();
3425 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3426 SDNode *N0 = N->getOperand(0).getNode();
3427 SDNode *N1 = N->getOperand(1).getNode();
3428 return N0->hasOneUse() && N1->hasOneUse() &&
3429 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
3430 }
3431 return false;
3432}
3433
3434SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
3435 SelectionDAG &DAG) const {
3436 // The rounding mode is in bits 23:22 of the FPSCR.
3437 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
3438 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
3439 // so that the shift + and get folded into a bitfield extract.
3440 SDLoc dl(Op);
3441
3442 SDValue Chain = Op.getOperand(0);
3443 SDValue FPCR_64 = DAG.getNode(
3444 ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
3445 {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
3446 Chain = FPCR_64.getValue(1);
3447 SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
3448 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
3449 DAG.getConstant(1U << 22, dl, MVT::i32));
3450 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
3451 DAG.getConstant(22, dl, MVT::i32));
3452 SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
3453 DAG.getConstant(3, dl, MVT::i32));
3454 return DAG.getMergeValues({AND, Chain}, dl);
3455}
3456
3457SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
3458 EVT VT = Op.getValueType();
3459
3460 // If SVE is available then i64 vector multiplications can also be made legal.
3461 bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
3462
3463 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
3464 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
3465
3466 // Multiplications are only custom-lowered for 128-bit vectors so that
3467 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
3468 assert(VT.is128BitVector() && VT.isInteger() &&((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3469, __PRETTY_FUNCTION__))
3469 "unexpected type for custom-lowering ISD::MUL")((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3469, __PRETTY_FUNCTION__))
;
3470 SDNode *N0 = Op.getOperand(0).getNode();
3471 SDNode *N1 = Op.getOperand(1).getNode();
3472 unsigned NewOpc = 0;
3473 bool isMLA = false;
3474 bool isN0SExt = isSignExtended(N0, DAG);
3475 bool isN1SExt = isSignExtended(N1, DAG);
3476 if (isN0SExt && isN1SExt)
3477 NewOpc = AArch64ISD::SMULL;
3478 else {
3479 bool isN0ZExt = isZeroExtended(N0, DAG);
3480 bool isN1ZExt = isZeroExtended(N1, DAG);
3481 if (isN0ZExt && isN1ZExt)
3482 NewOpc = AArch64ISD::UMULL;
3483 else if (isN1SExt || isN1ZExt) {
3484 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
3485 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
3486 if (isN1SExt && isAddSubSExt(N0, DAG)) {
3487 NewOpc = AArch64ISD::SMULL;
3488 isMLA = true;
3489 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
3490 NewOpc = AArch64ISD::UMULL;
3491 isMLA = true;
3492 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
3493 std::swap(N0, N1);
3494 NewOpc = AArch64ISD::UMULL;
3495 isMLA = true;
3496 }
3497 }
3498
3499 if (!NewOpc) {
3500 if (VT == MVT::v2i64)
3501 // Fall through to expand this. It is not legal.
3502 return SDValue();
3503 else
3504 // Other vector multiplications are legal.
3505 return Op;
3506 }
3507 }
3508
3509 // Legalize to a S/UMULL instruction
3510 SDLoc DL(Op);
3511 SDValue Op0;
3512 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
3513 if (!isMLA) {
3514 Op0 = skipExtensionForVectorMULL(N0, DAG);
3515 assert(Op0.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3517, __PRETTY_FUNCTION__))
3516 Op1.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3517, __PRETTY_FUNCTION__))
3517 "unexpected types for extended operands to VMULL")((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3517, __PRETTY_FUNCTION__))
;
3518 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
3519 }
3520 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
3521 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
3522 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
3523 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
3524 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
3525 EVT Op1VT = Op1.getValueType();
3526 return DAG.getNode(N0->getOpcode(), DL, VT,
3527 DAG.getNode(NewOpc, DL, VT,
3528 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
3529 DAG.getNode(NewOpc, DL, VT,
3530 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
3531}
3532
3533static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
3534 int Pattern) {
3535 return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
3536 DAG.getTargetConstant(Pattern, DL, MVT::i32));
3537}
3538
3539SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
3540 SelectionDAG &DAG) const {
3541 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3542 SDLoc dl(Op);
3543 switch (IntNo) {
3544 default: return SDValue(); // Don't custom lower most intrinsics.
3545 case Intrinsic::thread_pointer: {
3546 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3547 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
3548 }
3549 case Intrinsic::aarch64_neon_abs: {
3550 EVT Ty = Op.getValueType();
3551 if (Ty == MVT::i64) {
3552 SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
3553 Op.getOperand(1));
3554 Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
3555 return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
3556 } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
3557 return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
3558 } else {
3559 report_fatal_error("Unexpected type for AArch64 NEON intrinic");
3560 }
3561 }
3562 case Intrinsic::aarch64_neon_smax:
3563 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
3564 Op.getOperand(1), Op.getOperand(2));
3565 case Intrinsic::aarch64_neon_umax:
3566 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
3567 Op.getOperand(1), Op.getOperand(2));
3568 case Intrinsic::aarch64_neon_smin:
3569 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
3570 Op.getOperand(1), Op.getOperand(2));
3571 case Intrinsic::aarch64_neon_umin:
3572 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
3573 Op.getOperand(1), Op.getOperand(2));
3574
3575 case Intrinsic::aarch64_sve_sunpkhi:
3576 return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
3577 Op.getOperand(1));
3578 case Intrinsic::aarch64_sve_sunpklo:
3579 return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
3580 Op.getOperand(1));
3581 case Intrinsic::aarch64_sve_uunpkhi:
3582 return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
3583 Op.getOperand(1));
3584 case Intrinsic::aarch64_sve_uunpklo:
3585 return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
3586 Op.getOperand(1));
3587 case Intrinsic::aarch64_sve_clasta_n:
3588 return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
3589 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3590 case Intrinsic::aarch64_sve_clastb_n:
3591 return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
3592 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3593 case Intrinsic::aarch64_sve_lasta:
3594 return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
3595 Op.getOperand(1), Op.getOperand(2));
3596 case Intrinsic::aarch64_sve_lastb:
3597 return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
3598 Op.getOperand(1), Op.getOperand(2));
3599 case Intrinsic::aarch64_sve_rev:
3600 return DAG.getNode(AArch64ISD::REV, dl, Op.getValueType(),
3601 Op.getOperand(1));
3602 case Intrinsic::aarch64_sve_tbl:
3603 return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
3604 Op.getOperand(1), Op.getOperand(2));
3605 case Intrinsic::aarch64_sve_trn1:
3606 return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
3607 Op.getOperand(1), Op.getOperand(2));
3608 case Intrinsic::aarch64_sve_trn2:
3609 return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
3610 Op.getOperand(1), Op.getOperand(2));
3611 case Intrinsic::aarch64_sve_uzp1:
3612 return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
3613 Op.getOperand(1), Op.getOperand(2));
3614 case Intrinsic::aarch64_sve_uzp2:
3615 return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
3616 Op.getOperand(1), Op.getOperand(2));
3617 case Intrinsic::aarch64_sve_zip1:
3618 return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
3619 Op.getOperand(1), Op.getOperand(2));
3620 case Intrinsic::aarch64_sve_zip2:
3621 return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
3622 Op.getOperand(1), Op.getOperand(2));
3623 case Intrinsic::aarch64_sve_ptrue:
3624 return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
3625 Op.getOperand(1));
3626 case Intrinsic::aarch64_sve_clz:
3627 return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
3628 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3629 case Intrinsic::aarch64_sve_cnt: {
3630 SDValue Data = Op.getOperand(3);
3631 // CTPOP only supports integer operands.
3632 if (Data.getValueType().isFloatingPoint())
3633 Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
3634 return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
3635 Op.getOperand(2), Data, Op.getOperand(1));
3636 }
3637 case Intrinsic::aarch64_sve_dupq_lane:
3638 return LowerDUPQLane(Op, DAG);
3639 case Intrinsic::aarch64_sve_convert_from_svbool:
3640 return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
3641 Op.getOperand(1));
3642 case Intrinsic::aarch64_sve_fneg:
3643 return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
3644 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3645 case Intrinsic::aarch64_sve_frintp:
3646 return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
3647 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3648 case Intrinsic::aarch64_sve_frintm:
3649 return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
3650 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3651 case Intrinsic::aarch64_sve_frinti:
3652 return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
3653 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3654 case Intrinsic::aarch64_sve_frintx:
3655 return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
3656 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3657 case Intrinsic::aarch64_sve_frinta:
3658 return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
3659 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3660 case Intrinsic::aarch64_sve_frintn:
3661 return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
3662 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3663 case Intrinsic::aarch64_sve_frintz:
3664 return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
3665 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3666 case Intrinsic::aarch64_sve_ucvtf:
3667 return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
3668 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3669 Op.getOperand(1));
3670 case Intrinsic::aarch64_sve_scvtf:
3671 return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
3672 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3673 Op.getOperand(1));
3674 case Intrinsic::aarch64_sve_fcvtzu:
3675 return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
3676 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3677 Op.getOperand(1));
3678 case Intrinsic::aarch64_sve_fcvtzs:
3679 return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
3680 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3681 Op.getOperand(1));
3682 case Intrinsic::aarch64_sve_fsqrt:
3683 return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
3684 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3685 case Intrinsic::aarch64_sve_frecpx:
3686 return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
3687 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3688 case Intrinsic::aarch64_sve_fabs:
3689 return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
3690 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3691 case Intrinsic::aarch64_sve_abs:
3692 return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
3693 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3694 case Intrinsic::aarch64_sve_neg:
3695 return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
3696 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3697 case Intrinsic::aarch64_sve_convert_to_svbool: {
3698 EVT OutVT = Op.getValueType();
3699 EVT InVT = Op.getOperand(1).getValueType();
3700 // Return the operand if the cast isn't changing type,
3701 // i.e. <n x 16 x i1> -> <n x 16 x i1>
3702 if (InVT == OutVT)
3703 return Op.getOperand(1);
3704 // Otherwise, zero the newly introduced lanes.
3705 SDValue Reinterpret =
3706 DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Op.getOperand(1));
3707 SDValue Mask = getPTrue(DAG, dl, InVT, AArch64SVEPredPattern::all);
3708 SDValue MaskReinterpret =
3709 DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Mask);
3710 return DAG.getNode(ISD::AND, dl, OutVT, Reinterpret, MaskReinterpret);
3711 }
3712
3713 case Intrinsic::aarch64_sve_insr: {
3714 SDValue Scalar = Op.getOperand(2);
3715 EVT ScalarTy = Scalar.getValueType();
3716 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
3717 Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
3718
3719 return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
3720 Op.getOperand(1), Scalar);
3721 }
3722 case Intrinsic::aarch64_sve_rbit:
3723 return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
3724 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3725 Op.getOperand(1));
3726 case Intrinsic::aarch64_sve_revb:
3727 return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
3728 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3729 case Intrinsic::aarch64_sve_sxtb:
3730 return DAG.getNode(
3731 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
3732 Op.getOperand(2), Op.getOperand(3),
3733 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
3734 Op.getOperand(1));
3735 case Intrinsic::aarch64_sve_sxth:
3736 return DAG.getNode(
3737 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
3738 Op.getOperand(2), Op.getOperand(3),
3739 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
3740 Op.getOperand(1));
3741 case Intrinsic::aarch64_sve_sxtw:
3742 return DAG.getNode(
3743 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
3744 Op.getOperand(2), Op.getOperand(3),
3745 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
3746 Op.getOperand(1));
3747 case Intrinsic::aarch64_sve_uxtb:
3748 return DAG.getNode(
3749 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
3750 Op.getOperand(2), Op.getOperand(3),
3751 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
3752 Op.getOperand(1));
3753 case Intrinsic::aarch64_sve_uxth:
3754 return DAG.getNode(
3755 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
3756 Op.getOperand(2), Op.getOperand(3),
3757 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
3758 Op.getOperand(1));
3759 case Intrinsic::aarch64_sve_uxtw:
3760 return DAG.getNode(
3761 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
3762 Op.getOperand(2), Op.getOperand(3),
3763 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
3764 Op.getOperand(1));
3765
3766 case Intrinsic::localaddress: {
3767 const auto &MF = DAG.getMachineFunction();
3768 const auto *RegInfo = Subtarget->getRegisterInfo();
3769 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
3770 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
3771 Op.getSimpleValueType());
3772 }
3773
3774 case Intrinsic::eh_recoverfp: {
3775 // FIXME: This needs to be implemented to correctly handle highly aligned
3776 // stack objects. For now we simply return the incoming FP. Refer D53541
3777 // for more details.
3778 SDValue FnOp = Op.getOperand(1);
3779 SDValue IncomingFPOp = Op.getOperand(2);
3780 GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
3781 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
3782 if (!Fn)
3783 report_fatal_error(
3784 "llvm.eh.recoverfp must take a function as the first argument");
3785 return IncomingFPOp;
3786 }
3787
3788 case Intrinsic::aarch64_neon_vsri:
3789 case Intrinsic::aarch64_neon_vsli: {
3790 EVT Ty = Op.getValueType();
3791
3792 if (!Ty.isVector())
3793 report_fatal_error("Unexpected type for aarch64_neon_vsli");
3794
3795 assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits())((Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits())
? static_cast<void> (0) : __assert_fail ("Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits()"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3795, __PRETTY_FUNCTION__))
;
3796
3797 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
3798 unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
3799 return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
3800 Op.getOperand(3));
3801 }
3802
3803 case Intrinsic::aarch64_neon_srhadd:
3804 case Intrinsic::aarch64_neon_urhadd:
3805 case Intrinsic::aarch64_neon_shadd:
3806 case Intrinsic::aarch64_neon_uhadd: {
3807 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
3808 IntNo == Intrinsic::aarch64_neon_shadd);
3809 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
3810 IntNo == Intrinsic::aarch64_neon_urhadd);
3811 unsigned Opcode =
3812 IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
3813 : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
3814 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
3815 Op.getOperand(2));
3816 }
3817
3818 case Intrinsic::aarch64_neon_uabd: {
3819 return DAG.getNode(AArch64ISD::UABD, dl, Op.getValueType(),
3820 Op.getOperand(1), Op.getOperand(2));
3821 }
3822 case Intrinsic::aarch64_neon_sabd: {
3823 return DAG.getNode(AArch64ISD::SABD, dl, Op.getValueType(),
3824 Op.getOperand(1), Op.getOperand(2));
3825 }
3826 }
3827}
3828
3829bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
3830 if (VT.getVectorElementType() == MVT::i32 &&
3831 VT.getVectorElementCount().getKnownMinValue() >= 4)
3832 return true;
3833
3834 return false;
3835}
3836
3837bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
3838 return ExtVal.getValueType().isScalableVector();
3839}
3840
3841unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
3842 std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
3843 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
3844 AArch64ISD::GLD1_MERGE_ZERO},
3845 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
3846 AArch64ISD::GLD1_UXTW_MERGE_ZERO},
3847 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
3848 AArch64ISD::GLD1_MERGE_ZERO},
3849 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
3850 AArch64ISD::GLD1_SXTW_MERGE_ZERO},
3851 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
3852 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
3853 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
3854 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
3855 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
3856 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
3857 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
3858 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
3859 };
3860 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
3861 return AddrModes.find(Key)->second;
3862}
3863
3864unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
3865 std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
3866 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
3867 AArch64ISD::SST1_PRED},
3868 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
3869 AArch64ISD::SST1_UXTW_PRED},
3870 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
3871 AArch64ISD::SST1_PRED},
3872 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
3873 AArch64ISD::SST1_SXTW_PRED},
3874 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
3875 AArch64ISD::SST1_SCALED_PRED},
3876 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
3877 AArch64ISD::SST1_UXTW_SCALED_PRED},
3878 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
3879 AArch64ISD::SST1_SCALED_PRED},
3880 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
3881 AArch64ISD::SST1_SXTW_SCALED_PRED},
3882 };
3883 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
3884 return AddrModes.find(Key)->second;
3885}
3886
3887unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
3888 switch (Opcode) {
3889 default:
3890 llvm_unreachable("unimplemented opcode")::llvm::llvm_unreachable_internal("unimplemented opcode", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3890)
;
3891 return Opcode;
3892 case AArch64ISD::GLD1_MERGE_ZERO:
3893 return AArch64ISD::GLD1S_MERGE_ZERO;
3894 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
3895 return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
3896 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
3897 return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
3898 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
3899 return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
3900 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
3901 return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
3902 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
3903 return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
3904 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
3905 return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
3906 }
3907}
3908
3909bool getGatherScatterIndexIsExtended(SDValue Index) {
3910 unsigned Opcode = Index.getOpcode();
3911 if (Opcode == ISD::SIGN_EXTEND_INREG)
3912 return true;
3913
3914 if (Opcode == ISD::AND) {
3915 SDValue Splat = Index.getOperand(1);
3916 if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
3917 return false;
3918 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
3919 if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
3920 return false;
3921 return true;
3922 }
3923
3924 return false;
3925}
3926
3927// If the base pointer of a masked gather or scatter is null, we
3928// may be able to swap BasePtr & Index and use the vector + register
3929// or vector + immediate addressing mode, e.g.
3930// VECTOR + REGISTER:
3931// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
3932// -> getelementptr %offset, <vscale x N x T> %indices
3933// VECTOR + IMMEDIATE:
3934// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
3935// -> getelementptr #x, <vscale x N x T> %indices
3936void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
3937 unsigned &Opcode, bool IsGather,
3938 SelectionDAG &DAG) {
3939 if (!isNullConstant(BasePtr))
3940 return;
3941
3942 ConstantSDNode *Offset = nullptr;
3943 if (Index.getOpcode() == ISD::ADD)
3944 if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
3945 if (isa<ConstantSDNode>(SplatVal))
3946 Offset = cast<ConstantSDNode>(SplatVal);
3947 else {
3948 BasePtr = SplatVal;
3949 Index = Index->getOperand(0);
3950 return;
3951 }
3952 }
3953
3954 unsigned NewOp =
3955 IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
3956
3957 if (!Offset) {
3958 std::swap(BasePtr, Index);
3959 Opcode = NewOp;
3960 return;
3961 }
3962
3963 uint64_t OffsetVal = Offset->getZExtValue();
3964 unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
3965 auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
3966
3967 if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
3968 // Index is out of range for the immediate addressing mode
3969 BasePtr = ConstOffset;
3970 Index = Index->getOperand(0);
3971 return;
3972 }
3973
3974 // Immediate is in range
3975 Opcode = NewOp;
3976 BasePtr = Index->getOperand(0);
3977 Index = ConstOffset;
3978}
3979
3980SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
3981 SelectionDAG &DAG) const {
3982 SDLoc DL(Op);
3983 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
3984 assert(MGT && "Can only custom lower gather load nodes")((MGT && "Can only custom lower gather load nodes") ?
static_cast<void> (0) : __assert_fail ("MGT && \"Can only custom lower gather load nodes\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3984, __PRETTY_FUNCTION__))
;
3985
3986 SDValue Index = MGT->getIndex();
3987 SDValue Chain = MGT->getChain();
3988 SDValue PassThru = MGT->getPassThru();
3989 SDValue Mask = MGT->getMask();
3990 SDValue BasePtr = MGT->getBasePtr();
3991 ISD::LoadExtType ExtTy = MGT->getExtensionType();
3992
3993 ISD::MemIndexType IndexType = MGT->getIndexType();
3994 bool IsScaled =
3995 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
3996 bool IsSigned =
3997 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
3998 bool IdxNeedsExtend =
3999 getGatherScatterIndexIsExtended(Index) ||
4000 Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4001 bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
4002
4003 EVT VT = PassThru.getSimpleValueType();
4004 EVT MemVT = MGT->getMemoryVT();
4005 SDValue InputVT = DAG.getValueType(MemVT);
4006
4007 if (VT.getVectorElementType() == MVT::bf16 &&
4008 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4009 return SDValue();
4010
4011 // Handle FP data by using an integer gather and casting the result.
4012 if (VT.isFloatingPoint()) {
4013 EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4014 PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
4015 InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4016 }
4017
4018 SDVTList VTs = DAG.getVTList(PassThru.getSimpleValueType(), MVT::Other);
4019
4020 if (getGatherScatterIndexIsExtended(Index))
4021 Index = Index.getOperand(0);
4022
4023 unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
4024 selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4025 /*isGather=*/true, DAG);
4026
4027 if (ResNeedsSignExtend)
4028 Opcode = getSignExtendedGatherOpcode(Opcode);
4029
4030 SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT, PassThru};
4031 SDValue Gather = DAG.getNode(Opcode, DL, VTs, Ops);
4032
4033 if (VT.isFloatingPoint()) {
4034 SDValue Cast = getSVESafeBitCast(VT, Gather, DAG);
4035 return DAG.getMergeValues({Cast, Gather}, DL);
4036 }
4037
4038 return Gather;
4039}
4040
4041SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
4042 SelectionDAG &DAG) const {
4043 SDLoc DL(Op);
4044 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
4045 assert(MSC && "Can only custom lower scatter store nodes")((MSC && "Can only custom lower scatter store nodes")
? static_cast<void> (0) : __assert_fail ("MSC && \"Can only custom lower scatter store nodes\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4045, __PRETTY_FUNCTION__))
;
4046
4047 SDValue Index = MSC->getIndex();
4048 SDValue Chain = MSC->getChain();
4049 SDValue StoreVal = MSC->getValue();
4050 SDValue Mask = MSC->getMask();
4051 SDValue BasePtr = MSC->getBasePtr();
4052
4053 ISD::MemIndexType IndexType = MSC->getIndexType();
4054 bool IsScaled =
4055 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4056 bool IsSigned =
4057 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4058 bool NeedsExtend =
4059 getGatherScatterIndexIsExtended(Index) ||
4060 Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4061
4062 EVT VT = StoreVal.getSimpleValueType();
4063 SDVTList VTs = DAG.getVTList(MVT::Other);
4064 EVT MemVT = MSC->getMemoryVT();
4065 SDValue InputVT = DAG.getValueType(MemVT);
4066
4067 if (VT.getVectorElementType() == MVT::bf16 &&
4068 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4069 return SDValue();
4070
4071 // Handle FP data by casting the data so an integer scatter can be used.
4072 if (VT.isFloatingPoint()) {
4073 EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4074 StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
4075 InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4076 }
4077
4078 if (getGatherScatterIndexIsExtended(Index))
4079 Index = Index.getOperand(0);
4080
4081 unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
4082 selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4083 /*isGather=*/false, DAG);
4084
4085 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
4086 return DAG.getNode(Opcode, DL, VTs, Ops);
4087}
4088
4089// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
4090static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
4091 EVT VT, EVT MemVT,
4092 SelectionDAG &DAG) {
4093 assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4093, __PRETTY_FUNCTION__))
;
4094 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16)((MemVT == MVT::v4i8 && VT == MVT::v4i16) ? static_cast
<void> (0) : __assert_fail ("MemVT == MVT::v4i8 && VT == MVT::v4i16"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4094, __PRETTY_FUNCTION__))
;
4095
4096 SDValue Value = ST->getValue();
4097
4098 // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
4099 // the word lane which represent the v4i8 subvector. It optimizes the store
4100 // to:
4101 //
4102 // xtn v0.8b, v0.8h
4103 // str s0, [x0]
4104
4105 SDValue Undef = DAG.getUNDEF(MVT::i16);
4106 SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
4107 {Undef, Undef, Undef, Undef});
4108
4109 SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
4110 Value, UndefVec);
4111 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
4112
4113 Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
4114 SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4115 Trunc, DAG.getConstant(0, DL, MVT::i64));
4116
4117 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
4118 ST->getBasePtr(), ST->getMemOperand());
4119}
4120
4121// Custom lowering for any store, vector or scalar and/or default or with
4122// a truncate operations. Currently only custom lower truncate operation
4123// from vector v4i16 to v4i8 or volatile stores of i128.
4124SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
4125 SelectionDAG &DAG) const {
4126 SDLoc Dl(Op);
4127 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
4128 assert (StoreNode && "Can only custom lower store nodes")((StoreNode && "Can only custom lower store nodes") ?
static_cast<void> (0) : __assert_fail ("StoreNode && \"Can only custom lower store nodes\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4128, __PRETTY_FUNCTION__))
;
4129
4130 SDValue Value = StoreNode->getValue();
4131
4132 EVT VT = Value.getValueType();
4133 EVT MemVT = StoreNode->getMemoryVT();
4134
4135 if (VT.isVector()) {
4136 if (useSVEForFixedLengthVectorVT(VT))
4137 return LowerFixedLengthVectorStoreToSVE(Op, DAG);
4138
4139 unsigned AS = StoreNode->getAddressSpace();
4140 Align Alignment = StoreNode->getAlign();
4141 if (Alignment < MemVT.getStoreSize() &&
4142 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment.value(),
4143 StoreNode->getMemOperand()->getFlags(),
4144 nullptr)) {
4145 return scalarizeVectorStore(StoreNode, DAG);
4146 }
4147
4148 if (StoreNode->isTruncatingStore()) {
4149 return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
4150 }
4151 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
4152 // the custom lowering, as there are no un-paired non-temporal stores and
4153 // legalization will break up 256 bit inputs.
4154 ElementCount EC = MemVT.getVectorElementCount();
4155 if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
4156 EC.isKnownEven() &&
4157 ((MemVT.getScalarSizeInBits() == 8u ||
4158 MemVT.getScalarSizeInBits() == 16u ||
4159 MemVT.getScalarSizeInBits() == 32u ||
4160 MemVT.getScalarSizeInBits() == 64u))) {
4161 SDValue Lo =
4162 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
4163 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4164 StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
4165 SDValue Hi =
4166 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
4167 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4168 StoreNode->getValue(),
4169 DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
4170 SDValue Result = DAG.getMemIntrinsicNode(
4171 AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
4172 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4173 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4174 return Result;
4175 }
4176 } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
4177 assert(StoreNode->getValue()->getValueType(0) == MVT::i128)((StoreNode->getValue()->getValueType(0) == MVT::i128) ?
static_cast<void> (0) : __assert_fail ("StoreNode->getValue()->getValueType(0) == MVT::i128"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4177, __PRETTY_FUNCTION__))
;
4178 SDValue Lo =
4179 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4180 DAG.getConstant(0, Dl, MVT::i64));
4181 SDValue Hi =
4182 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4183 DAG.getConstant(1, Dl, MVT::i64));
4184 SDValue Result = DAG.getMemIntrinsicNode(
4185 AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
4186 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4187 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4188 return Result;
4189 }
4190
4191 return SDValue();
4192}
4193
4194// Generate SUBS and CSEL for integer abs.
4195SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
4196 MVT VT = Op.getSimpleValueType();
4197
4198 if (VT.isVector())
4199 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
4200
4201 SDLoc DL(Op);
4202 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4203 Op.getOperand(0));
4204 // Generate SUBS & CSEL.
4205 SDValue Cmp =
4206 DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
4207 Op.getOperand(0), DAG.getConstant(0, DL, VT));
4208 return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
4209 DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
4210 Cmp.getValue(1));
4211}
4212
4213SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
4214 SelectionDAG &DAG) const {
4215 LLVM_DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
4216 LLVM_DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
4217
4218 switch (Op.getOpcode()) {
4219 default:
4220 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4220)
;
4221 return SDValue();
4222 case ISD::BITCAST:
4223 return LowerBITCAST(Op, DAG);
4224 case ISD::GlobalAddress:
4225 return LowerGlobalAddress(Op, DAG);
4226 case ISD::GlobalTLSAddress:
4227 return LowerGlobalTLSAddress(Op, DAG);
4228 case ISD::SETCC:
4229 case ISD::STRICT_FSETCC:
4230 case ISD::STRICT_FSETCCS:
4231 return LowerSETCC(Op, DAG);
4232 case ISD::BR_CC:
4233 return LowerBR_CC(Op, DAG);
4234 case ISD::SELECT:
4235 return LowerSELECT(Op, DAG);
4236 case ISD::SELECT_CC:
4237 return LowerSELECT_CC(Op, DAG);
4238 case ISD::JumpTable:
4239 return LowerJumpTable(Op, DAG);
4240 case ISD::BR_JT:
4241 return LowerBR_JT(Op, DAG);
4242 case ISD::ConstantPool:
4243 return LowerConstantPool(Op, DAG);
4244 case ISD::BlockAddress:
4245 return LowerBlockAddress(Op, DAG);
4246 case ISD::VASTART:
4247 return LowerVASTART(Op, DAG);
4248 case ISD::VACOPY:
4249 return LowerVACOPY(Op, DAG);
4250 case ISD::VAARG:
4251 return LowerVAARG(Op, DAG);
4252 case ISD::ADDC:
4253 case ISD::ADDE:
4254 case ISD::SUBC:
4255 case ISD::SUBE:
4256 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
4257 case ISD::SADDO:
4258 case ISD::UADDO:
4259 case ISD::SSUBO:
4260 case ISD::USUBO:
4261 case ISD::SMULO:
4262 case ISD::UMULO:
4263 return LowerXALUO(Op, DAG);
4264 case ISD::FADD:
4265 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
4266 case ISD::FSUB:
4267 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
4268 case ISD::FMUL:
4269 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
4270 case ISD::FMA:
4271 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
4272 case ISD::FDIV:
4273 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
4274 case ISD::FNEG:
4275 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
4276 case ISD::FCEIL:
4277 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
4278 case ISD::FFLOOR:
4279 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
4280 case ISD::FNEARBYINT:
4281 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
4282 case ISD::FRINT:
4283 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
4284 case ISD::FROUND:
4285 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
4286 case ISD::FROUNDEVEN:
4287 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
4288 case ISD::FTRUNC:
4289 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
4290 case ISD::FSQRT:
4291 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
4292 case ISD::FABS:
4293 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
4294 case ISD::FP_ROUND:
4295 case ISD::STRICT_FP_ROUND:
4296 return LowerFP_ROUND(Op, DAG);
4297 case ISD::FP_EXTEND:
4298 return LowerFP_EXTEND(Op, DAG);
4299 case ISD::FRAMEADDR:
4300 return LowerFRAMEADDR(Op, DAG);
4301 case ISD::SPONENTRY:
4302 return LowerSPONENTRY(Op, DAG);
4303 case ISD::RETURNADDR:
4304 return LowerRETURNADDR(Op, DAG);
4305 case ISD::ADDROFRETURNADDR:
4306 return LowerADDROFRETURNADDR(Op, DAG);
4307 case ISD::CONCAT_VECTORS:
4308 return LowerCONCAT_VECTORS(Op, DAG);
4309 case ISD::INSERT_VECTOR_ELT:
4310 return LowerINSERT_VECTOR_ELT(Op, DAG);
4311 case ISD::EXTRACT_VECTOR_ELT:
4312 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
4313 case ISD::BUILD_VECTOR:
4314 return LowerBUILD_VECTOR(Op, DAG);
4315 case ISD::VECTOR_SHUFFLE:
4316 return LowerVECTOR_SHUFFLE(Op, DAG);
4317 case ISD::SPLAT_VECTOR:
4318 return LowerSPLAT_VECTOR(Op, DAG);
4319 case ISD::EXTRACT_SUBVECTOR:
4320 return LowerEXTRACT_SUBVECTOR(Op, DAG);
4321 case ISD::INSERT_SUBVECTOR:
4322 return LowerINSERT_SUBVECTOR(Op, DAG);
4323 case ISD::SDIV:
4324 case ISD::UDIV:
4325 return LowerDIV(Op, DAG);
4326 case ISD::SMIN:
4327 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
4328 /*OverrideNEON=*/true);
4329 case ISD::UMIN:
4330 return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
4331 /*OverrideNEON=*/true);
4332 case ISD::SMAX:
4333 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
4334 /*OverrideNEON=*/true);
4335 case ISD::UMAX:
4336 return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
4337 /*OverrideNEON=*/true);
4338 case ISD::SRA:
4339 case ISD::SRL:
4340 case ISD::SHL:
4341 return LowerVectorSRA_SRL_SHL(Op, DAG);
4342 case ISD::SHL_PARTS:
4343 return LowerShiftLeftParts(Op, DAG);
4344 case ISD::SRL_PARTS:
4345 case ISD::SRA_PARTS:
4346 return LowerShiftRightParts(Op, DAG);
4347 case ISD::CTPOP:
4348 return LowerCTPOP(Op, DAG);
4349 case ISD::FCOPYSIGN:
4350 return LowerFCOPYSIGN(Op, DAG);
4351 case ISD::OR:
4352 return LowerVectorOR(Op, DAG);
4353 case ISD::XOR:
4354 return LowerXOR(Op, DAG);
4355 case ISD::PREFETCH:
4356 return LowerPREFETCH(Op, DAG);
4357 case ISD::SINT_TO_FP:
4358 case ISD::UINT_TO_FP:
4359 case ISD::STRICT_SINT_TO_FP:
4360 case ISD::STRICT_UINT_TO_FP:
4361 return LowerINT_TO_FP(Op, DAG);
4362 case ISD::FP_TO_SINT:
4363 case ISD::FP_TO_UINT:
4364 case ISD::STRICT_FP_TO_SINT:
4365 case ISD::STRICT_FP_TO_UINT:
4366 return LowerFP_TO_INT(Op, DAG);
4367 case ISD::FSINCOS:
4368 return LowerFSINCOS(Op, DAG);
4369 case ISD::FLT_ROUNDS_:
4370 return LowerFLT_ROUNDS_(Op, DAG);
4371 case ISD::MUL:
4372 return LowerMUL(Op, DAG);
4373 case ISD::INTRINSIC_WO_CHAIN:
4374 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4375 case ISD::STORE:
4376 return LowerSTORE(Op, DAG);
4377 case ISD::MGATHER:
4378 return LowerMGATHER(Op, DAG);
4379 case ISD::MSCATTER:
4380 return LowerMSCATTER(Op, DAG);
4381 case ISD::VECREDUCE_SEQ_FADD:
4382 return LowerVECREDUCE_SEQ_FADD(Op, DAG);
4383 case ISD::VECREDUCE_ADD:
4384 case ISD::VECREDUCE_AND:
4385 case ISD::VECREDUCE_OR:
4386 case ISD::VECREDUCE_XOR:
4387 case ISD::VECREDUCE_SMAX:
4388 case ISD::VECREDUCE_SMIN:
4389 case ISD::VECREDUCE_UMAX:
4390 case ISD::VECREDUCE_UMIN:
4391 case ISD::VECREDUCE_FADD:
4392 case ISD::VECREDUCE_FMAX:
4393 case ISD::VECREDUCE_FMIN:
4394 return LowerVECREDUCE(Op, DAG);
4395 case ISD::ATOMIC_LOAD_SUB:
4396 return LowerATOMIC_LOAD_SUB(Op, DAG);
4397 case ISD::ATOMIC_LOAD_AND:
4398 return LowerATOMIC_LOAD_AND(Op, DAG);
4399 case ISD::DYNAMIC_STACKALLOC:
4400 return LowerDYNAMIC_STACKALLOC(Op, DAG);
4401 case ISD::VSCALE:
4402 return LowerVSCALE(Op, DAG);
4403 case ISD::ANY_EXTEND:
4404 case ISD::SIGN_EXTEND:
4405 case ISD::ZERO_EXTEND:
4406 return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
4407 case ISD::SIGN_EXTEND_INREG: {
4408 // Only custom lower when ExtraVT has a legal byte based element type.
4409 EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4410 EVT ExtraEltVT = ExtraVT.getVectorElementType();
4411 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
4412 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
4413 return SDValue();
4414
4415 return LowerToPredicatedOp(Op, DAG,
4416 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
4417 }
4418 case ISD::TRUNCATE:
4419 return LowerTRUNCATE(Op, DAG);
4420 case ISD::LOAD:
4421 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
4422 return LowerFixedLengthVectorLoadToSVE(Op, DAG);
4423 llvm_unreachable("Unexpected request to lower ISD::LOAD")::llvm::llvm_unreachable_internal("Unexpected request to lower ISD::LOAD"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4423)
;
4424 case ISD::ADD:
4425 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
4426 case ISD::AND:
4427 return LowerToScalableOp(Op, DAG);
4428 case ISD::SUB:
4429 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
4430 case ISD::FMAXNUM:
4431 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
4432 case ISD::FMINNUM:
4433 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
4434 case ISD::VSELECT:
4435 return LowerFixedLengthVectorSelectToSVE(Op, DAG);
4436 case ISD::ABS:
4437 return LowerABS(Op, DAG);
4438 case ISD::BITREVERSE:
4439 return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
4440 /*OverrideNEON=*/true);
4441 case ISD::BSWAP:
4442 return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
4443 case ISD::CTLZ:
4444 return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
4445 /*OverrideNEON=*/true);
4446 case ISD::CTTZ:
4447 return LowerCTTZ(Op, DAG);
4448 }
4449}
4450
4451bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
4452 return !Subtarget->useSVEForFixedLengthVectors();
4453}
4454
4455bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
4456 EVT VT, bool OverrideNEON) const {
4457 if (!Subtarget->useSVEForFixedLengthVectors())
4458 return false;
4459
4460 if (!VT.isFixedLengthVector())
4461 return false;
4462
4463 // Don't use SVE for vectors we cannot scalarize if required.
4464 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
4465 // Fixed length predicates should be promoted to i8.
4466 // NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
4467 case MVT::i1:
4468 default:
4469 return false;
4470 case MVT::i8:
4471 case MVT::i16:
4472 case MVT::i32:
4473 case MVT::i64:
4474 case MVT::f16:
4475 case MVT::f32:
4476 case MVT::f64:
4477 break;
4478 }
4479
4480 // All SVE implementations support NEON sized vectors.
4481 if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
4482 return true;
4483
4484 // Ensure NEON MVTs only belong to a single register class.
4485 if (VT.getFixedSizeInBits() <= 128)
4486 return false;
4487
4488 // Don't use SVE for types that don't fit.
4489 if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
4490 return false;
4491
4492 // TODO: Perhaps an artificial restriction, but worth having whilst getting
4493 // the base fixed length SVE support in place.
4494 if (!VT.isPow2VectorType())
4495 return false;
4496
4497 return true;
4498}
4499
4500//===----------------------------------------------------------------------===//
4501// Calling Convention Implementation
4502//===----------------------------------------------------------------------===//
4503
4504/// Selects the correct CCAssignFn for a given CallingConvention value.
4505CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
4506 bool IsVarArg) const {
4507 switch (CC) {
4508 default:
4509 report_fatal_error("Unsupported calling convention.");
4510 case CallingConv::WebKit_JS:
4511 return CC_AArch64_WebKit_JS;
4512 case CallingConv::GHC:
4513 return CC_AArch64_GHC;
4514 case CallingConv::C:
4515 case CallingConv::Fast:
4516 case CallingConv::PreserveMost:
4517 case CallingConv::CXX_FAST_TLS:
4518 case CallingConv::Swift:
4519 if (Subtarget->isTargetWindows() && IsVarArg)
4520 return CC_AArch64_Win64_VarArg;
4521 if (!Subtarget->isTargetDarwin())
4522 return CC_AArch64_AAPCS;
4523 if (!IsVarArg)
4524 return CC_AArch64_DarwinPCS;
4525 return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
4526 : CC_AArch64_DarwinPCS_VarArg;
4527 case CallingConv::Win64:
4528 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
4529 case CallingConv::CFGuard_Check:
4530 return CC_AArch64_Win64_CFGuard_Check;
4531 case CallingConv::AArch64_VectorCall:
4532 case CallingConv::AArch64_SVE_VectorCall:
4533 return CC_AArch64_AAPCS;
4534 }
4535}
4536
4537CCAssignFn *
4538AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
4539 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
4540 : RetCC_AArch64_AAPCS;
4541}
4542
4543SDValue AArch64TargetLowering::LowerFormalArguments(
4544 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4545 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
4546 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4547 MachineFunction &MF = DAG.getMachineFunction();
4548 MachineFrameInfo &MFI = MF.getFrameInfo();
4549 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
4550
4551 // Assign locations to all of the incoming arguments.
4552 SmallVector<CCValAssign, 16> ArgLocs;
4553 DenseMap<unsigned, SDValue> CopiedRegs;
4554 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4555 *DAG.getContext());
4556
4557 // At this point, Ins[].VT may already be promoted to i32. To correctly
4558 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
4559 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
4560 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
4561 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
4562 // LocVT.
4563 unsigned NumArgs = Ins.size();
4564 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
4565 unsigned CurArgIdx = 0;
4566 for (unsigned i = 0; i != NumArgs; ++i) {
4567 MVT ValVT = Ins[i].VT;
4568 if (Ins[i].isOrigArg()) {
4569 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
4570 CurArgIdx = Ins[i].getOrigArgIndex();
4571
4572 // Get type of the original argument.
4573 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
4574 /*AllowUnknown*/ true);
4575 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
4576 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
4577 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
4578 ValVT = MVT::i8;
4579 else if (ActualMVT == MVT::i16)
4580 ValVT = MVT::i16;
4581 }
4582 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
4583 bool Res =
4584 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
4585 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4585, __PRETTY_FUNCTION__))
;
4586 (void)Res;
4587 }
4588 SmallVector<SDValue, 16> ArgValues;
4589 unsigned ExtraArgLocs = 0;
4590 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4591 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
4592
4593 if (Ins[i].Flags.isByVal()) {
4594 // Byval is used for HFAs in the PCS, but the system should work in a
4595 // non-compliant manner for larger structs.
4596 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4597 int Size = Ins[i].Flags.getByValSize();
4598 unsigned NumRegs = (Size + 7) / 8;
4599
4600 // FIXME: This works on big-endian for composite byvals, which are the common
4601 // case. It should also work for fundamental types too.
4602 unsigned FrameIdx =
4603 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
4604 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
4605 InVals.push_back(FrameIdxN);
4606
4607 continue;
4608 }
4609
4610 SDValue ArgValue;
4611 if (VA.isRegLoc()) {
4612 // Arguments stored in registers.
4613 EVT RegVT = VA.getLocVT();
4614 const TargetRegisterClass *RC;
4615
4616 if (RegVT == MVT::i32)
4617 RC = &AArch64::GPR32RegClass;
4618 else if (RegVT == MVT::i64)
4619 RC = &AArch64::GPR64RegClass;
4620 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4621 RC = &AArch64::FPR16RegClass;
4622 else if (RegVT == MVT::f32)
4623 RC = &AArch64::FPR32RegClass;
4624 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
4625 RC = &AArch64::FPR64RegClass;
4626 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
4627 RC = &AArch64::FPR128RegClass;
4628 else if (RegVT.isScalableVector() &&
4629 RegVT.getVectorElementType() == MVT::i1)
4630 RC = &AArch64::PPRRegClass;
4631 else if (RegVT.isScalableVector())
4632 RC = &AArch64::ZPRRegClass;
4633 else
4634 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4634)
;
4635
4636 // Transform the arguments in physical registers into virtual ones.
4637 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
4638 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
4639
4640 // If this is an 8, 16 or 32-bit value, it is really passed promoted
4641 // to 64 bits. Insert an assert[sz]ext to capture this, then
4642 // truncate to the right size.
4643 switch (VA.getLocInfo()) {
4644 default:
4645 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4645)
;
4646 case CCValAssign::Full:
4647 break;
4648 case CCValAssign::Indirect:
4649 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4650, __PRETTY_FUNCTION__))
4650 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4650, __PRETTY_FUNCTION__))
;
4651 break;
4652 case CCValAssign::BCvt:
4653 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
4654 break;
4655 case CCValAssign::AExt:
4656 case CCValAssign::SExt:
4657 case CCValAssign::ZExt:
4658 break;
4659 case CCValAssign::AExtUpper:
4660 ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
4661 DAG.getConstant(32, DL, RegVT));
4662 ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
4663 break;
4664 }
4665 } else { // VA.isRegLoc()
4666 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")((VA.isMemLoc() && "CCValAssign is neither reg nor mem"
) ? static_cast<void> (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4666, __PRETTY_FUNCTION__))
;
4667 unsigned ArgOffset = VA.getLocMemOffset();
4668 unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
4669 ? VA.getLocVT().getSizeInBits()
4670 : VA.getValVT().getSizeInBits()) / 8;
4671
4672 uint32_t BEAlign = 0;
4673 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
4674 !Ins[i].Flags.isInConsecutiveRegs())
4675 BEAlign = 8 - ArgSize;
4676
4677 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
4678
4679 // Create load nodes to retrieve arguments from the stack.
4680 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4681
4682 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
4683 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
4684 MVT MemVT = VA.getValVT();
4685
4686 switch (VA.getLocInfo()) {
4687 default:
4688 break;
4689 case CCValAssign::Trunc:
4690 case CCValAssign::BCvt:
4691 MemVT = VA.getLocVT();
4692 break;
4693 case CCValAssign::Indirect:
4694 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4695, __PRETTY_FUNCTION__))
4695 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4695, __PRETTY_FUNCTION__))
;
4696 MemVT = VA.getLocVT();
4697 break;
4698 case CCValAssign::SExt:
4699 ExtType = ISD::SEXTLOAD;
4700 break;
4701 case CCValAssign::ZExt:
4702 ExtType = ISD::ZEXTLOAD;
4703 break;
4704 case CCValAssign::AExt:
4705 ExtType = ISD::EXTLOAD;
4706 break;
4707 }
4708
4709 ArgValue = DAG.getExtLoad(
4710 ExtType, DL, VA.getLocVT(), Chain, FIN,
4711 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
4712 MemVT);
4713
4714 }
4715
4716 if (VA.getLocInfo() == CCValAssign::Indirect) {
4717 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4718, __PRETTY_FUNCTION__))
4718 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4718, __PRETTY_FUNCTION__))
;
4719
4720 uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
4721 unsigned NumParts = 1;
4722 if (Ins[i].Flags.isInConsecutiveRegs()) {
4723 assert(!Ins[i].Flags.isInConsecutiveRegsLast())((!Ins[i].Flags.isInConsecutiveRegsLast()) ? static_cast<void
> (0) : __assert_fail ("!Ins[i].Flags.isInConsecutiveRegsLast()"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4723, __PRETTY_FUNCTION__))
;
4724 while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
4725 ++NumParts;
4726 }
4727
4728 MVT PartLoad = VA.getValVT();
4729 SDValue Ptr = ArgValue;
4730
4731 // Ensure we generate all loads for each tuple part, whilst updating the
4732 // pointer after each load correctly using vscale.
4733 while (NumParts > 0) {
4734 ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
4735 InVals.push_back(ArgValue);
4736 NumParts--;
4737 if (NumParts > 0) {
4738 SDValue BytesIncrement = DAG.getVScale(
4739 DL, Ptr.getValueType(),
4740 APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
4741 SDNodeFlags Flags;
4742 Flags.setNoUnsignedWrap(true);
4743 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
4744 BytesIncrement, Flags);
4745 ExtraArgLocs++;
4746 i++;
4747 }
4748 }
4749 } else {
4750 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
4751 ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
4752 ArgValue, DAG.getValueType(MVT::i32));
4753 InVals.push_back(ArgValue);
4754 }
4755 }
4756 assert((ArgLocs.size() + ExtraArgLocs) == Ins.size())(((ArgLocs.size() + ExtraArgLocs) == Ins.size()) ? static_cast
<void> (0) : __assert_fail ("(ArgLocs.size() + ExtraArgLocs) == Ins.size()"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4756, __PRETTY_FUNCTION__))
;
4757
4758 // varargs
4759 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
4760 if (isVarArg) {
4761 if (!Subtarget->isTargetDarwin() || IsWin64) {
4762 // The AAPCS variadic function ABI is identical to the non-variadic
4763 // one. As a result there may be more arguments in registers and we should
4764 // save them for future reference.
4765 // Win64 variadic functions also pass arguments in registers, but all float
4766 // arguments are passed in integer registers.
4767 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
4768 }
4769
4770 // This will point to the next argument passed via stack.
4771 unsigned StackOffset = CCInfo.getNextStackOffset();
4772 // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
4773 StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
4774 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
4775
4776 if (MFI.hasMustTailInVarArgFunc()) {
4777 SmallVector<MVT, 2> RegParmTypes;
4778 RegParmTypes.push_back(MVT::i64);
4779 RegParmTypes.push_back(MVT::f128);
4780 // Compute the set of forwarded registers. The rest are scratch.
4781 SmallVectorImpl<ForwardedRegister> &Forwards =
4782 FuncInfo->getForwardedMustTailRegParms();
4783 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
4784 CC_AArch64_AAPCS);
4785
4786 // Conservatively forward X8, since it might be used for aggregate return.
4787 if (!CCInfo.isAllocated(AArch64::X8)) {
4788 unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
4789 Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
4790 }
4791 }
4792 }
4793
4794 // On Windows, InReg pointers must be returned, so record the pointer in a
4795 // virtual register at the start of the function so it can be returned in the
4796 // epilogue.
4797 if (IsWin64) {
4798 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
4799 if (Ins[I].Flags.isInReg()) {
4800 assert(!FuncInfo->getSRetReturnReg())((!FuncInfo->getSRetReturnReg()) ? static_cast<void>
(0) : __assert_fail ("!FuncInfo->getSRetReturnReg()", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4800, __PRETTY_FUNCTION__))
;
4801
4802 MVT PtrTy = getPointerTy(DAG.getDataLayout());
4803 Register Reg =
4804 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
4805 FuncInfo->setSRetReturnReg(Reg);
4806
4807 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
4808 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
4809 break;
4810 }
4811 }
4812 }
4813
4814 unsigned StackArgSize = CCInfo.getNextStackOffset();
4815 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4816 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
4817 // This is a non-standard ABI so by fiat I say we're allowed to make full
4818 // use of the stack area to be popped, which must be aligned to 16 bytes in
4819 // any case:
4820 StackArgSize = alignTo(StackArgSize, 16);
4821
4822 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
4823 // a multiple of 16.
4824 FuncInfo->setArgumentStackToRestore(StackArgSize);
4825
4826 // This realignment carries over to the available bytes below. Our own
4827 // callers will guarantee the space is free by giving an aligned value to
4828 // CALLSEQ_START.
4829 }
4830 // Even if we're not expected to free up the space, it's useful to know how
4831 // much is there while considering tail calls (because we can reuse it).
4832 FuncInfo->setBytesInStackArgArea(StackArgSize);
4833
4834 if (Subtarget->hasCustomCallingConv())
4835 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
4836
4837 return Chain;
4838}
4839
4840void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
4841 SelectionDAG &DAG,
4842 const SDLoc &DL,
4843 SDValue &Chain) const {
4844 MachineFunction &MF = DAG.getMachineFunction();
4845 MachineFrameInfo &MFI = MF.getFrameInfo();
4846 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
4847 auto PtrVT = getPointerTy(DAG.getDataLayout());
4848 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
4849
4850 SmallVector<SDValue, 8> MemOps;
4851
4852 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
4853 AArch64::X3, AArch64::X4, AArch64::X5,
4854 AArch64::X6, AArch64::X7 };
4855 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
4856 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
4857
4858 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
4859 int GPRIdx = 0;
4860 if (GPRSaveSize != 0) {
4861 if (IsWin64) {
4862 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
4863 if (GPRSaveSize & 15)
4864 // The extra size here, if triggered, will always be 8.
4865 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
4866 } else
4867 GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
4868
4869 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
4870
4871 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
4872 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
4873 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
4874 SDValue Store = DAG.getStore(
4875 Val.getValue(1), DL, Val, FIN,
4876 IsWin64
4877 ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
4878 GPRIdx,
4879 (i - FirstVariadicGPR) * 8)
4880 : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
4881 MemOps.push_back(Store);
4882 FIN =
4883 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
4884 }
4885 }
4886 FuncInfo->setVarArgsGPRIndex(GPRIdx);
4887 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
4888
4889 if (Subtarget->hasFPARMv8() && !IsWin64) {
4890 static const MCPhysReg FPRArgRegs[] = {
4891 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
4892 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
4893 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
4894 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
4895
4896 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
4897 int FPRIdx = 0;
4898 if (FPRSaveSize != 0) {
4899 FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
4900
4901 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
4902
4903 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
4904 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
4905 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
4906
4907 SDValue Store = DAG.getStore(
4908 Val.getValue(1), DL, Val, FIN,
4909 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
4910 MemOps.push_back(Store);
4911 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
4912 DAG.getConstant(16, DL, PtrVT));
4913 }
4914 }
4915 FuncInfo->setVarArgsFPRIndex(FPRIdx);
4916 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
4917 }
4918
4919 if (!MemOps.empty()) {
4920 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4921 }
4922}
4923
4924/// LowerCallResult - Lower the result values of a call into the
4925/// appropriate copies out of appropriate physical registers.
4926SDValue AArch64TargetLowering::LowerCallResult(
4927 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
4928 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
4929 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
4930 SDValue ThisVal) const {
4931 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
4932 // Assign locations to each value returned by this call.
4933 SmallVector<CCValAssign, 16> RVLocs;
4934 DenseMap<unsigned, SDValue> CopiedRegs;
4935 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4936 *DAG.getContext());
4937 CCInfo.AnalyzeCallResult(Ins, RetCC);
4938
4939 // Copy all of the result registers out of their specified physreg.
4940 for (unsigned i = 0; i != RVLocs.size(); ++i) {
4941 CCValAssign VA = RVLocs[i];
4942
4943 // Pass 'this' value directly from the argument to return value, to avoid
4944 // reg unit interference
4945 if (i == 0 && isThisReturn) {
4946 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4947, __PRETTY_FUNCTION__))
4947 "unexpected return calling convention register assignment")((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4947, __PRETTY_FUNCTION__))
;
4948 InVals.push_back(ThisVal);
4949 continue;
4950 }
4951
4952 // Avoid copying a physreg twice since RegAllocFast is incompetent and only
4953 // allows one use of a physreg per block.
4954 SDValue Val = CopiedRegs.lookup(VA.getLocReg());
4955 if (!Val) {
4956 Val =
4957 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
4958 Chain = Val.getValue(1);
4959 InFlag = Val.getValue(2);
4960 CopiedRegs[VA.getLocReg()] = Val;
4961 }
4962
4963 switch (VA.getLocInfo()) {
4964 default:
4965 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4965)
;
4966 case CCValAssign::Full:
4967 break;
4968 case CCValAssign::BCvt:
4969 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
4970 break;
4971 case CCValAssign::AExtUpper:
4972 Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
4973 DAG.getConstant(32, DL, VA.getLocVT()));
4974 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4975 case CCValAssign::AExt:
4976 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4977 case CCValAssign::ZExt:
4978 Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
4979 break;
4980 }
4981
4982 InVals.push_back(Val);
4983 }
4984
4985 return Chain;
4986}
4987
4988/// Return true if the calling convention is one that we can guarantee TCO for.
4989static bool canGuaranteeTCO(CallingConv::ID CC) {
4990 return CC == CallingConv::Fast;
4991}
4992
4993/// Return true if we might ever do TCO for calls with this calling convention.
4994static bool mayTailCallThisCC(CallingConv::ID CC) {
4995 switch (CC) {
4996 case CallingConv::C:
4997 case CallingConv::AArch64_SVE_VectorCall:
4998 case CallingConv::PreserveMost:
4999 case CallingConv::Swift:
5000 return true;
5001 default:
5002 return canGuaranteeTCO(CC);
5003 }
5004}
5005
5006bool AArch64TargetLowering::isEligibleForTailCallOptimization(
5007 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
5008 const SmallVectorImpl<ISD::OutputArg> &Outs,
5009 const SmallVectorImpl<SDValue> &OutVals,
5010 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
5011 if (!mayTailCallThisCC(CalleeCC))
5012 return false;
5013
5014 MachineFunction &MF = DAG.getMachineFunction();
5015 const Function &CallerF = MF.getFunction();
5016 CallingConv::ID CallerCC = CallerF.getCallingConv();
5017
5018 // If this function uses the C calling convention but has an SVE signature,
5019 // then it preserves more registers and should assume the SVE_VectorCall CC.
5020 // The check for matching callee-saved regs will determine whether it is
5021 // eligible for TCO.
5022 if (CallerCC == CallingConv::C &&
5023 AArch64RegisterInfo::hasSVEArgsOrReturn(&MF))
5024 CallerCC = CallingConv::AArch64_SVE_VectorCall;
5025
5026 bool CCMatch = CallerCC == CalleeCC;
5027
5028 // When using the Windows calling convention on a non-windows OS, we want
5029 // to back up and restore X18 in such functions; we can't do a tail call
5030 // from those functions.
5031 if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
5032 CalleeCC != CallingConv::Win64)
5033 return false;
5034
5035 // Byval parameters hand the function a pointer directly into the stack area
5036 // we want to reuse during a tail call. Working around this *is* possible (see
5037 // X86) but less efficient and uglier in LowerCall.
5038 for (Function::const_arg_iterator i = CallerF.arg_begin(),
5039 e = CallerF.arg_end();
5040 i != e; ++i) {
5041 if (i->hasByValAttr())
5042 return false;
5043
5044 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
5045 // In this case, it is necessary to save/restore X0 in the callee. Tail
5046 // call opt interferes with this. So we disable tail call opt when the
5047 // caller has an argument with "inreg" attribute.
5048
5049 // FIXME: Check whether the callee also has an "inreg" argument.
5050 if (i->hasInRegAttr())
5051 return false;
5052 }
5053
5054 if (getTargetMachine().Options.GuaranteedTailCallOpt)
5055 return canGuaranteeTCO(CalleeCC) && CCMatch;
5056
5057 // Externally-defined functions with weak linkage should not be
5058 // tail-called on AArch64 when the OS does not support dynamic
5059 // pre-emption of symbols, as the AAELF spec requires normal calls
5060 // to undefined weak functions to be replaced with a NOP or jump to the
5061 // next instruction. The behaviour of branch instructions in this
5062 // situation (as used for tail calls) is implementation-defined, so we
5063 // cannot rely on the linker replacing the tail call with a return.
5064 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5065 const GlobalValue *GV = G->getGlobal();
5066 const Triple &TT = getTargetMachine().getTargetTriple();
5067 if (GV->hasExternalWeakLinkage() &&
5068 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
5069 return false;
5070 }
5071
5072 // Now we search for cases where we can use a tail call without changing the
5073 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
5074 // concept.
5075
5076 // I want anyone implementing a new calling convention to think long and hard
5077 // about this assert.
5078 assert((!isVarArg || CalleeCC == CallingConv::C) &&(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5079, __PRETTY_FUNCTION__))
5079 "Unexpected variadic calling convention")(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5079, __PRETTY_FUNCTION__))
;
5080
5081 LLVMContext &C = *DAG.getContext();
5082 if (isVarArg && !Outs.empty()) {
5083 // At least two cases here: if caller is fastcc then we can't have any
5084 // memory arguments (we'd be expected to clean up the stack afterwards). If
5085 // caller is C then we could potentially use its argument area.
5086
5087 // FIXME: for now we take the most conservative of these in both cases:
5088 // disallow all variadic memory operands.
5089 SmallVector<CCValAssign, 16> ArgLocs;
5090 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5091
5092 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
5093 for (const CCValAssign &ArgLoc : ArgLocs)
5094 if (!ArgLoc.isRegLoc())
5095 return false;
5096 }
5097
5098 // Check that the call results are passed in the same way.
5099 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
5100 CCAssignFnForCall(CalleeCC, isVarArg),
5101 CCAssignFnForCall(CallerCC, isVarArg)))
5102 return false;
5103 // The callee has to preserve all registers the caller needs to preserve.
5104 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5105 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5106 if (!CCMatch) {
5107 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5108 if (Subtarget->hasCustomCallingConv()) {
5109 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
5110 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
5111 }
5112 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5113 return false;
5114 }
5115
5116 // Nothing more to check if the callee is taking no arguments
5117 if (Outs.empty())
5118 return true;
5119
5120 SmallVector<CCValAssign, 16> ArgLocs;
5121 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5122
5123 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
5124
5125 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5126
5127 // If any of the arguments is passed indirectly, it must be SVE, so the
5128 // 'getBytesInStackArgArea' is not sufficient to determine whether we need to
5129 // allocate space on the stack. That is why we determine this explicitly here
5130 // the call cannot be a tailcall.
5131 if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
5132 assert((A.getLocInfo() != CCValAssign::Indirect ||(((A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector
()) && "Expected value to be scalable") ? static_cast
<void> (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5134, __PRETTY_FUNCTION__))
5133 A.getValVT().isScalableVector()) &&(((A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector
()) && "Expected value to be scalable") ? static_cast
<void> (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5134, __PRETTY_FUNCTION__))
5134 "Expected value to be scalable")(((A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector
()) && "Expected value to be scalable") ? static_cast
<void> (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5134, __PRETTY_FUNCTION__))
;
5135 return A.getLocInfo() == CCValAssign::Indirect;
5136 }))
5137 return false;
5138
5139 // If the stack arguments for this call do not fit into our own save area then
5140 // the call cannot be made tail.
5141 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
5142 return false;
5143
5144 const MachineRegisterInfo &MRI = MF.getRegInfo();
5145 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5146 return false;
5147
5148 return true;
5149}
5150
5151SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
5152 SelectionDAG &DAG,
5153 MachineFrameInfo &MFI,
5154 int ClobberedFI) const {
5155 SmallVector<SDValue, 8> ArgChains;
5156 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
5157 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
5158
5159 // Include the original chain at the beginning of the list. When this is
5160 // used by target LowerCall hooks, this helps legalize find the
5161 // CALLSEQ_BEGIN node.
5162 ArgChains.push_back(Chain);
5163
5164 // Add a chain value for each stack argument corresponding
5165 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
5166 UE = DAG.getEntryNode().getNode()->use_end();
5167 U != UE; ++U)
5168 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
5169 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
5170 if (FI->getIndex() < 0) {
5171 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
5172 int64_t InLastByte = InFirstByte;
5173 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
5174
5175 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
5176 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
5177 ArgChains.push_back(SDValue(L, 1));
5178 }
5179
5180 // Build a tokenfactor for all the chains.
5181 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
5182}
5183
5184bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
5185 bool TailCallOpt) const {
5186 return CallCC == CallingConv::Fast && TailCallOpt;
5187}
5188
5189/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
5190/// and add input and output parameter nodes.
5191SDValue
5192AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
5193 SmallVectorImpl<SDValue> &InVals) const {
5194 SelectionDAG &DAG = CLI.DAG;
5195 SDLoc &DL = CLI.DL;
5196 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
5197 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
5198 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
5199 SDValue Chain = CLI.Chain;
5200 SDValue Callee = CLI.Callee;
5201 bool &IsTailCall = CLI.IsTailCall;
5202 CallingConv::ID CallConv = CLI.CallConv;
5203 bool IsVarArg = CLI.IsVarArg;
5204
5205 MachineFunction &MF = DAG.getMachineFunction();
5206 MachineFunction::CallSiteInfo CSInfo;
5207 bool IsThisReturn = false;
5208
5209 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5210 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
5211 bool IsSibCall = false;
5212
5213 // Check callee args/returns for SVE registers and set calling convention
5214 // accordingly.
5215 if (CallConv == CallingConv::C) {
5216 bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
5217 return Out.VT.isScalableVector();
5218 });
5219 bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
5220 return In.VT.isScalableVector();
5221 });
5222
5223 if (CalleeInSVE || CalleeOutSVE)
5224 CallConv = CallingConv::AArch64_SVE_VectorCall;
5225 }
5226
5227 if (IsTailCall) {
5228 // Check if it's really possible to do a tail call.
5229 IsTailCall = isEligibleForTailCallOptimization(
5230 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
5231 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
5232 report_fatal_error("failed to perform tail call elimination on a call "
5233 "site marked musttail");
5234
5235 // A sibling call is one where we're under the usual C ABI and not planning
5236 // to change that but can still do a tail call:
5237 if (!TailCallOpt && IsTailCall)
5238 IsSibCall = true;
5239
5240 if (IsTailCall)
5241 ++NumTailCalls;
5242 }
5243
5244 // Analyze operands of the call, assigning locations to each operand.
5245 SmallVector<CCValAssign, 16> ArgLocs;
5246 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
5247 *DAG.getContext());
5248
5249 if (IsVarArg) {
5250 // Handle fixed and variable vector arguments differently.
5251 // Variable vector arguments always go into memory.
5252 unsigned NumArgs = Outs.size();
5253
5254 for (unsigned i = 0; i != NumArgs; ++i) {
5255 MVT ArgVT = Outs[i].VT;
5256 if (!Outs[i].IsFixed && ArgVT.isScalableVector())
5257 report_fatal_error("Passing SVE types to variadic functions is "
5258 "currently not supported");
5259
5260 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5261 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
5262 /*IsVarArg=*/ !Outs[i].IsFixed);
5263 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
5264 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5264, __PRETTY_FUNCTION__))
;
5265 (void)Res;
5266 }
5267 } else {
5268 // At this point, Outs[].VT may already be promoted to i32. To correctly
5269 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
5270 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
5271 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
5272 // we use a special version of AnalyzeCallOperands to pass in ValVT and
5273 // LocVT.
5274 unsigned NumArgs = Outs.size();
5275 for (unsigned i = 0; i != NumArgs; ++i) {
5276 MVT ValVT = Outs[i].VT;
5277 // Get type of the original argument.
5278 EVT ActualVT = getValueType(DAG.getDataLayout(),
5279 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
5280 /*AllowUnknown*/ true);
5281 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
5282 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5283 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5284 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5285 ValVT = MVT::i8;
5286 else if (ActualMVT == MVT::i16)
5287 ValVT = MVT::i16;
5288
5289 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
5290 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
5291 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5291, __PRETTY_FUNCTION__))
;
5292 (void)Res;
5293 }
5294 }
5295
5296 // Get a count of how many bytes are to be pushed on the stack.
5297 unsigned NumBytes = CCInfo.getNextStackOffset();
5298
5299 if (IsSibCall) {
5300 // Since we're not changing the ABI to make this a tail call, the memory
5301 // operands are already available in the caller's incoming argument space.
5302 NumBytes = 0;
5303 }
5304
5305 // FPDiff is the byte offset of the call's argument area from the callee's.
5306 // Stores to callee stack arguments will be placed in FixedStackSlots offset
5307 // by this amount for a tail call. In a sibling call it must be 0 because the
5308 // caller will deallocate the entire stack and the callee still expects its
5309 // arguments to begin at SP+0. Completely unused for non-tail calls.
5310 int FPDiff = 0;
5311
5312 if (IsTailCall && !IsSibCall) {
5313 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
5314
5315 // Since callee will pop argument stack as a tail call, we must keep the
5316 // popped size 16-byte aligned.
5317 NumBytes = alignTo(NumBytes, 16);
5318
5319 // FPDiff will be negative if this tail call requires more space than we
5320 // would automatically have in our incoming argument space. Positive if we
5321 // can actually shrink the stack.
5322 FPDiff = NumReusableBytes - NumBytes;
5323
5324 // The stack pointer must be 16-byte aligned at all times it's used for a
5325 // memory operation, which in practice means at *all* times and in
5326 // particular across call boundaries. Therefore our own arguments started at
5327 // a 16-byte aligned SP and the delta applied for the tail call should
5328 // satisfy the same constraint.
5329 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")((FPDiff % 16 == 0 && "unaligned stack on tail call")
? static_cast<void> (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5329, __PRETTY_FUNCTION__))
;
5330 }
5331
5332 // Adjust the stack pointer for the new arguments...
5333 // These operations are automatically eliminated by the prolog/epilog pass
5334 if (!IsSibCall)
5335 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
5336
5337 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
5338 getPointerTy(DAG.getDataLayout()));
5339
5340 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5341 SmallSet<unsigned, 8> RegsUsed;
5342 SmallVector<SDValue, 8> MemOpChains;
5343 auto PtrVT = getPointerTy(DAG.getDataLayout());
5344
5345 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
5346 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
5347 for (const auto &F : Forwards) {
5348 SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
5349 RegsToPass.emplace_back(F.PReg, Val);
5350 }
5351 }
5352
5353 // Walk the register/memloc assignments, inserting copies/loads.
5354 unsigned ExtraArgLocs = 0;
5355 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5356 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
5357 SDValue Arg = OutVals[i];
5358 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5359
5360 // Promote the value if needed.
5361 switch (VA.getLocInfo()) {
5362 default:
5363 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5363)
;
5364 case CCValAssign::Full:
5365 break;
5366 case CCValAssign::SExt:
5367 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
5368 break;
5369 case CCValAssign::ZExt:
5370 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
5371 break;
5372 case CCValAssign::AExt:
5373 if (Outs[i].ArgVT == MVT::i1) {
5374 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
5375 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
5376 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
5377 }
5378 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5379 break;
5380 case CCValAssign::AExtUpper:
5381 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")((VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5381, __PRETTY_FUNCTION__))
;
5382 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5383 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
5384 DAG.getConstant(32, DL, VA.getLocVT()));
5385 break;
5386 case CCValAssign::BCvt:
5387 Arg = DAG.getBitcast(VA.getLocVT(), Arg);
5388 break;
5389 case CCValAssign::Trunc:
5390 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
5391 break;
5392 case CCValAssign::FPExt:
5393 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
5394 break;
5395 case CCValAssign::Indirect:
5396 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5397, __PRETTY_FUNCTION__))
5397 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5397, __PRETTY_FUNCTION__))
;
5398
5399 uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
5400 uint64_t PartSize = StoreSize;
5401 unsigned NumParts = 1;
5402 if (Outs[i].Flags.isInConsecutiveRegs()) {
5403 assert(!Outs[i].Flags.isInConsecutiveRegsLast())((!Outs[i].Flags.isInConsecutiveRegsLast()) ? static_cast<
void> (0) : __assert_fail ("!Outs[i].Flags.isInConsecutiveRegsLast()"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5403, __PRETTY_FUNCTION__))
;
5404 while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
5405 ++NumParts;
5406 StoreSize *= NumParts;
5407 }
5408
5409 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
5410 Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
5411 Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
5412 int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
5413 MFI.setStackID(FI, TargetStackID::ScalableVector);
5414
5415 MachinePointerInfo MPI =
5416 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
5417 SDValue Ptr = DAG.getFrameIndex(
5418 FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
5419 SDValue SpillSlot = Ptr;
5420
5421 // Ensure we generate all stores for each tuple part, whilst updating the
5422 // pointer after each store correctly using vscale.
5423 while (NumParts) {
5424 Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
5425 NumParts--;
5426 if (NumParts > 0) {
5427 SDValue BytesIncrement = DAG.getVScale(
5428 DL, Ptr.getValueType(),
5429 APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
5430 SDNodeFlags Flags;
5431 Flags.setNoUnsignedWrap(true);
5432
5433 MPI = MachinePointerInfo(MPI.getAddrSpace());
5434 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5435 BytesIncrement, Flags);
5436 ExtraArgLocs++;
5437 i++;
5438 }
5439 }
5440
5441 Arg = SpillSlot;
5442 break;
5443 }
5444
5445 if (VA.isRegLoc()) {
5446 if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
5447 Outs[0].VT == MVT::i64) {
5448 assert(VA.getLocVT() == MVT::i64 &&((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5449, __PRETTY_FUNCTION__))
5449 "unexpected calling convention register assignment")((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5449, __PRETTY_FUNCTION__))
;
5450 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5451, __PRETTY_FUNCTION__))
5451 "unexpected use of 'returned'")((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5451, __PRETTY_FUNCTION__))
;
5452 IsThisReturn = true;
5453 }
5454 if (RegsUsed.count(VA.getLocReg())) {
5455 // If this register has already been used then we're trying to pack
5456 // parts of an [N x i32] into an X-register. The extension type will
5457 // take care of putting the two halves in the right place but we have to
5458 // combine them.
5459 SDValue &Bits =
5460 llvm::find_if(RegsToPass,
5461 [=](const std::pair<unsigned, SDValue> &Elt) {
5462 return Elt.first == VA.getLocReg();
5463 })
5464 ->second;
5465 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
5466 // Call site info is used for function's parameter entry value
5467 // tracking. For now we track only simple cases when parameter
5468 // is transferred through whole register.
5469 llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
5470 return ArgReg.Reg == VA.getLocReg();
5471 });
5472 } else {
5473 RegsToPass.emplace_back(VA.getLocReg(), Arg);
5474 RegsUsed.insert(VA.getLocReg());
5475 const TargetOptions &Options = DAG.getTarget().Options;
5476 if (Options.EmitCallSiteInfo)
5477 CSInfo.emplace_back(VA.getLocReg(), i);
5478 }
5479 } else {
5480 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5480, __PRETTY_FUNCTION__))
;
5481
5482 SDValue DstAddr;
5483 MachinePointerInfo DstInfo;
5484
5485 // FIXME: This works on big-endian for composite byvals, which are the
5486 // common case. It should also work for fundamental types too.
5487 uint32_t BEAlign = 0;
5488 unsigned OpSize;
5489 if (VA.getLocInfo() == CCValAssign::Indirect)
5490 OpSize = VA.getLocVT().getFixedSizeInBits();
5491 else
5492 OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
5493 : VA.getValVT().getSizeInBits();
5494 OpSize = (OpSize + 7) / 8;
5495 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
5496 !Flags.isInConsecutiveRegs()) {
5497 if (OpSize < 8)
5498 BEAlign = 8 - OpSize;
5499 }
5500 unsigned LocMemOffset = VA.getLocMemOffset();
5501 int32_t Offset = LocMemOffset + BEAlign;
5502 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
5503 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
5504
5505 if (IsTailCall) {
5506 Offset = Offset + FPDiff;
5507 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5508
5509 DstAddr = DAG.getFrameIndex(FI, PtrVT);
5510 DstInfo =
5511 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
5512
5513 // Make sure any stack arguments overlapping with where we're storing
5514 // are loaded before this eventual operation. Otherwise they'll be
5515 // clobbered.
5516 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
5517 } else {
5518 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
5519
5520 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
5521 DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
5522 LocMemOffset);
5523 }
5524
5525 if (Outs[i].Flags.isByVal()) {
5526 SDValue SizeNode =
5527 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
5528 SDValue Cpy = DAG.getMemcpy(
5529 Chain, DL, DstAddr, Arg, SizeNode,
5530 Outs[i].Flags.getNonZeroByValAlign(),
5531 /*isVol = */ false, /*AlwaysInline = */ false,
5532 /*isTailCall = */ false, DstInfo, MachinePointerInfo());
5533
5534 MemOpChains.push_back(Cpy);
5535 } else {
5536 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
5537 // promoted to a legal register type i32, we should truncate Arg back to
5538 // i1/i8/i16.
5539 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
5540 VA.getValVT() == MVT::i16)
5541 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
5542
5543 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
5544 MemOpChains.push_back(Store);
5545 }
5546 }
5547 }
5548
5549 if (!MemOpChains.empty())
5550 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5551
5552 // Build a sequence of copy-to-reg nodes chained together with token chain
5553 // and flag operands which copy the outgoing args into the appropriate regs.
5554 SDValue InFlag;
5555 for (auto &RegToPass : RegsToPass) {
5556 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
5557 RegToPass.second, InFlag);
5558 InFlag = Chain.getValue(1);
5559 }
5560
5561 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
5562 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
5563 // node so that legalize doesn't hack it.
5564 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5565 auto GV = G->getGlobal();
5566 unsigned OpFlags =
5567 Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
5568 if (OpFlags & AArch64II::MO_GOT) {
5569 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
5570 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
5571 } else {
5572 const GlobalValue *GV = G->getGlobal();
5573 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
5574 }
5575 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5576 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
5577 Subtarget->isTargetMachO()) {
5578 const char *Sym = S->getSymbol();
5579 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
5580 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
5581 } else {
5582 const char *Sym = S->getSymbol();
5583 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
5584 }
5585 }
5586
5587 // We don't usually want to end the call-sequence here because we would tidy
5588 // the frame up *after* the call, however in the ABI-changing tail-call case
5589 // we've carefully laid out the parameters so that when sp is reset they'll be
5590 // in the correct location.
5591 if (IsTailCall && !IsSibCall) {
5592 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
5593 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
5594 InFlag = Chain.getValue(1);
5595 }
5596
5597 std::vector<SDValue> Ops;
5598 Ops.push_back(Chain);
5599 Ops.push_back(Callee);
5600
5601 if (IsTailCall) {
5602 // Each tail call may have to adjust the stack by a different amount, so
5603 // this information must travel along with the operation for eventual
5604 // consumption by emitEpilogue.
5605 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
5606 }
5607
5608 // Add argument registers to the end of the list so that they are known live
5609 // into the call.
5610 for (auto &RegToPass : RegsToPass)
5611 Ops.push_back(DAG.getRegister(RegToPass.first,
5612 RegToPass.second.getValueType()));
5613
5614 // Add a register mask operand representing the call-preserved registers.
5615 const uint32_t *Mask;
5616 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5617 if (IsThisReturn) {
5618 // For 'this' returns, use the X0-preserving mask if applicable
5619 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
5620 if (!Mask) {
5621 IsThisReturn = false;
5622 Mask = TRI->getCallPreservedMask(MF, CallConv);
5623 }
5624 } else
5625 Mask = TRI->getCallPreservedMask(MF, CallConv);
5626
5627 if (Subtarget->hasCustomCallingConv())
5628 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
5629
5630 if (TRI->isAnyArgRegReserved(MF))
5631 TRI->emitReservedArgRegCallError(MF);
5632
5633 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5633, __PRETTY_FUNCTION__))
;
5634 Ops.push_back(DAG.getRegisterMask(Mask));
5635
5636 if (InFlag.getNode())
5637 Ops.push_back(InFlag);
5638
5639 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5640
5641 // If we're doing a tall call, use a TC_RETURN here rather than an
5642 // actual call instruction.
5643 if (IsTailCall) {
5644 MF.getFrameInfo().setHasTailCall();
5645 SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
5646 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
5647 return Ret;
5648 }
5649
5650 unsigned CallOpc = AArch64ISD::CALL;
5651 // Calls marked with "clang.arc.rv" are special. They should be expanded to
5652 // the call, directly followed by a special marker sequence. Use the
5653 // CALL_RVMARKER to do that.
5654 if (CLI.CB && CLI.CB->hasRetAttr(objcarc::getRVAttrKeyStr())) {
5655 assert(!IsTailCall && "tail calls cannot be marked with clang.arc.rv")((!IsTailCall && "tail calls cannot be marked with clang.arc.rv"
) ? static_cast<void> (0) : __assert_fail ("!IsTailCall && \"tail calls cannot be marked with clang.arc.rv\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5655, __PRETTY_FUNCTION__))
;
5656 CallOpc = AArch64ISD::CALL_RVMARKER;
5657 }
5658
5659 // Returns a chain and a flag for retval copy to use.
5660 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
5661 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5662 InFlag = Chain.getValue(1);
5663 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
5664
5665 uint64_t CalleePopBytes =
5666 DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
5667
5668 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
5669 DAG.getIntPtrConstant(CalleePopBytes, DL, true),
5670 InFlag, DL);
5671 if (!Ins.empty())
5672 InFlag = Chain.getValue(1);
5673
5674 // Handle result values, copying them out of physregs into vregs that we
5675 // return.
5676 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
5677 InVals, IsThisReturn,
5678 IsThisReturn ? OutVals[0] : SDValue());
5679}
5680
5681bool AArch64TargetLowering::CanLowerReturn(
5682 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
5683 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
5684 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
5685 SmallVector<CCValAssign, 16> RVLocs;
5686 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
5687 return CCInfo.CheckReturn(Outs, RetCC);
5688}
5689
5690SDValue
5691AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
5692 bool isVarArg,
5693 const SmallVectorImpl<ISD::OutputArg> &Outs,
5694 const SmallVectorImpl<SDValue> &OutVals,
5695 const SDLoc &DL, SelectionDAG &DAG) const {
5696 auto &MF = DAG.getMachineFunction();
5697 auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5698
5699 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
5700 SmallVector<CCValAssign, 16> RVLocs;
5701 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5702 *DAG.getContext());
5703 CCInfo.AnalyzeReturn(Outs, RetCC);
5704
5705 // Copy the result values into the output registers.
5706 SDValue Flag;
5707 SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
5708 SmallSet<unsigned, 4> RegsUsed;
5709 for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
5710 ++i, ++realRVLocIdx) {
5711 CCValAssign &VA = RVLocs[i];
5712 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5712, __PRETTY_FUNCTION__))
;
5713 SDValue Arg = OutVals[realRVLocIdx];
5714
5715 switch (VA.getLocInfo()) {
5716 default:
5717 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5717)
;
5718 case CCValAssign::Full:
5719 if (Outs[i].ArgVT == MVT::i1) {
5720 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
5721 // value. This is strictly redundant on Darwin (which uses "zeroext
5722 // i1"), but will be optimised out before ISel.
5723 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
5724 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
5725 }
5726 break;
5727 case CCValAssign::BCvt:
5728 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
5729 break;
5730 case CCValAssign::AExt:
5731 case CCValAssign::ZExt:
5732 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
5733 break;
5734 case CCValAssign::AExtUpper:
5735 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")((VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5735, __PRETTY_FUNCTION__))
;
5736 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
5737 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
5738 DAG.getConstant(32, DL, VA.getLocVT()));
5739 break;
5740 }
5741
5742 if (RegsUsed.count(VA.getLocReg())) {
5743 SDValue &Bits =
5744 llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) {
5745 return Elt.first == VA.getLocReg();
5746 })->second;
5747 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
5748 } else {
5749 RetVals.emplace_back(VA.getLocReg(), Arg);
5750 RegsUsed.insert(VA.getLocReg());
5751 }
5752 }
5753
5754 SmallVector<SDValue, 4> RetOps(1, Chain);
5755 for (auto &RetVal : RetVals) {
5756 Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
5757 Flag = Chain.getValue(1);
5758 RetOps.push_back(
5759 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
5760 }
5761
5762 // Windows AArch64 ABIs require that for returning structs by value we copy
5763 // the sret argument into X0 for the return.
5764 // We saved the argument into a virtual register in the entry block,
5765 // so now we copy the value out and into X0.
5766 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
5767 SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
5768 getPointerTy(MF.getDataLayout()));
5769
5770 unsigned RetValReg = AArch64::X0;
5771 Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
5772 Flag = Chain.getValue(1);
5773
5774 RetOps.push_back(
5775 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
5776 }
5777
5778 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5779 const MCPhysReg *I =
5780 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
5781 if (I) {
5782 for (; *I; ++I) {
5783 if (AArch64::GPR64RegClass.contains(*I))
5784 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
5785 else if (AArch64::FPR64RegClass.contains(*I))
5786 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
5787 else
5788 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5788)
;
5789 }
5790 }
5791
5792 RetOps[0] = Chain; // Update chain.
5793
5794 // Add the flag if we have it.
5795 if (Flag.getNode())
5796 RetOps.push_back(Flag);
5797
5798 return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
5799}
5800
5801//===----------------------------------------------------------------------===//
5802// Other Lowering Code
5803//===----------------------------------------------------------------------===//
5804
5805SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
5806 SelectionDAG &DAG,
5807 unsigned Flag) const {
5808 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
5809 N->getOffset(), Flag);
5810}
5811
5812SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
5813 SelectionDAG &DAG,
5814 unsigned Flag) const {
5815 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
5816}
5817
5818SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
5819 SelectionDAG &DAG,
5820 unsigned Flag) const {
5821 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
5822 N->getOffset(), Flag);
5823}
5824
5825SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
5826 SelectionDAG &DAG,
5827 unsigned Flag) const {
5828 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
5829}
5830
5831// (loadGOT sym)
5832template <class NodeTy>
5833SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
5834 unsigned Flags) const {
5835 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getGOT\n"
; } } while (false)
;
5836 SDLoc DL(N);
5837 EVT Ty = getPointerTy(DAG.getDataLayout());
5838 SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
5839 // FIXME: Once remat is capable of dealing with instructions with register
5840 // operands, expand this into two nodes instead of using a wrapper node.
5841 return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
5842}
5843
5844// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
5845template <class NodeTy>
5846SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
5847 unsigned Flags) const {
5848 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrLarge\n"
; } } while (false)
;
5849 SDLoc DL(N);
5850 EVT Ty = getPointerTy(DAG.getDataLayout());
5851 const unsigned char MO_NC = AArch64II::MO_NC;
5852 return DAG.getNode(
5853 AArch64ISD::WrapperLarge, DL, Ty,
5854 getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
5855 getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
5856 getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
5857 getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
5858}
5859
5860// (addlow (adrp %hi(sym)) %lo(sym))
5861template <class NodeTy>
5862SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
5863 unsigned Flags) const {
5864 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddr\n"
; } } while (false)
;
5865 SDLoc DL(N);
5866 EVT Ty = getPointerTy(DAG.getDataLayout());
5867 SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
5868 SDValue Lo = getTargetNode(N, Ty, DAG,
5869 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
5870 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
5871 return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
5872}
5873
5874// (adr sym)
5875template <class NodeTy>
5876SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
5877 unsigned Flags) const {
5878 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrTiny\n"
; } } while (false)
;
5879 SDLoc DL(N);
5880 EVT Ty = getPointerTy(DAG.getDataLayout());
5881 SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
5882 return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
5883}
5884
5885SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
5886 SelectionDAG &DAG) const {
5887 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
5888 const GlobalValue *GV = GN->getGlobal();
5889 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
5890
5891 if (OpFlags != AArch64II::MO_NO_FLAG)
5892 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&((cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node") ? static_cast<void>
(0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5893, __PRETTY_FUNCTION__))
5893 "unexpected offset in global node")((cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node") ? static_cast<void>
(0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5893, __PRETTY_FUNCTION__))
;
5894
5895 // This also catches the large code model case for Darwin, and tiny code
5896 // model with got relocations.
5897 if ((OpFlags & AArch64II::MO_GOT) != 0) {
5898 return getGOT(GN, DAG, OpFlags);
5899 }
5900
5901 SDValue Result;
5902 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
5903 Result = getAddrLarge(GN, DAG, OpFlags);
5904 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
5905 Result = getAddrTiny(GN, DAG, OpFlags);
5906 } else {
5907 Result = getAddr(GN, DAG, OpFlags);
5908 }
5909 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5910 SDLoc DL(GN);
5911 if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB))
5912 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
5913 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
5914 return Result;
5915}
5916
5917/// Convert a TLS address reference into the correct sequence of loads
5918/// and calls to compute the variable's address (for Darwin, currently) and
5919/// return an SDValue containing the final node.
5920
5921/// Darwin only has one TLS scheme which must be capable of dealing with the
5922/// fully general situation, in the worst case. This means:
5923/// + "extern __thread" declaration.
5924/// + Defined in a possibly unknown dynamic library.
5925///
5926/// The general system is that each __thread variable has a [3 x i64] descriptor
5927/// which contains information used by the runtime to calculate the address. The
5928/// only part of this the compiler needs to know about is the first xword, which
5929/// contains a function pointer that must be called with the address of the
5930/// entire descriptor in "x0".
5931///
5932/// Since this descriptor may be in a different unit, in general even the
5933/// descriptor must be accessed via an indirect load. The "ideal" code sequence
5934/// is:
5935/// adrp x0, _var@TLVPPAGE
5936/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
5937/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
5938/// ; the function pointer
5939/// blr x1 ; Uses descriptor address in x0
5940/// ; Address of _var is now in x0.
5941///
5942/// If the address of _var's descriptor *is* known to the linker, then it can
5943/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
5944/// a slight efficiency gain.
5945SDValue
5946AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
5947 SelectionDAG &DAG) const {
5948 assert(Subtarget->isTargetDarwin() &&((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5949, __PRETTY_FUNCTION__))
5949 "This function expects a Darwin target")((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5949, __PRETTY_FUNCTION__))
;
5950
5951 SDLoc DL(Op);
5952 MVT PtrVT = getPointerTy(DAG.getDataLayout());
5953 MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
5954 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
5955
5956 SDValue TLVPAddr =
5957 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
5958 SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
5959
5960 // The first entry in the descriptor is a function pointer that we must call
5961 // to obtain the address of the variable.
5962 SDValue Chain = DAG.getEntryNode();
5963 SDValue FuncTLVGet = DAG.getLoad(
5964 PtrMemVT, DL, Chain, DescAddr,
5965 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
5966 Align(PtrMemVT.getSizeInBits() / 8),
5967 MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
5968 Chain = FuncTLVGet.getValue(1);
5969
5970 // Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
5971 FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);
5972
5973 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
5974 MFI.setAdjustsStack(true);
5975
5976 // TLS calls preserve all registers except those that absolutely must be
5977 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
5978 // silly).
5979 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5980 const uint32_t *Mask = TRI->getTLSCallPreservedMask();
5981 if (Subtarget->hasCustomCallingConv())
5982 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
5983
5984 // Finally, we can make the call. This is just a degenerate version of a
5985 // normal AArch64 call node: x0 takes the address of the descriptor, and
5986 // returns the address of the variable in this thread.
5987 Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
5988 Chain =
5989 DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
5990 Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
5991 DAG.getRegisterMask(Mask), Chain.getValue(1));
5992 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
5993}
5994
5995/// Convert a thread-local variable reference into a sequence of instructions to
5996/// compute the variable's address for the local exec TLS model of ELF targets.
5997/// The sequence depends on the maximum TLS area size.
5998SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
5999 SDValue ThreadBase,
6000 const SDLoc &DL,
6001 SelectionDAG &DAG) const {
6002 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6003 SDValue TPOff, Addr;
6004
6005 switch (DAG.getTarget().Options.TLSSize) {
6006 default:
6007 llvm_unreachable("Unexpected TLS size")::llvm::llvm_unreachable_internal("Unexpected TLS size", "/build/llvm-toolchain-snapshot-12~++20210125100614+2cdb34efdac5/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6007)
;
6008
6009 case 12: {
6010 // mrs x0, TPIDR_EL0
6011 // add x0, x0, :tprel_lo12:a
6012 SDValue Var = DAG.getTargetGlobalAddress(
6013 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF);
6014 return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
6015 Var,
6016 DAG.getTargetConstant(0, DL, MVT::i32)),
6017 0);
6018 }
6019
6020 case 24: {
6021 // mrs x0, TPIDR_EL0
6022 // add x0, x0, :tprel_hi12:a
6023 // add x0, x0, :tprel_lo12_nc:a
6024 SDValue HiVar = DAG.getTargetGlobalAddress(
6025 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
6026 SDValue LoVar = DAG.getTargetGlobalAddress(
6027 GV, DL,