Bug Summary

File:llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1114, column 10
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/Target/AArch64 -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-04-14-063029-18377-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64ISelLowering.h"
14#include "AArch64CallingConvention.h"
15#include "AArch64ExpandImm.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/ADT/Triple.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/ObjCARCUtil.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/CallingConvLower.h"
35#include "llvm/CodeGen/MachineBasicBlock.h"
36#include "llvm/CodeGen/MachineFrameInfo.h"
37#include "llvm/CodeGen/MachineFunction.h"
38#include "llvm/CodeGen/MachineInstr.h"
39#include "llvm/CodeGen/MachineInstrBuilder.h"
40#include "llvm/CodeGen/MachineMemOperand.h"
41#include "llvm/CodeGen/MachineRegisterInfo.h"
42#include "llvm/CodeGen/RuntimeLibcalls.h"
43#include "llvm/CodeGen/SelectionDAG.h"
44#include "llvm/CodeGen/SelectionDAGNodes.h"
45#include "llvm/CodeGen/TargetCallingConv.h"
46#include "llvm/CodeGen/TargetInstrInfo.h"
47#include "llvm/CodeGen/ValueTypes.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DataLayout.h"
51#include "llvm/IR/DebugLoc.h"
52#include "llvm/IR/DerivedTypes.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/GetElementPtrTypeIterator.h"
55#include "llvm/IR/GlobalValue.h"
56#include "llvm/IR/IRBuilder.h"
57#include "llvm/IR/Instruction.h"
58#include "llvm/IR/Instructions.h"
59#include "llvm/IR/IntrinsicInst.h"
60#include "llvm/IR/Intrinsics.h"
61#include "llvm/IR/IntrinsicsAArch64.h"
62#include "llvm/IR/Module.h"
63#include "llvm/IR/OperandTraits.h"
64#include "llvm/IR/PatternMatch.h"
65#include "llvm/IR/Type.h"
66#include "llvm/IR/Use.h"
67#include "llvm/IR/Value.h"
68#include "llvm/MC/MCRegisterInfo.h"
69#include "llvm/Support/Casting.h"
70#include "llvm/Support/CodeGen.h"
71#include "llvm/Support/CommandLine.h"
72#include "llvm/Support/Compiler.h"
73#include "llvm/Support/Debug.h"
74#include "llvm/Support/ErrorHandling.h"
75#include "llvm/Support/KnownBits.h"
76#include "llvm/Support/MachineValueType.h"
77#include "llvm/Support/MathExtras.h"
78#include "llvm/Support/raw_ostream.h"
79#include "llvm/Target/TargetMachine.h"
80#include "llvm/Target/TargetOptions.h"
81#include <algorithm>
82#include <bitset>
83#include <cassert>
84#include <cctype>
85#include <cstdint>
86#include <cstdlib>
87#include <iterator>
88#include <limits>
89#include <tuple>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94using namespace llvm::PatternMatch;
95
96#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
97
98STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls"}
;
99STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts"}
;
100STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized"}
;
101
102// FIXME: The necessary dtprel relocations don't seem to be supported
103// well in the GNU bfd and gold linkers at the moment. Therefore, by
104// default, for now, fall back to GeneralDynamic code generation.
105cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
106 "aarch64-elf-ldtls-generation", cl::Hidden,
107 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
108 cl::init(false));
109
110static cl::opt<bool>
111EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
112 cl::desc("Enable AArch64 logical imm instruction "
113 "optimization"),
114 cl::init(true));
115
116// Temporary option added for the purpose of testing functionality added
117// to DAGCombiner.cpp in D92230. It is expected that this can be removed
118// in future when both implementations will be based off MGATHER rather
119// than the GLD1 nodes added for the SVE gather load intrinsics.
120static cl::opt<bool>
121EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
122 cl::desc("Combine extends of AArch64 masked "
123 "gather intrinsics"),
124 cl::init(true));
125
126/// Value type used for condition codes.
127static const MVT MVT_CC = MVT::i32;
128
129static inline EVT getPackedSVEVectorVT(EVT VT) {
130 switch (VT.getSimpleVT().SimpleTy) {
131 default:
132 llvm_unreachable("unexpected element type for vector")::llvm::llvm_unreachable_internal("unexpected element type for vector"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 132)
;
133 case MVT::i8:
134 return MVT::nxv16i8;
135 case MVT::i16:
136 return MVT::nxv8i16;
137 case MVT::i32:
138 return MVT::nxv4i32;
139 case MVT::i64:
140 return MVT::nxv2i64;
141 case MVT::f16:
142 return MVT::nxv8f16;
143 case MVT::f32:
144 return MVT::nxv4f32;
145 case MVT::f64:
146 return MVT::nxv2f64;
147 case MVT::bf16:
148 return MVT::nxv8bf16;
149 }
150}
151
152// NOTE: Currently there's only a need to return integer vector types. If this
153// changes then just add an extra "type" parameter.
154static inline EVT getPackedSVEVectorVT(ElementCount EC) {
155 switch (EC.getKnownMinValue()) {
156 default:
157 llvm_unreachable("unexpected element count for vector")::llvm::llvm_unreachable_internal("unexpected element count for vector"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 157)
;
158 case 16:
159 return MVT::nxv16i8;
160 case 8:
161 return MVT::nxv8i16;
162 case 4:
163 return MVT::nxv4i32;
164 case 2:
165 return MVT::nxv2i64;
166 }
167}
168
169static inline EVT getPromotedVTForPredicate(EVT VT) {
170 assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&((VT.isScalableVector() && (VT.getVectorElementType()
== MVT::i1) && "Expected scalable predicate vector type!"
) ? static_cast<void> (0) : __assert_fail ("VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && \"Expected scalable predicate vector type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 171, __PRETTY_FUNCTION__))
171 "Expected scalable predicate vector type!")((VT.isScalableVector() && (VT.getVectorElementType()
== MVT::i1) && "Expected scalable predicate vector type!"
) ? static_cast<void> (0) : __assert_fail ("VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && \"Expected scalable predicate vector type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 171, __PRETTY_FUNCTION__))
;
172 switch (VT.getVectorMinNumElements()) {
173 default:
174 llvm_unreachable("unexpected element count for vector")::llvm::llvm_unreachable_internal("unexpected element count for vector"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 174)
;
175 case 2:
176 return MVT::nxv2i64;
177 case 4:
178 return MVT::nxv4i32;
179 case 8:
180 return MVT::nxv8i16;
181 case 16:
182 return MVT::nxv16i8;
183 }
184}
185
186/// Returns true if VT's elements occupy the lowest bit positions of its
187/// associated register class without any intervening space.
188///
189/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
190/// same register class, but only nxv8f16 can be treated as a packed vector.
191static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
192 assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&((VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal
(VT) && "Expected legal vector type!") ? static_cast<
void> (0) : __assert_fail ("VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal vector type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 193, __PRETTY_FUNCTION__))
193 "Expected legal vector type!")((VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal
(VT) && "Expected legal vector type!") ? static_cast<
void> (0) : __assert_fail ("VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal vector type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 193, __PRETTY_FUNCTION__))
;
194 return VT.isFixedLengthVector() ||
195 VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
196}
197
198// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
199// predicate and end with a passthru value matching the result type.
200static bool isMergePassthruOpcode(unsigned Opc) {
201 switch (Opc) {
202 default:
203 return false;
204 case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
205 case AArch64ISD::BSWAP_MERGE_PASSTHRU:
206 case AArch64ISD::CTLZ_MERGE_PASSTHRU:
207 case AArch64ISD::CTPOP_MERGE_PASSTHRU:
208 case AArch64ISD::DUP_MERGE_PASSTHRU:
209 case AArch64ISD::ABS_MERGE_PASSTHRU:
210 case AArch64ISD::NEG_MERGE_PASSTHRU:
211 case AArch64ISD::FNEG_MERGE_PASSTHRU:
212 case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
213 case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
214 case AArch64ISD::FCEIL_MERGE_PASSTHRU:
215 case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
216 case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
217 case AArch64ISD::FRINT_MERGE_PASSTHRU:
218 case AArch64ISD::FROUND_MERGE_PASSTHRU:
219 case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
220 case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
221 case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
222 case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
223 case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
224 case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
225 case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
226 case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
227 case AArch64ISD::FSQRT_MERGE_PASSTHRU:
228 case AArch64ISD::FRECPX_MERGE_PASSTHRU:
229 case AArch64ISD::FABS_MERGE_PASSTHRU:
230 return true;
231 }
232}
233
234AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
235 const AArch64Subtarget &STI)
236 : TargetLowering(TM), Subtarget(&STI) {
237 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
238 // we have to make something up. Arbitrarily, choose ZeroOrOne.
239 setBooleanContents(ZeroOrOneBooleanContent);
240 // When comparing vectors the result sets the different elements in the
241 // vector to all-one or all-zero.
242 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
243
244 // Set up the register classes.
245 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
246 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
247
248 if (Subtarget->hasFPARMv8()) {
249 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
250 addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
251 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
252 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
253 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
254 }
255
256 if (Subtarget->hasNEON()) {
257 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
258 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
259 // Someone set us up the NEON.
260 addDRTypeForNEON(MVT::v2f32);
261 addDRTypeForNEON(MVT::v8i8);
262 addDRTypeForNEON(MVT::v4i16);
263 addDRTypeForNEON(MVT::v2i32);
264 addDRTypeForNEON(MVT::v1i64);
265 addDRTypeForNEON(MVT::v1f64);
266 addDRTypeForNEON(MVT::v4f16);
267 if (Subtarget->hasBF16())
268 addDRTypeForNEON(MVT::v4bf16);
269
270 addQRTypeForNEON(MVT::v4f32);
271 addQRTypeForNEON(MVT::v2f64);
272 addQRTypeForNEON(MVT::v16i8);
273 addQRTypeForNEON(MVT::v8i16);
274 addQRTypeForNEON(MVT::v4i32);
275 addQRTypeForNEON(MVT::v2i64);
276 addQRTypeForNEON(MVT::v8f16);
277 if (Subtarget->hasBF16())
278 addQRTypeForNEON(MVT::v8bf16);
279 }
280
281 if (Subtarget->hasSVE()) {
282 // Add legal sve predicate types
283 addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
284 addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
285 addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
286 addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
287
288 // Add legal sve data types
289 addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
290 addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
291 addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
292 addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
293
294 addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
295 addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
296 addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
297 addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
298 addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
299 addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
300
301 if (Subtarget->hasBF16()) {
302 addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
303 addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
304 addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
305 }
306
307 if (Subtarget->useSVEForFixedLengthVectors()) {
308 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
309 if (useSVEForFixedLengthVectorVT(VT))
310 addRegisterClass(VT, &AArch64::ZPRRegClass);
311
312 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
313 if (useSVEForFixedLengthVectorVT(VT))
314 addRegisterClass(VT, &AArch64::ZPRRegClass);
315 }
316
317 for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
318 setOperationAction(ISD::SADDSAT, VT, Legal);
319 setOperationAction(ISD::UADDSAT, VT, Legal);
320 setOperationAction(ISD::SSUBSAT, VT, Legal);
321 setOperationAction(ISD::USUBSAT, VT, Legal);
322 setOperationAction(ISD::UREM, VT, Expand);
323 setOperationAction(ISD::SREM, VT, Expand);
324 setOperationAction(ISD::SDIVREM, VT, Expand);
325 setOperationAction(ISD::UDIVREM, VT, Expand);
326 }
327
328 for (auto VT :
329 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
330 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
331 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
332
333 for (auto VT :
334 { MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32,
335 MVT::nxv2f64 }) {
336 setCondCodeAction(ISD::SETO, VT, Expand);
337 setCondCodeAction(ISD::SETOLT, VT, Expand);
338 setCondCodeAction(ISD::SETLT, VT, Expand);
339 setCondCodeAction(ISD::SETOLE, VT, Expand);
340 setCondCodeAction(ISD::SETLE, VT, Expand);
341 setCondCodeAction(ISD::SETULT, VT, Expand);
342 setCondCodeAction(ISD::SETULE, VT, Expand);
343 setCondCodeAction(ISD::SETUGE, VT, Expand);
344 setCondCodeAction(ISD::SETUGT, VT, Expand);
345 setCondCodeAction(ISD::SETUEQ, VT, Expand);
346 setCondCodeAction(ISD::SETUNE, VT, Expand);
347
348 setOperationAction(ISD::FREM, VT, Expand);
349 setOperationAction(ISD::FPOW, VT, Expand);
350 setOperationAction(ISD::FPOWI, VT, Expand);
351 setOperationAction(ISD::FCOS, VT, Expand);
352 setOperationAction(ISD::FSIN, VT, Expand);
353 setOperationAction(ISD::FSINCOS, VT, Expand);
354 setOperationAction(ISD::FEXP, VT, Expand);
355 setOperationAction(ISD::FEXP2, VT, Expand);
356 setOperationAction(ISD::FLOG, VT, Expand);
357 setOperationAction(ISD::FLOG2, VT, Expand);
358 setOperationAction(ISD::FLOG10, VT, Expand);
359 }
360 }
361
362 // Compute derived properties from the register classes
363 computeRegisterProperties(Subtarget->getRegisterInfo());
364
365 // Provide all sorts of operation actions
366 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
367 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
368 setOperationAction(ISD::SETCC, MVT::i32, Custom);
369 setOperationAction(ISD::SETCC, MVT::i64, Custom);
370 setOperationAction(ISD::SETCC, MVT::f16, Custom);
371 setOperationAction(ISD::SETCC, MVT::f32, Custom);
372 setOperationAction(ISD::SETCC, MVT::f64, Custom);
373 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
374 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
375 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
376 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
377 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
378 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
379 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
380 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
381 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
382 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
383 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
384 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
385 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
386 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
387 setOperationAction(ISD::SELECT, MVT::i32, Custom);
388 setOperationAction(ISD::SELECT, MVT::i64, Custom);
389 setOperationAction(ISD::SELECT, MVT::f16, Custom);
390 setOperationAction(ISD::SELECT, MVT::f32, Custom);
391 setOperationAction(ISD::SELECT, MVT::f64, Custom);
392 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
393 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
394 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
395 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
396 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
397 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
398 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
399
400 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
401 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
402 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
403
404 setOperationAction(ISD::FREM, MVT::f32, Expand);
405 setOperationAction(ISD::FREM, MVT::f64, Expand);
406 setOperationAction(ISD::FREM, MVT::f80, Expand);
407
408 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
409
410 // Custom lowering hooks are needed for XOR
411 // to fold it into CSINC/CSINV.
412 setOperationAction(ISD::XOR, MVT::i32, Custom);
413 setOperationAction(ISD::XOR, MVT::i64, Custom);
414
415 // Virtually no operation on f128 is legal, but LLVM can't expand them when
416 // there's a valid register class, so we need custom operations in most cases.
417 setOperationAction(ISD::FABS, MVT::f128, Expand);
418 setOperationAction(ISD::FADD, MVT::f128, LibCall);
419 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
420 setOperationAction(ISD::FCOS, MVT::f128, Expand);
421 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
422 setOperationAction(ISD::FMA, MVT::f128, Expand);
423 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
424 setOperationAction(ISD::FNEG, MVT::f128, Expand);
425 setOperationAction(ISD::FPOW, MVT::f128, Expand);
426 setOperationAction(ISD::FREM, MVT::f128, Expand);
427 setOperationAction(ISD::FRINT, MVT::f128, Expand);
428 setOperationAction(ISD::FSIN, MVT::f128, Expand);
429 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
430 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
431 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
432 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
433 setOperationAction(ISD::SETCC, MVT::f128, Custom);
434 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
435 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
436 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
437 setOperationAction(ISD::SELECT, MVT::f128, Custom);
438 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
439 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
440
441 // Lowering for many of the conversions is actually specified by the non-f128
442 // type. The LowerXXX function will be trivial when f128 isn't involved.
443 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
444 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
445 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
446 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
447 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
448 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
449 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
450 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
451 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
452 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
453 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
454 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
455 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
456 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
457 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
458 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
459 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
460 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
461 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
462 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
463 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
464 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
465 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
466 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
467 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
468 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
469 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
470 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
471 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
472 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
473
474 // Variable arguments.
475 setOperationAction(ISD::VASTART, MVT::Other, Custom);
476 setOperationAction(ISD::VAARG, MVT::Other, Custom);
477 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
478 setOperationAction(ISD::VAEND, MVT::Other, Expand);
479
480 // Variable-sized objects.
481 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
482 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
483
484 if (Subtarget->isTargetWindows())
485 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
486 else
487 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
488
489 // Constant pool entries
490 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
491
492 // BlockAddress
493 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
494
495 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
496 setOperationAction(ISD::ADDC, MVT::i32, Custom);
497 setOperationAction(ISD::ADDE, MVT::i32, Custom);
498 setOperationAction(ISD::SUBC, MVT::i32, Custom);
499 setOperationAction(ISD::SUBE, MVT::i32, Custom);
500 setOperationAction(ISD::ADDC, MVT::i64, Custom);
501 setOperationAction(ISD::ADDE, MVT::i64, Custom);
502 setOperationAction(ISD::SUBC, MVT::i64, Custom);
503 setOperationAction(ISD::SUBE, MVT::i64, Custom);
504
505 // AArch64 lacks both left-rotate and popcount instructions.
506 setOperationAction(ISD::ROTL, MVT::i32, Expand);
507 setOperationAction(ISD::ROTL, MVT::i64, Expand);
508 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
509 setOperationAction(ISD::ROTL, VT, Expand);
510 setOperationAction(ISD::ROTR, VT, Expand);
511 }
512
513 // AArch64 doesn't have i32 MULH{S|U}.
514 setOperationAction(ISD::MULHU, MVT::i32, Expand);
515 setOperationAction(ISD::MULHS, MVT::i32, Expand);
516
517 // AArch64 doesn't have {U|S}MUL_LOHI.
518 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
519 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
520
521 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
522 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
523 setOperationAction(ISD::CTPOP, MVT::i128, Custom);
524
525 setOperationAction(ISD::ABS, MVT::i32, Custom);
526 setOperationAction(ISD::ABS, MVT::i64, Custom);
527
528 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
529 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
530 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
531 setOperationAction(ISD::SDIVREM, VT, Expand);
532 setOperationAction(ISD::UDIVREM, VT, Expand);
533 }
534 setOperationAction(ISD::SREM, MVT::i32, Expand);
535 setOperationAction(ISD::SREM, MVT::i64, Expand);
536 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
537 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
538 setOperationAction(ISD::UREM, MVT::i32, Expand);
539 setOperationAction(ISD::UREM, MVT::i64, Expand);
540
541 // Custom lower Add/Sub/Mul with overflow.
542 setOperationAction(ISD::SADDO, MVT::i32, Custom);
543 setOperationAction(ISD::SADDO, MVT::i64, Custom);
544 setOperationAction(ISD::UADDO, MVT::i32, Custom);
545 setOperationAction(ISD::UADDO, MVT::i64, Custom);
546 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
547 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
548 setOperationAction(ISD::USUBO, MVT::i32, Custom);
549 setOperationAction(ISD::USUBO, MVT::i64, Custom);
550 setOperationAction(ISD::SMULO, MVT::i32, Custom);
551 setOperationAction(ISD::SMULO, MVT::i64, Custom);
552 setOperationAction(ISD::UMULO, MVT::i32, Custom);
553 setOperationAction(ISD::UMULO, MVT::i64, Custom);
554
555 setOperationAction(ISD::FSIN, MVT::f32, Expand);
556 setOperationAction(ISD::FSIN, MVT::f64, Expand);
557 setOperationAction(ISD::FCOS, MVT::f32, Expand);
558 setOperationAction(ISD::FCOS, MVT::f64, Expand);
559 setOperationAction(ISD::FPOW, MVT::f32, Expand);
560 setOperationAction(ISD::FPOW, MVT::f64, Expand);
561 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
562 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
563 if (Subtarget->hasFullFP16())
564 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
565 else
566 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
567
568 setOperationAction(ISD::FREM, MVT::f16, Promote);
569 setOperationAction(ISD::FREM, MVT::v4f16, Expand);
570 setOperationAction(ISD::FREM, MVT::v8f16, Expand);
571 setOperationAction(ISD::FPOW, MVT::f16, Promote);
572 setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
573 setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
574 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
575 setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
576 setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
577 setOperationAction(ISD::FCOS, MVT::f16, Promote);
578 setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
579 setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
580 setOperationAction(ISD::FSIN, MVT::f16, Promote);
581 setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
582 setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
583 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
584 setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
585 setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
586 setOperationAction(ISD::FEXP, MVT::f16, Promote);
587 setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
588 setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
589 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
590 setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
591 setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
592 setOperationAction(ISD::FLOG, MVT::f16, Promote);
593 setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
594 setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
595 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
596 setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
597 setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
598 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
599 setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
600 setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
601
602 if (!Subtarget->hasFullFP16()) {
603 setOperationAction(ISD::SELECT, MVT::f16, Promote);
604 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
605 setOperationAction(ISD::SETCC, MVT::f16, Promote);
606 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
607 setOperationAction(ISD::FADD, MVT::f16, Promote);
608 setOperationAction(ISD::FSUB, MVT::f16, Promote);
609 setOperationAction(ISD::FMUL, MVT::f16, Promote);
610 setOperationAction(ISD::FDIV, MVT::f16, Promote);
611 setOperationAction(ISD::FMA, MVT::f16, Promote);
612 setOperationAction(ISD::FNEG, MVT::f16, Promote);
613 setOperationAction(ISD::FABS, MVT::f16, Promote);
614 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
615 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
616 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
617 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
618 setOperationAction(ISD::FRINT, MVT::f16, Promote);
619 setOperationAction(ISD::FROUND, MVT::f16, Promote);
620 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
621 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
622 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
623 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
624 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
625 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
626
627 // promote v4f16 to v4f32 when that is known to be safe.
628 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
629 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
630 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
631 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
632 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
633 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
634 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
635 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
636
637 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
638 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
639 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
640 setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
641 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
642 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
643 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
644 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
645 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
646 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
647 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
648 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
649 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
650 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
651 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
652 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
653
654 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
655 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
656 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
657 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
658 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
659 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
660 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
661 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
662 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
663 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
664 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
665 setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
666 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
667 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
668 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
669 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
670 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
671 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
672 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
673 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
674 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
675 }
676
677 // AArch64 has implementations of a lot of rounding-like FP operations.
678 for (MVT Ty : {MVT::f32, MVT::f64}) {
679 setOperationAction(ISD::FFLOOR, Ty, Legal);
680 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
681 setOperationAction(ISD::FCEIL, Ty, Legal);
682 setOperationAction(ISD::FRINT, Ty, Legal);
683 setOperationAction(ISD::FTRUNC, Ty, Legal);
684 setOperationAction(ISD::FROUND, Ty, Legal);
685 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
686 setOperationAction(ISD::FMINNUM, Ty, Legal);
687 setOperationAction(ISD::FMAXNUM, Ty, Legal);
688 setOperationAction(ISD::FMINIMUM, Ty, Legal);
689 setOperationAction(ISD::FMAXIMUM, Ty, Legal);
690 setOperationAction(ISD::LROUND, Ty, Legal);
691 setOperationAction(ISD::LLROUND, Ty, Legal);
692 setOperationAction(ISD::LRINT, Ty, Legal);
693 setOperationAction(ISD::LLRINT, Ty, Legal);
694 }
695
696 if (Subtarget->hasFullFP16()) {
697 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
698 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
699 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
700 setOperationAction(ISD::FRINT, MVT::f16, Legal);
701 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
702 setOperationAction(ISD::FROUND, MVT::f16, Legal);
703 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
704 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
705 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
706 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
707 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
708 }
709
710 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
711
712 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
713 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
714
715 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
716 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
717 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
718 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
719 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
720
721 // Generate outline atomics library calls only if LSE was not specified for
722 // subtarget
723 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
724 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
725 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
726 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
727 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
728 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
729 setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
730 setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
731 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
732 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
733 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
734 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
735 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
736 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
737 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
738 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
739 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
740 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
741 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
742 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
743 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
744 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
745 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
746 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
747 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
748 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
749#define LCALLNAMES(A, B, N) \
750 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
751 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
752 setLibcallName(A##N##_REL, #B #N "_rel"); \
753 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
754#define LCALLNAME4(A, B) \
755 LCALLNAMES(A, B, 1) \
756 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
757#define LCALLNAME5(A, B) \
758 LCALLNAMES(A, B, 1) \
759 LCALLNAMES(A, B, 2) \
760 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
761 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
762 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
763 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
764 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
765 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
766 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
767#undef LCALLNAMES
768#undef LCALLNAME4
769#undef LCALLNAME5
770 }
771
772 // 128-bit loads and stores can be done without expanding
773 setOperationAction(ISD::LOAD, MVT::i128, Custom);
774 setOperationAction(ISD::STORE, MVT::i128, Custom);
775
776 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
777 // custom lowering, as there are no un-paired non-temporal stores and
778 // legalization will break up 256 bit inputs.
779 setOperationAction(ISD::STORE, MVT::v32i8, Custom);
780 setOperationAction(ISD::STORE, MVT::v16i16, Custom);
781 setOperationAction(ISD::STORE, MVT::v16f16, Custom);
782 setOperationAction(ISD::STORE, MVT::v8i32, Custom);
783 setOperationAction(ISD::STORE, MVT::v8f32, Custom);
784 setOperationAction(ISD::STORE, MVT::v4f64, Custom);
785 setOperationAction(ISD::STORE, MVT::v4i64, Custom);
786
787 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
788 // This requires the Performance Monitors extension.
789 if (Subtarget->hasPerfMon())
790 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
791
792 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
793 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
794 // Issue __sincos_stret if available.
795 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
796 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
797 } else {
798 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
799 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
800 }
801
802 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
803 // MSVCRT doesn't have powi; fall back to pow
804 setLibcallName(RTLIB::POWI_F32, nullptr);
805 setLibcallName(RTLIB::POWI_F64, nullptr);
806 }
807
808 // Make floating-point constants legal for the large code model, so they don't
809 // become loads from the constant pool.
810 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
811 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
812 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
813 }
814
815 // AArch64 does not have floating-point extending loads, i1 sign-extending
816 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
817 for (MVT VT : MVT::fp_valuetypes()) {
818 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
819 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
820 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
821 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
822 }
823 for (MVT VT : MVT::integer_valuetypes())
824 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
825
826 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
827 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
828 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
829 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
830 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
831 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
832 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
833
834 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
835 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
836 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
837
838 // Indexed loads and stores are supported.
839 for (unsigned im = (unsigned)ISD::PRE_INC;
840 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
841 setIndexedLoadAction(im, MVT::i8, Legal);
842 setIndexedLoadAction(im, MVT::i16, Legal);
843 setIndexedLoadAction(im, MVT::i32, Legal);
844 setIndexedLoadAction(im, MVT::i64, Legal);
845 setIndexedLoadAction(im, MVT::f64, Legal);
846 setIndexedLoadAction(im, MVT::f32, Legal);
847 setIndexedLoadAction(im, MVT::f16, Legal);
848 setIndexedLoadAction(im, MVT::bf16, Legal);
849 setIndexedStoreAction(im, MVT::i8, Legal);
850 setIndexedStoreAction(im, MVT::i16, Legal);
851 setIndexedStoreAction(im, MVT::i32, Legal);
852 setIndexedStoreAction(im, MVT::i64, Legal);
853 setIndexedStoreAction(im, MVT::f64, Legal);
854 setIndexedStoreAction(im, MVT::f32, Legal);
855 setIndexedStoreAction(im, MVT::f16, Legal);
856 setIndexedStoreAction(im, MVT::bf16, Legal);
857 }
858
859 // Trap.
860 setOperationAction(ISD::TRAP, MVT::Other, Legal);
861 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
862 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
863
864 // We combine OR nodes for bitfield operations.
865 setTargetDAGCombine(ISD::OR);
866 // Try to create BICs for vector ANDs.
867 setTargetDAGCombine(ISD::AND);
868
869 // Vector add and sub nodes may conceal a high-half opportunity.
870 // Also, try to fold ADD into CSINC/CSINV..
871 setTargetDAGCombine(ISD::ADD);
872 setTargetDAGCombine(ISD::ABS);
873 setTargetDAGCombine(ISD::SUB);
874 setTargetDAGCombine(ISD::SRL);
875 setTargetDAGCombine(ISD::XOR);
876 setTargetDAGCombine(ISD::SINT_TO_FP);
877 setTargetDAGCombine(ISD::UINT_TO_FP);
878
879 setTargetDAGCombine(ISD::FP_TO_SINT);
880 setTargetDAGCombine(ISD::FP_TO_UINT);
881 setTargetDAGCombine(ISD::FDIV);
882
883 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
884
885 setTargetDAGCombine(ISD::ANY_EXTEND);
886 setTargetDAGCombine(ISD::ZERO_EXTEND);
887 setTargetDAGCombine(ISD::SIGN_EXTEND);
888 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
889 setTargetDAGCombine(ISD::TRUNCATE);
890 setTargetDAGCombine(ISD::CONCAT_VECTORS);
891 setTargetDAGCombine(ISD::STORE);
892 if (Subtarget->supportsAddressTopByteIgnored())
893 setTargetDAGCombine(ISD::LOAD);
894
895 setTargetDAGCombine(ISD::MUL);
896
897 setTargetDAGCombine(ISD::SELECT);
898 setTargetDAGCombine(ISD::VSELECT);
899
900 setTargetDAGCombine(ISD::INTRINSIC_VOID);
901 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
902 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
903 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
904 setTargetDAGCombine(ISD::VECREDUCE_ADD);
905
906 setTargetDAGCombine(ISD::GlobalAddress);
907
908 // In case of strict alignment, avoid an excessive number of byte wide stores.
909 MaxStoresPerMemsetOptSize = 8;
910 MaxStoresPerMemset = Subtarget->requiresStrictAlign()
911 ? MaxStoresPerMemsetOptSize : 32;
912
913 MaxGluedStoresPerMemcpy = 4;
914 MaxStoresPerMemcpyOptSize = 4;
915 MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
916 ? MaxStoresPerMemcpyOptSize : 16;
917
918 MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
919
920 MaxLoadsPerMemcmpOptSize = 4;
921 MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
922 ? MaxLoadsPerMemcmpOptSize : 8;
923
924 setStackPointerRegisterToSaveRestore(AArch64::SP);
925
926 setSchedulingPreference(Sched::Hybrid);
927
928 EnableExtLdPromotion = true;
929
930 // Set required alignment.
931 setMinFunctionAlignment(Align(4));
932 // Set preferred alignments.
933 setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
934 setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
935
936 // Only change the limit for entries in a jump table if specified by
937 // the sub target, but not at the command line.
938 unsigned MaxJT = STI.getMaximumJumpTableSize();
939 if (MaxJT && getMaximumJumpTableSize() == UINT_MAX(2147483647 *2U +1U))
940 setMaximumJumpTableSize(MaxJT);
941
942 setHasExtractBitsInsn(true);
943
944 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
945
946 if (Subtarget->hasNEON()) {
947 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
948 // silliness like this:
949 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
950 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
951 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
952 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
953 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
954 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
955 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
956 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
957 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
958 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
959 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
960 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
961 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
962 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
963 setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand);
964 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
965 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
966 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
967 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
968 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
969 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
970 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
971 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
972 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
973 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
974 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
975
976 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
977 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
978 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
979 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
980 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
981
982 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
983
984 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
985 // elements smaller than i32, so promote the input to i32 first.
986 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
987 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
988 // i8 vector elements also need promotion to i32 for v8i8
989 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
990 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
991 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
992 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
993 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
994 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
995 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
996 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
997 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
998 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
999 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1000
1001 if (Subtarget->hasFullFP16()) {
1002 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1003 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1004 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
1005 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1006 } else {
1007 // when AArch64 doesn't have fullfp16 support, promote the input
1008 // to i32 first.
1009 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
1010 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
1011 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
1012 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
1013 }
1014
1015 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
1016 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
1017
1018 // AArch64 doesn't have MUL.2d:
1019 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
1020 // Custom handling for some quad-vector types to detect MULL.
1021 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
1022 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1023 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1024
1025 // Saturates
1026 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1027 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1028 setOperationAction(ISD::SADDSAT, VT, Legal);
1029 setOperationAction(ISD::UADDSAT, VT, Legal);
1030 setOperationAction(ISD::SSUBSAT, VT, Legal);
1031 setOperationAction(ISD::USUBSAT, VT, Legal);
1032 }
1033
1034 // Vector reductions
1035 for (MVT VT : { MVT::v4f16, MVT::v2f32,
1036 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1037 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1038 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1039 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1040
1041 setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
1042 }
1043 }
1044 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1045 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1046 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1047 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1048 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1049 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1050 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1051 }
1052 setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
1053
1054 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
1055 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
1056 // Likewise, narrowing and extending vector loads/stores aren't handled
1057 // directly.
1058 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1059 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
1060
1061 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1062 setOperationAction(ISD::MULHS, VT, Legal);
1063 setOperationAction(ISD::MULHU, VT, Legal);
1064 } else {
1065 setOperationAction(ISD::MULHS, VT, Expand);
1066 setOperationAction(ISD::MULHU, VT, Expand);
1067 }
1068 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1069 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1070
1071 setOperationAction(ISD::BSWAP, VT, Expand);
1072 setOperationAction(ISD::CTTZ, VT, Expand);
1073
1074 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
1075 setTruncStoreAction(VT, InnerVT, Expand);
1076 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1077 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1078 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1079 }
1080 }
1081
1082 // AArch64 has implementations of a lot of rounding-like FP operations.
1083 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
1084 setOperationAction(ISD::FFLOOR, Ty, Legal);
1085 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
1086 setOperationAction(ISD::FCEIL, Ty, Legal);
1087 setOperationAction(ISD::FRINT, Ty, Legal);
1088 setOperationAction(ISD::FTRUNC, Ty, Legal);
1089 setOperationAction(ISD::FROUND, Ty, Legal);
1090 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
1091 }
1092
1093 if (Subtarget->hasFullFP16()) {
1094 for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
1095 setOperationAction(ISD::FFLOOR, Ty, Legal);
1096 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
1097 setOperationAction(ISD::FCEIL, Ty, Legal);
1098 setOperationAction(ISD::FRINT, Ty, Legal);
1099 setOperationAction(ISD::FTRUNC, Ty, Legal);
1100 setOperationAction(ISD::FROUND, Ty, Legal);
1101 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
1102 }
1103 }
1104
1105 if (Subtarget->hasSVE())
1106 setOperationAction(ISD::VSCALE, MVT::i32, Custom);
1107
1108 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
1109 }
1110
1111 if (Subtarget->hasSVE()) {
1112 // FIXME: Add custom lowering of MLOAD to handle different passthrus (not a
1113 // splat of 0 or undef) once vector selects supported in SVE codegen. See
1114 // D68877 for more details.
1115 for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1116 setOperationAction(ISD::BITREVERSE, VT, Custom);
1117 setOperationAction(ISD::BSWAP, VT, Custom);
1118 setOperationAction(ISD::CTLZ, VT, Custom);
1119 setOperationAction(ISD::CTPOP, VT, Custom);
1120 setOperationAction(ISD::CTTZ, VT, Custom);
1121 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1122 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1123 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1124 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1125 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1126 setOperationAction(ISD::MGATHER, VT, Custom);
1127 setOperationAction(ISD::MSCATTER, VT, Custom);
1128 setOperationAction(ISD::MUL, VT, Custom);
1129 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1130 setOperationAction(ISD::SELECT, VT, Custom);
1131 setOperationAction(ISD::SETCC, VT, Custom);
1132 setOperationAction(ISD::SDIV, VT, Custom);
1133 setOperationAction(ISD::UDIV, VT, Custom);
1134 setOperationAction(ISD::SMIN, VT, Custom);
1135 setOperationAction(ISD::UMIN, VT, Custom);
1136 setOperationAction(ISD::SMAX, VT, Custom);
1137 setOperationAction(ISD::UMAX, VT, Custom);
1138 setOperationAction(ISD::SHL, VT, Custom);
1139 setOperationAction(ISD::SRL, VT, Custom);
1140 setOperationAction(ISD::SRA, VT, Custom);
1141 setOperationAction(ISD::ABS, VT, Custom);
1142 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1143 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1144 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1145 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1146 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1147 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1148 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1149 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1150 setOperationAction(ISD::STEP_VECTOR, VT, Custom);
1151
1152 setOperationAction(ISD::MULHU, VT, Expand);
1153 setOperationAction(ISD::MULHS, VT, Expand);
1154 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1155 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1156 }
1157
1158 // Illegal unpacked integer vector types.
1159 for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1160 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1161 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1162 }
1163
1164 for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
1165 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1166 setOperationAction(ISD::SELECT, VT, Custom);
1167 setOperationAction(ISD::SETCC, VT, Custom);
1168 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1169 setOperationAction(ISD::TRUNCATE, VT, Custom);
1170 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1171 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1172 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1173
1174 // There are no legal MVT::nxv16f## based types.
1175 if (VT != MVT::nxv16i1) {
1176 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1177 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1178 }
1179 }
1180
1181 for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1182 MVT::nxv4f32, MVT::nxv2f64}) {
1183 for (auto InnerVT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16,
1184 MVT::nxv2f32, MVT::nxv4f32, MVT::nxv2f64}) {
1185 // Avoid marking truncating FP stores as legal to prevent the
1186 // DAGCombiner from creating unsupported truncating stores.
1187 setTruncStoreAction(VT, InnerVT, Expand);
1188 }
1189
1190 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1191 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1192 setOperationAction(ISD::MGATHER, VT, Custom);
1193 setOperationAction(ISD::MSCATTER, VT, Custom);
1194 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1195 setOperationAction(ISD::SELECT, VT, Custom);
1196 setOperationAction(ISD::FADD, VT, Custom);
1197 setOperationAction(ISD::FDIV, VT, Custom);
1198 setOperationAction(ISD::FMA, VT, Custom);
1199 setOperationAction(ISD::FMAXIMUM, VT, Custom);
1200 setOperationAction(ISD::FMAXNUM, VT, Custom);
1201 setOperationAction(ISD::FMINIMUM, VT, Custom);
1202 setOperationAction(ISD::FMINNUM, VT, Custom);
1203 setOperationAction(ISD::FMUL, VT, Custom);
1204 setOperationAction(ISD::FNEG, VT, Custom);
1205 setOperationAction(ISD::FSUB, VT, Custom);
1206 setOperationAction(ISD::FCEIL, VT, Custom);
1207 setOperationAction(ISD::FFLOOR, VT, Custom);
1208 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1209 setOperationAction(ISD::FRINT, VT, Custom);
1210 setOperationAction(ISD::FROUND, VT, Custom);
1211 setOperationAction(ISD::FROUNDEVEN, VT, Custom);
1212 setOperationAction(ISD::FTRUNC, VT, Custom);
1213 setOperationAction(ISD::FSQRT, VT, Custom);
1214 setOperationAction(ISD::FABS, VT, Custom);
1215 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1216 setOperationAction(ISD::FP_ROUND, VT, Custom);
1217 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1218 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1219 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1220 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1221 }
1222
1223 for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1224 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1225 setOperationAction(ISD::MGATHER, VT, Custom);
1226 setOperationAction(ISD::MSCATTER, VT, Custom);
1227 }
1228
1229 setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
1230
1231 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
1232 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
1233
1234 // NOTE: Currently this has to happen after computeRegisterProperties rather
1235 // than the preferred option of combining it with the addRegisterClass call.
1236 if (Subtarget->useSVEForFixedLengthVectors()) {
1237 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
1238 if (useSVEForFixedLengthVectorVT(VT))
1239 addTypeForFixedLengthSVE(VT);
1240 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
1241 if (useSVEForFixedLengthVectorVT(VT))
1242 addTypeForFixedLengthSVE(VT);
1243
1244 // 64bit results can mean a bigger than NEON input.
1245 for (auto VT : {MVT::v8i8, MVT::v4i16})
1246 setOperationAction(ISD::TRUNCATE, VT, Custom);
1247 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
1248
1249 // 128bit results imply a bigger than NEON input.
1250 for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1251 setOperationAction(ISD::TRUNCATE, VT, Custom);
1252 for (auto VT : {MVT::v8f16, MVT::v4f32})
1253 setOperationAction(ISD::FP_ROUND, VT, Expand);
1254
1255 // These operations are not supported on NEON but SVE can do them.
1256 setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
1257 setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
1258 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1259 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
1260 setOperationAction(ISD::MUL, MVT::v1i64, Custom);
1261 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1262 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
1263 setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
1264 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
1265 setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
1266 setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
1267 setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
1268 setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
1269 setOperationAction(ISD::SDIV, MVT::v2i64, Custom);
1270 setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
1271 setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
1272 setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
1273 setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
1274 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
1275 setOperationAction(ISD::UDIV, MVT::v16i8, Custom);
1276 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
1277 setOperationAction(ISD::UDIV, MVT::v8i16, Custom);
1278 setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
1279 setOperationAction(ISD::UDIV, MVT::v4i32, Custom);
1280 setOperationAction(ISD::UDIV, MVT::v1i64, Custom);
1281 setOperationAction(ISD::UDIV, MVT::v2i64, Custom);
1282 setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
1283 setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
1284 setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
1285 setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
1286 setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
1287 setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
1288 setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
1289 setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
1290
1291 // Int operations with no NEON support.
1292 for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1293 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1294 setOperationAction(ISD::BITREVERSE, VT, Custom);
1295 setOperationAction(ISD::CTTZ, VT, Custom);
1296 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1297 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1298 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1299 }
1300
1301 // FP operations with no NEON support.
1302 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
1303 MVT::v1f64, MVT::v2f64})
1304 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1305
1306 // Use SVE for vectors with more than 2 elements.
1307 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1308 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1309 }
1310
1311 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv2i1, MVT::nxv2i64);
1312 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32);
1313 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16);
1314 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
1315 }
1316
1317 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
1318}
1319
1320void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
1321 assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1321, __PRETTY_FUNCTION__))
;
1322
1323 if (VT.isFloatingPoint()) {
1324 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
1325 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
1326 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
1327 }
1328
1329 // Mark vector float intrinsics as expand.
1330 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
1331 setOperationAction(ISD::FSIN, VT, Expand);
1332 setOperationAction(ISD::FCOS, VT, Expand);
1333 setOperationAction(ISD::FPOW, VT, Expand);
1334 setOperationAction(ISD::FLOG, VT, Expand);
1335 setOperationAction(ISD::FLOG2, VT, Expand);
1336 setOperationAction(ISD::FLOG10, VT, Expand);
1337 setOperationAction(ISD::FEXP, VT, Expand);
1338 setOperationAction(ISD::FEXP2, VT, Expand);
1339
1340 // But we do support custom-lowering for FCOPYSIGN.
1341 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1342 }
1343
1344 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1345 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1346 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1347 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1348 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1349 setOperationAction(ISD::SRA, VT, Custom);
1350 setOperationAction(ISD::SRL, VT, Custom);
1351 setOperationAction(ISD::SHL, VT, Custom);
1352 setOperationAction(ISD::OR, VT, Custom);
1353 setOperationAction(ISD::SETCC, VT, Custom);
1354 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
1355
1356 setOperationAction(ISD::SELECT, VT, Expand);
1357 setOperationAction(ISD::SELECT_CC, VT, Expand);
1358 setOperationAction(ISD::VSELECT, VT, Expand);
1359 for (MVT InnerVT : MVT::all_valuetypes())
1360 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
1361
1362 // CNT supports only B element sizes, then use UADDLP to widen.
1363 if (VT != MVT::v8i8 && VT != MVT::v16i8)
1364 setOperationAction(ISD::CTPOP, VT, Custom);
1365
1366 setOperationAction(ISD::UDIV, VT, Expand);
1367 setOperationAction(ISD::SDIV, VT, Expand);
1368 setOperationAction(ISD::UREM, VT, Expand);
1369 setOperationAction(ISD::SREM, VT, Expand);
1370 setOperationAction(ISD::FREM, VT, Expand);
1371
1372 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1373 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1374
1375 if (!VT.isFloatingPoint())
1376 setOperationAction(ISD::ABS, VT, Legal);
1377
1378 // [SU][MIN|MAX] are available for all NEON types apart from i64.
1379 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
1380 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
1381 setOperationAction(Opcode, VT, Legal);
1382
1383 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
1384 if (VT.isFloatingPoint() &&
1385 VT.getVectorElementType() != MVT::bf16 &&
1386 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
1387 for (unsigned Opcode :
1388 {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
1389 setOperationAction(Opcode, VT, Legal);
1390
1391 if (Subtarget->isLittleEndian()) {
1392 for (unsigned im = (unsigned)ISD::PRE_INC;
1393 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1394 setIndexedLoadAction(im, VT, Legal);
1395 setIndexedStoreAction(im, VT, Legal);
1396 }
1397 }
1398}
1399
1400void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
1401 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")((VT.isFixedLengthVector() && "Expected fixed length vector type!"
) ? static_cast<void> (0) : __assert_fail ("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1401, __PRETTY_FUNCTION__))
;
1402
1403 // By default everything must be expanded.
1404 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1405 setOperationAction(Op, VT, Expand);
1406
1407 // We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
1408 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1409
1410 if (VT.isFloatingPoint()) {
1411 setCondCodeAction(ISD::SETO, VT, Expand);
1412 setCondCodeAction(ISD::SETOLT, VT, Expand);
1413 setCondCodeAction(ISD::SETLT, VT, Expand);
1414 setCondCodeAction(ISD::SETOLE, VT, Expand);
1415 setCondCodeAction(ISD::SETLE, VT, Expand);
1416 setCondCodeAction(ISD::SETULT, VT, Expand);
1417 setCondCodeAction(ISD::SETULE, VT, Expand);
1418 setCondCodeAction(ISD::SETUGE, VT, Expand);
1419 setCondCodeAction(ISD::SETUGT, VT, Expand);
1420 setCondCodeAction(ISD::SETUEQ, VT, Expand);
1421 setCondCodeAction(ISD::SETUNE, VT, Expand);
1422 }
1423
1424 // Lower fixed length vector operations to scalable equivalents.
1425 setOperationAction(ISD::ABS, VT, Custom);
1426 setOperationAction(ISD::ADD, VT, Custom);
1427 setOperationAction(ISD::AND, VT, Custom);
1428 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1429 setOperationAction(ISD::BITREVERSE, VT, Custom);
1430 setOperationAction(ISD::BSWAP, VT, Custom);
1431 setOperationAction(ISD::CTLZ, VT, Custom);
1432 setOperationAction(ISD::CTPOP, VT, Custom);
1433 setOperationAction(ISD::CTTZ, VT, Custom);
1434 setOperationAction(ISD::FABS, VT, Custom);
1435 setOperationAction(ISD::FADD, VT, Custom);
1436 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1437 setOperationAction(ISD::FCEIL, VT, Custom);
1438 setOperationAction(ISD::FDIV, VT, Custom);
1439 setOperationAction(ISD::FFLOOR, VT, Custom);
1440 setOperationAction(ISD::FMA, VT, Custom);
1441 setOperationAction(ISD::FMAXIMUM, VT, Custom);
1442 setOperationAction(ISD::FMAXNUM, VT, Custom);
1443 setOperationAction(ISD::FMINIMUM, VT, Custom);
1444 setOperationAction(ISD::FMINNUM, VT, Custom);
1445 setOperationAction(ISD::FMUL, VT, Custom);
1446 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1447 setOperationAction(ISD::FNEG, VT, Custom);
1448 setOperationAction(ISD::FRINT, VT, Custom);
1449 setOperationAction(ISD::FROUND, VT, Custom);
1450 setOperationAction(ISD::FROUNDEVEN, VT, Custom);
1451 setOperationAction(ISD::FSQRT, VT, Custom);
1452 setOperationAction(ISD::FSUB, VT, Custom);
1453 setOperationAction(ISD::FTRUNC, VT, Custom);
1454 setOperationAction(ISD::LOAD, VT, Custom);
1455 setOperationAction(ISD::MUL, VT, Custom);
1456 setOperationAction(ISD::OR, VT, Custom);
1457 setOperationAction(ISD::SDIV, VT, Custom);
1458 setOperationAction(ISD::SELECT, VT, Custom);
1459 setOperationAction(ISD::SETCC, VT, Custom);
1460 setOperationAction(ISD::SHL, VT, Custom);
1461 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1462 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
1463 setOperationAction(ISD::SMAX, VT, Custom);
1464 setOperationAction(ISD::SMIN, VT, Custom);
1465 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1466 setOperationAction(ISD::SRA, VT, Custom);
1467 setOperationAction(ISD::SRL, VT, Custom);
1468 setOperationAction(ISD::STORE, VT, Custom);
1469 setOperationAction(ISD::SUB, VT, Custom);
1470 setOperationAction(ISD::TRUNCATE, VT, Custom);
1471 setOperationAction(ISD::UDIV, VT, Custom);
1472 setOperationAction(ISD::UMAX, VT, Custom);
1473 setOperationAction(ISD::UMIN, VT, Custom);
1474 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1475 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1476 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1477 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1478 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1479 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1480 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1481 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1482 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1483 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1484 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1485 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1486 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1487 setOperationAction(ISD::VSELECT, VT, Custom);
1488 setOperationAction(ISD::XOR, VT, Custom);
1489 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1490}
1491
1492void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
1493 addRegisterClass(VT, &AArch64::FPR64RegClass);
1494 addTypeForNEON(VT, MVT::v2i32);
1495}
1496
1497void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
1498 addRegisterClass(VT, &AArch64::FPR128RegClass);
1499 addTypeForNEON(VT, MVT::v4i32);
1500}
1501
1502EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
1503 LLVMContext &C, EVT VT) const {
1504 if (!VT.isVector())
1505 return MVT::i32;
1506 if (VT.isScalableVector())
1507 return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount());
1508 return VT.changeVectorElementTypeToInteger();
1509}
1510
1511static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
1512 const APInt &Demanded,
1513 TargetLowering::TargetLoweringOpt &TLO,
1514 unsigned NewOpc) {
1515 uint64_t OldImm = Imm, NewImm, Enc;
1516 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
1517
1518 // Return if the immediate is already all zeros, all ones, a bimm32 or a
1519 // bimm64.
1520 if (Imm == 0 || Imm == Mask ||
1521 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
1522 return false;
1523
1524 unsigned EltSize = Size;
1525 uint64_t DemandedBits = Demanded.getZExtValue();
1526
1527 // Clear bits that are not demanded.
1528 Imm &= DemandedBits;
1529
1530 while (true) {
1531 // The goal here is to set the non-demanded bits in a way that minimizes
1532 // the number of switching between 0 and 1. In order to achieve this goal,
1533 // we set the non-demanded bits to the value of the preceding demanded bits.
1534 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
1535 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
1536 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
1537 // The final result is 0b11000011.
1538 uint64_t NonDemandedBits = ~DemandedBits;
1539 uint64_t InvertedImm = ~Imm & DemandedBits;
1540 uint64_t RotatedImm =
1541 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
1542 NonDemandedBits;
1543 uint64_t Sum = RotatedImm + NonDemandedBits;
1544 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
1545 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
1546 NewImm = (Imm | Ones) & Mask;
1547
1548 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
1549 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
1550 // we halve the element size and continue the search.
1551 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
1552 break;
1553
1554 // We cannot shrink the element size any further if it is 2-bits.
1555 if (EltSize == 2)
1556 return false;
1557
1558 EltSize /= 2;
1559 Mask >>= EltSize;
1560 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
1561
1562 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
1563 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
1564 return false;
1565
1566 // Merge the upper and lower halves of Imm and DemandedBits.
1567 Imm |= Hi;
1568 DemandedBits |= DemandedBitsHi;
1569 }
1570
1571 ++NumOptimizedImms;
1572
1573 // Replicate the element across the register width.
1574 while (EltSize < Size) {
1575 NewImm |= NewImm << EltSize;
1576 EltSize *= 2;
1577 }
1578
1579 (void)OldImm;
1580 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1581, __PRETTY_FUNCTION__))
1581 "demanded bits should never be altered")((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1581, __PRETTY_FUNCTION__))
;
1582 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")((OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? static_cast<void> (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1582, __PRETTY_FUNCTION__))
;
1583
1584 // Create the new constant immediate node.
1585 EVT VT = Op.getValueType();
1586 SDLoc DL(Op);
1587 SDValue New;
1588
1589 // If the new constant immediate is all-zeros or all-ones, let the target
1590 // independent DAG combine optimize this node.
1591 if (NewImm == 0 || NewImm == OrigMask) {
1592 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
1593 TLO.DAG.getConstant(NewImm, DL, VT));
1594 // Otherwise, create a machine node so that target independent DAG combine
1595 // doesn't undo this optimization.
1596 } else {
1597 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
1598 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
1599 New = SDValue(
1600 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
1601 }
1602
1603 return TLO.CombineTo(Op, New);
1604}
1605
1606bool AArch64TargetLowering::targetShrinkDemandedConstant(
1607 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1608 TargetLoweringOpt &TLO) const {
1609 // Delay this optimization to as late as possible.
1610 if (!TLO.LegalOps)
1611 return false;
1612
1613 if (!EnableOptimizeLogicalImm)
1614 return false;
1615
1616 EVT VT = Op.getValueType();
1617 if (VT.isVector())
1618 return false;
1619
1620 unsigned Size = VT.getSizeInBits();
1621 assert((Size == 32 || Size == 64) &&(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1622, __PRETTY_FUNCTION__))
1622 "i32 or i64 is expected after legalization.")(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1622, __PRETTY_FUNCTION__))
;
1623
1624 // Exit early if we demand all bits.
1625 if (DemandedBits.countPopulation() == Size)
1626 return false;
1627
1628 unsigned NewOpc;
1629 switch (Op.getOpcode()) {
1630 default:
1631 return false;
1632 case ISD::AND:
1633 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1634 break;
1635 case ISD::OR:
1636 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1637 break;
1638 case ISD::XOR:
1639 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1640 break;
1641 }
1642 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1643 if (!C)
1644 return false;
1645 uint64_t Imm = C->getZExtValue();
1646 return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
1647}
1648
1649/// computeKnownBitsForTargetNode - Determine which of the bits specified in
1650/// Mask are known to be either zero or one and return them Known.
1651void AArch64TargetLowering::computeKnownBitsForTargetNode(
1652 const SDValue Op, KnownBits &Known,
1653 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
1654 switch (Op.getOpcode()) {
1655 default:
1656 break;
1657 case AArch64ISD::CSEL: {
1658 KnownBits Known2;
1659 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1660 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1661 Known = KnownBits::commonBits(Known, Known2);
1662 break;
1663 }
1664 case AArch64ISD::LOADgot:
1665 case AArch64ISD::ADDlow: {
1666 if (!Subtarget->isTargetILP32())
1667 break;
1668 // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
1669 Known.Zero = APInt::getHighBitsSet(64, 32);
1670 break;
1671 }
1672 case ISD::INTRINSIC_W_CHAIN: {
1673 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1674 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1675 switch (IntID) {
1676 default: return;
1677 case Intrinsic::aarch64_ldaxr:
1678 case Intrinsic::aarch64_ldxr: {
1679 unsigned BitWidth = Known.getBitWidth();
1680 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1681 unsigned MemBits = VT.getScalarSizeInBits();
1682 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1683 return;
1684 }
1685 }
1686 break;
1687 }
1688 case ISD::INTRINSIC_WO_CHAIN:
1689 case ISD::INTRINSIC_VOID: {
1690 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1691 switch (IntNo) {
1692 default:
1693 break;
1694 case Intrinsic::aarch64_neon_umaxv:
1695 case Intrinsic::aarch64_neon_uminv: {
1696 // Figure out the datatype of the vector operand. The UMINV instruction
1697 // will zero extend the result, so we can mark as known zero all the
1698 // bits larger than the element datatype. 32-bit or larget doesn't need
1699 // this as those are legal types and will be handled by isel directly.
1700 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1701 unsigned BitWidth = Known.getBitWidth();
1702 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1703 assert(BitWidth >= 8 && "Unexpected width!")((BitWidth >= 8 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1703, __PRETTY_FUNCTION__))
;
1704 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1705 Known.Zero |= Mask;
1706 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1707 assert(BitWidth >= 16 && "Unexpected width!")((BitWidth >= 16 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1707, __PRETTY_FUNCTION__))
;
1708 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1709 Known.Zero |= Mask;
1710 }
1711 break;
1712 } break;
1713 }
1714 }
1715 }
1716}
1717
1718MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1719 EVT) const {
1720 return MVT::i64;
1721}
1722
1723bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1724 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1725 bool *Fast) const {
1726 if (Subtarget->requiresStrictAlign())
1727 return false;
1728
1729 if (Fast) {
1730 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1731 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1732 // See comments in performSTORECombine() for more details about
1733 // these conditions.
1734
1735 // Code that uses clang vector extensions can mark that it
1736 // wants unaligned accesses to be treated as fast by
1737 // underspecifying alignment to be 1 or 2.
1738 Alignment <= 2 ||
1739
1740 // Disregard v2i64. Memcpy lowering produces those and splitting
1741 // them regresses performance on micro-benchmarks and olden/bh.
1742 VT == MVT::v2i64;
1743 }
1744 return true;
1745}
1746
1747// Same as above but handling LLTs instead.
1748bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1749 LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1750 bool *Fast) const {
1751 if (Subtarget->requiresStrictAlign())
1752 return false;
1753
1754 if (Fast) {
1755 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1756 *Fast = !Subtarget->isMisaligned128StoreSlow() ||
1757 Ty.getSizeInBytes() != 16 ||
1758 // See comments in performSTORECombine() for more details about
1759 // these conditions.
1760
1761 // Code that uses clang vector extensions can mark that it
1762 // wants unaligned accesses to be treated as fast by
1763 // underspecifying alignment to be 1 or 2.
1764 Alignment <= 2 ||
1765
1766 // Disregard v2i64. Memcpy lowering produces those and splitting
1767 // them regresses performance on micro-benchmarks and olden/bh.
1768 Ty == LLT::vector(2, 64);
1769 }
1770 return true;
1771}
1772
1773FastISel *
1774AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1775 const TargetLibraryInfo *libInfo) const {
1776 return AArch64::createFastISel(funcInfo, libInfo);
1777}
1778
1779const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1780#define MAKE_CASE(V) \
1781 case V: \
1782 return #V;
1783 switch ((AArch64ISD::NodeType)Opcode) {
1784 case AArch64ISD::FIRST_NUMBER:
1785 break;
1786 MAKE_CASE(AArch64ISD::CALL)
1787 MAKE_CASE(AArch64ISD::ADRP)
1788 MAKE_CASE(AArch64ISD::ADR)
1789 MAKE_CASE(AArch64ISD::ADDlow)
1790 MAKE_CASE(AArch64ISD::LOADgot)
1791 MAKE_CASE(AArch64ISD::RET_FLAG)
1792 MAKE_CASE(AArch64ISD::BRCOND)
1793 MAKE_CASE(AArch64ISD::CSEL)
1794 MAKE_CASE(AArch64ISD::FCSEL)
1795 MAKE_CASE(AArch64ISD::CSINV)
1796 MAKE_CASE(AArch64ISD::CSNEG)
1797 MAKE_CASE(AArch64ISD::CSINC)
1798 MAKE_CASE(AArch64ISD::THREAD_POINTER)
1799 MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
1800 MAKE_CASE(AArch64ISD::ADD_PRED)
1801 MAKE_CASE(AArch64ISD::MUL_PRED)
1802 MAKE_CASE(AArch64ISD::SDIV_PRED)
1803 MAKE_CASE(AArch64ISD::SHL_PRED)
1804 MAKE_CASE(AArch64ISD::SMAX_PRED)
1805 MAKE_CASE(AArch64ISD::SMIN_PRED)
1806 MAKE_CASE(AArch64ISD::SRA_PRED)
1807 MAKE_CASE(AArch64ISD::SRL_PRED)
1808 MAKE_CASE(AArch64ISD::SUB_PRED)
1809 MAKE_CASE(AArch64ISD::UDIV_PRED)
1810 MAKE_CASE(AArch64ISD::UMAX_PRED)
1811 MAKE_CASE(AArch64ISD::UMIN_PRED)
1812 MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
1813 MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
1814 MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
1815 MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
1816 MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
1817 MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
1818 MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
1819 MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
1820 MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
1821 MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
1822 MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
1823 MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
1824 MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
1825 MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
1826 MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
1827 MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
1828 MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
1829 MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
1830 MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
1831 MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
1832 MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
1833 MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
1834 MAKE_CASE(AArch64ISD::ADC)
1835 MAKE_CASE(AArch64ISD::SBC)
1836 MAKE_CASE(AArch64ISD::ADDS)
1837 MAKE_CASE(AArch64ISD::SUBS)
1838 MAKE_CASE(AArch64ISD::ADCS)
1839 MAKE_CASE(AArch64ISD::SBCS)
1840 MAKE_CASE(AArch64ISD::ANDS)
1841 MAKE_CASE(AArch64ISD::CCMP)
1842 MAKE_CASE(AArch64ISD::CCMN)
1843 MAKE_CASE(AArch64ISD::FCCMP)
1844 MAKE_CASE(AArch64ISD::FCMP)
1845 MAKE_CASE(AArch64ISD::STRICT_FCMP)
1846 MAKE_CASE(AArch64ISD::STRICT_FCMPE)
1847 MAKE_CASE(AArch64ISD::DUP)
1848 MAKE_CASE(AArch64ISD::DUPLANE8)
1849 MAKE_CASE(AArch64ISD::DUPLANE16)
1850 MAKE_CASE(AArch64ISD::DUPLANE32)
1851 MAKE_CASE(AArch64ISD::DUPLANE64)
1852 MAKE_CASE(AArch64ISD::MOVI)
1853 MAKE_CASE(AArch64ISD::MOVIshift)
1854 MAKE_CASE(AArch64ISD::MOVIedit)
1855 MAKE_CASE(AArch64ISD::MOVImsl)
1856 MAKE_CASE(AArch64ISD::FMOV)
1857 MAKE_CASE(AArch64ISD::MVNIshift)
1858 MAKE_CASE(AArch64ISD::MVNImsl)
1859 MAKE_CASE(AArch64ISD::BICi)
1860 MAKE_CASE(AArch64ISD::ORRi)
1861 MAKE_CASE(AArch64ISD::BSP)
1862 MAKE_CASE(AArch64ISD::NEG)
1863 MAKE_CASE(AArch64ISD::EXTR)
1864 MAKE_CASE(AArch64ISD::ZIP1)
1865 MAKE_CASE(AArch64ISD::ZIP2)
1866 MAKE_CASE(AArch64ISD::UZP1)
1867 MAKE_CASE(AArch64ISD::UZP2)
1868 MAKE_CASE(AArch64ISD::TRN1)
1869 MAKE_CASE(AArch64ISD::TRN2)
1870 MAKE_CASE(AArch64ISD::REV16)
1871 MAKE_CASE(AArch64ISD::REV32)
1872 MAKE_CASE(AArch64ISD::REV64)
1873 MAKE_CASE(AArch64ISD::EXT)
1874 MAKE_CASE(AArch64ISD::VSHL)
1875 MAKE_CASE(AArch64ISD::VLSHR)
1876 MAKE_CASE(AArch64ISD::VASHR)
1877 MAKE_CASE(AArch64ISD::VSLI)
1878 MAKE_CASE(AArch64ISD::VSRI)
1879 MAKE_CASE(AArch64ISD::CMEQ)
1880 MAKE_CASE(AArch64ISD::CMGE)
1881 MAKE_CASE(AArch64ISD::CMGT)
1882 MAKE_CASE(AArch64ISD::CMHI)
1883 MAKE_CASE(AArch64ISD::CMHS)
1884 MAKE_CASE(AArch64ISD::FCMEQ)
1885 MAKE_CASE(AArch64ISD::FCMGE)
1886 MAKE_CASE(AArch64ISD::FCMGT)
1887 MAKE_CASE(AArch64ISD::CMEQz)
1888 MAKE_CASE(AArch64ISD::CMGEz)
1889 MAKE_CASE(AArch64ISD::CMGTz)
1890 MAKE_CASE(AArch64ISD::CMLEz)
1891 MAKE_CASE(AArch64ISD::CMLTz)
1892 MAKE_CASE(AArch64ISD::FCMEQz)
1893 MAKE_CASE(AArch64ISD::FCMGEz)
1894 MAKE_CASE(AArch64ISD::FCMGTz)
1895 MAKE_CASE(AArch64ISD::FCMLEz)
1896 MAKE_CASE(AArch64ISD::FCMLTz)
1897 MAKE_CASE(AArch64ISD::SADDV)
1898 MAKE_CASE(AArch64ISD::UADDV)
1899 MAKE_CASE(AArch64ISD::SRHADD)
1900 MAKE_CASE(AArch64ISD::URHADD)
1901 MAKE_CASE(AArch64ISD::SHADD)
1902 MAKE_CASE(AArch64ISD::UHADD)
1903 MAKE_CASE(AArch64ISD::SDOT)
1904 MAKE_CASE(AArch64ISD::UDOT)
1905 MAKE_CASE(AArch64ISD::SMINV)
1906 MAKE_CASE(AArch64ISD::UMINV)
1907 MAKE_CASE(AArch64ISD::SMAXV)
1908 MAKE_CASE(AArch64ISD::UMAXV)
1909 MAKE_CASE(AArch64ISD::SADDV_PRED)
1910 MAKE_CASE(AArch64ISD::UADDV_PRED)
1911 MAKE_CASE(AArch64ISD::SMAXV_PRED)
1912 MAKE_CASE(AArch64ISD::UMAXV_PRED)
1913 MAKE_CASE(AArch64ISD::SMINV_PRED)
1914 MAKE_CASE(AArch64ISD::UMINV_PRED)
1915 MAKE_CASE(AArch64ISD::ORV_PRED)
1916 MAKE_CASE(AArch64ISD::EORV_PRED)
1917 MAKE_CASE(AArch64ISD::ANDV_PRED)
1918 MAKE_CASE(AArch64ISD::CLASTA_N)
1919 MAKE_CASE(AArch64ISD::CLASTB_N)
1920 MAKE_CASE(AArch64ISD::LASTA)
1921 MAKE_CASE(AArch64ISD::LASTB)
1922 MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
1923 MAKE_CASE(AArch64ISD::TBL)
1924 MAKE_CASE(AArch64ISD::FADD_PRED)
1925 MAKE_CASE(AArch64ISD::FADDA_PRED)
1926 MAKE_CASE(AArch64ISD::FADDV_PRED)
1927 MAKE_CASE(AArch64ISD::FDIV_PRED)
1928 MAKE_CASE(AArch64ISD::FMA_PRED)
1929 MAKE_CASE(AArch64ISD::FMAX_PRED)
1930 MAKE_CASE(AArch64ISD::FMAXV_PRED)
1931 MAKE_CASE(AArch64ISD::FMAXNM_PRED)
1932 MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
1933 MAKE_CASE(AArch64ISD::FMIN_PRED)
1934 MAKE_CASE(AArch64ISD::FMINV_PRED)
1935 MAKE_CASE(AArch64ISD::FMINNM_PRED)
1936 MAKE_CASE(AArch64ISD::FMINNMV_PRED)
1937 MAKE_CASE(AArch64ISD::FMUL_PRED)
1938 MAKE_CASE(AArch64ISD::FSUB_PRED)
1939 MAKE_CASE(AArch64ISD::BIT)
1940 MAKE_CASE(AArch64ISD::CBZ)
1941 MAKE_CASE(AArch64ISD::CBNZ)
1942 MAKE_CASE(AArch64ISD::TBZ)
1943 MAKE_CASE(AArch64ISD::TBNZ)
1944 MAKE_CASE(AArch64ISD::TC_RETURN)
1945 MAKE_CASE(AArch64ISD::PREFETCH)
1946 MAKE_CASE(AArch64ISD::SITOF)
1947 MAKE_CASE(AArch64ISD::UITOF)
1948 MAKE_CASE(AArch64ISD::NVCAST)
1949 MAKE_CASE(AArch64ISD::MRS)
1950 MAKE_CASE(AArch64ISD::SQSHL_I)
1951 MAKE_CASE(AArch64ISD::UQSHL_I)
1952 MAKE_CASE(AArch64ISD::SRSHR_I)
1953 MAKE_CASE(AArch64ISD::URSHR_I)
1954 MAKE_CASE(AArch64ISD::SQSHLU_I)
1955 MAKE_CASE(AArch64ISD::WrapperLarge)
1956 MAKE_CASE(AArch64ISD::LD2post)
1957 MAKE_CASE(AArch64ISD::LD3post)
1958 MAKE_CASE(AArch64ISD::LD4post)
1959 MAKE_CASE(AArch64ISD::ST2post)
1960 MAKE_CASE(AArch64ISD::ST3post)
1961 MAKE_CASE(AArch64ISD::ST4post)
1962 MAKE_CASE(AArch64ISD::LD1x2post)
1963 MAKE_CASE(AArch64ISD::LD1x3post)
1964 MAKE_CASE(AArch64ISD::LD1x4post)
1965 MAKE_CASE(AArch64ISD::ST1x2post)
1966 MAKE_CASE(AArch64ISD::ST1x3post)
1967 MAKE_CASE(AArch64ISD::ST1x4post)
1968 MAKE_CASE(AArch64ISD::LD1DUPpost)
1969 MAKE_CASE(AArch64ISD::LD2DUPpost)
1970 MAKE_CASE(AArch64ISD::LD3DUPpost)
1971 MAKE_CASE(AArch64ISD::LD4DUPpost)
1972 MAKE_CASE(AArch64ISD::LD1LANEpost)
1973 MAKE_CASE(AArch64ISD::LD2LANEpost)
1974 MAKE_CASE(AArch64ISD::LD3LANEpost)
1975 MAKE_CASE(AArch64ISD::LD4LANEpost)
1976 MAKE_CASE(AArch64ISD::ST2LANEpost)
1977 MAKE_CASE(AArch64ISD::ST3LANEpost)
1978 MAKE_CASE(AArch64ISD::ST4LANEpost)
1979 MAKE_CASE(AArch64ISD::SMULL)
1980 MAKE_CASE(AArch64ISD::UMULL)
1981 MAKE_CASE(AArch64ISD::FRECPE)
1982 MAKE_CASE(AArch64ISD::FRECPS)
1983 MAKE_CASE(AArch64ISD::FRSQRTE)
1984 MAKE_CASE(AArch64ISD::FRSQRTS)
1985 MAKE_CASE(AArch64ISD::STG)
1986 MAKE_CASE(AArch64ISD::STZG)
1987 MAKE_CASE(AArch64ISD::ST2G)
1988 MAKE_CASE(AArch64ISD::STZ2G)
1989 MAKE_CASE(AArch64ISD::SUNPKHI)
1990 MAKE_CASE(AArch64ISD::SUNPKLO)
1991 MAKE_CASE(AArch64ISD::UUNPKHI)
1992 MAKE_CASE(AArch64ISD::UUNPKLO)
1993 MAKE_CASE(AArch64ISD::INSR)
1994 MAKE_CASE(AArch64ISD::PTEST)
1995 MAKE_CASE(AArch64ISD::PTRUE)
1996 MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
1997 MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
1998 MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
1999 MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO)
2000 MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO)
2001 MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO)
2002 MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO)
2003 MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO)
2004 MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
2005 MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
2006 MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
2007 MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO)
2008 MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO)
2009 MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO)
2010 MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO)
2011 MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
2012 MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
2013 MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
2014 MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
2015 MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
2016 MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
2017 MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO)
2018 MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO)
2019 MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO)
2020 MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO)
2021 MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO)
2022 MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO)
2023 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO)
2024 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO)
2025 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO)
2026 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO)
2027 MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO)
2028 MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO)
2029 MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO)
2030 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO)
2031 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO)
2032 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO)
2033 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO)
2034 MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO)
2035 MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
2036 MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
2037 MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
2038 MAKE_CASE(AArch64ISD::ST1_PRED)
2039 MAKE_CASE(AArch64ISD::SST1_PRED)
2040 MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
2041 MAKE_CASE(AArch64ISD::SST1_SXTW_PRED)
2042 MAKE_CASE(AArch64ISD::SST1_UXTW_PRED)
2043 MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED)
2044 MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED)
2045 MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
2046 MAKE_CASE(AArch64ISD::SSTNT1_PRED)
2047 MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
2048 MAKE_CASE(AArch64ISD::LDP)
2049 MAKE_CASE(AArch64ISD::STP)
2050 MAKE_CASE(AArch64ISD::STNP)
2051 MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
2052 MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
2053 MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
2054 MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
2055 MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
2056 MAKE_CASE(AArch64ISD::INDEX_VECTOR)
2057 MAKE_CASE(AArch64ISD::UABD)
2058 MAKE_CASE(AArch64ISD::SABD)
2059 MAKE_CASE(AArch64ISD::CALL_RVMARKER)
2060 }
2061#undef MAKE_CASE
2062 return nullptr;
2063}
2064
2065MachineBasicBlock *
2066AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
2067 MachineBasicBlock *MBB) const {
2068 // We materialise the F128CSEL pseudo-instruction as some control flow and a
2069 // phi node:
2070
2071 // OrigBB:
2072 // [... previous instrs leading to comparison ...]
2073 // b.ne TrueBB
2074 // b EndBB
2075 // TrueBB:
2076 // ; Fallthrough
2077 // EndBB:
2078 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
2079
2080 MachineFunction *MF = MBB->getParent();
2081 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2082 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2083 DebugLoc DL = MI.getDebugLoc();
2084 MachineFunction::iterator It = ++MBB->getIterator();
2085
2086 Register DestReg = MI.getOperand(0).getReg();
2087 Register IfTrueReg = MI.getOperand(1).getReg();
2088 Register IfFalseReg = MI.getOperand(2).getReg();
2089 unsigned CondCode = MI.getOperand(3).getImm();
2090 bool NZCVKilled = MI.getOperand(4).isKill();
2091
2092 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
2093 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
2094 MF->insert(It, TrueBB);
2095 MF->insert(It, EndBB);
2096
2097 // Transfer rest of current basic-block to EndBB
2098 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
2099 MBB->end());
2100 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
2101
2102 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
2103 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
2104 MBB->addSuccessor(TrueBB);
2105 MBB->addSuccessor(EndBB);
2106
2107 // TrueBB falls through to the end.
2108 TrueBB->addSuccessor(EndBB);
2109
2110 if (!NZCVKilled) {
2111 TrueBB->addLiveIn(AArch64::NZCV);
2112 EndBB->addLiveIn(AArch64::NZCV);
2113 }
2114
2115 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
2116 .addReg(IfTrueReg)
2117 .addMBB(TrueBB)
2118 .addReg(IfFalseReg)
2119 .addMBB(MBB);
2120
2121 MI.eraseFromParent();
2122 return EndBB;
2123}
2124
2125MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
2126 MachineInstr &MI, MachineBasicBlock *BB) const {
2127 assert(!isAsynchronousEHPersonality(classifyEHPersonality(((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2129, __PRETTY_FUNCTION__))
2128 BB->getParent()->getFunction().getPersonalityFn())) &&((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2129, __PRETTY_FUNCTION__))
2129 "SEH does not use catchret!")((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2129, __PRETTY_FUNCTION__))
;
2130 return BB;
2131}
2132
2133MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
2134 MachineInstr &MI, MachineBasicBlock *BB) const {
2135 switch (MI.getOpcode()) {
2136 default:
2137#ifndef NDEBUG
2138 MI.dump();
2139#endif
2140 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2140)
;
2141
2142 case AArch64::F128CSEL:
2143 return EmitF128CSEL(MI, BB);
2144
2145 case TargetOpcode::STACKMAP:
2146 case TargetOpcode::PATCHPOINT:
2147 case TargetOpcode::STATEPOINT:
2148 return emitPatchPoint(MI, BB);
2149
2150 case AArch64::CATCHRET:
2151 return EmitLoweredCatchRet(MI, BB);
2152 }
2153}
2154
2155//===----------------------------------------------------------------------===//
2156// AArch64 Lowering private implementation.
2157//===----------------------------------------------------------------------===//
2158
2159//===----------------------------------------------------------------------===//
2160// Lowering Code
2161//===----------------------------------------------------------------------===//
2162
2163/// isZerosVector - Check whether SDNode N is a zero-filled vector.
2164static bool isZerosVector(const SDNode *N) {
2165 // Look through a bit convert.
2166 while (N->getOpcode() == ISD::BITCAST)
2167 N = N->getOperand(0).getNode();
2168
2169 if (ISD::isConstantSplatVectorAllZeros(N))
2170 return true;
2171
2172 if (N->getOpcode() != AArch64ISD::DUP)
2173 return false;
2174
2175 auto Opnd0 = N->getOperand(0);
2176 auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
2177 auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
2178 return (CINT && CINT->isNullValue()) || (CFP && CFP->isZero());
2179}
2180
2181/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
2182/// CC
2183static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
2184 switch (CC) {
2185 default:
2186 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2186)
;
2187 case ISD::SETNE:
2188 return AArch64CC::NE;
2189 case ISD::SETEQ:
2190 return AArch64CC::EQ;
2191 case ISD::SETGT:
2192 return AArch64CC::GT;
2193 case ISD::SETGE:
2194 return AArch64CC::GE;
2195 case ISD::SETLT:
2196 return AArch64CC::LT;
2197 case ISD::SETLE:
2198 return AArch64CC::LE;
2199 case ISD::SETUGT:
2200 return AArch64CC::HI;
2201 case ISD::SETUGE:
2202 return AArch64CC::HS;
2203 case ISD::SETULT:
2204 return AArch64CC::LO;
2205 case ISD::SETULE:
2206 return AArch64CC::LS;
2207 }
2208}
2209
2210/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
2211static void changeFPCCToAArch64CC(ISD::CondCode CC,
2212 AArch64CC::CondCode &CondCode,
2213 AArch64CC::CondCode &CondCode2) {
2214 CondCode2 = AArch64CC::AL;
2215 switch (CC) {
2216 default:
2217 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2217)
;
2218 case ISD::SETEQ:
2219 case ISD::SETOEQ:
2220 CondCode = AArch64CC::EQ;
2221 break;
2222 case ISD::SETGT:
2223 case ISD::SETOGT:
2224 CondCode = AArch64CC::GT;
2225 break;
2226 case ISD::SETGE:
2227 case ISD::SETOGE:
2228 CondCode = AArch64CC::GE;
2229 break;
2230 case ISD::SETOLT:
2231 CondCode = AArch64CC::MI;
2232 break;
2233 case ISD::SETOLE:
2234 CondCode = AArch64CC::LS;
2235 break;
2236 case ISD::SETONE:
2237 CondCode = AArch64CC::MI;
2238 CondCode2 = AArch64CC::GT;
2239 break;
2240 case ISD::SETO:
2241 CondCode = AArch64CC::VC;
2242 break;
2243 case ISD::SETUO:
2244 CondCode = AArch64CC::VS;
2245 break;
2246 case ISD::SETUEQ:
2247 CondCode = AArch64CC::EQ;
2248 CondCode2 = AArch64CC::VS;
2249 break;
2250 case ISD::SETUGT:
2251 CondCode = AArch64CC::HI;
2252 break;
2253 case ISD::SETUGE:
2254 CondCode = AArch64CC::PL;
2255 break;
2256 case ISD::SETLT:
2257 case ISD::SETULT:
2258 CondCode = AArch64CC::LT;
2259 break;
2260 case ISD::SETLE:
2261 case ISD::SETULE:
2262 CondCode = AArch64CC::LE;
2263 break;
2264 case ISD::SETNE:
2265 case ISD::SETUNE:
2266 CondCode = AArch64CC::NE;
2267 break;
2268 }
2269}
2270
2271/// Convert a DAG fp condition code to an AArch64 CC.
2272/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
2273/// should be AND'ed instead of OR'ed.
2274static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
2275 AArch64CC::CondCode &CondCode,
2276 AArch64CC::CondCode &CondCode2) {
2277 CondCode2 = AArch64CC::AL;
2278 switch (CC) {
2279 default:
2280 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2281 assert(CondCode2 == AArch64CC::AL)((CondCode2 == AArch64CC::AL) ? static_cast<void> (0) :
__assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2281, __PRETTY_FUNCTION__))
;
2282 break;
2283 case ISD::SETONE:
2284 // (a one b)
2285 // == ((a olt b) || (a ogt b))
2286 // == ((a ord b) && (a une b))
2287 CondCode = AArch64CC::VC;
2288 CondCode2 = AArch64CC::NE;
2289 break;
2290 case ISD::SETUEQ:
2291 // (a ueq b)
2292 // == ((a uno b) || (a oeq b))
2293 // == ((a ule b) && (a uge b))
2294 CondCode = AArch64CC::PL;
2295 CondCode2 = AArch64CC::LE;
2296 break;
2297 }
2298}
2299
2300/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
2301/// CC usable with the vector instructions. Fewer operations are available
2302/// without a real NZCV register, so we have to use less efficient combinations
2303/// to get the same effect.
2304static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
2305 AArch64CC::CondCode &CondCode,
2306 AArch64CC::CondCode &CondCode2,
2307 bool &Invert) {
2308 Invert = false;
2309 switch (CC) {
2310 default:
2311 // Mostly the scalar mappings work fine.
2312 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2313 break;
2314 case ISD::SETUO:
2315 Invert = true;
2316 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2317 case ISD::SETO:
2318 CondCode = AArch64CC::MI;
2319 CondCode2 = AArch64CC::GE;
2320 break;
2321 case ISD::SETUEQ:
2322 case ISD::SETULT:
2323 case ISD::SETULE:
2324 case ISD::SETUGT:
2325 case ISD::SETUGE:
2326 // All of the compare-mask comparisons are ordered, but we can switch
2327 // between the two by a double inversion. E.g. ULE == !OGT.
2328 Invert = true;
2329 changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
2330 CondCode, CondCode2);
2331 break;
2332 }
2333}
2334
2335static bool isLegalArithImmed(uint64_t C) {
2336 // Matches AArch64DAGToDAGISel::SelectArithImmed().
2337 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
2338 LLVM_DEBUG(dbgs() << "Is imm " << Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
2339 << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
2340 return IsLegal;
2341}
2342
2343// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
2344// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
2345// can be set differently by this operation. It comes down to whether
2346// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
2347// everything is fine. If not then the optimization is wrong. Thus general
2348// comparisons are only valid if op2 != 0.
2349//
2350// So, finally, the only LLVM-native comparisons that don't mention C and V
2351// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
2352// the absence of information about op2.
2353static bool isCMN(SDValue Op, ISD::CondCode CC) {
2354 return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
2355 (CC == ISD::SETEQ || CC == ISD::SETNE);
2356}
2357
2358static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
2359 SelectionDAG &DAG, SDValue Chain,
2360 bool IsSignaling) {
2361 EVT VT = LHS.getValueType();
2362 assert(VT != MVT::f128)((VT != MVT::f128) ? static_cast<void> (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2362, __PRETTY_FUNCTION__))
;
2363 assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented")((VT != MVT::f16 && "Lowering of strict fp16 not yet implemented"
) ? static_cast<void> (0) : __assert_fail ("VT != MVT::f16 && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2363, __PRETTY_FUNCTION__))
;
2364 unsigned Opcode =
2365 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
2366 return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
2367}
2368
2369static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2370 const SDLoc &dl, SelectionDAG &DAG) {
2371 EVT VT = LHS.getValueType();
2372 const bool FullFP16 =
2373 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2374
2375 if (VT.isFloatingPoint()) {
2376 assert(VT != MVT::f128)((VT != MVT::f128) ? static_cast<void> (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2376, __PRETTY_FUNCTION__))
;
2377 if (VT == MVT::f16 && !FullFP16) {
2378 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
2379 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
2380 VT = MVT::f32;
2381 }
2382 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
2383 }
2384
2385 // The CMP instruction is just an alias for SUBS, and representing it as
2386 // SUBS means that it's possible to get CSE with subtract operations.
2387 // A later phase can perform the optimization of setting the destination
2388 // register to WZR/XZR if it ends up being unused.
2389 unsigned Opcode = AArch64ISD::SUBS;
2390
2391 if (isCMN(RHS, CC)) {
2392 // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
2393 Opcode = AArch64ISD::ADDS;
2394 RHS = RHS.getOperand(1);
2395 } else if (isCMN(LHS, CC)) {
2396 // As we are looking for EQ/NE compares, the operands can be commuted ; can
2397 // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
2398 Opcode = AArch64ISD::ADDS;
2399 LHS = LHS.getOperand(1);
2400 } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
2401 if (LHS.getOpcode() == ISD::AND) {
2402 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
2403 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
2404 // of the signed comparisons.
2405 const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
2406 DAG.getVTList(VT, MVT_CC),
2407 LHS.getOperand(0),
2408 LHS.getOperand(1));
2409 // Replace all users of (and X, Y) with newly generated (ands X, Y)
2410 DAG.ReplaceAllUsesWith(LHS, ANDSNode);
2411 return ANDSNode.getValue(1);
2412 } else if (LHS.getOpcode() == AArch64ISD::ANDS) {
2413 // Use result of ANDS
2414 return LHS.getValue(1);
2415 }
2416 }
2417
2418 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
2419 .getValue(1);
2420}
2421
2422/// \defgroup AArch64CCMP CMP;CCMP matching
2423///
2424/// These functions deal with the formation of CMP;CCMP;... sequences.
2425/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
2426/// a comparison. They set the NZCV flags to a predefined value if their
2427/// predicate is false. This allows to express arbitrary conjunctions, for
2428/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
2429/// expressed as:
2430/// cmp A
2431/// ccmp B, inv(CB), CA
2432/// check for CB flags
2433///
2434/// This naturally lets us implement chains of AND operations with SETCC
2435/// operands. And we can even implement some other situations by transforming
2436/// them:
2437/// - We can implement (NEG SETCC) i.e. negating a single comparison by
2438/// negating the flags used in a CCMP/FCCMP operations.
2439/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
2440/// by negating the flags we test for afterwards. i.e.
2441/// NEG (CMP CCMP CCCMP ...) can be implemented.
2442/// - Note that we can only ever negate all previously processed results.
2443/// What we can not implement by flipping the flags to test is a negation
2444/// of two sub-trees (because the negation affects all sub-trees emitted so
2445/// far, so the 2nd sub-tree we emit would also affect the first).
2446/// With those tools we can implement some OR operations:
2447/// - (OR (SETCC A) (SETCC B)) can be implemented via:
2448/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
2449/// - After transforming OR to NEG/AND combinations we may be able to use NEG
2450/// elimination rules from earlier to implement the whole thing as a
2451/// CCMP/FCCMP chain.
2452///
2453/// As complete example:
2454/// or (or (setCA (cmp A)) (setCB (cmp B)))
2455/// (and (setCC (cmp C)) (setCD (cmp D)))"
2456/// can be reassociated to:
2457/// or (and (setCC (cmp C)) setCD (cmp D))
2458// (or (setCA (cmp A)) (setCB (cmp B)))
2459/// can be transformed to:
2460/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
2461/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
2462/// which can be implemented as:
2463/// cmp C
2464/// ccmp D, inv(CD), CC
2465/// ccmp A, CA, inv(CD)
2466/// ccmp B, CB, inv(CA)
2467/// check for CB flags
2468///
2469/// A counterexample is "or (and A B) (and C D)" which translates to
2470/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
2471/// can only implement 1 of the inner (not) operations, but not both!
2472/// @{
2473
2474/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
2475static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
2476 ISD::CondCode CC, SDValue CCOp,
2477 AArch64CC::CondCode Predicate,
2478 AArch64CC::CondCode OutCC,
2479 const SDLoc &DL, SelectionDAG &DAG) {
2480 unsigned Opcode = 0;
2481 const bool FullFP16 =
2482 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2483
2484 if (LHS.getValueType().isFloatingPoint()) {
2485 assert(LHS.getValueType() != MVT::f128)((LHS.getValueType() != MVT::f128) ? static_cast<void> (
0) : __assert_fail ("LHS.getValueType() != MVT::f128", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2485, __PRETTY_FUNCTION__))
;
2486 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
2487 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
2488 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
2489 }
2490 Opcode = AArch64ISD::FCCMP;
2491 } else if (RHS.getOpcode() == ISD::SUB) {
2492 SDValue SubOp0 = RHS.getOperand(0);
2493 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2494 // See emitComparison() on why we can only do this for SETEQ and SETNE.
2495 Opcode = AArch64ISD::CCMN;
2496 RHS = RHS.getOperand(1);
2497 }
2498 }
2499 if (Opcode == 0)
2500 Opcode = AArch64ISD::CCMP;
2501
2502 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
2503 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
2504 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
2505 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
2506 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
2507}
2508
2509/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
2510/// expressed as a conjunction. See \ref AArch64CCMP.
2511/// \param CanNegate Set to true if we can negate the whole sub-tree just by
2512/// changing the conditions on the SETCC tests.
2513/// (this means we can call emitConjunctionRec() with
2514/// Negate==true on this sub-tree)
2515/// \param MustBeFirst Set to true if this subtree needs to be negated and we
2516/// cannot do the negation naturally. We are required to
2517/// emit the subtree first in this case.
2518/// \param WillNegate Is true if are called when the result of this
2519/// subexpression must be negated. This happens when the
2520/// outer expression is an OR. We can use this fact to know
2521/// that we have a double negation (or (or ...) ...) that
2522/// can be implemented for free.
2523static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
2524 bool &MustBeFirst, bool WillNegate,
2525 unsigned Depth = 0) {
2526 if (!Val.hasOneUse())
2527 return false;
2528 unsigned Opcode = Val->getOpcode();
2529 if (Opcode == ISD::SETCC) {
2530 if (Val->getOperand(0).getValueType() == MVT::f128)
2531 return false;
2532 CanNegate = true;
2533 MustBeFirst = false;
2534 return true;
2535 }
2536 // Protect against exponential runtime and stack overflow.
2537 if (Depth > 6)
2538 return false;
2539 if (Opcode == ISD::AND || Opcode == ISD::OR) {
2540 bool IsOR = Opcode == ISD::OR;
2541 SDValue O0 = Val->getOperand(0);
2542 SDValue O1 = Val->getOperand(1);
2543 bool CanNegateL;
2544 bool MustBeFirstL;
2545 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
2546 return false;
2547 bool CanNegateR;
2548 bool MustBeFirstR;
2549 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
2550 return false;
2551
2552 if (MustBeFirstL && MustBeFirstR)
2553 return false;
2554
2555 if (IsOR) {
2556 // For an OR expression we need to be able to naturally negate at least
2557 // one side or we cannot do the transformation at all.
2558 if (!CanNegateL && !CanNegateR)
2559 return false;
2560 // If we the result of the OR will be negated and we can naturally negate
2561 // the leafs, then this sub-tree as a whole negates naturally.
2562 CanNegate = WillNegate && CanNegateL && CanNegateR;
2563 // If we cannot naturally negate the whole sub-tree, then this must be
2564 // emitted first.
2565 MustBeFirst = !CanNegate;
2566 } else {
2567 assert(Opcode == ISD::AND && "Must be OR or AND")((Opcode == ISD::AND && "Must be OR or AND") ? static_cast
<void> (0) : __assert_fail ("Opcode == ISD::AND && \"Must be OR or AND\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2567, __PRETTY_FUNCTION__))
;
2568 // We cannot naturally negate an AND operation.
2569 CanNegate = false;
2570 MustBeFirst = MustBeFirstL || MustBeFirstR;
2571 }
2572 return true;
2573 }
2574 return false;
2575}
2576
2577/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
2578/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
2579/// Tries to transform the given i1 producing node @p Val to a series compare
2580/// and conditional compare operations. @returns an NZCV flags producing node
2581/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
2582/// transformation was not possible.
2583/// \p Negate is true if we want this sub-tree being negated just by changing
2584/// SETCC conditions.
2585static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
2586 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
2587 AArch64CC::CondCode Predicate) {
2588 // We're at a tree leaf, produce a conditional comparison operation.
2589 unsigned Opcode = Val->getOpcode();
2590 if (Opcode == ISD::SETCC) {
2591 SDValue LHS = Val->getOperand(0);
2592 SDValue RHS = Val->getOperand(1);
2593 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
2594 bool isInteger = LHS.getValueType().isInteger();
2595 if (Negate)
2596 CC = getSetCCInverse(CC, LHS.getValueType());
2597 SDLoc DL(Val);
2598 // Determine OutCC and handle FP special case.
2599 if (isInteger) {
2600 OutCC = changeIntCCToAArch64CC(CC);
2601 } else {
2602 assert(LHS.getValueType().isFloatingPoint())((LHS.getValueType().isFloatingPoint()) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2602, __PRETTY_FUNCTION__))
;
2603 AArch64CC::CondCode ExtraCC;
2604 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
2605 // Some floating point conditions can't be tested with a single condition
2606 // code. Construct an additional comparison in this case.
2607 if (ExtraCC != AArch64CC::AL) {
2608 SDValue ExtraCmp;
2609 if (!CCOp.getNode())
2610 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
2611 else
2612 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
2613 ExtraCC, DL, DAG);
2614 CCOp = ExtraCmp;
2615 Predicate = ExtraCC;
2616 }
2617 }
2618
2619 // Produce a normal comparison if we are first in the chain
2620 if (!CCOp)
2621 return emitComparison(LHS, RHS, CC, DL, DAG);
2622 // Otherwise produce a ccmp.
2623 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
2624 DAG);
2625 }
2626 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree")((Val->hasOneUse() && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("Val->hasOneUse() && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2626, __PRETTY_FUNCTION__))
;
2627
2628 bool IsOR = Opcode == ISD::OR;
2629
2630 SDValue LHS = Val->getOperand(0);
2631 bool CanNegateL;
2632 bool MustBeFirstL;
2633 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
2634 assert(ValidL && "Valid conjunction/disjunction tree")((ValidL && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2634, __PRETTY_FUNCTION__))
;
2635 (void)ValidL;
2636
2637 SDValue RHS = Val->getOperand(1);
2638 bool CanNegateR;
2639 bool MustBeFirstR;
2640 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
2641 assert(ValidR && "Valid conjunction/disjunction tree")((ValidR && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2641, __PRETTY_FUNCTION__))
;
2642 (void)ValidR;
2643
2644 // Swap sub-tree that must come first to the right side.
2645 if (MustBeFirstL) {
2646 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")((!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2646, __PRETTY_FUNCTION__))
;
2647 std::swap(LHS, RHS);
2648 std::swap(CanNegateL, CanNegateR);
2649 std::swap(MustBeFirstL, MustBeFirstR);
2650 }
2651
2652 bool NegateR;
2653 bool NegateAfterR;
2654 bool NegateL;
2655 bool NegateAfterAll;
2656 if (Opcode == ISD::OR) {
2657 // Swap the sub-tree that we can negate naturally to the left.
2658 if (!CanNegateL) {
2659 assert(CanNegateR && "at least one side must be negatable")((CanNegateR && "at least one side must be negatable"
) ? static_cast<void> (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2659, __PRETTY_FUNCTION__))
;
2660 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")((!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2660, __PRETTY_FUNCTION__))
;
2661 assert(!Negate)((!Negate) ? static_cast<void> (0) : __assert_fail ("!Negate"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2661, __PRETTY_FUNCTION__))
;
2662 std::swap(LHS, RHS);
2663 NegateR = false;
2664 NegateAfterR = true;
2665 } else {
2666 // Negate the left sub-tree if possible, otherwise negate the result.
2667 NegateR = CanNegateR;
2668 NegateAfterR = !CanNegateR;
2669 }
2670 NegateL = true;
2671 NegateAfterAll = !Negate;
2672 } else {
2673 assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree")((Opcode == ISD::AND && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("Opcode == ISD::AND && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2673, __PRETTY_FUNCTION__))
;
2674 assert(!Negate && "Valid conjunction/disjunction tree")((!Negate && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2674, __PRETTY_FUNCTION__))
;
2675
2676 NegateL = false;
2677 NegateR = false;
2678 NegateAfterR = false;
2679 NegateAfterAll = false;
2680 }
2681
2682 // Emit sub-trees.
2683 AArch64CC::CondCode RHSCC;
2684 SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
2685 if (NegateAfterR)
2686 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
2687 SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
2688 if (NegateAfterAll)
2689 OutCC = AArch64CC::getInvertedCondCode(OutCC);
2690 return CmpL;
2691}
2692
2693/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
2694/// In some cases this is even possible with OR operations in the expression.
2695/// See \ref AArch64CCMP.
2696/// \see emitConjunctionRec().
2697static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
2698 AArch64CC::CondCode &OutCC) {
2699 bool DummyCanNegate;
2700 bool DummyMustBeFirst;
2701 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
2702 return SDValue();
2703
2704 return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
2705}
2706
2707/// @}
2708
2709/// Returns how profitable it is to fold a comparison's operand's shift and/or
2710/// extension operations.
2711static unsigned getCmpOperandFoldingProfit(SDValue Op) {
2712 auto isSupportedExtend = [&](SDValue V) {
2713 if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
2714 return true;
2715
2716 if (V.getOpcode() == ISD::AND)
2717 if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
2718 uint64_t Mask = MaskCst->getZExtValue();
2719 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
2720 }
2721
2722 return false;
2723 };
2724
2725 if (!Op.hasOneUse())
2726 return 0;
2727
2728 if (isSupportedExtend(Op))
2729 return 1;
2730
2731 unsigned Opc = Op.getOpcode();
2732 if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
2733 if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2734 uint64_t Shift = ShiftCst->getZExtValue();
2735 if (isSupportedExtend(Op.getOperand(0)))
2736 return (Shift <= 4) ? 2 : 1;
2737 EVT VT = Op.getValueType();
2738 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
2739 return 1;
2740 }
2741
2742 return 0;
2743}
2744
2745static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2746 SDValue &AArch64cc, SelectionDAG &DAG,
2747 const SDLoc &dl) {
2748 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2749 EVT VT = RHS.getValueType();
2750 uint64_t C = RHSC->getZExtValue();
2751 if (!isLegalArithImmed(C)) {
2752 // Constant does not fit, try adjusting it by one?
2753 switch (CC) {
2754 default:
2755 break;
2756 case ISD::SETLT:
2757 case ISD::SETGE:
2758 if ((VT == MVT::i32 && C != 0x80000000 &&
2759 isLegalArithImmed((uint32_t)(C - 1))) ||
2760 (VT == MVT::i64 && C != 0x80000000ULL &&
2761 isLegalArithImmed(C - 1ULL))) {
2762 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2763 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2764 RHS = DAG.getConstant(C, dl, VT);
2765 }
2766 break;
2767 case ISD::SETULT:
2768 case ISD::SETUGE:
2769 if ((VT == MVT::i32 && C != 0 &&
2770 isLegalArithImmed((uint32_t)(C - 1))) ||
2771 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
2772 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2773 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2774 RHS = DAG.getConstant(C, dl, VT);
2775 }
2776 break;
2777 case ISD::SETLE:
2778 case ISD::SETGT:
2779 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
2780 isLegalArithImmed((uint32_t)(C + 1))) ||
2781 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
2782 isLegalArithImmed(C + 1ULL))) {
2783 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2784 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2785 RHS = DAG.getConstant(C, dl, VT);
2786 }
2787 break;
2788 case ISD::SETULE:
2789 case ISD::SETUGT:
2790 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
2791 isLegalArithImmed((uint32_t)(C + 1))) ||
2792 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
2793 isLegalArithImmed(C + 1ULL))) {
2794 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2795 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2796 RHS = DAG.getConstant(C, dl, VT);
2797 }
2798 break;
2799 }
2800 }
2801 }
2802
2803 // Comparisons are canonicalized so that the RHS operand is simpler than the
2804 // LHS one, the extreme case being when RHS is an immediate. However, AArch64
2805 // can fold some shift+extend operations on the RHS operand, so swap the
2806 // operands if that can be done.
2807 //
2808 // For example:
2809 // lsl w13, w11, #1
2810 // cmp w13, w12
2811 // can be turned into:
2812 // cmp w12, w11, lsl #1
2813 if (!isa<ConstantSDNode>(RHS) ||
2814 !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
2815 SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
2816
2817 if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
2818 std::swap(LHS, RHS);
2819 CC = ISD::getSetCCSwappedOperands(CC);
2820 }
2821 }
2822
2823 SDValue Cmp;
2824 AArch64CC::CondCode AArch64CC;
2825 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
2826 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
2827
2828 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
2829 // For the i8 operand, the largest immediate is 255, so this can be easily
2830 // encoded in the compare instruction. For the i16 operand, however, the
2831 // largest immediate cannot be encoded in the compare.
2832 // Therefore, use a sign extending load and cmn to avoid materializing the
2833 // -1 constant. For example,
2834 // movz w1, #65535
2835 // ldrh w0, [x0, #0]
2836 // cmp w0, w1
2837 // >
2838 // ldrsh w0, [x0, #0]
2839 // cmn w0, #1
2840 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
2841 // if and only if (sext LHS) == (sext RHS). The checks are in place to
2842 // ensure both the LHS and RHS are truly zero extended and to make sure the
2843 // transformation is profitable.
2844 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
2845 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
2846 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
2847 LHS.getNode()->hasNUsesOfValue(1, 0)) {
2848 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
2849 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
2850 SDValue SExt =
2851 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
2852 DAG.getValueType(MVT::i16));
2853 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
2854 RHS.getValueType()),
2855 CC, dl, DAG);
2856 AArch64CC = changeIntCCToAArch64CC(CC);
2857 }
2858 }
2859
2860 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
2861 if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
2862 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
2863 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
2864 }
2865 }
2866 }
2867
2868 if (!Cmp) {
2869 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
2870 AArch64CC = changeIntCCToAArch64CC(CC);
2871 }
2872 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
2873 return Cmp;
2874}
2875
2876static std::pair<SDValue, SDValue>
2877getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
2878 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2879, __PRETTY_FUNCTION__))
2879 "Unsupported value type")(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2879, __PRETTY_FUNCTION__))
;
2880 SDValue Value, Overflow;
2881 SDLoc DL(Op);
2882 SDValue LHS = Op.getOperand(0);
2883 SDValue RHS = Op.getOperand(1);
2884 unsigned Opc = 0;
2885 switch (Op.getOpcode()) {
2886 default:
2887 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2887)
;
2888 case ISD::SADDO:
2889 Opc = AArch64ISD::ADDS;
2890 CC = AArch64CC::VS;
2891 break;
2892 case ISD::UADDO:
2893 Opc = AArch64ISD::ADDS;
2894 CC = AArch64CC::HS;
2895 break;
2896 case ISD::SSUBO:
2897 Opc = AArch64ISD::SUBS;
2898 CC = AArch64CC::VS;
2899 break;
2900 case ISD::USUBO:
2901 Opc = AArch64ISD::SUBS;
2902 CC = AArch64CC::LO;
2903 break;
2904 // Multiply needs a little bit extra work.
2905 case ISD::SMULO:
2906 case ISD::UMULO: {
2907 CC = AArch64CC::NE;
2908 bool IsSigned = Op.getOpcode() == ISD::SMULO;
2909 if (Op.getValueType() == MVT::i32) {
2910 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2911 // For a 32 bit multiply with overflow check we want the instruction
2912 // selector to generate a widening multiply (SMADDL/UMADDL). For that we
2913 // need to generate the following pattern:
2914 // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
2915 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
2916 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
2917 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2918 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
2919 DAG.getConstant(0, DL, MVT::i64));
2920 // On AArch64 the upper 32 bits are always zero extended for a 32 bit
2921 // operation. We need to clear out the upper 32 bits, because we used a
2922 // widening multiply that wrote all 64 bits. In the end this should be a
2923 // noop.
2924 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
2925 if (IsSigned) {
2926 // The signed overflow check requires more than just a simple check for
2927 // any bit set in the upper 32 bits of the result. These bits could be
2928 // just the sign bits of a negative number. To perform the overflow
2929 // check we have to arithmetic shift right the 32nd bit of the result by
2930 // 31 bits. Then we compare the result to the upper 32 bits.
2931 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
2932 DAG.getConstant(32, DL, MVT::i64));
2933 UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
2934 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
2935 DAG.getConstant(31, DL, MVT::i64));
2936 // It is important that LowerBits is last, otherwise the arithmetic
2937 // shift will not be folded into the compare (SUBS).
2938 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
2939 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2940 .getValue(1);
2941 } else {
2942 // The overflow check for unsigned multiply is easy. We only need to
2943 // check if any of the upper 32 bits are set. This can be done with a
2944 // CMP (shifted register). For that we need to generate the following
2945 // pattern:
2946 // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
2947 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2948 DAG.getConstant(32, DL, MVT::i64));
2949 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2950 Overflow =
2951 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2952 DAG.getConstant(0, DL, MVT::i64),
2953 UpperBits).getValue(1);
2954 }
2955 break;
2956 }
2957 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")((Op.getValueType() == MVT::i64 && "Expected an i64 value type"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2957, __PRETTY_FUNCTION__))
;
2958 // For the 64 bit multiply
2959 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2960 if (IsSigned) {
2961 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
2962 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
2963 DAG.getConstant(63, DL, MVT::i64));
2964 // It is important that LowerBits is last, otherwise the arithmetic
2965 // shift will not be folded into the compare (SUBS).
2966 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2967 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2968 .getValue(1);
2969 } else {
2970 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
2971 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2972 Overflow =
2973 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2974 DAG.getConstant(0, DL, MVT::i64),
2975 UpperBits).getValue(1);
2976 }
2977 break;
2978 }
2979 } // switch (...)
2980
2981 if (Opc) {
2982 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
2983
2984 // Emit the AArch64 operation with overflow check.
2985 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
2986 Overflow = Value.getValue(1);
2987 }
2988 return std::make_pair(Value, Overflow);
2989}
2990
2991SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
2992 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
2993 return LowerToScalableOp(Op, DAG);
2994
2995 SDValue Sel = Op.getOperand(0);
2996 SDValue Other = Op.getOperand(1);
2997 SDLoc dl(Sel);
2998
2999 // If the operand is an overflow checking operation, invert the condition
3000 // code and kill the Not operation. I.e., transform:
3001 // (xor (overflow_op_bool, 1))
3002 // -->
3003 // (csel 1, 0, invert(cc), overflow_op_bool)
3004 // ... which later gets transformed to just a cset instruction with an
3005 // inverted condition code, rather than a cset + eor sequence.
3006 if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
3007 // Only lower legal XALUO ops.
3008 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
3009 return SDValue();
3010
3011 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3012 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3013 AArch64CC::CondCode CC;
3014 SDValue Value, Overflow;
3015 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
3016 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3017 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
3018 CCVal, Overflow);
3019 }
3020 // If neither operand is a SELECT_CC, give up.
3021 if (Sel.getOpcode() != ISD::SELECT_CC)
3022 std::swap(Sel, Other);
3023 if (Sel.getOpcode() != ISD::SELECT_CC)
3024 return Op;
3025
3026 // The folding we want to perform is:
3027 // (xor x, (select_cc a, b, cc, 0, -1) )
3028 // -->
3029 // (csel x, (xor x, -1), cc ...)
3030 //
3031 // The latter will get matched to a CSINV instruction.
3032
3033 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
3034 SDValue LHS = Sel.getOperand(0);
3035 SDValue RHS = Sel.getOperand(1);
3036 SDValue TVal = Sel.getOperand(2);
3037 SDValue FVal = Sel.getOperand(3);
3038
3039 // FIXME: This could be generalized to non-integer comparisons.
3040 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
3041 return Op;
3042
3043 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
3044 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
3045
3046 // The values aren't constants, this isn't the pattern we're looking for.
3047 if (!CFVal || !CTVal)
3048 return Op;
3049
3050 // We can commute the SELECT_CC by inverting the condition. This
3051 // might be needed to make this fit into a CSINV pattern.
3052 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
3053 std::swap(TVal, FVal);
3054 std::swap(CTVal, CFVal);
3055 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
3056 }
3057
3058 // If the constants line up, perform the transform!
3059 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
3060 SDValue CCVal;
3061 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
3062
3063 FVal = Other;
3064 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
3065 DAG.getConstant(-1ULL, dl, Other.getValueType()));
3066
3067 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
3068 CCVal, Cmp);
3069 }
3070
3071 return Op;
3072}
3073
3074static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
3075 EVT VT = Op.getValueType();
3076
3077 // Let legalize expand this if it isn't a legal type yet.
3078 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3079 return SDValue();
3080
3081 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3082
3083 unsigned Opc;
3084 bool ExtraOp = false;
3085 switch (Op.getOpcode()) {
3086 default:
3087 llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3087)
;
3088 case ISD::ADDC:
3089 Opc = AArch64ISD::ADDS;
3090 break;
3091 case ISD::SUBC:
3092 Opc = AArch64ISD::SUBS;
3093 break;
3094 case ISD::ADDE:
3095 Opc = AArch64ISD::ADCS;
3096 ExtraOp = true;
3097 break;
3098 case ISD::SUBE:
3099 Opc = AArch64ISD::SBCS;
3100 ExtraOp = true;
3101 break;
3102 }
3103
3104 if (!ExtraOp)
3105 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
3106 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
3107 Op.getOperand(2));
3108}
3109
3110static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
3111 // Let legalize expand this if it isn't a legal type yet.
3112 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3113 return SDValue();
3114
3115 SDLoc dl(Op);
3116 AArch64CC::CondCode CC;
3117 // The actual operation that sets the overflow or carry flag.
3118 SDValue Value, Overflow;
3119 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
3120
3121 // We use 0 and 1 as false and true values.
3122 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3123 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3124
3125 // We use an inverted condition, because the conditional select is inverted
3126 // too. This will allow it to be selected to a single instruction:
3127 // CSINC Wd, WZR, WZR, invert(cond).
3128 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3129 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
3130 CCVal, Overflow);
3131
3132 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3133 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3134}
3135
3136// Prefetch operands are:
3137// 1: Address to prefetch
3138// 2: bool isWrite
3139// 3: int locality (0 = no locality ... 3 = extreme locality)
3140// 4: bool isDataCache
3141static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
3142 SDLoc DL(Op);
3143 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3144 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
3145 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3146
3147 bool IsStream = !Locality;
3148 // When the locality number is set
3149 if (Locality) {
3150 // The front-end should have filtered out the out-of-range values
3151 assert(Locality <= 3 && "Prefetch locality out-of-range")((Locality <= 3 && "Prefetch locality out-of-range"
) ? static_cast<void> (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3151, __PRETTY_FUNCTION__))
;
3152 // The locality degree is the opposite of the cache speed.
3153 // Put the number the other way around.
3154 // The encoding starts at 0 for level 1
3155 Locality = 3 - Locality;
3156 }
3157
3158 // built the mask value encoding the expected behavior.
3159 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
3160 (!IsData << 3) | // IsDataCache bit
3161 (Locality << 1) | // Cache level bits
3162 (unsigned)IsStream; // Stream bit
3163 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
3164 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
3165}
3166
3167SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
3168 SelectionDAG &DAG) const {
3169 if (Op.getValueType().isScalableVector())
3170 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
3171
3172 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")((Op.getValueType() == MVT::f128 && "Unexpected lowering"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3172, __PRETTY_FUNCTION__))
;
3173 return SDValue();
3174}
3175
3176SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
3177 SelectionDAG &DAG) const {
3178 if (Op.getValueType().isScalableVector())
3179 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
3180
3181 bool IsStrict = Op->isStrictFPOpcode();
3182 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3183 EVT SrcVT = SrcVal.getValueType();
3184
3185 if (SrcVT != MVT::f128) {
3186 // Expand cases where the input is a vector bigger than NEON.
3187 if (useSVEForFixedLengthVectorVT(SrcVT))
3188 return SDValue();
3189
3190 // It's legal except when f128 is involved
3191 return Op;
3192 }
3193
3194 return SDValue();
3195}
3196
3197SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
3198 SelectionDAG &DAG) const {
3199 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3200 // Any additional optimization in this function should be recorded
3201 // in the cost tables.
3202 EVT InVT = Op.getOperand(0).getValueType();
3203 EVT VT = Op.getValueType();
3204
3205 if (VT.isScalableVector()) {
3206 unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
3207 ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
3208 : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
3209 return LowerToPredicatedOp(Op, DAG, Opcode);
3210 }
3211
3212 unsigned NumElts = InVT.getVectorNumElements();
3213
3214 // f16 conversions are promoted to f32 when full fp16 is not supported.
3215 if (InVT.getVectorElementType() == MVT::f16 &&
3216 !Subtarget->hasFullFP16()) {
3217 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
3218 SDLoc dl(Op);
3219 return DAG.getNode(
3220 Op.getOpcode(), dl, Op.getValueType(),
3221 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
3222 }
3223
3224 uint64_t VTSize = VT.getFixedSizeInBits();
3225 uint64_t InVTSize = InVT.getFixedSizeInBits();
3226 if (VTSize < InVTSize) {
3227 SDLoc dl(Op);
3228 SDValue Cv =
3229 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
3230 Op.getOperand(0));
3231 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
3232 }
3233
3234 if (VTSize > InVTSize) {
3235 SDLoc dl(Op);
3236 MVT ExtVT =
3237 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
3238 VT.getVectorNumElements());
3239 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
3240 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
3241 }
3242
3243 // Type changing conversions are illegal.
3244 return Op;
3245}
3246
3247SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
3248 SelectionDAG &DAG) const {
3249 bool IsStrict = Op->isStrictFPOpcode();
3250 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3251
3252 if (SrcVal.getValueType().isVector())
3253 return LowerVectorFP_TO_INT(Op, DAG);
3254
3255 // f16 conversions are promoted to f32 when full fp16 is not supported.
3256 if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
3257 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")((!IsStrict && "Lowering of strict fp16 not yet implemented"
) ? static_cast<void> (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3257, __PRETTY_FUNCTION__))
;
3258 SDLoc dl(Op);
3259 return DAG.getNode(
3260 Op.getOpcode(), dl, Op.getValueType(),
3261 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
3262 }
3263
3264 if (SrcVal.getValueType() != MVT::f128) {
3265 // It's legal except when f128 is involved
3266 return Op;
3267 }
3268
3269 return SDValue();
3270}
3271
3272SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
3273 SelectionDAG &DAG) const {
3274 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3275 // Any additional optimization in this function should be recorded
3276 // in the cost tables.
3277 EVT VT = Op.getValueType();
3278 SDLoc dl(Op);
3279 SDValue In = Op.getOperand(0);
3280 EVT InVT = In.getValueType();
3281 unsigned Opc = Op.getOpcode();
3282 bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
3283
3284 if (VT.isScalableVector()) {
3285 if (InVT.getVectorElementType() == MVT::i1) {
3286 // We can't directly extend an SVE predicate; extend it first.
3287 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3288 EVT CastVT = getPromotedVTForPredicate(InVT);
3289 In = DAG.getNode(CastOpc, dl, CastVT, In);
3290 return DAG.getNode(Opc, dl, VT, In);
3291 }
3292
3293 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
3294 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
3295 return LowerToPredicatedOp(Op, DAG, Opcode);
3296 }
3297
3298 uint64_t VTSize = VT.getFixedSizeInBits();
3299 uint64_t InVTSize = InVT.getFixedSizeInBits();
3300 if (VTSize < InVTSize) {
3301 MVT CastVT =
3302 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
3303 InVT.getVectorNumElements());
3304 In = DAG.getNode(Opc, dl, CastVT, In);
3305 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
3306 }
3307
3308 if (VTSize > InVTSize) {
3309 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3310 EVT CastVT = VT.changeVectorElementTypeToInteger();
3311 In = DAG.getNode(CastOpc, dl, CastVT, In);
3312 return DAG.getNode(Opc, dl, VT, In);
3313 }
3314
3315 return Op;
3316}
3317
3318SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
3319 SelectionDAG &DAG) const {
3320 if (Op.getValueType().isVector())
3321 return LowerVectorINT_TO_FP(Op, DAG);
3322
3323 bool IsStrict = Op->isStrictFPOpcode();
3324 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3325
3326 // f16 conversions are promoted to f32 when full fp16 is not supported.
3327 if (Op.getValueType() == MVT::f16 &&
3328 !Subtarget->hasFullFP16()) {
3329 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")((!IsStrict && "Lowering of strict fp16 not yet implemented"
) ? static_cast<void> (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3329, __PRETTY_FUNCTION__))
;
3330 SDLoc dl(Op);
3331 return DAG.getNode(
3332 ISD::FP_ROUND, dl, MVT::f16,
3333 DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
3334 DAG.getIntPtrConstant(0, dl));
3335 }
3336
3337 // i128 conversions are libcalls.
3338 if (SrcVal.getValueType() == MVT::i128)
3339 return SDValue();
3340
3341 // Other conversions are legal, unless it's to the completely software-based
3342 // fp128.
3343 if (Op.getValueType() != MVT::f128)
3344 return Op;
3345 return SDValue();
3346}
3347
3348SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
3349 SelectionDAG &DAG) const {
3350 // For iOS, we want to call an alternative entry point: __sincos_stret,
3351 // which returns the values in two S / D registers.
3352 SDLoc dl(Op);
3353 SDValue Arg = Op.getOperand(0);
3354 EVT ArgVT = Arg.getValueType();
3355 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
3356
3357 ArgListTy Args;
3358 ArgListEntry Entry;
3359
3360 Entry.Node = Arg;
3361 Entry.Ty = ArgTy;
3362 Entry.IsSExt = false;
3363 Entry.IsZExt = false;
3364 Args.push_back(Entry);
3365
3366 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
3367 : RTLIB::SINCOS_STRET_F32;
3368 const char *LibcallName = getLibcallName(LC);
3369 SDValue Callee =
3370 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
3371
3372 StructType *RetTy = StructType::get(ArgTy, ArgTy);
3373 TargetLowering::CallLoweringInfo CLI(DAG);
3374 CLI.setDebugLoc(dl)
3375 .setChain(DAG.getEntryNode())
3376 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
3377
3378 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3379 return CallResult.first;
3380}
3381
3382static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
3383 EVT OpVT = Op.getValueType();
3384 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
3385 return SDValue();
3386
3387 assert(Op.getOperand(0).getValueType() == MVT::i16)((Op.getOperand(0).getValueType() == MVT::i16) ? static_cast<
void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::i16"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3387, __PRETTY_FUNCTION__))
;
3388 SDLoc DL(Op);
3389
3390 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
3391 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
3392 return SDValue(
3393 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
3394 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
3395 0);
3396}
3397
3398static EVT getExtensionTo64Bits(const EVT &OrigVT) {
3399 if (OrigVT.getSizeInBits() >= 64)
3400 return OrigVT;
3401
3402 assert(OrigVT.isSimple() && "Expecting a simple value type")((OrigVT.isSimple() && "Expecting a simple value type"
) ? static_cast<void> (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3402, __PRETTY_FUNCTION__))
;
3403
3404 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
3405 switch (OrigSimpleTy) {
3406 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3406)
;
3407 case MVT::v2i8:
3408 case MVT::v2i16:
3409 return MVT::v2i32;
3410 case MVT::v4i8:
3411 return MVT::v4i16;
3412 }
3413}
3414
3415static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
3416 const EVT &OrigTy,
3417 const EVT &ExtTy,
3418 unsigned ExtOpcode) {
3419 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
3420 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
3421 // 64-bits we need to insert a new extension so that it will be 64-bits.
3422 assert(ExtTy.is128BitVector() && "Unexpected extension size")((ExtTy.is128BitVector() && "Unexpected extension size"
) ? static_cast<void> (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3422, __PRETTY_FUNCTION__))
;
3423 if (OrigTy.getSizeInBits() >= 64)
3424 return N;
3425
3426 // Must extend size to at least 64 bits to be used as an operand for VMULL.
3427 EVT NewVT = getExtensionTo64Bits(OrigTy);
3428
3429 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
3430}
3431
3432static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
3433 bool isSigned) {
3434 EVT VT = N->getValueType(0);
3435
3436 if (N->getOpcode() != ISD::BUILD_VECTOR)
3437 return false;
3438
3439 for (const SDValue &Elt : N->op_values()) {
3440 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
3441 unsigned EltSize = VT.getScalarSizeInBits();
3442 unsigned HalfSize = EltSize / 2;
3443 if (isSigned) {
3444 if (!isIntN(HalfSize, C->getSExtValue()))
3445 return false;
3446 } else {
3447 if (!isUIntN(HalfSize, C->getZExtValue()))
3448 return false;
3449 }
3450 continue;
3451 }
3452 return false;
3453 }
3454
3455 return true;
3456}
3457
3458static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
3459 if (N->getOpcode() == ISD::SIGN_EXTEND ||
3460 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
3461 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
3462 N->getOperand(0)->getValueType(0),
3463 N->getValueType(0),
3464 N->getOpcode());
3465
3466 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")((N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3466, __PRETTY_FUNCTION__))
;
3467 EVT VT = N->getValueType(0);
3468 SDLoc dl(N);
3469 unsigned EltSize = VT.getScalarSizeInBits() / 2;
3470 unsigned NumElts = VT.getVectorNumElements();
3471 MVT TruncVT = MVT::getIntegerVT(EltSize);
3472 SmallVector<SDValue, 8> Ops;
3473 for (unsigned i = 0; i != NumElts; ++i) {
3474 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
3475 const APInt &CInt = C->getAPIntValue();
3476 // Element types smaller than 32 bits are not legal, so use i32 elements.
3477 // The values are implicitly truncated so sext vs. zext doesn't matter.
3478 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
3479 }
3480 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
3481}
3482
3483static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
3484 return N->getOpcode() == ISD::SIGN_EXTEND ||
3485 N->getOpcode() == ISD::ANY_EXTEND ||
3486 isExtendedBUILD_VECTOR(N, DAG, true);
3487}
3488
3489static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
3490 return N->getOpcode() == ISD::ZERO_EXTEND ||
3491 N->getOpcode() == ISD::ANY_EXTEND ||
3492 isExtendedBUILD_VECTOR(N, DAG, false);
3493}
3494
3495static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
3496 unsigned Opcode = N->getOpcode();
3497 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3498 SDNode *N0 = N->getOperand(0).getNode();
3499 SDNode *N1 = N->getOperand(1).getNode();
3500 return N0->hasOneUse() && N1->hasOneUse() &&
3501 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
3502 }
3503 return false;
3504}
3505
3506static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
3507 unsigned Opcode = N->getOpcode();
3508 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3509 SDNode *N0 = N->getOperand(0).getNode();
3510 SDNode *N1 = N->getOperand(1).getNode();
3511 return N0->hasOneUse() && N1->hasOneUse() &&
3512 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
3513 }
3514 return false;
3515}
3516
3517SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
3518 SelectionDAG &DAG) const {
3519 // The rounding mode is in bits 23:22 of the FPSCR.
3520 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
3521 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
3522 // so that the shift + and get folded into a bitfield extract.
3523 SDLoc dl(Op);
3524
3525 SDValue Chain = Op.getOperand(0);
3526 SDValue FPCR_64 = DAG.getNode(
3527 ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
3528 {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
3529 Chain = FPCR_64.getValue(1);
3530 SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
3531 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
3532 DAG.getConstant(1U << 22, dl, MVT::i32));
3533 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
3534 DAG.getConstant(22, dl, MVT::i32));
3535 SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
3536 DAG.getConstant(3, dl, MVT::i32));
3537 return DAG.getMergeValues({AND, Chain}, dl);
3538}
3539
3540SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
3541 SelectionDAG &DAG) const {
3542 SDLoc DL(Op);
3543 SDValue Chain = Op->getOperand(0);
3544 SDValue RMValue = Op->getOperand(1);
3545
3546 // The rounding mode is in bits 23:22 of the FPCR.
3547 // The llvm.set.rounding argument value to the rounding mode in FPCR mapping
3548 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
3549 // ((arg - 1) & 3) << 22).
3550 //
3551 // The argument of llvm.set.rounding must be within the segment [0, 3], so
3552 // NearestTiesToAway (4) is not handled here. It is responsibility of the code
3553 // generated llvm.set.rounding to ensure this condition.
3554
3555 // Calculate new value of FPCR[23:22].
3556 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
3557 DAG.getConstant(1, DL, MVT::i32));
3558 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
3559 DAG.getConstant(0x3, DL, MVT::i32));
3560 RMValue =
3561 DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
3562 DAG.getConstant(AArch64::RoundingBitsPos, DL, MVT::i32));
3563 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue);
3564
3565 // Get current value of FPCR.
3566 SDValue Ops[] = {
3567 Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
3568 SDValue FPCR =
3569 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
3570 Chain = FPCR.getValue(1);
3571 FPCR = FPCR.getValue(0);
3572
3573 // Put new rounding mode into FPSCR[23:22].
3574 const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos);
3575 FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR,
3576 DAG.getConstant(RMMask, DL, MVT::i64));
3577 FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue);
3578 SDValue Ops2[] = {
3579 Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
3580 FPCR};
3581 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
3582}
3583
3584SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
3585 EVT VT = Op.getValueType();
3586
3587 // If SVE is available then i64 vector multiplications can also be made legal.
3588 bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
3589
3590 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
3591 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
3592
3593 // Multiplications are only custom-lowered for 128-bit vectors so that
3594 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
3595 assert(VT.is128BitVector() && VT.isInteger() &&((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3596, __PRETTY_FUNCTION__))
3596 "unexpected type for custom-lowering ISD::MUL")((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3596, __PRETTY_FUNCTION__))
;
3597 SDNode *N0 = Op.getOperand(0).getNode();
3598 SDNode *N1 = Op.getOperand(1).getNode();
3599 unsigned NewOpc = 0;
3600 bool isMLA = false;
3601 bool isN0SExt = isSignExtended(N0, DAG);
3602 bool isN1SExt = isSignExtended(N1, DAG);
3603 if (isN0SExt && isN1SExt)
3604 NewOpc = AArch64ISD::SMULL;
3605 else {
3606 bool isN0ZExt = isZeroExtended(N0, DAG);
3607 bool isN1ZExt = isZeroExtended(N1, DAG);
3608 if (isN0ZExt && isN1ZExt)
3609 NewOpc = AArch64ISD::UMULL;
3610 else if (isN1SExt || isN1ZExt) {
3611 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
3612 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
3613 if (isN1SExt && isAddSubSExt(N0, DAG)) {
3614 NewOpc = AArch64ISD::SMULL;
3615 isMLA = true;
3616 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
3617 NewOpc = AArch64ISD::UMULL;
3618 isMLA = true;
3619 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
3620 std::swap(N0, N1);
3621 NewOpc = AArch64ISD::UMULL;
3622 isMLA = true;
3623 }
3624 }
3625
3626 if (!NewOpc) {
3627 if (VT == MVT::v2i64)
3628 // Fall through to expand this. It is not legal.
3629 return SDValue();
3630 else
3631 // Other vector multiplications are legal.
3632 return Op;
3633 }
3634 }
3635
3636 // Legalize to a S/UMULL instruction
3637 SDLoc DL(Op);
3638 SDValue Op0;
3639 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
3640 if (!isMLA) {
3641 Op0 = skipExtensionForVectorMULL(N0, DAG);
3642 assert(Op0.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3644, __PRETTY_FUNCTION__))
3643 Op1.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3644, __PRETTY_FUNCTION__))
3644 "unexpected types for extended operands to VMULL")((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3644, __PRETTY_FUNCTION__))
;
3645 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
3646 }
3647 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
3648 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
3649 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
3650 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
3651 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
3652 EVT Op1VT = Op1.getValueType();
3653 return DAG.getNode(N0->getOpcode(), DL, VT,
3654 DAG.getNode(NewOpc, DL, VT,
3655 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
3656 DAG.getNode(NewOpc, DL, VT,
3657 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
3658}
3659
3660static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
3661 int Pattern) {
3662 return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
3663 DAG.getTargetConstant(Pattern, DL, MVT::i32));
3664}
3665
3666SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
3667 SelectionDAG &DAG) const {
3668 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3669 SDLoc dl(Op);
3670 switch (IntNo) {
3671 default: return SDValue(); // Don't custom lower most intrinsics.
3672 case Intrinsic::thread_pointer: {
3673 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3674 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
3675 }
3676 case Intrinsic::aarch64_neon_abs: {
3677 EVT Ty = Op.getValueType();
3678 if (Ty == MVT::i64) {
3679 SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
3680 Op.getOperand(1));
3681 Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
3682 return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
3683 } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
3684 return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
3685 } else {
3686 report_fatal_error("Unexpected type for AArch64 NEON intrinic");
3687 }
3688 }
3689 case Intrinsic::aarch64_neon_smax:
3690 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
3691 Op.getOperand(1), Op.getOperand(2));
3692 case Intrinsic::aarch64_neon_umax:
3693 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
3694 Op.getOperand(1), Op.getOperand(2));
3695 case Intrinsic::aarch64_neon_smin:
3696 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
3697 Op.getOperand(1), Op.getOperand(2));
3698 case Intrinsic::aarch64_neon_umin:
3699 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
3700 Op.getOperand(1), Op.getOperand(2));
3701
3702 case Intrinsic::aarch64_sve_sunpkhi:
3703 return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
3704 Op.getOperand(1));
3705 case Intrinsic::aarch64_sve_sunpklo:
3706 return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
3707 Op.getOperand(1));
3708 case Intrinsic::aarch64_sve_uunpkhi:
3709 return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
3710 Op.getOperand(1));
3711 case Intrinsic::aarch64_sve_uunpklo:
3712 return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
3713 Op.getOperand(1));
3714 case Intrinsic::aarch64_sve_clasta_n:
3715 return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
3716 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3717 case Intrinsic::aarch64_sve_clastb_n:
3718 return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
3719 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3720 case Intrinsic::aarch64_sve_lasta:
3721 return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
3722 Op.getOperand(1), Op.getOperand(2));
3723 case Intrinsic::aarch64_sve_lastb:
3724 return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
3725 Op.getOperand(1), Op.getOperand(2));
3726 case Intrinsic::aarch64_sve_rev:
3727 return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
3728 Op.getOperand(1));
3729 case Intrinsic::aarch64_sve_tbl:
3730 return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
3731 Op.getOperand(1), Op.getOperand(2));
3732 case Intrinsic::aarch64_sve_trn1:
3733 return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
3734 Op.getOperand(1), Op.getOperand(2));
3735 case Intrinsic::aarch64_sve_trn2:
3736 return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
3737 Op.getOperand(1), Op.getOperand(2));
3738 case Intrinsic::aarch64_sve_uzp1:
3739 return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
3740 Op.getOperand(1), Op.getOperand(2));
3741 case Intrinsic::aarch64_sve_uzp2:
3742 return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
3743 Op.getOperand(1), Op.getOperand(2));
3744 case Intrinsic::aarch64_sve_zip1:
3745 return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
3746 Op.getOperand(1), Op.getOperand(2));
3747 case Intrinsic::aarch64_sve_zip2:
3748 return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
3749 Op.getOperand(1), Op.getOperand(2));
3750 case Intrinsic::aarch64_sve_ptrue:
3751 return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
3752 Op.getOperand(1));
3753 case Intrinsic::aarch64_sve_clz:
3754 return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
3755 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3756 case Intrinsic::aarch64_sve_cnt: {
3757 SDValue Data = Op.getOperand(3);
3758 // CTPOP only supports integer operands.
3759 if (Data.getValueType().isFloatingPoint())
3760 Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
3761 return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
3762 Op.getOperand(2), Data, Op.getOperand(1));
3763 }
3764 case Intrinsic::aarch64_sve_dupq_lane:
3765 return LowerDUPQLane(Op, DAG);
3766 case Intrinsic::aarch64_sve_convert_from_svbool:
3767 return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
3768 Op.getOperand(1));
3769 case Intrinsic::aarch64_sve_fneg:
3770 return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
3771 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3772 case Intrinsic::aarch64_sve_frintp:
3773 return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
3774 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3775 case Intrinsic::aarch64_sve_frintm:
3776 return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
3777 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3778 case Intrinsic::aarch64_sve_frinti:
3779 return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
3780 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3781 case Intrinsic::aarch64_sve_frintx:
3782 return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
3783 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3784 case Intrinsic::aarch64_sve_frinta:
3785 return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
3786 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3787 case Intrinsic::aarch64_sve_frintn:
3788 return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
3789 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3790 case Intrinsic::aarch64_sve_frintz:
3791 return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
3792 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3793 case Intrinsic::aarch64_sve_ucvtf:
3794 return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
3795 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3796 Op.getOperand(1));
3797 case Intrinsic::aarch64_sve_scvtf:
3798 return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
3799 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3800 Op.getOperand(1));
3801 case Intrinsic::aarch64_sve_fcvtzu:
3802 return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
3803 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3804 Op.getOperand(1));
3805 case Intrinsic::aarch64_sve_fcvtzs:
3806 return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
3807 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3808 Op.getOperand(1));
3809 case Intrinsic::aarch64_sve_fsqrt:
3810 return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
3811 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3812 case Intrinsic::aarch64_sve_frecpx:
3813 return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
3814 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3815 case Intrinsic::aarch64_sve_fabs:
3816 return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
3817 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3818 case Intrinsic::aarch64_sve_abs:
3819 return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
3820 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3821 case Intrinsic::aarch64_sve_neg:
3822 return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
3823 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3824 case Intrinsic::aarch64_sve_convert_to_svbool: {
3825 EVT OutVT = Op.getValueType();
3826 EVT InVT = Op.getOperand(1).getValueType();
3827 // Return the operand if the cast isn't changing type,
3828 // i.e. <n x 16 x i1> -> <n x 16 x i1>
3829 if (InVT == OutVT)
3830 return Op.getOperand(1);
3831 // Otherwise, zero the newly introduced lanes.
3832 SDValue Reinterpret =
3833 DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Op.getOperand(1));
3834 SDValue Mask = getPTrue(DAG, dl, InVT, AArch64SVEPredPattern::all);
3835 SDValue MaskReinterpret =
3836 DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Mask);
3837 return DAG.getNode(ISD::AND, dl, OutVT, Reinterpret, MaskReinterpret);
3838 }
3839
3840 case Intrinsic::aarch64_sve_insr: {
3841 SDValue Scalar = Op.getOperand(2);
3842 EVT ScalarTy = Scalar.getValueType();
3843 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
3844 Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
3845
3846 return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
3847 Op.getOperand(1), Scalar);
3848 }
3849 case Intrinsic::aarch64_sve_rbit:
3850 return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
3851 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3852 Op.getOperand(1));
3853 case Intrinsic::aarch64_sve_revb:
3854 return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
3855 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3856 case Intrinsic::aarch64_sve_sxtb:
3857 return DAG.getNode(
3858 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
3859 Op.getOperand(2), Op.getOperand(3),
3860 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
3861 Op.getOperand(1));
3862 case Intrinsic::aarch64_sve_sxth:
3863 return DAG.getNode(
3864 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
3865 Op.getOperand(2), Op.getOperand(3),
3866 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
3867 Op.getOperand(1));
3868 case Intrinsic::aarch64_sve_sxtw:
3869 return DAG.getNode(
3870 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
3871 Op.getOperand(2), Op.getOperand(3),
3872 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
3873 Op.getOperand(1));
3874 case Intrinsic::aarch64_sve_uxtb:
3875 return DAG.getNode(
3876 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
3877 Op.getOperand(2), Op.getOperand(3),
3878 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
3879 Op.getOperand(1));
3880 case Intrinsic::aarch64_sve_uxth:
3881 return DAG.getNode(
3882 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
3883 Op.getOperand(2), Op.getOperand(3),
3884 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
3885 Op.getOperand(1));
3886 case Intrinsic::aarch64_sve_uxtw:
3887 return DAG.getNode(
3888 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
3889 Op.getOperand(2), Op.getOperand(3),
3890 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
3891 Op.getOperand(1));
3892
3893 case Intrinsic::localaddress: {
3894 const auto &MF = DAG.getMachineFunction();
3895 const auto *RegInfo = Subtarget->getRegisterInfo();
3896 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
3897 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
3898 Op.getSimpleValueType());
3899 }
3900
3901 case Intrinsic::eh_recoverfp: {
3902 // FIXME: This needs to be implemented to correctly handle highly aligned
3903 // stack objects. For now we simply return the incoming FP. Refer D53541
3904 // for more details.
3905 SDValue FnOp = Op.getOperand(1);
3906 SDValue IncomingFPOp = Op.getOperand(2);
3907 GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
3908 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
3909 if (!Fn)
3910 report_fatal_error(
3911 "llvm.eh.recoverfp must take a function as the first argument");
3912 return IncomingFPOp;
3913 }
3914
3915 case Intrinsic::aarch64_neon_vsri:
3916 case Intrinsic::aarch64_neon_vsli: {
3917 EVT Ty = Op.getValueType();
3918
3919 if (!Ty.isVector())
3920 report_fatal_error("Unexpected type for aarch64_neon_vsli");
3921
3922 assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits())((Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits())
? static_cast<void> (0) : __assert_fail ("Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits()"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3922, __PRETTY_FUNCTION__))
;
3923
3924 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
3925 unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
3926 return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
3927 Op.getOperand(3));
3928 }
3929
3930 case Intrinsic::aarch64_neon_srhadd:
3931 case Intrinsic::aarch64_neon_urhadd:
3932 case Intrinsic::aarch64_neon_shadd:
3933 case Intrinsic::aarch64_neon_uhadd: {
3934 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
3935 IntNo == Intrinsic::aarch64_neon_shadd);
3936 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
3937 IntNo == Intrinsic::aarch64_neon_urhadd);
3938 unsigned Opcode =
3939 IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
3940 : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
3941 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
3942 Op.getOperand(2));
3943 }
3944 case Intrinsic::aarch64_neon_sabd:
3945 case Intrinsic::aarch64_neon_uabd: {
3946 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? AArch64ISD::UABD
3947 : AArch64ISD::SABD;
3948 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
3949 Op.getOperand(2));
3950 }
3951 case Intrinsic::aarch64_neon_sdot:
3952 case Intrinsic::aarch64_neon_udot:
3953 case Intrinsic::aarch64_sve_sdot:
3954 case Intrinsic::aarch64_sve_udot: {
3955 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
3956 IntNo == Intrinsic::aarch64_sve_udot)
3957 ? AArch64ISD::UDOT
3958 : AArch64ISD::SDOT;
3959 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
3960 Op.getOperand(2), Op.getOperand(3));
3961 }
3962 }
3963}
3964
3965bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
3966 if (VT.getVectorElementType() == MVT::i8 ||
3967 VT.getVectorElementType() == MVT::i16) {
3968 EltTy = MVT::i32;
3969 return true;
3970 }
3971 return false;
3972}
3973
3974bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
3975 if (VT.getVectorElementType() == MVT::i32 &&
3976 VT.getVectorElementCount().getKnownMinValue() >= 4)
3977 return true;
3978
3979 return false;
3980}
3981
3982bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
3983 return ExtVal.getValueType().isScalableVector();
3984}
3985
3986unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
3987 std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
3988 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
3989 AArch64ISD::GLD1_MERGE_ZERO},
3990 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
3991 AArch64ISD::GLD1_UXTW_MERGE_ZERO},
3992 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
3993 AArch64ISD::GLD1_MERGE_ZERO},
3994 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
3995 AArch64ISD::GLD1_SXTW_MERGE_ZERO},
3996 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
3997 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
3998 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
3999 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
4000 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
4001 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
4002 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
4003 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
4004 };
4005 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4006 return AddrModes.find(Key)->second;
4007}
4008
4009unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
4010 std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
4011 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
4012 AArch64ISD::SST1_PRED},
4013 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
4014 AArch64ISD::SST1_UXTW_PRED},
4015 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
4016 AArch64ISD::SST1_PRED},
4017 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
4018 AArch64ISD::SST1_SXTW_PRED},
4019 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
4020 AArch64ISD::SST1_SCALED_PRED},
4021 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
4022 AArch64ISD::SST1_UXTW_SCALED_PRED},
4023 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
4024 AArch64ISD::SST1_SCALED_PRED},
4025 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
4026 AArch64ISD::SST1_SXTW_SCALED_PRED},
4027 };
4028 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4029 return AddrModes.find(Key)->second;
4030}
4031
4032unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
4033 switch (Opcode) {
4034 default:
4035 llvm_unreachable("unimplemented opcode")::llvm::llvm_unreachable_internal("unimplemented opcode", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4035)
;
4036 return Opcode;
4037 case AArch64ISD::GLD1_MERGE_ZERO:
4038 return AArch64ISD::GLD1S_MERGE_ZERO;
4039 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
4040 return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
4041 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
4042 return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
4043 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
4044 return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
4045 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
4046 return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
4047 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
4048 return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
4049 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
4050 return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
4051 }
4052}
4053
4054bool getGatherScatterIndexIsExtended(SDValue Index) {
4055 unsigned Opcode = Index.getOpcode();
4056 if (Opcode == ISD::SIGN_EXTEND_INREG)
4057 return true;
4058
4059 if (Opcode == ISD::AND) {
4060 SDValue Splat = Index.getOperand(1);
4061 if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
4062 return false;
4063 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
4064 if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
4065 return false;
4066 return true;
4067 }
4068
4069 return false;
4070}
4071
4072// If the base pointer of a masked gather or scatter is null, we
4073// may be able to swap BasePtr & Index and use the vector + register
4074// or vector + immediate addressing mode, e.g.
4075// VECTOR + REGISTER:
4076// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
4077// -> getelementptr %offset, <vscale x N x T> %indices
4078// VECTOR + IMMEDIATE:
4079// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
4080// -> getelementptr #x, <vscale x N x T> %indices
4081void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
4082 unsigned &Opcode, bool IsGather,
4083 SelectionDAG &DAG) {
4084 if (!isNullConstant(BasePtr))
4085 return;
4086
4087 ConstantSDNode *Offset = nullptr;
4088 if (Index.getOpcode() == ISD::ADD)
4089 if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
4090 if (isa<ConstantSDNode>(SplatVal))
4091 Offset = cast<ConstantSDNode>(SplatVal);
4092 else {
4093 BasePtr = SplatVal;
4094 Index = Index->getOperand(0);
4095 return;
4096 }
4097 }
4098
4099 unsigned NewOp =
4100 IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
4101
4102 if (!Offset) {
4103 std::swap(BasePtr, Index);
4104 Opcode = NewOp;
4105 return;
4106 }
4107
4108 uint64_t OffsetVal = Offset->getZExtValue();
4109 unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
4110 auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
4111
4112 if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
4113 // Index is out of range for the immediate addressing mode
4114 BasePtr = ConstOffset;
4115 Index = Index->getOperand(0);
4116 return;
4117 }
4118
4119 // Immediate is in range
4120 Opcode = NewOp;
4121 BasePtr = Index->getOperand(0);
4122 Index = ConstOffset;
4123}
4124
4125SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
4126 SelectionDAG &DAG) const {
4127 SDLoc DL(Op);
4128 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
4129 assert(MGT && "Can only custom lower gather load nodes")((MGT && "Can only custom lower gather load nodes") ?
static_cast<void> (0) : __assert_fail ("MGT && \"Can only custom lower gather load nodes\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4129, __PRETTY_FUNCTION__))
;
4130
4131 SDValue Index = MGT->getIndex();
4132 SDValue Chain = MGT->getChain();
4133 SDValue PassThru = MGT->getPassThru();
4134 SDValue Mask = MGT->getMask();
4135 SDValue BasePtr = MGT->getBasePtr();
4136 ISD::LoadExtType ExtTy = MGT->getExtensionType();
4137
4138 ISD::MemIndexType IndexType = MGT->getIndexType();
4139 bool IsScaled =
4140 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4141 bool IsSigned =
4142 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4143 bool IdxNeedsExtend =
4144 getGatherScatterIndexIsExtended(Index) ||
4145 Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4146 bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
4147
4148 EVT VT = PassThru.getSimpleValueType();
4149 EVT MemVT = MGT->getMemoryVT();
4150 SDValue InputVT = DAG.getValueType(MemVT);
4151
4152 if (VT.getVectorElementType() == MVT::bf16 &&
4153 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4154 return SDValue();
4155
4156 // Handle FP data by using an integer gather and casting the result.
4157 if (VT.isFloatingPoint()) {
4158 EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4159 PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
4160 InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4161 }
4162
4163 SDVTList VTs = DAG.getVTList(PassThru.getSimpleValueType(), MVT::Other);
4164
4165 if (getGatherScatterIndexIsExtended(Index))
4166 Index = Index.getOperand(0);
4167
4168 unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
4169 selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4170 /*isGather=*/true, DAG);
4171
4172 if (ResNeedsSignExtend)
4173 Opcode = getSignExtendedGatherOpcode(Opcode);
4174
4175 SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT, PassThru};
4176 SDValue Gather = DAG.getNode(Opcode, DL, VTs, Ops);
4177
4178 if (VT.isFloatingPoint()) {
4179 SDValue Cast = getSVESafeBitCast(VT, Gather, DAG);
4180 return DAG.getMergeValues({Cast, Gather.getValue(1)}, DL);
4181 }
4182
4183 return Gather;
4184}
4185
4186SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
4187 SelectionDAG &DAG) const {
4188 SDLoc DL(Op);
4189 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
4190 assert(MSC && "Can only custom lower scatter store nodes")((MSC && "Can only custom lower scatter store nodes")
? static_cast<void> (0) : __assert_fail ("MSC && \"Can only custom lower scatter store nodes\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4190, __PRETTY_FUNCTION__))
;
4191
4192 SDValue Index = MSC->getIndex();
4193 SDValue Chain = MSC->getChain();
4194 SDValue StoreVal = MSC->getValue();
4195 SDValue Mask = MSC->getMask();
4196 SDValue BasePtr = MSC->getBasePtr();
4197
4198 ISD::MemIndexType IndexType = MSC->getIndexType();
4199 bool IsScaled =
4200 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4201 bool IsSigned =
4202 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4203 bool NeedsExtend =
4204 getGatherScatterIndexIsExtended(Index) ||
4205 Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4206
4207 EVT VT = StoreVal.getSimpleValueType();
4208 SDVTList VTs = DAG.getVTList(MVT::Other);
4209 EVT MemVT = MSC->getMemoryVT();
4210 SDValue InputVT = DAG.getValueType(MemVT);
4211
4212 if (VT.getVectorElementType() == MVT::bf16 &&
4213 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4214 return SDValue();
4215
4216 // Handle FP data by casting the data so an integer scatter can be used.
4217 if (VT.isFloatingPoint()) {
4218 EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4219 StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
4220 InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4221 }
4222
4223 if (getGatherScatterIndexIsExtended(Index))
4224 Index = Index.getOperand(0);
4225
4226 unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
4227 selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4228 /*isGather=*/false, DAG);
4229
4230 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
4231 return DAG.getNode(Opcode, DL, VTs, Ops);
4232}
4233
4234// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
4235static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
4236 EVT VT, EVT MemVT,
4237 SelectionDAG &DAG) {
4238 assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4238, __PRETTY_FUNCTION__))
;
4239 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16)((MemVT == MVT::v4i8 && VT == MVT::v4i16) ? static_cast
<void> (0) : __assert_fail ("MemVT == MVT::v4i8 && VT == MVT::v4i16"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4239, __PRETTY_FUNCTION__))
;
4240
4241 SDValue Value = ST->getValue();
4242
4243 // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
4244 // the word lane which represent the v4i8 subvector. It optimizes the store
4245 // to:
4246 //
4247 // xtn v0.8b, v0.8h
4248 // str s0, [x0]
4249
4250 SDValue Undef = DAG.getUNDEF(MVT::i16);
4251 SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
4252 {Undef, Undef, Undef, Undef});
4253
4254 SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
4255 Value, UndefVec);
4256 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
4257
4258 Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
4259 SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4260 Trunc, DAG.getConstant(0, DL, MVT::i64));
4261
4262 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
4263 ST->getBasePtr(), ST->getMemOperand());
4264}
4265
4266// Custom lowering for any store, vector or scalar and/or default or with
4267// a truncate operations. Currently only custom lower truncate operation
4268// from vector v4i16 to v4i8 or volatile stores of i128.
4269SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
4270 SelectionDAG &DAG) const {
4271 SDLoc Dl(Op);
4272 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
4273 assert (StoreNode && "Can only custom lower store nodes")((StoreNode && "Can only custom lower store nodes") ?
static_cast<void> (0) : __assert_fail ("StoreNode && \"Can only custom lower store nodes\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4273, __PRETTY_FUNCTION__))
;
4274
4275 SDValue Value = StoreNode->getValue();
4276
4277 EVT VT = Value.getValueType();
4278 EVT MemVT = StoreNode->getMemoryVT();
4279
4280 if (VT.isVector()) {
4281 if (useSVEForFixedLengthVectorVT(VT))
4282 return LowerFixedLengthVectorStoreToSVE(Op, DAG);
4283
4284 unsigned AS = StoreNode->getAddressSpace();
4285 Align Alignment = StoreNode->getAlign();
4286 if (Alignment < MemVT.getStoreSize() &&
4287 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
4288 StoreNode->getMemOperand()->getFlags(),
4289 nullptr)) {
4290 return scalarizeVectorStore(StoreNode, DAG);
4291 }
4292
4293 if (StoreNode->isTruncatingStore()) {
4294 return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
4295 }
4296 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
4297 // the custom lowering, as there are no un-paired non-temporal stores and
4298 // legalization will break up 256 bit inputs.
4299 ElementCount EC = MemVT.getVectorElementCount();
4300 if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
4301 EC.isKnownEven() &&
4302 ((MemVT.getScalarSizeInBits() == 8u ||
4303 MemVT.getScalarSizeInBits() == 16u ||
4304 MemVT.getScalarSizeInBits() == 32u ||
4305 MemVT.getScalarSizeInBits() == 64u))) {
4306 SDValue Lo =
4307 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
4308 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4309 StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
4310 SDValue Hi =
4311 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
4312 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4313 StoreNode->getValue(),
4314 DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
4315 SDValue Result = DAG.getMemIntrinsicNode(
4316 AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
4317 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4318 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4319 return Result;
4320 }
4321 } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
4322 assert(StoreNode->getValue()->getValueType(0) == MVT::i128)((StoreNode->getValue()->getValueType(0) == MVT::i128) ?
static_cast<void> (0) : __assert_fail ("StoreNode->getValue()->getValueType(0) == MVT::i128"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4322, __PRETTY_FUNCTION__))
;
4323 SDValue Lo =
4324 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4325 DAG.getConstant(0, Dl, MVT::i64));
4326 SDValue Hi =
4327 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4328 DAG.getConstant(1, Dl, MVT::i64));
4329 SDValue Result = DAG.getMemIntrinsicNode(
4330 AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
4331 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4332 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4333 return Result;
4334 }
4335
4336 return SDValue();
4337}
4338
4339// Generate SUBS and CSEL for integer abs.
4340SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
4341 MVT VT = Op.getSimpleValueType();
4342
4343 if (VT.isVector())
4344 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
4345
4346 SDLoc DL(Op);
4347 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4348 Op.getOperand(0));
4349 // Generate SUBS & CSEL.
4350 SDValue Cmp =
4351 DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
4352 Op.getOperand(0), DAG.getConstant(0, DL, VT));
4353 return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
4354 DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
4355 Cmp.getValue(1));
4356}
4357
4358SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
4359 SelectionDAG &DAG) const {
4360 LLVM_DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
4361 LLVM_DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
4362
4363 switch (Op.getOpcode()) {
4364 default:
4365 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4365)
;
4366 return SDValue();
4367 case ISD::BITCAST:
4368 return LowerBITCAST(Op, DAG);
4369 case ISD::GlobalAddress:
4370 return LowerGlobalAddress(Op, DAG);
4371 case ISD::GlobalTLSAddress:
4372 return LowerGlobalTLSAddress(Op, DAG);
4373 case ISD::SETCC:
4374 case ISD::STRICT_FSETCC:
4375 case ISD::STRICT_FSETCCS:
4376 return LowerSETCC(Op, DAG);
4377 case ISD::BR_CC:
4378 return LowerBR_CC(Op, DAG);
4379 case ISD::SELECT:
4380 return LowerSELECT(Op, DAG);
4381 case ISD::SELECT_CC:
4382 return LowerSELECT_CC(Op, DAG);
4383 case ISD::JumpTable:
4384 return LowerJumpTable(Op, DAG);
4385 case ISD::BR_JT:
4386 return LowerBR_JT(Op, DAG);
4387 case ISD::ConstantPool:
4388 return LowerConstantPool(Op, DAG);
4389 case ISD::BlockAddress:
4390 return LowerBlockAddress(Op, DAG);
4391 case ISD::VASTART:
4392 return LowerVASTART(Op, DAG);
4393 case ISD::VACOPY:
4394 return LowerVACOPY(Op, DAG);
4395 case ISD::VAARG:
4396 return LowerVAARG(Op, DAG);
4397 case ISD::ADDC:
4398 case ISD::ADDE:
4399 case ISD::SUBC:
4400 case ISD::SUBE:
4401 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
4402 case ISD::SADDO:
4403 case ISD::UADDO:
4404 case ISD::SSUBO:
4405 case ISD::USUBO:
4406 case ISD::SMULO:
4407 case ISD::UMULO:
4408 return LowerXALUO(Op, DAG);
4409 case ISD::FADD:
4410 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
4411 case ISD::FSUB:
4412 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
4413 case ISD::FMUL:
4414 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
4415 case ISD::FMA:
4416 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
4417 case ISD::FDIV:
4418 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
4419 case ISD::FNEG:
4420 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
4421 case ISD::FCEIL:
4422 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
4423 case ISD::FFLOOR:
4424 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
4425 case ISD::FNEARBYINT:
4426 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
4427 case ISD::FRINT:
4428 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
4429 case ISD::FROUND:
4430 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
4431 case ISD::FROUNDEVEN:
4432 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
4433 case ISD::FTRUNC:
4434 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
4435 case ISD::FSQRT:
4436 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
4437 case ISD::FABS:
4438 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
4439 case ISD::FP_ROUND:
4440 case ISD::STRICT_FP_ROUND:
4441 return LowerFP_ROUND(Op, DAG);
4442 case ISD::FP_EXTEND:
4443 return LowerFP_EXTEND(Op, DAG);
4444 case ISD::FRAMEADDR:
4445 return LowerFRAMEADDR(Op, DAG);
4446 case ISD::SPONENTRY:
4447 return LowerSPONENTRY(Op, DAG);
4448 case ISD::RETURNADDR:
4449 return LowerRETURNADDR(Op, DAG);
4450 case ISD::ADDROFRETURNADDR:
4451 return LowerADDROFRETURNADDR(Op, DAG);
4452 case ISD::CONCAT_VECTORS:
4453 return LowerCONCAT_VECTORS(Op, DAG);
4454 case ISD::INSERT_VECTOR_ELT:
4455 return LowerINSERT_VECTOR_ELT(Op, DAG);
4456 case ISD::EXTRACT_VECTOR_ELT:
4457 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
4458 case ISD::BUILD_VECTOR:
4459 return LowerBUILD_VECTOR(Op, DAG);
4460 case ISD::VECTOR_SHUFFLE:
4461 return LowerVECTOR_SHUFFLE(Op, DAG);
4462 case ISD::SPLAT_VECTOR:
4463 return LowerSPLAT_VECTOR(Op, DAG);
4464 case ISD::STEP_VECTOR:
4465 return LowerSTEP_VECTOR(Op, DAG);
4466 case ISD::EXTRACT_SUBVECTOR:
4467 return LowerEXTRACT_SUBVECTOR(Op, DAG);
4468 case ISD::INSERT_SUBVECTOR:
4469 return LowerINSERT_SUBVECTOR(Op, DAG);
4470 case ISD::SDIV:
4471 case ISD::UDIV:
4472 return LowerDIV(Op, DAG);
4473 case ISD::SMIN:
4474 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
4475 /*OverrideNEON=*/true);
4476 case ISD::UMIN:
4477 return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
4478 /*OverrideNEON=*/true);
4479 case ISD::SMAX:
4480 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
4481 /*OverrideNEON=*/true);
4482 case ISD::UMAX:
4483 return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
4484 /*OverrideNEON=*/true);
4485 case ISD::SRA:
4486 case ISD::SRL:
4487 case ISD::SHL:
4488 return LowerVectorSRA_SRL_SHL(Op, DAG);
4489 case ISD::SHL_PARTS:
4490 return LowerShiftLeftParts(Op, DAG);
4491 case ISD::SRL_PARTS:
4492 case ISD::SRA_PARTS:
4493 return LowerShiftRightParts(Op, DAG);
4494 case ISD::CTPOP:
4495 return LowerCTPOP(Op, DAG);
4496 case ISD::FCOPYSIGN:
4497 return LowerFCOPYSIGN(Op, DAG);
4498 case ISD::OR:
4499 return LowerVectorOR(Op, DAG);
4500 case ISD::XOR:
4501 return LowerXOR(Op, DAG);
4502 case ISD::PREFETCH:
4503 return LowerPREFETCH(Op, DAG);
4504 case ISD::SINT_TO_FP:
4505 case ISD::UINT_TO_FP:
4506 case ISD::STRICT_SINT_TO_FP:
4507 case ISD::STRICT_UINT_TO_FP:
4508 return LowerINT_TO_FP(Op, DAG);
4509 case ISD::FP_TO_SINT:
4510 case ISD::FP_TO_UINT:
4511 case ISD::STRICT_FP_TO_SINT:
4512 case ISD::STRICT_FP_TO_UINT:
4513 return LowerFP_TO_INT(Op, DAG);
4514 case ISD::FSINCOS:
4515 return LowerFSINCOS(Op, DAG);
4516 case ISD::FLT_ROUNDS_:
4517 return LowerFLT_ROUNDS_(Op, DAG);
4518 case ISD::SET_ROUNDING:
4519 return LowerSET_ROUNDING(Op, DAG);
4520 case ISD::MUL:
4521 return LowerMUL(Op, DAG);
4522 case ISD::INTRINSIC_WO_CHAIN:
4523 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4524 case ISD::STORE:
4525 return LowerSTORE(Op, DAG);
4526 case ISD::MGATHER:
4527 return LowerMGATHER(Op, DAG);
4528 case ISD::MSCATTER:
4529 return LowerMSCATTER(Op, DAG);
4530 case ISD::VECREDUCE_SEQ_FADD:
4531 return LowerVECREDUCE_SEQ_FADD(Op, DAG);
4532 case ISD::VECREDUCE_ADD:
4533 case ISD::VECREDUCE_AND:
4534 case ISD::VECREDUCE_OR:
4535 case ISD::VECREDUCE_XOR:
4536 case ISD::VECREDUCE_SMAX:
4537 case ISD::VECREDUCE_SMIN:
4538 case ISD::VECREDUCE_UMAX:
4539 case ISD::VECREDUCE_UMIN:
4540 case ISD::VECREDUCE_FADD:
4541 case ISD::VECREDUCE_FMAX:
4542 case ISD::VECREDUCE_FMIN:
4543 return LowerVECREDUCE(Op, DAG);
4544 case ISD::ATOMIC_LOAD_SUB:
4545 return LowerATOMIC_LOAD_SUB(Op, DAG);
4546 case ISD::ATOMIC_LOAD_AND:
4547 return LowerATOMIC_LOAD_AND(Op, DAG);
4548 case ISD::DYNAMIC_STACKALLOC:
4549 return LowerDYNAMIC_STACKALLOC(Op, DAG);
4550 case ISD::VSCALE:
4551 return LowerVSCALE(Op, DAG);
4552 case ISD::ANY_EXTEND:
4553 case ISD::SIGN_EXTEND:
4554 case ISD::ZERO_EXTEND:
4555 return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
4556 case ISD::SIGN_EXTEND_INREG: {
4557 // Only custom lower when ExtraVT has a legal byte based element type.
4558 EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4559 EVT ExtraEltVT = ExtraVT.getVectorElementType();
4560 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
4561 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
4562 return SDValue();
4563
4564 return LowerToPredicatedOp(Op, DAG,
4565 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
4566 }
4567 case ISD::TRUNCATE:
4568 return LowerTRUNCATE(Op, DAG);
4569 case ISD::LOAD:
4570 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
4571 return LowerFixedLengthVectorLoadToSVE(Op, DAG);
4572 llvm_unreachable("Unexpected request to lower ISD::LOAD")::llvm::llvm_unreachable_internal("Unexpected request to lower ISD::LOAD"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4572)
;
4573 case ISD::ADD:
4574 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
4575 case ISD::AND:
4576 return LowerToScalableOp(Op, DAG);
4577 case ISD::SUB:
4578 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
4579 case ISD::FMAXIMUM:
4580 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
4581 case ISD::FMAXNUM:
4582 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
4583 case ISD::FMINIMUM:
4584 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED);
4585 case ISD::FMINNUM:
4586 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
4587 case ISD::VSELECT:
4588 return LowerFixedLengthVectorSelectToSVE(Op, DAG);
4589 case ISD::ABS:
4590 return LowerABS(Op, DAG);
4591 case ISD::BITREVERSE:
4592 return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
4593 /*OverrideNEON=*/true);
4594 case ISD::BSWAP:
4595 return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
4596 case ISD::CTLZ:
4597 return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
4598 /*OverrideNEON=*/true);
4599 case ISD::CTTZ:
4600 return LowerCTTZ(Op, DAG);
4601 }
4602}
4603
4604bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
4605 return !Subtarget->useSVEForFixedLengthVectors();
4606}
4607
4608bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
4609 EVT VT, bool OverrideNEON) const {
4610 if (!Subtarget->useSVEForFixedLengthVectors())
4611 return false;
4612
4613 if (!VT.isFixedLengthVector())
4614 return false;
4615
4616 // Don't use SVE for vectors we cannot scalarize if required.
4617 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
4618 // Fixed length predicates should be promoted to i8.
4619 // NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
4620 case MVT::i1:
4621 default:
4622 return false;
4623 case MVT::i8:
4624 case MVT::i16:
4625 case MVT::i32:
4626 case MVT::i64:
4627 case MVT::f16:
4628 case MVT::f32:
4629 case MVT::f64:
4630 break;
4631 }
4632
4633 // All SVE implementations support NEON sized vectors.
4634 if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
4635 return true;
4636
4637 // Ensure NEON MVTs only belong to a single register class.
4638 if (VT.getFixedSizeInBits() <= 128)
4639 return false;
4640
4641 // Don't use SVE for types that don't fit.
4642 if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
4643 return false;
4644
4645 // TODO: Perhaps an artificial restriction, but worth having whilst getting
4646 // the base fixed length SVE support in place.
4647 if (!VT.isPow2VectorType())
4648 return false;
4649
4650 return true;
4651}
4652
4653//===----------------------------------------------------------------------===//
4654// Calling Convention Implementation
4655//===----------------------------------------------------------------------===//
4656
4657/// Selects the correct CCAssignFn for a given CallingConvention value.
4658CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
4659 bool IsVarArg) const {
4660 switch (CC) {
4661 default:
4662 report_fatal_error("Unsupported calling convention.");
4663 case CallingConv::WebKit_JS:
4664 return CC_AArch64_WebKit_JS;
4665 case CallingConv::GHC:
4666 return CC_AArch64_GHC;
4667 case CallingConv::C:
4668 case CallingConv::Fast:
4669 case CallingConv::PreserveMost:
4670 case CallingConv::CXX_FAST_TLS:
4671 case CallingConv::Swift:
4672 if (Subtarget->isTargetWindows() && IsVarArg)
4673 return CC_AArch64_Win64_VarArg;
4674 if (!Subtarget->isTargetDarwin())
4675 return CC_AArch64_AAPCS;
4676 if (!IsVarArg)
4677 return CC_AArch64_DarwinPCS;
4678 return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
4679 : CC_AArch64_DarwinPCS_VarArg;
4680 case CallingConv::Win64:
4681 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
4682 case CallingConv::CFGuard_Check:
4683 return CC_AArch64_Win64_CFGuard_Check;
4684 case CallingConv::AArch64_VectorCall:
4685 case CallingConv::AArch64_SVE_VectorCall:
4686 return CC_AArch64_AAPCS;
4687 }
4688}
4689
4690CCAssignFn *
4691AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
4692 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
4693 : RetCC_AArch64_AAPCS;
4694}
4695
4696SDValue AArch64TargetLowering::LowerFormalArguments(
4697 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4698 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
4699 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4700 MachineFunction &MF = DAG.getMachineFunction();
4701 MachineFrameInfo &MFI = MF.getFrameInfo();
4702 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
4703
4704 // Assign locations to all of the incoming arguments.
4705 SmallVector<CCValAssign, 16> ArgLocs;
4706 DenseMap<unsigned, SDValue> CopiedRegs;
4707 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4708 *DAG.getContext());
4709
4710 // At this point, Ins[].VT may already be promoted to i32. To correctly
4711 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
4712 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
4713 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
4714 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
4715 // LocVT.
4716 unsigned NumArgs = Ins.size();
4717 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
4718 unsigned CurArgIdx = 0;
4719 for (unsigned i = 0; i != NumArgs; ++i) {
4720 MVT ValVT = Ins[i].VT;
4721 if (Ins[i].isOrigArg()) {
4722 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
4723 CurArgIdx = Ins[i].getOrigArgIndex();
4724
4725 // Get type of the original argument.
4726 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
4727 /*AllowUnknown*/ true);
4728 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
4729 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
4730 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
4731 ValVT = MVT::i8;
4732 else if (ActualMVT == MVT::i16)
4733 ValVT = MVT::i16;
4734 }
4735 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
4736 bool Res =
4737 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
4738 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4738, __PRETTY_FUNCTION__))
;
4739 (void)Res;
4740 }
4741 SmallVector<SDValue, 16> ArgValues;
4742 unsigned ExtraArgLocs = 0;
4743 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4744 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
4745
4746 if (Ins[i].Flags.isByVal()) {
4747 // Byval is used for HFAs in the PCS, but the system should work in a
4748 // non-compliant manner for larger structs.
4749 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4750 int Size = Ins[i].Flags.getByValSize();
4751 unsigned NumRegs = (Size + 7) / 8;
4752
4753 // FIXME: This works on big-endian for composite byvals, which are the common
4754 // case. It should also work for fundamental types too.
4755 unsigned FrameIdx =
4756 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
4757 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
4758 InVals.push_back(FrameIdxN);
4759
4760 continue;
4761 }
4762
4763 SDValue ArgValue;
4764 if (VA.isRegLoc()) {
4765 // Arguments stored in registers.
4766 EVT RegVT = VA.getLocVT();
4767 const TargetRegisterClass *RC;
4768
4769 if (RegVT == MVT::i32)
4770 RC = &AArch64::GPR32RegClass;
4771 else if (RegVT == MVT::i64)
4772 RC = &AArch64::GPR64RegClass;
4773 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4774 RC = &AArch64::FPR16RegClass;
4775 else if (RegVT == MVT::f32)
4776 RC = &AArch64::FPR32RegClass;
4777 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
4778 RC = &AArch64::FPR64RegClass;
4779 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
4780 RC = &AArch64::FPR128RegClass;
4781 else if (RegVT.isScalableVector() &&
4782 RegVT.getVectorElementType() == MVT::i1)
4783 RC = &AArch64::PPRRegClass;
4784 else if (RegVT.isScalableVector())
4785 RC = &AArch64::ZPRRegClass;
4786 else
4787 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4787)
;
4788
4789 // Transform the arguments in physical registers into virtual ones.
4790 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
4791 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
4792
4793 // If this is an 8, 16 or 32-bit value, it is really passed promoted
4794 // to 64 bits. Insert an assert[sz]ext to capture this, then
4795 // truncate to the right size.
4796 switch (VA.getLocInfo()) {
4797 default:
4798 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4798)
;
4799 case CCValAssign::Full:
4800 break;
4801 case CCValAssign::Indirect:
4802 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4803, __PRETTY_FUNCTION__))
4803 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4803, __PRETTY_FUNCTION__))
;
4804 break;
4805 case CCValAssign::BCvt:
4806 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
4807 break;
4808 case CCValAssign::AExt:
4809 case CCValAssign::SExt:
4810 case CCValAssign::ZExt:
4811 break;
4812 case CCValAssign::AExtUpper:
4813 ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
4814 DAG.getConstant(32, DL, RegVT));
4815 ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
4816 break;
4817 }
4818 } else { // VA.isRegLoc()
4819 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")((VA.isMemLoc() && "CCValAssign is neither reg nor mem"
) ? static_cast<void> (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4819, __PRETTY_FUNCTION__))
;
4820 unsigned ArgOffset = VA.getLocMemOffset();
4821 unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
4822 ? VA.getLocVT().getSizeInBits()
4823 : VA.getValVT().getSizeInBits()) / 8;
4824
4825 uint32_t BEAlign = 0;
4826 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
4827 !Ins[i].Flags.isInConsecutiveRegs())
4828 BEAlign = 8 - ArgSize;
4829
4830 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
4831
4832 // Create load nodes to retrieve arguments from the stack.
4833 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4834
4835 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
4836 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
4837 MVT MemVT = VA.getValVT();
4838
4839 switch (VA.getLocInfo()) {
4840 default:
4841 break;
4842 case CCValAssign::Trunc:
4843 case CCValAssign::BCvt:
4844 MemVT = VA.getLocVT();
4845 break;
4846 case CCValAssign::Indirect:
4847 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4848, __PRETTY_FUNCTION__))
4848 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4848, __PRETTY_FUNCTION__))
;
4849 MemVT = VA.getLocVT();
4850 break;
4851 case CCValAssign::SExt:
4852 ExtType = ISD::SEXTLOAD;
4853 break;
4854 case CCValAssign::ZExt:
4855 ExtType = ISD::ZEXTLOAD;
4856 break;
4857 case CCValAssign::AExt:
4858 ExtType = ISD::EXTLOAD;
4859 break;
4860 }
4861
4862 ArgValue = DAG.getExtLoad(
4863 ExtType, DL, VA.getLocVT(), Chain, FIN,
4864 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
4865 MemVT);
4866
4867 }
4868
4869 if (VA.getLocInfo() == CCValAssign::Indirect) {
4870 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4871, __PRETTY_FUNCTION__))
4871 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4871, __PRETTY_FUNCTION__))
;
4872
4873 uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
4874 unsigned NumParts = 1;
4875 if (Ins[i].Flags.isInConsecutiveRegs()) {
4876 assert(!Ins[i].Flags.isInConsecutiveRegsLast())((!Ins[i].Flags.isInConsecutiveRegsLast()) ? static_cast<void
> (0) : __assert_fail ("!Ins[i].Flags.isInConsecutiveRegsLast()"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4876, __PRETTY_FUNCTION__))
;
4877 while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
4878 ++NumParts;
4879 }
4880
4881 MVT PartLoad = VA.getValVT();
4882 SDValue Ptr = ArgValue;
4883
4884 // Ensure we generate all loads for each tuple part, whilst updating the
4885 // pointer after each load correctly using vscale.
4886 while (NumParts > 0) {
4887 ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
4888 InVals.push_back(ArgValue);
4889 NumParts--;
4890 if (NumParts > 0) {
4891 SDValue BytesIncrement = DAG.getVScale(
4892 DL, Ptr.getValueType(),
4893 APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
4894 SDNodeFlags Flags;
4895 Flags.setNoUnsignedWrap(true);
4896 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
4897 BytesIncrement, Flags);
4898 ExtraArgLocs++;
4899 i++;
4900 }
4901 }
4902 } else {
4903 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
4904 ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
4905 ArgValue, DAG.getValueType(MVT::i32));
4906 InVals.push_back(ArgValue);
4907 }
4908 }
4909 assert((ArgLocs.size() + ExtraArgLocs) == Ins.size())(((ArgLocs.size() + ExtraArgLocs) == Ins.size()) ? static_cast
<void> (0) : __assert_fail ("(ArgLocs.size() + ExtraArgLocs) == Ins.size()"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4909, __PRETTY_FUNCTION__))
;
4910
4911 // varargs
4912 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
4913 if (isVarArg) {
4914 if (!Subtarget->isTargetDarwin() || IsWin64) {
4915 // The AAPCS variadic function ABI is identical to the non-variadic
4916 // one. As a result there may be more arguments in registers and we should
4917 // save them for future reference.
4918 // Win64 variadic functions also pass arguments in registers, but all float
4919 // arguments are passed in integer registers.
4920 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
4921 }
4922
4923 // This will point to the next argument passed via stack.
4924 unsigned StackOffset = CCInfo.getNextStackOffset();
4925 // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
4926 StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
4927 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
4928
4929 if (MFI.hasMustTailInVarArgFunc()) {
4930 SmallVector<MVT, 2> RegParmTypes;
4931 RegParmTypes.push_back(MVT::i64);
4932 RegParmTypes.push_back(MVT::f128);
4933 // Compute the set of forwarded registers. The rest are scratch.
4934 SmallVectorImpl<ForwardedRegister> &Forwards =
4935 FuncInfo->getForwardedMustTailRegParms();
4936 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
4937 CC_AArch64_AAPCS);
4938
4939 // Conservatively forward X8, since it might be used for aggregate return.
4940 if (!CCInfo.isAllocated(AArch64::X8)) {
4941 unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
4942 Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
4943 }
4944 }
4945 }
4946
4947 // On Windows, InReg pointers must be returned, so record the pointer in a
4948 // virtual register at the start of the function so it can be returned in the
4949 // epilogue.
4950 if (IsWin64) {
4951 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
4952 if (Ins[I].Flags.isInReg()) {
4953 assert(!FuncInfo->getSRetReturnReg())((!FuncInfo->getSRetReturnReg()) ? static_cast<void>
(0) : __assert_fail ("!FuncInfo->getSRetReturnReg()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4953, __PRETTY_FUNCTION__))
;
4954
4955 MVT PtrTy = getPointerTy(DAG.getDataLayout());
4956 Register Reg =
4957 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
4958 FuncInfo->setSRetReturnReg(Reg);
4959
4960 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
4961 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
4962 break;
4963 }
4964 }
4965 }
4966
4967 unsigned StackArgSize = CCInfo.getNextStackOffset();
4968 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4969 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
4970 // This is a non-standard ABI so by fiat I say we're allowed to make full
4971 // use of the stack area to be popped, which must be aligned to 16 bytes in
4972 // any case:
4973 StackArgSize = alignTo(StackArgSize, 16);
4974
4975 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
4976 // a multiple of 16.
4977 FuncInfo->setArgumentStackToRestore(StackArgSize);
4978
4979 // This realignment carries over to the available bytes below. Our own
4980 // callers will guarantee the space is free by giving an aligned value to
4981 // CALLSEQ_START.
4982 }
4983 // Even if we're not expected to free up the space, it's useful to know how
4984 // much is there while considering tail calls (because we can reuse it).
4985 FuncInfo->setBytesInStackArgArea(StackArgSize);
4986
4987 if (Subtarget->hasCustomCallingConv())
4988 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
4989
4990 return Chain;
4991}
4992
4993void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
4994 SelectionDAG &DAG,
4995 const SDLoc &DL,
4996 SDValue &Chain) const {
4997 MachineFunction &MF = DAG.getMachineFunction();
4998 MachineFrameInfo &MFI = MF.getFrameInfo();
4999 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5000 auto PtrVT = getPointerTy(DAG.getDataLayout());
5001 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
5002
5003 SmallVector<SDValue, 8> MemOps;
5004
5005 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
5006 AArch64::X3, AArch64::X4, AArch64::X5,
5007 AArch64::X6, AArch64::X7 };
5008 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
5009 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
5010
5011 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
5012 int GPRIdx = 0;
5013 if (GPRSaveSize != 0) {
5014 if (IsWin64) {
5015 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
5016 if (GPRSaveSize & 15)
5017 // The extra size here, if triggered, will always be 8.
5018 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
5019 } else
5020 GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
5021
5022 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
5023
5024 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
5025 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
5026 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
5027 SDValue Store = DAG.getStore(
5028 Val.getValue(1), DL, Val, FIN,
5029 IsWin64
5030 ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
5031 GPRIdx,
5032 (i - FirstVariadicGPR) * 8)
5033 : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
5034 MemOps.push_back(Store);
5035 FIN =
5036 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
5037 }
5038 }
5039 FuncInfo->setVarArgsGPRIndex(GPRIdx);
5040 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
5041
5042 if (Subtarget->hasFPARMv8() && !IsWin64) {
5043 static const MCPhysReg FPRArgRegs[] = {
5044 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
5045 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
5046 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
5047 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
5048
5049 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
5050 int FPRIdx = 0;
5051 if (FPRSaveSize != 0) {
5052 FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
5053
5054 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
5055
5056 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
5057 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
5058 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
5059
5060 SDValue Store = DAG.getStore(
5061 Val.getValue(1), DL, Val, FIN,
5062 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
5063 MemOps.push_back(Store);
5064 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
5065 DAG.getConstant(16, DL, PtrVT));
5066 }
5067 }
5068 FuncInfo->setVarArgsFPRIndex(FPRIdx);
5069 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
5070 }
5071
5072 if (!MemOps.empty()) {
5073 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
5074 }
5075}
5076
5077/// LowerCallResult - Lower the result values of a call into the
5078/// appropriate copies out of appropriate physical registers.
5079SDValue AArch64TargetLowering::LowerCallResult(
5080 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5081 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5082 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
5083 SDValue ThisVal) const {
5084 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
5085 // Assign locations to each value returned by this call.
5086 SmallVector<CCValAssign, 16> RVLocs;
5087 DenseMap<unsigned, SDValue> CopiedRegs;
5088 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5089 *DAG.getContext());
5090 CCInfo.AnalyzeCallResult(Ins, RetCC);
5091
5092 // Copy all of the result registers out of their specified physreg.
5093 for (unsigned i = 0; i != RVLocs.size(); ++i) {
5094 CCValAssign VA = RVLocs[i];
5095
5096 // Pass 'this' value directly from the argument to return value, to avoid
5097 // reg unit interference
5098 if (i == 0 && isThisReturn) {
5099 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5100, __PRETTY_FUNCTION__))
5100 "unexpected return calling convention register assignment")((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5100, __PRETTY_FUNCTION__))
;
5101 InVals.push_back(ThisVal);
5102 continue;
5103 }
5104
5105 // Avoid copying a physreg twice since RegAllocFast is incompetent and only
5106 // allows one use of a physreg per block.
5107 SDValue Val = CopiedRegs.lookup(VA.getLocReg());
5108 if (!Val) {
5109 Val =
5110 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
5111 Chain = Val.getValue(1);
5112 InFlag = Val.getValue(2);
5113 CopiedRegs[VA.getLocReg()] = Val;
5114 }
5115
5116 switch (VA.getLocInfo()) {
5117 default:
5118 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5118)
;
5119 case CCValAssign::Full:
5120 break;
5121 case CCValAssign::BCvt:
5122 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5123 break;
5124 case CCValAssign::AExtUpper:
5125 Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
5126 DAG.getConstant(32, DL, VA.getLocVT()));
5127 LLVM_FALLTHROUGH[[gnu::fallthrough]];
5128 case CCValAssign::AExt:
5129 LLVM_FALLTHROUGH[[gnu::fallthrough]];
5130 case CCValAssign::ZExt:
5131 Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
5132 break;
5133 }
5134
5135 InVals.push_back(Val);
5136 }
5137
5138 return Chain;
5139}
5140
5141/// Return true if the calling convention is one that we can guarantee TCO for.
5142static bool canGuaranteeTCO(CallingConv::ID CC) {
5143 return CC == CallingConv::Fast;
5144}
5145
5146/// Return true if we might ever do TCO for calls with this calling convention.
5147static bool mayTailCallThisCC(CallingConv::ID CC) {
5148 switch (CC) {
5149 case CallingConv::C:
5150 case CallingConv::AArch64_SVE_VectorCall:
5151 case CallingConv::PreserveMost:
5152 case CallingConv::Swift:
5153 return true;
5154 default:
5155 return canGuaranteeTCO(CC);
5156 }
5157}
5158
5159bool AArch64TargetLowering::isEligibleForTailCallOptimization(
5160 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
5161 const SmallVectorImpl<ISD::OutputArg> &Outs,
5162 const SmallVectorImpl<SDValue> &OutVals,
5163 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
5164 if (!mayTailCallThisCC(CalleeCC))
5165 return false;
5166
5167 MachineFunction &MF = DAG.getMachineFunction();
5168 const Function &CallerF = MF.getFunction();
5169 CallingConv::ID CallerCC = CallerF.getCallingConv();
5170
5171 // Functions using the C or Fast calling convention that have an SVE signature
5172 // preserve more registers and should assume the SVE_VectorCall CC.
5173 // The check for matching callee-saved regs will determine whether it is
5174 // eligible for TCO.
5175 if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) &&
5176 AArch64RegisterInfo::hasSVEArgsOrReturn(&MF))
5177 CallerCC = CallingConv::AArch64_SVE_VectorCall;
5178
5179 bool CCMatch = CallerCC == CalleeCC;
5180
5181 // When using the Windows calling convention on a non-windows OS, we want
5182 // to back up and restore X18 in such functions; we can't do a tail call
5183 // from those functions.
5184 if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
5185 CalleeCC != CallingConv::Win64)
5186 return false;
5187
5188 // Byval parameters hand the function a pointer directly into the stack area
5189 // we want to reuse during a tail call. Working around this *is* possible (see
5190 // X86) but less efficient and uglier in LowerCall.
5191 for (Function::const_arg_iterator i = CallerF.arg_begin(),
5192 e = CallerF.arg_end();
5193 i != e; ++i) {
5194 if (i->hasByValAttr())
5195 return false;
5196
5197 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
5198 // In this case, it is necessary to save/restore X0 in the callee. Tail
5199 // call opt interferes with this. So we disable tail call opt when the
5200 // caller has an argument with "inreg" attribute.
5201
5202 // FIXME: Check whether the callee also has an "inreg" argument.
5203 if (i->hasInRegAttr())
5204 return false;
5205 }
5206
5207 if (getTargetMachine().Options.GuaranteedTailCallOpt)
5208 return canGuaranteeTCO(CalleeCC) && CCMatch;
5209
5210 // Externally-defined functions with weak linkage should not be
5211 // tail-called on AArch64 when the OS does not support dynamic
5212 // pre-emption of symbols, as the AAELF spec requires normal calls
5213 // to undefined weak functions to be replaced with a NOP or jump to the
5214 // next instruction. The behaviour of branch instructions in this
5215 // situation (as used for tail calls) is implementation-defined, so we
5216 // cannot rely on the linker replacing the tail call with a return.
5217 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5218 const GlobalValue *GV = G->getGlobal();
5219 const Triple &TT = getTargetMachine().getTargetTriple();
5220 if (GV->hasExternalWeakLinkage() &&
5221 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
5222 return false;
5223 }
5224
5225 // Now we search for cases where we can use a tail call without changing the
5226 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
5227 // concept.
5228
5229 // I want anyone implementing a new calling convention to think long and hard
5230 // about this assert.
5231 assert((!isVarArg || CalleeCC == CallingConv::C) &&(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5232, __PRETTY_FUNCTION__))
5232 "Unexpected variadic calling convention")(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5232, __PRETTY_FUNCTION__))
;
5233
5234 LLVMContext &C = *DAG.getContext();
5235 if (isVarArg && !Outs.empty()) {
5236 // At least two cases here: if caller is fastcc then we can't have any
5237 // memory arguments (we'd be expected to clean up the stack afterwards). If
5238 // caller is C then we could potentially use its argument area.
5239
5240 // FIXME: for now we take the most conservative of these in both cases:
5241 // disallow all variadic memory operands.
5242 SmallVector<CCValAssign, 16> ArgLocs;
5243 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5244
5245 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
5246 for (const CCValAssign &ArgLoc : ArgLocs)
5247 if (!ArgLoc.isRegLoc())
5248 return false;
5249 }
5250
5251 // Check that the call results are passed in the same way.
5252 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
5253 CCAssignFnForCall(CalleeCC, isVarArg),
5254 CCAssignFnForCall(CallerCC, isVarArg)))
5255 return false;
5256 // The callee has to preserve all registers the caller needs to preserve.
5257 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5258 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5259 if (!CCMatch) {
5260 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5261 if (Subtarget->hasCustomCallingConv()) {
5262 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
5263 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
5264 }
5265 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5266 return false;
5267 }
5268
5269 // Nothing more to check if the callee is taking no arguments
5270 if (Outs.empty())
5271 return true;
5272
5273 SmallVector<CCValAssign, 16> ArgLocs;
5274 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5275
5276 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
5277
5278 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5279
5280 // If any of the arguments is passed indirectly, it must be SVE, so the
5281 // 'getBytesInStackArgArea' is not sufficient to determine whether we need to
5282 // allocate space on the stack. That is why we determine this explicitly here
5283 // the call cannot be a tailcall.
5284 if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
5285 assert((A.getLocInfo() != CCValAssign::Indirect ||(((A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector
()) && "Expected value to be scalable") ? static_cast
<void> (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5287, __PRETTY_FUNCTION__))
5286 A.getValVT().isScalableVector()) &&(((A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector
()) && "Expected value to be scalable") ? static_cast
<void> (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5287, __PRETTY_FUNCTION__))
5287 "Expected value to be scalable")(((A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector
()) && "Expected value to be scalable") ? static_cast
<void> (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5287, __PRETTY_FUNCTION__))
;
5288 return A.getLocInfo() == CCValAssign::Indirect;
5289 }))
5290 return false;
5291
5292 // If the stack arguments for this call do not fit into our own save area then
5293 // the call cannot be made tail.
5294 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
5295 return false;
5296
5297 const MachineRegisterInfo &MRI = MF.getRegInfo();
5298 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5299 return false;
5300
5301 return true;
5302}
5303
5304SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
5305 SelectionDAG &DAG,
5306 MachineFrameInfo &MFI,
5307 int ClobberedFI) const {
5308 SmallVector<SDValue, 8> ArgChains;
5309 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
5310 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
5311
5312 // Include the original chain at the beginning of the list. When this is
5313 // used by target LowerCall hooks, this helps legalize find the
5314 // CALLSEQ_BEGIN node.
5315 ArgChains.push_back(Chain);
5316
5317 // Add a chain value for each stack argument corresponding
5318 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
5319 UE = DAG.getEntryNode().getNode()->use_end();
5320 U != UE; ++U)
5321 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
5322 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
5323 if (FI->getIndex() < 0) {
5324 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
5325 int64_t InLastByte = InFirstByte;
5326 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
5327
5328 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
5329 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
5330 ArgChains.push_back(SDValue(L, 1));
5331 }
5332
5333 // Build a tokenfactor for all the chains.
5334 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
5335}
5336
5337bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
5338 bool TailCallOpt) const {
5339 return CallCC == CallingConv::Fast && TailCallOpt;
5340}
5341
5342/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
5343/// and add input and output parameter nodes.
5344SDValue
5345AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
5346 SmallVectorImpl<SDValue> &InVals) const {
5347 SelectionDAG &DAG = CLI.DAG;
5348 SDLoc &DL = CLI.DL;
5349 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
5350 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
5351 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
5352 SDValue Chain = CLI.Chain;
5353 SDValue Callee = CLI.Callee;
5354 bool &IsTailCall = CLI.IsTailCall;
5355 CallingConv::ID CallConv = CLI.CallConv;
5356 bool IsVarArg = CLI.IsVarArg;
5357
5358 MachineFunction &MF = DAG.getMachineFunction();
5359 MachineFunction::CallSiteInfo CSInfo;
5360 bool IsThisReturn = false;
5361
5362 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5363 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
5364 bool IsSibCall = false;
5365
5366 // Check callee args/returns for SVE registers and set calling convention
5367 // accordingly.
5368 if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) {
5369 bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
5370 return Out.VT.isScalableVector();
5371 });
5372 bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
5373 return In.VT.isScalableVector();
5374 });
5375
5376 if (CalleeInSVE || CalleeOutSVE)
5377 CallConv = CallingConv::AArch64_SVE_VectorCall;
5378 }
5379
5380 if (IsTailCall) {
5381 // Check if it's really possible to do a tail call.
5382 IsTailCall = isEligibleForTailCallOptimization(
5383 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
5384 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
5385 report_fatal_error("failed to perform tail call elimination on a call "
5386 "site marked musttail");
5387
5388 // A sibling call is one where we're under the usual C ABI and not planning
5389 // to change that but can still do a tail call:
5390 if (!TailCallOpt && IsTailCall)
5391 IsSibCall = true;
5392
5393 if (IsTailCall)
5394 ++NumTailCalls;
5395 }
5396
5397 // Analyze operands of the call, assigning locations to each operand.
5398 SmallVector<CCValAssign, 16> ArgLocs;
5399 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
5400 *DAG.getContext());
5401
5402 if (IsVarArg) {
5403 // Handle fixed and variable vector arguments differently.
5404 // Variable vector arguments always go into memory.
5405 unsigned NumArgs = Outs.size();
5406
5407 for (unsigned i = 0; i != NumArgs; ++i) {
5408 MVT ArgVT = Outs[i].VT;
5409 if (!Outs[i].IsFixed && ArgVT.isScalableVector())
5410 report_fatal_error("Passing SVE types to variadic functions is "
5411 "currently not supported");
5412
5413 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5414 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
5415 /*IsVarArg=*/ !Outs[i].IsFixed);
5416 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
5417 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5417, __PRETTY_FUNCTION__))
;
5418 (void)Res;
5419 }
5420 } else {
5421 // At this point, Outs[].VT may already be promoted to i32. To correctly
5422 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
5423 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
5424 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
5425 // we use a special version of AnalyzeCallOperands to pass in ValVT and
5426 // LocVT.
5427 unsigned NumArgs = Outs.size();
5428 for (unsigned i = 0; i != NumArgs; ++i) {
5429 MVT ValVT = Outs[i].VT;
5430 // Get type of the original argument.
5431 EVT ActualVT = getValueType(DAG.getDataLayout(),
5432 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
5433 /*AllowUnknown*/ true);
5434 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
5435 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5436 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5437 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5438 ValVT = MVT::i8;
5439 else if (ActualMVT == MVT::i16)
5440 ValVT = MVT::i16;
5441
5442 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
5443 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
5444 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5444, __PRETTY_FUNCTION__))
;
5445 (void)Res;
5446 }
5447 }
5448
5449 // Get a count of how many bytes are to be pushed on the stack.
5450 unsigned NumBytes = CCInfo.getNextStackOffset();
5451
5452 if (IsSibCall) {
5453 // Since we're not changing the ABI to make this a tail call, the memory
5454 // operands are already available in the caller's incoming argument space.
5455 NumBytes = 0;
5456 }
5457
5458 // FPDiff is the byte offset of the call's argument area from the callee's.
5459 // Stores to callee stack arguments will be placed in FixedStackSlots offset
5460 // by this amount for a tail call. In a sibling call it must be 0 because the
5461 // caller will deallocate the entire stack and the callee still expects its
5462 // arguments to begin at SP+0. Completely unused for non-tail calls.
5463 int FPDiff = 0;
5464
5465 if (IsTailCall && !IsSibCall) {
5466 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
5467
5468 // Since callee will pop argument stack as a tail call, we must keep the
5469 // popped size 16-byte aligned.
5470 NumBytes = alignTo(NumBytes, 16);
5471
5472 // FPDiff will be negative if this tail call requires more space than we
5473 // would automatically have in our incoming argument space. Positive if we
5474 // can actually shrink the stack.
5475 FPDiff = NumReusableBytes - NumBytes;
5476
5477 // The stack pointer must be 16-byte aligned at all times it's used for a
5478 // memory operation, which in practice means at *all* times and in
5479 // particular across call boundaries. Therefore our own arguments started at
5480 // a 16-byte aligned SP and the delta applied for the tail call should
5481 // satisfy the same constraint.
5482 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")((FPDiff % 16 == 0 && "unaligned stack on tail call")
? static_cast<void> (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5482, __PRETTY_FUNCTION__))
;
5483 }
5484
5485 // Adjust the stack pointer for the new arguments...
5486 // These operations are automatically eliminated by the prolog/epilog pass
5487 if (!IsSibCall)
5488 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
5489
5490 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
5491 getPointerTy(DAG.getDataLayout()));
5492
5493 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5494 SmallSet<unsigned, 8> RegsUsed;
5495 SmallVector<SDValue, 8> MemOpChains;
5496 auto PtrVT = getPointerTy(DAG.getDataLayout());
5497
5498 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
5499 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
5500 for (const auto &F : Forwards) {
5501 SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
5502 RegsToPass.emplace_back(F.PReg, Val);
5503 }
5504 }
5505
5506 // Walk the register/memloc assignments, inserting copies/loads.
5507 unsigned ExtraArgLocs = 0;
5508 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5509 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
5510 SDValue Arg = OutVals[i];
5511 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5512
5513 // Promote the value if needed.
5514 switch (VA.getLocInfo()) {
5515 default:
5516 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5516)
;
5517 case CCValAssign::Full:
5518 break;
5519 case CCValAssign::SExt:
5520 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
5521 break;
5522 case CCValAssign::ZExt:
5523 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
5524 break;
5525 case CCValAssign::AExt:
5526 if (Outs[i].ArgVT == MVT::i1) {
5527 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
5528 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
5529 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
5530 }
5531 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5532 break;
5533 case CCValAssign::AExtUpper:
5534 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")((VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5534, __PRETTY_FUNCTION__))
;
5535 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5536 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
5537 DAG.getConstant(32, DL, VA.getLocVT()));
5538 break;
5539 case CCValAssign::BCvt:
5540 Arg = DAG.getBitcast(VA.getLocVT(), Arg);
5541 break;
5542 case CCValAssign::Trunc:
5543 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
5544 break;
5545 case CCValAssign::FPExt:
5546 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
5547 break;
5548 case CCValAssign::Indirect:
5549 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5550, __PRETTY_FUNCTION__))
5550 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5550, __PRETTY_FUNCTION__))
;
5551
5552 uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
5553 uint64_t PartSize = StoreSize;
5554 unsigned NumParts = 1;
5555 if (Outs[i].Flags.isInConsecutiveRegs()) {
5556 assert(!Outs[i].Flags.isInConsecutiveRegsLast())((!Outs[i].Flags.isInConsecutiveRegsLast()) ? static_cast<
void> (0) : __assert_fail ("!Outs[i].Flags.isInConsecutiveRegsLast()"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5556, __PRETTY_FUNCTION__))
;
5557 while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
5558 ++NumParts;
5559 StoreSize *= NumParts;
5560 }
5561
5562 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
5563 Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
5564 Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
5565 int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
5566 MFI.setStackID(FI, TargetStackID::ScalableVector);
5567
5568 MachinePointerInfo MPI =
5569 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
5570 SDValue Ptr = DAG.getFrameIndex(
5571 FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
5572 SDValue SpillSlot = Ptr;
5573
5574 // Ensure we generate all stores for each tuple part, whilst updating the
5575 // pointer after each store correctly using vscale.
5576 while (NumParts) {
5577 Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
5578 NumParts--;
5579 if (NumParts > 0) {
5580 SDValue BytesIncrement = DAG.getVScale(
5581 DL, Ptr.getValueType(),
5582 APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
5583 SDNodeFlags Flags;
5584 Flags.setNoUnsignedWrap(true);
5585
5586 MPI = MachinePointerInfo(MPI.getAddrSpace());
5587 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5588 BytesIncrement, Flags);
5589 ExtraArgLocs++;
5590 i++;
5591 }
5592 }
5593
5594 Arg = SpillSlot;
5595 break;
5596 }
5597
5598 if (VA.isRegLoc()) {
5599 if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
5600 Outs[0].VT == MVT::i64) {
5601 assert(VA.getLocVT() == MVT::i64 &&((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5602, __PRETTY_FUNCTION__))
5602 "unexpected calling convention register assignment")((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5602, __PRETTY_FUNCTION__))
;
5603 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5604, __PRETTY_FUNCTION__))
5604 "unexpected use of 'returned'")((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5604, __PRETTY_FUNCTION__))
;
5605 IsThisReturn = true;
5606 }
5607 if (RegsUsed.count(VA.getLocReg())) {
5608 // If this register has already been used then we're trying to pack
5609 // parts of an [N x i32] into an X-register. The extension type will
5610 // take care of putting the two halves in the right place but we have to
5611 // combine them.
5612 SDValue &Bits =
5613 llvm::find_if(RegsToPass,
5614 [=](const std::pair<unsigned, SDValue> &Elt) {
5615 return Elt.first == VA.getLocReg();
5616 })
5617 ->second;
5618 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
5619 // Call site info is used for function's parameter entry value
5620 // tracking. For now we track only simple cases when parameter
5621 // is transferred through whole register.
5622 llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
5623 return ArgReg.Reg == VA.getLocReg();
5624 });
5625 } else {
5626 RegsToPass.emplace_back(VA.getLocReg(), Arg);
5627 RegsUsed.insert(VA.getLocReg());
5628 const TargetOptions &Options = DAG.getTarget().Options;
5629 if (Options.EmitCallSiteInfo)
5630 CSInfo.emplace_back(VA.getLocReg(), i);
5631 }
5632 } else {
5633 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5633, __PRETTY_FUNCTION__))
;
5634
5635 SDValue DstAddr;
5636 MachinePointerInfo DstInfo;
5637
5638 // FIXME: This works on big-endian for composite byvals, which are the
5639 // common case. It should also work for fundamental types too.
5640 uint32_t BEAlign = 0;
5641 unsigned OpSize;
5642 if (VA.getLocInfo() == CCValAssign::Indirect)
5643 OpSize = VA.getLocVT().getFixedSizeInBits();
5644 else
5645 OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
5646 : VA.getValVT().getSizeInBits();
5647 OpSize = (OpSize + 7) / 8;
5648 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
5649 !Flags.isInConsecutiveRegs()) {
5650 if (OpSize < 8)
5651 BEAlign = 8 - OpSize;
5652 }
5653 unsigned LocMemOffset = VA.getLocMemOffset();
5654 int32_t Offset = LocMemOffset + BEAlign;
5655 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
5656 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
5657
5658 if (IsTailCall) {
5659 Offset = Offset + FPDiff;
5660 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5661
5662 DstAddr = DAG.getFrameIndex(FI, PtrVT);
5663 DstInfo =
5664 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
5665
5666 // Make sure any stack arguments overlapping with where we're storing
5667 // are loaded before this eventual operation. Otherwise they'll be
5668 // clobbered.
5669 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
5670 } else {
5671 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
5672
5673 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
5674 DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
5675 LocMemOffset);
5676 }
5677
5678 if (Outs[i].Flags.isByVal()) {
5679 SDValue SizeNode =
5680 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
5681 SDValue Cpy = DAG.getMemcpy(
5682 Chain, DL, DstAddr, Arg, SizeNode,
5683 Outs[i].Flags.getNonZeroByValAlign(),
5684 /*isVol = */ false, /*AlwaysInline = */ false,
5685 /*isTailCall = */ false, DstInfo, MachinePointerInfo());
5686
5687 MemOpChains.push_back(Cpy);
5688 } else {
5689 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
5690 // promoted to a legal register type i32, we should truncate Arg back to
5691 // i1/i8/i16.
5692 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
5693 VA.getValVT() == MVT::i16)
5694 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
5695
5696 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
5697 MemOpChains.push_back(Store);
5698 }
5699 }
5700 }
5701
5702 if (!MemOpChains.empty())
5703 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5704
5705 // Build a sequence of copy-to-reg nodes chained together with token chain
5706 // and flag operands which copy the outgoing args into the appropriate regs.
5707 SDValue InFlag;
5708 for (auto &RegToPass : RegsToPass) {
5709 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
5710 RegToPass.second, InFlag);
5711 InFlag = Chain.getValue(1);
5712 }
5713
5714 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
5715 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
5716 // node so that legalize doesn't hack it.
5717 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5718 auto GV = G->getGlobal();
5719 unsigned OpFlags =
5720 Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
5721 if (OpFlags & AArch64II::MO_GOT) {
5722 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
5723 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
5724 } else {
5725 const GlobalValue *GV = G->getGlobal();
5726 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
5727 }
5728 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5729 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
5730 Subtarget->isTargetMachO()) {
5731 const char *Sym = S->getSymbol();
5732 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
5733 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
5734 } else {
5735 const char *Sym = S->getSymbol();
5736 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
5737 }
5738 }
5739
5740 // We don't usually want to end the call-sequence here because we would tidy
5741 // the frame up *after* the call, however in the ABI-changing tail-call case
5742 // we've carefully laid out the parameters so that when sp is reset they'll be
5743 // in the correct location.
5744 if (IsTailCall && !IsSibCall) {
5745 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
5746 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
5747 InFlag = Chain.getValue(1);
5748 }
5749
5750 std::vector<SDValue> Ops;
5751 Ops.push_back(Chain);
5752 Ops.push_back(Callee);
5753
5754 if (IsTailCall) {
5755 // Each tail call may have to adjust the stack by a different amount, so
5756 // this information must travel along with the operation for eventual
5757 // consumption by emitEpilogue.
5758 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
5759 }
5760
5761 // Add argument registers to the end of the list so that they are known live
5762 // into the call.
5763 for (auto &RegToPass : RegsToPass)
5764 Ops.push_back(DAG.getRegister(RegToPass.first,
5765 RegToPass.second.getValueType()));
5766
5767 // Add a register mask operand representing the call-preserved registers.
5768 const uint32_t *Mask;
5769 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5770 if (IsThisReturn) {
5771 // For 'this' returns, use the X0-preserving mask if applicable
5772 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
5773 if (!Mask) {
5774 IsThisReturn = false;
5775 Mask = TRI->getCallPreservedMask(MF, CallConv);
5776 }
5777 } else
5778 Mask = TRI->getCallPreservedMask(MF, CallConv);
5779
5780 if (Subtarget->hasCustomCallingConv())
5781 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
5782
5783 if (TRI->isAnyArgRegReserved(MF))
5784 TRI->emitReservedArgRegCallError(MF);
5785
5786 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5786, __PRETTY_FUNCTION__))
;
5787 Ops.push_back(DAG.getRegisterMask(Mask));
5788
5789 if (InFlag.getNode())
5790 Ops.push_back(InFlag);
5791
5792 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5793
5794 // If we're doing a tall call, use a TC_RETURN here rather than an
5795 // actual call instruction.
5796 if (IsTailCall) {
5797 MF.getFrameInfo().setHasTailCall();
5798 SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
5799 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
5800 return Ret;
5801 }
5802
5803 unsigned CallOpc = AArch64ISD::CALL;
5804 // Calls with operand bundle "clang.arc.attachedcall" are special. They should
5805 // be expanded to the call, directly followed by a special marker sequence.
5806 // Use the CALL_RVMARKER to do that.
5807 if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
5808 assert(!IsTailCall &&((!IsTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? static_cast<void> (0) : __assert_fail ("!IsTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5809, __PRETTY_FUNCTION__))
5809 "tail calls cannot be marked with clang.arc.attachedcall")((!IsTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? static_cast<void> (0) : __assert_fail ("!IsTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5809, __PRETTY_FUNCTION__))
;
5810 CallOpc = AArch64ISD::CALL_RVMARKER;
5811 }
5812
5813 // Returns a chain and a flag for retval copy to use.
5814 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
5815 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5816 InFlag = Chain.getValue(1);
5817 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
5818
5819 uint64_t CalleePopBytes =
5820 DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
5821
5822 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
5823 DAG.getIntPtrConstant(CalleePopBytes, DL, true),
5824 InFlag, DL);
5825 if (!Ins.empty())
5826 InFlag = Chain.getValue(1);
5827
5828 // Handle result values, copying them out of physregs into vregs that we
5829 // return.
5830 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
5831 InVals, IsThisReturn,
5832 IsThisReturn ? OutVals[0] : SDValue());
5833}
5834
5835bool AArch64TargetLowering::CanLowerReturn(
5836 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
5837 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
5838 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
5839 SmallVector<CCValAssign, 16> RVLocs;
5840 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
5841 return CCInfo.CheckReturn(Outs, RetCC);
5842}
5843
5844SDValue
5845AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
5846 bool isVarArg,
5847 const SmallVectorImpl<ISD::OutputArg> &Outs,
5848 const SmallVectorImpl<SDValue> &OutVals,
5849 const SDLoc &DL, SelectionDAG &DAG) const {
5850 auto &MF = DAG.getMachineFunction();
5851 auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5852
5853 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
5854 SmallVector<CCValAssign, 16> RVLocs;
5855 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5856 *DAG.getContext());
5857 CCInfo.AnalyzeReturn(Outs, RetCC);
5858
5859 // Copy the result values into the output registers.
5860 SDValue Flag;
5861 SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
5862 SmallSet<unsigned, 4> RegsUsed;
5863 for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
5864 ++i, ++realRVLocIdx) {
5865 CCValAssign &VA = RVLocs[i];
5866 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5866, __PRETTY_FUNCTION__))
;
5867 SDValue Arg = OutVals[realRVLocIdx];
5868
5869 switch (VA.getLocInfo()) {
5870 default:
5871 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5871)
;
5872 case CCValAssign::Full:
5873 if (Outs[i].ArgVT == MVT::i1) {
5874 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
5875 // value. This is strictly redundant on Darwin (which uses "zeroext
5876 // i1"), but will be optimised out before ISel.
5877 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
5878 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
5879 }
5880 break;
5881 case CCValAssign::BCvt:
5882 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
5883 break;
5884 case CCValAssign::AExt:
5885 case CCValAssign::ZExt:
5886 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
5887 break;
5888 case CCValAssign::AExtUpper:
5889 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")((VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5889, __PRETTY_FUNCTION__))
;
5890 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
5891 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
5892 DAG.getConstant(32, DL, VA.getLocVT()));
5893 break;
5894 }
5895
5896 if (RegsUsed.count(VA.getLocReg())) {
5897 SDValue &Bits =
5898 llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) {
5899 return Elt.first == VA.getLocReg();
5900 })->second;
5901 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
5902 } else {
5903 RetVals.emplace_back(VA.getLocReg(), Arg);
5904 RegsUsed.insert(VA.getLocReg());
5905 }
5906 }
5907
5908 SmallVector<SDValue, 4> RetOps(1, Chain);
5909 for (auto &RetVal : RetVals) {
5910 Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
5911 Flag = Chain.getValue(1);
5912 RetOps.push_back(
5913 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
5914 }
5915
5916 // Windows AArch64 ABIs require that for returning structs by value we copy
5917 // the sret argument into X0 for the return.
5918 // We saved the argument into a virtual register in the entry block,
5919 // so now we copy the value out and into X0.
5920 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
5921 SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
5922 getPointerTy(MF.getDataLayout()));
5923
5924 unsigned RetValReg = AArch64::X0;
5925 Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
5926 Flag = Chain.getValue(1);
5927
5928 RetOps.push_back(
5929 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
5930 }
5931
5932 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5933 const MCPhysReg *I =
5934 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
5935 if (I) {
5936 for (; *I; ++I) {
5937 if (AArch64::GPR64RegClass.contains(*I))
5938 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
5939 else if (AArch64::FPR64RegClass.contains(*I))
5940 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
5941 else
5942 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5942)
;
5943 }
5944 }
5945
5946 RetOps[0] = Chain; // Update chain.
5947
5948 // Add the flag if we have it.
5949 if (Flag.getNode())
5950 RetOps.push_back(Flag);
5951
5952 return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
5953}
5954
5955//===----------------------------------------------------------------------===//
5956// Other Lowering Code
5957//===----------------------------------------------------------------------===//
5958
5959SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
5960 SelectionDAG &DAG,
5961 unsigned Flag) const {
5962 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
5963 N->getOffset(), Flag);
5964}
5965
5966SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
5967 SelectionDAG &DAG,
5968 unsigned Flag) const {
5969 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
5970}
5971
5972SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
5973 SelectionDAG &DAG,
5974 unsigned Flag) const {
5975 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
5976 N->getOffset(), Flag);
5977}
5978
5979SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
5980 SelectionDAG &DAG,
5981 unsigned Flag) const {
5982 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
5983}
5984
5985// (loadGOT sym)
5986template <class NodeTy>
5987SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
5988 unsigned Flags) const {
5989 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getGOT\n"
; } } while (false)
;
5990 SDLoc DL(N);
5991 EVT Ty = getPointerTy(DAG.getDataLayout());
5992 SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
5993 // FIXME: Once remat is capable of dealing with instructions with register
5994 // operands, expand this into two nodes instead of using a wrapper node.
5995 return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
5996}
5997
5998// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
5999template <class NodeTy>
6000SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
6001 unsigned Flags) const {
6002 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrLarge\n"
; } } while (false)
;
6003 SDLoc DL(N);
6004 EVT Ty = getPointerTy(DAG.getDataLayout());
6005 const unsigned char MO_NC = AArch64II::MO_NC;
6006 return DAG.getNode(
6007 AArch64ISD::WrapperLarge, DL, Ty,
6008 getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
6009 getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
6010 getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
6011 getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
6012}
6013
6014// (addlow (adrp %hi(sym)) %lo(sym))
6015template <class NodeTy>
6016SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
6017 unsigned Flags) const {
6018 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddr\n"
; } } while (false)
;
6019 SDLoc DL(N);
6020 EVT Ty = getPointerTy(DAG.getDataLayout());
6021 SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
6022 SDValue Lo = getTargetNode(N, Ty, DAG,
6023 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
6024 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
6025 return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
6026}
6027
6028// (adr sym)
6029template <class NodeTy>
6030SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
6031 unsigned Flags) const {
6032 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrTiny\n"
; } } while (false)
;
6033 SDLoc DL(N);
6034 EVT Ty = getPointerTy(DAG.getDataLayout());
6035 SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
6036 return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
6037}
6038
6039SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
6040 SelectionDAG &DAG) const {
6041 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);