Bug Summary

File:build/source/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Warning:line 13643, column 22
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AArch64 -I /build/source/llvm/lib/Target/AArch64 -I include -I /build/source/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1668078801 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-11-10-135928-647445-1 -x c++ /build/source/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64ISelLowering.h"
14#include "AArch64CallingConvention.h"
15#include "AArch64ExpandImm.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/ADT/Triple.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/LoopInfo.h"
33#include "llvm/Analysis/MemoryLocation.h"
34#include "llvm/Analysis/ObjCARCUtil.h"
35#include "llvm/Analysis/TargetTransformInfo.h"
36#include "llvm/Analysis/VectorUtils.h"
37#include "llvm/CodeGen/Analysis.h"
38#include "llvm/CodeGen/CallingConvLower.h"
39#include "llvm/CodeGen/ISDOpcodes.h"
40#include "llvm/CodeGen/MachineBasicBlock.h"
41#include "llvm/CodeGen/MachineFrameInfo.h"
42#include "llvm/CodeGen/MachineFunction.h"
43#include "llvm/CodeGen/MachineInstr.h"
44#include "llvm/CodeGen/MachineInstrBuilder.h"
45#include "llvm/CodeGen/MachineMemOperand.h"
46#include "llvm/CodeGen/MachineRegisterInfo.h"
47#include "llvm/CodeGen/RuntimeLibcalls.h"
48#include "llvm/CodeGen/SelectionDAG.h"
49#include "llvm/CodeGen/SelectionDAGNodes.h"
50#include "llvm/CodeGen/TargetCallingConv.h"
51#include "llvm/CodeGen/TargetInstrInfo.h"
52#include "llvm/CodeGen/ValueTypes.h"
53#include "llvm/IR/Attributes.h"
54#include "llvm/IR/Constants.h"
55#include "llvm/IR/DataLayout.h"
56#include "llvm/IR/DebugLoc.h"
57#include "llvm/IR/DerivedTypes.h"
58#include "llvm/IR/Function.h"
59#include "llvm/IR/GetElementPtrTypeIterator.h"
60#include "llvm/IR/GlobalValue.h"
61#include "llvm/IR/IRBuilder.h"
62#include "llvm/IR/Instruction.h"
63#include "llvm/IR/Instructions.h"
64#include "llvm/IR/IntrinsicInst.h"
65#include "llvm/IR/Intrinsics.h"
66#include "llvm/IR/IntrinsicsAArch64.h"
67#include "llvm/IR/Module.h"
68#include "llvm/IR/OperandTraits.h"
69#include "llvm/IR/PatternMatch.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/Use.h"
72#include "llvm/IR/Value.h"
73#include "llvm/MC/MCRegisterInfo.h"
74#include "llvm/Support/Casting.h"
75#include "llvm/Support/CodeGen.h"
76#include "llvm/Support/CommandLine.h"
77#include "llvm/Support/Compiler.h"
78#include "llvm/Support/Debug.h"
79#include "llvm/Support/ErrorHandling.h"
80#include "llvm/Support/InstructionCost.h"
81#include "llvm/Support/KnownBits.h"
82#include "llvm/Support/MachineValueType.h"
83#include "llvm/Support/MathExtras.h"
84#include "llvm/Support/raw_ostream.h"
85#include "llvm/Target/TargetMachine.h"
86#include "llvm/Target/TargetOptions.h"
87#include <algorithm>
88#include <bitset>
89#include <cassert>
90#include <cctype>
91#include <cstdint>
92#include <cstdlib>
93#include <iterator>
94#include <limits>
95#include <tuple>
96#include <utility>
97#include <vector>
98
99using namespace llvm;
100using namespace llvm::PatternMatch;
101
102#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
103
104STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls"}
;
105STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts"}
;
106STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized"}
;
107
108// FIXME: The necessary dtprel relocations don't seem to be supported
109// well in the GNU bfd and gold linkers at the moment. Therefore, by
110// default, for now, fall back to GeneralDynamic code generation.
111cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
112 "aarch64-elf-ldtls-generation", cl::Hidden,
113 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
114 cl::init(false));
115
116static cl::opt<bool>
117EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
118 cl::desc("Enable AArch64 logical imm instruction "
119 "optimization"),
120 cl::init(true));
121
122// Temporary option added for the purpose of testing functionality added
123// to DAGCombiner.cpp in D92230. It is expected that this can be removed
124// in future when both implementations will be based off MGATHER rather
125// than the GLD1 nodes added for the SVE gather load intrinsics.
126static cl::opt<bool>
127EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
128 cl::desc("Combine extends of AArch64 masked "
129 "gather intrinsics"),
130 cl::init(true));
131
132/// Value type used for condition codes.
133static const MVT MVT_CC = MVT::i32;
134
135static inline EVT getPackedSVEVectorVT(EVT VT) {
136 switch (VT.getSimpleVT().SimpleTy) {
137 default:
138 llvm_unreachable("unexpected element type for vector")::llvm::llvm_unreachable_internal("unexpected element type for vector"
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 138)
;
139 case MVT::i8:
140 return MVT::nxv16i8;
141 case MVT::i16:
142 return MVT::nxv8i16;
143 case MVT::i32:
144 return MVT::nxv4i32;
145 case MVT::i64:
146 return MVT::nxv2i64;
147 case MVT::f16:
148 return MVT::nxv8f16;
149 case MVT::f32:
150 return MVT::nxv4f32;
151 case MVT::f64:
152 return MVT::nxv2f64;
153 case MVT::bf16:
154 return MVT::nxv8bf16;
155 }
156}
157
158// NOTE: Currently there's only a need to return integer vector types. If this
159// changes then just add an extra "type" parameter.
160static inline EVT getPackedSVEVectorVT(ElementCount EC) {
161 switch (EC.getKnownMinValue()) {
162 default:
163 llvm_unreachable("unexpected element count for vector")::llvm::llvm_unreachable_internal("unexpected element count for vector"
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 163)
;
164 case 16:
165 return MVT::nxv16i8;
166 case 8:
167 return MVT::nxv8i16;
168 case 4:
169 return MVT::nxv4i32;
170 case 2:
171 return MVT::nxv2i64;
172 }
173}
174
175static inline EVT getPromotedVTForPredicate(EVT VT) {
176 assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&(static_cast <bool> (VT.isScalableVector() && (
VT.getVectorElementType() == MVT::i1) && "Expected scalable predicate vector type!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && \"Expected scalable predicate vector type!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 177, __extension__
__PRETTY_FUNCTION__))
177 "Expected scalable predicate vector type!")(static_cast <bool> (VT.isScalableVector() && (
VT.getVectorElementType() == MVT::i1) && "Expected scalable predicate vector type!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && \"Expected scalable predicate vector type!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 177, __extension__
__PRETTY_FUNCTION__))
;
178 switch (VT.getVectorMinNumElements()) {
179 default:
180 llvm_unreachable("unexpected element count for vector")::llvm::llvm_unreachable_internal("unexpected element count for vector"
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 180)
;
181 case 2:
182 return MVT::nxv2i64;
183 case 4:
184 return MVT::nxv4i32;
185 case 8:
186 return MVT::nxv8i16;
187 case 16:
188 return MVT::nxv16i8;
189 }
190}
191
192/// Returns true if VT's elements occupy the lowest bit positions of its
193/// associated register class without any intervening space.
194///
195/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
196/// same register class, but only nxv8f16 can be treated as a packed vector.
197static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
198 assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&(static_cast <bool> (VT.isVector() && DAG.getTargetLoweringInfo
().isTypeLegal(VT) && "Expected legal vector type!") ?
void (0) : __assert_fail ("VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal vector type!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 199, __extension__
__PRETTY_FUNCTION__))
199 "Expected legal vector type!")(static_cast <bool> (VT.isVector() && DAG.getTargetLoweringInfo
().isTypeLegal(VT) && "Expected legal vector type!") ?
void (0) : __assert_fail ("VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal vector type!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 199, __extension__
__PRETTY_FUNCTION__))
;
200 return VT.isFixedLengthVector() ||
201 VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
202}
203
204// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
205// predicate and end with a passthru value matching the result type.
206static bool isMergePassthruOpcode(unsigned Opc) {
207 switch (Opc) {
208 default:
209 return false;
210 case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
211 case AArch64ISD::BSWAP_MERGE_PASSTHRU:
212 case AArch64ISD::REVH_MERGE_PASSTHRU:
213 case AArch64ISD::REVW_MERGE_PASSTHRU:
214 case AArch64ISD::REVD_MERGE_PASSTHRU:
215 case AArch64ISD::CTLZ_MERGE_PASSTHRU:
216 case AArch64ISD::CTPOP_MERGE_PASSTHRU:
217 case AArch64ISD::DUP_MERGE_PASSTHRU:
218 case AArch64ISD::ABS_MERGE_PASSTHRU:
219 case AArch64ISD::NEG_MERGE_PASSTHRU:
220 case AArch64ISD::FNEG_MERGE_PASSTHRU:
221 case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
222 case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
223 case AArch64ISD::FCEIL_MERGE_PASSTHRU:
224 case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
225 case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
226 case AArch64ISD::FRINT_MERGE_PASSTHRU:
227 case AArch64ISD::FROUND_MERGE_PASSTHRU:
228 case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
229 case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
230 case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
231 case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
232 case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
233 case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
234 case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
235 case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
236 case AArch64ISD::FSQRT_MERGE_PASSTHRU:
237 case AArch64ISD::FRECPX_MERGE_PASSTHRU:
238 case AArch64ISD::FABS_MERGE_PASSTHRU:
239 return true;
240 }
241}
242
243// Returns true if inactive lanes are known to be zeroed by construction.
244static bool isZeroingInactiveLanes(SDValue Op) {
245 switch (Op.getOpcode()) {
246 default:
247 // We guarantee i1 splat_vectors to zero the other lanes by
248 // implementing it with ptrue and possibly a punpklo for nxv1i1.
249 if (ISD::isConstantSplatVectorAllOnes(Op.getNode()))
250 return true;
251 return false;
252 case AArch64ISD::PTRUE:
253 case AArch64ISD::SETCC_MERGE_ZERO:
254 return true;
255 case ISD::INTRINSIC_WO_CHAIN:
256 switch (Op.getConstantOperandVal(0)) {
257 default:
258 return false;
259 case Intrinsic::aarch64_sve_ptrue:
260 case Intrinsic::aarch64_sve_pnext:
261 case Intrinsic::aarch64_sve_cmpeq:
262 case Intrinsic::aarch64_sve_cmpne:
263 case Intrinsic::aarch64_sve_cmpge:
264 case Intrinsic::aarch64_sve_cmpgt:
265 case Intrinsic::aarch64_sve_cmphs:
266 case Intrinsic::aarch64_sve_cmphi:
267 case Intrinsic::aarch64_sve_cmpeq_wide:
268 case Intrinsic::aarch64_sve_cmpne_wide:
269 case Intrinsic::aarch64_sve_cmpge_wide:
270 case Intrinsic::aarch64_sve_cmpgt_wide:
271 case Intrinsic::aarch64_sve_cmplt_wide:
272 case Intrinsic::aarch64_sve_cmple_wide:
273 case Intrinsic::aarch64_sve_cmphs_wide:
274 case Intrinsic::aarch64_sve_cmphi_wide:
275 case Intrinsic::aarch64_sve_cmplo_wide:
276 case Intrinsic::aarch64_sve_cmpls_wide:
277 case Intrinsic::aarch64_sve_fcmpeq:
278 case Intrinsic::aarch64_sve_fcmpne:
279 case Intrinsic::aarch64_sve_fcmpge:
280 case Intrinsic::aarch64_sve_fcmpgt:
281 case Intrinsic::aarch64_sve_fcmpuo:
282 return true;
283 }
284 }
285}
286
287AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
288 const AArch64Subtarget &STI)
289 : TargetLowering(TM), Subtarget(&STI) {
290 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
291 // we have to make something up. Arbitrarily, choose ZeroOrOne.
292 setBooleanContents(ZeroOrOneBooleanContent);
293 // When comparing vectors the result sets the different elements in the
294 // vector to all-one or all-zero.
295 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
296
297 // Set up the register classes.
298 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
299 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
300
301 if (Subtarget->hasLS64()) {
302 addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass);
303 setOperationAction(ISD::LOAD, MVT::i64x8, Custom);
304 setOperationAction(ISD::STORE, MVT::i64x8, Custom);
305 }
306
307 if (Subtarget->hasFPARMv8()) {
308 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
309 addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
310 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
311 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
312 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
313 }
314
315 if (Subtarget->hasNEON()) {
316 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
317 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
318 // Someone set us up the NEON.
319 addDRTypeForNEON(MVT::v2f32);
320 addDRTypeForNEON(MVT::v8i8);
321 addDRTypeForNEON(MVT::v4i16);
322 addDRTypeForNEON(MVT::v2i32);
323 addDRTypeForNEON(MVT::v1i64);
324 addDRTypeForNEON(MVT::v1f64);
325 addDRTypeForNEON(MVT::v4f16);
326 if (Subtarget->hasBF16())
327 addDRTypeForNEON(MVT::v4bf16);
328
329 addQRTypeForNEON(MVT::v4f32);
330 addQRTypeForNEON(MVT::v2f64);
331 addQRTypeForNEON(MVT::v16i8);
332 addQRTypeForNEON(MVT::v8i16);
333 addQRTypeForNEON(MVT::v4i32);
334 addQRTypeForNEON(MVT::v2i64);
335 addQRTypeForNEON(MVT::v8f16);
336 if (Subtarget->hasBF16())
337 addQRTypeForNEON(MVT::v8bf16);
338 }
339
340 if (Subtarget->hasSVE() || Subtarget->hasSME()) {
341 // Add legal sve predicate types
342 addRegisterClass(MVT::nxv1i1, &AArch64::PPRRegClass);
343 addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
344 addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
345 addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
346 addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
347
348 // Add legal sve data types
349 addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
350 addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
351 addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
352 addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
353
354 addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
355 addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
356 addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
357 addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
358 addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
359 addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
360
361 if (Subtarget->hasBF16()) {
362 addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
363 addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
364 addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
365 }
366
367 if (Subtarget->useSVEForFixedLengthVectors()) {
368 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
369 if (useSVEForFixedLengthVectorVT(VT))
370 addRegisterClass(VT, &AArch64::ZPRRegClass);
371
372 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
373 if (useSVEForFixedLengthVectorVT(VT))
374 addRegisterClass(VT, &AArch64::ZPRRegClass);
375 }
376 }
377
378 // Compute derived properties from the register classes
379 computeRegisterProperties(Subtarget->getRegisterInfo());
380
381 // Provide all sorts of operation actions
382 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
383 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
384 setOperationAction(ISD::SETCC, MVT::i32, Custom);
385 setOperationAction(ISD::SETCC, MVT::i64, Custom);
386 setOperationAction(ISD::SETCC, MVT::f16, Custom);
387 setOperationAction(ISD::SETCC, MVT::f32, Custom);
388 setOperationAction(ISD::SETCC, MVT::f64, Custom);
389 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
390 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
391 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
392 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
393 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
394 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
395 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
396 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
397 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
398 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
399 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
400 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
401 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
402 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
403 setOperationAction(ISD::SELECT, MVT::i32, Custom);
404 setOperationAction(ISD::SELECT, MVT::i64, Custom);
405 setOperationAction(ISD::SELECT, MVT::f16, Custom);
406 setOperationAction(ISD::SELECT, MVT::bf16, Custom);
407 setOperationAction(ISD::SELECT, MVT::f32, Custom);
408 setOperationAction(ISD::SELECT, MVT::f64, Custom);
409 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
410 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
411 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
412 setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand);
413 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
414 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
415 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
416 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
417 setOperationAction(ISD::SETCCCARRY, MVT::i64, Custom);
418
419 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
420 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
421 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
422
423 setOperationAction(ISD::FREM, MVT::f32, Expand);
424 setOperationAction(ISD::FREM, MVT::f64, Expand);
425 setOperationAction(ISD::FREM, MVT::f80, Expand);
426
427 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
428
429 // Custom lowering hooks are needed for XOR
430 // to fold it into CSINC/CSINV.
431 setOperationAction(ISD::XOR, MVT::i32, Custom);
432 setOperationAction(ISD::XOR, MVT::i64, Custom);
433
434 // Virtually no operation on f128 is legal, but LLVM can't expand them when
435 // there's a valid register class, so we need custom operations in most cases.
436 setOperationAction(ISD::FABS, MVT::f128, Expand);
437 setOperationAction(ISD::FADD, MVT::f128, LibCall);
438 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
439 setOperationAction(ISD::FCOS, MVT::f128, Expand);
440 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
441 setOperationAction(ISD::FMA, MVT::f128, Expand);
442 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
443 setOperationAction(ISD::FNEG, MVT::f128, Expand);
444 setOperationAction(ISD::FPOW, MVT::f128, Expand);
445 setOperationAction(ISD::FREM, MVT::f128, Expand);
446 setOperationAction(ISD::FRINT, MVT::f128, Expand);
447 setOperationAction(ISD::FSIN, MVT::f128, Expand);
448 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
449 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
450 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
451 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
452 setOperationAction(ISD::SETCC, MVT::f128, Custom);
453 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
454 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
455 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
456 setOperationAction(ISD::SELECT, MVT::f128, Custom);
457 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
458 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
459 // FIXME: f128 FMINIMUM and FMAXIMUM (including STRICT versions) currently
460 // aren't handled.
461
462 // Lowering for many of the conversions is actually specified by the non-f128
463 // type. The LowerXXX function will be trivial when f128 isn't involved.
464 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
465 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
466 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
467 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
468 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
469 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
470 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
471 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
472 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
473 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
474 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
475 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
476 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
477 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
478 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
479 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
480 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
481 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
482 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
483 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
484 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
485 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
486 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
487 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
488 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
489 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
490 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
491 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
492 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
493 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
494
495 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
496 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
497 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
498 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
499
500 // Variable arguments.
501 setOperationAction(ISD::VASTART, MVT::Other, Custom);
502 setOperationAction(ISD::VAARG, MVT::Other, Custom);
503 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
504 setOperationAction(ISD::VAEND, MVT::Other, Expand);
505
506 // Variable-sized objects.
507 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
508 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
509
510 if (Subtarget->isTargetWindows())
511 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
512 else
513 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
514
515 // Constant pool entries
516 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
517
518 // BlockAddress
519 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
520
521 // AArch64 lacks both left-rotate and popcount instructions.
522 setOperationAction(ISD::ROTL, MVT::i32, Expand);
523 setOperationAction(ISD::ROTL, MVT::i64, Expand);
524 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
525 setOperationAction(ISD::ROTL, VT, Expand);
526 setOperationAction(ISD::ROTR, VT, Expand);
527 }
528
529 // AArch64 doesn't have i32 MULH{S|U}.
530 setOperationAction(ISD::MULHU, MVT::i32, Expand);
531 setOperationAction(ISD::MULHS, MVT::i32, Expand);
532
533 // AArch64 doesn't have {U|S}MUL_LOHI.
534 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
535 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
536
537 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
538 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
539 setOperationAction(ISD::CTPOP, MVT::i128, Custom);
540
541 setOperationAction(ISD::PARITY, MVT::i64, Custom);
542 setOperationAction(ISD::PARITY, MVT::i128, Custom);
543
544 setOperationAction(ISD::ABS, MVT::i32, Custom);
545 setOperationAction(ISD::ABS, MVT::i64, Custom);
546
547 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
548 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
549 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
550 setOperationAction(ISD::SDIVREM, VT, Expand);
551 setOperationAction(ISD::UDIVREM, VT, Expand);
552 }
553 setOperationAction(ISD::SREM, MVT::i32, Expand);
554 setOperationAction(ISD::SREM, MVT::i64, Expand);
555 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
556 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
557 setOperationAction(ISD::UREM, MVT::i32, Expand);
558 setOperationAction(ISD::UREM, MVT::i64, Expand);
559
560 // Custom lower Add/Sub/Mul with overflow.
561 setOperationAction(ISD::SADDO, MVT::i32, Custom);
562 setOperationAction(ISD::SADDO, MVT::i64, Custom);
563 setOperationAction(ISD::UADDO, MVT::i32, Custom);
564 setOperationAction(ISD::UADDO, MVT::i64, Custom);
565 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
566 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
567 setOperationAction(ISD::USUBO, MVT::i32, Custom);
568 setOperationAction(ISD::USUBO, MVT::i64, Custom);
569 setOperationAction(ISD::SMULO, MVT::i32, Custom);
570 setOperationAction(ISD::SMULO, MVT::i64, Custom);
571 setOperationAction(ISD::UMULO, MVT::i32, Custom);
572 setOperationAction(ISD::UMULO, MVT::i64, Custom);
573
574 setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
575 setOperationAction(ISD::ADDCARRY, MVT::i64, Custom);
576 setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
577 setOperationAction(ISD::SUBCARRY, MVT::i64, Custom);
578 setOperationAction(ISD::SADDO_CARRY, MVT::i32, Custom);
579 setOperationAction(ISD::SADDO_CARRY, MVT::i64, Custom);
580 setOperationAction(ISD::SSUBO_CARRY, MVT::i32, Custom);
581 setOperationAction(ISD::SSUBO_CARRY, MVT::i64, Custom);
582
583 setOperationAction(ISD::FSIN, MVT::f32, Expand);
584 setOperationAction(ISD::FSIN, MVT::f64, Expand);
585 setOperationAction(ISD::FCOS, MVT::f32, Expand);
586 setOperationAction(ISD::FCOS, MVT::f64, Expand);
587 setOperationAction(ISD::FPOW, MVT::f32, Expand);
588 setOperationAction(ISD::FPOW, MVT::f64, Expand);
589 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
590 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
591 if (Subtarget->hasFullFP16())
592 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
593 else
594 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
595
596 for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
597 ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
598 ISD::FEXP, ISD::FEXP2, ISD::FLOG,
599 ISD::FLOG2, ISD::FLOG10, ISD::STRICT_FREM,
600 ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS,
601 ISD::STRICT_FSIN, ISD::STRICT_FEXP, ISD::STRICT_FEXP2,
602 ISD::STRICT_FLOG, ISD::STRICT_FLOG2, ISD::STRICT_FLOG10}) {
603 setOperationAction(Op, MVT::f16, Promote);
604 setOperationAction(Op, MVT::v4f16, Expand);
605 setOperationAction(Op, MVT::v8f16, Expand);
606 }
607
608 if (!Subtarget->hasFullFP16()) {
609 for (auto Op :
610 {ISD::SETCC, ISD::SELECT_CC,
611 ISD::BR_CC, ISD::FADD, ISD::FSUB,
612 ISD::FMUL, ISD::FDIV, ISD::FMA,
613 ISD::FNEG, ISD::FABS, ISD::FCEIL,
614 ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT,
615 ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN,
616 ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM,
617 ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD,
618 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
619 ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
620 ISD::STRICT_FSQRT, ISD::STRICT_FRINT, ISD::STRICT_FNEARBYINT,
621 ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN,
622 ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM,
623 ISD::STRICT_FMAXIMUM})
624 setOperationAction(Op, MVT::f16, Promote);
625
626 // Round-to-integer need custom lowering for fp16, as Promote doesn't work
627 // because the result type is integer.
628 for (auto Op : {ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT,
629 ISD::STRICT_LLRINT})
630 setOperationAction(Op, MVT::f16, Custom);
631
632 // promote v4f16 to v4f32 when that is known to be safe.
633 setOperationPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
634 setOperationPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
635 setOperationPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
636 setOperationPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
637
638 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
639 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
640 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
641 setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
642 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
643 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
644 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
645 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
646 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
647 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
648 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
649 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
650 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
651 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
652 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
653 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
654
655 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
656 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
657 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
658 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
659 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
660 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
661 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
662 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
663 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
664 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
665 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
666 setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
667 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
668 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
669 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
670 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
671 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
672 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
673 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
674 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
675 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
676 }
677
678 // AArch64 has implementations of a lot of rounding-like FP operations.
679 for (auto Op :
680 {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL,
681 ISD::FRINT, ISD::FTRUNC, ISD::FROUND,
682 ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM,
683 ISD::FMINIMUM, ISD::FMAXIMUM, ISD::LROUND,
684 ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
685 ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FNEARBYINT,
686 ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN,
687 ISD::STRICT_FROUND, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM,
688 ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_LROUND,
689 ISD::STRICT_LLROUND, ISD::STRICT_LRINT, ISD::STRICT_LLRINT}) {
690 for (MVT Ty : {MVT::f32, MVT::f64})
691 setOperationAction(Op, Ty, Legal);
692 if (Subtarget->hasFullFP16())
693 setOperationAction(Op, MVT::f16, Legal);
694 }
695
696 // Basic strict FP operations are legal
697 for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
698 ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT}) {
699 for (MVT Ty : {MVT::f32, MVT::f64})
700 setOperationAction(Op, Ty, Legal);
701 if (Subtarget->hasFullFP16())
702 setOperationAction(Op, MVT::f16, Legal);
703 }
704
705 // Strict conversion to a larger type is legal
706 for (auto VT : {MVT::f32, MVT::f64})
707 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
708
709 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
710
711 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
712 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
713
714 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
715 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
716 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
717 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
718 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
719
720 // Generate outline atomics library calls only if LSE was not specified for
721 // subtarget
722 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
723 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
724 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
725 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
726 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
727 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
728 setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
729 setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
730 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
731 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
732 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
733 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
734 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
735 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
736 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
737 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
738 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
739 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
740 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
741 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
742 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
743 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
744 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
745 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
746 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
747 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
748#define LCALLNAMES(A, B, N) \
749 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
750 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
751 setLibcallName(A##N##_REL, #B #N "_rel"); \
752 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
753#define LCALLNAME4(A, B) \
754 LCALLNAMES(A, B, 1) \
755 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
756#define LCALLNAME5(A, B) \
757 LCALLNAMES(A, B, 1) \
758 LCALLNAMES(A, B, 2) \
759 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
760 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
761 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
762 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
763 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
764 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
765 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
766#undef LCALLNAMES
767#undef LCALLNAME4
768#undef LCALLNAME5
769 }
770
771 // 128-bit loads and stores can be done without expanding
772 setOperationAction(ISD::LOAD, MVT::i128, Custom);
773 setOperationAction(ISD::STORE, MVT::i128, Custom);
774
775 // Aligned 128-bit loads and stores are single-copy atomic according to the
776 // v8.4a spec.
777 if (Subtarget->hasLSE2()) {
778 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
779 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
780 }
781
782 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
783 // custom lowering, as there are no un-paired non-temporal stores and
784 // legalization will break up 256 bit inputs.
785 setOperationAction(ISD::STORE, MVT::v32i8, Custom);
786 setOperationAction(ISD::STORE, MVT::v16i16, Custom);
787 setOperationAction(ISD::STORE, MVT::v16f16, Custom);
788 setOperationAction(ISD::STORE, MVT::v8i32, Custom);
789 setOperationAction(ISD::STORE, MVT::v8f32, Custom);
790 setOperationAction(ISD::STORE, MVT::v4f64, Custom);
791 setOperationAction(ISD::STORE, MVT::v4i64, Custom);
792
793 // 256 bit non-temporal loads can be lowered to LDNP. This is done using
794 // custom lowering, as there are no un-paired non-temporal loads legalization
795 // will break up 256 bit inputs.
796 setOperationAction(ISD::LOAD, MVT::v32i8, Custom);
797 setOperationAction(ISD::LOAD, MVT::v16i16, Custom);
798 setOperationAction(ISD::LOAD, MVT::v16f16, Custom);
799 setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
800 setOperationAction(ISD::LOAD, MVT::v8f32, Custom);
801 setOperationAction(ISD::LOAD, MVT::v4f64, Custom);
802 setOperationAction(ISD::LOAD, MVT::v4i64, Custom);
803
804 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
805 // This requires the Performance Monitors extension.
806 if (Subtarget->hasPerfMon())
807 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
808
809 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
810 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
811 // Issue __sincos_stret if available.
812 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
813 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
814 } else {
815 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
816 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
817 }
818
819 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
820 // MSVCRT doesn't have powi; fall back to pow
821 setLibcallName(RTLIB::POWI_F32, nullptr);
822 setLibcallName(RTLIB::POWI_F64, nullptr);
823 }
824
825 // Make floating-point constants legal for the large code model, so they don't
826 // become loads from the constant pool.
827 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
828 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
829 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
830 }
831
832 // AArch64 does not have floating-point extending loads, i1 sign-extending
833 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
834 for (MVT VT : MVT::fp_valuetypes()) {
835 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
836 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
837 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
838 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
839 }
840 for (MVT VT : MVT::integer_valuetypes())
841 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
842
843 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
844 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
845 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
846 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
847 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
848 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
849 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
850
851 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
852 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
853 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
854
855 // Indexed loads and stores are supported.
856 for (unsigned im = (unsigned)ISD::PRE_INC;
857 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
858 setIndexedLoadAction(im, MVT::i8, Legal);
859 setIndexedLoadAction(im, MVT::i16, Legal);
860 setIndexedLoadAction(im, MVT::i32, Legal);
861 setIndexedLoadAction(im, MVT::i64, Legal);
862 setIndexedLoadAction(im, MVT::f64, Legal);
863 setIndexedLoadAction(im, MVT::f32, Legal);
864 setIndexedLoadAction(im, MVT::f16, Legal);
865 setIndexedLoadAction(im, MVT::bf16, Legal);
866 setIndexedStoreAction(im, MVT::i8, Legal);
867 setIndexedStoreAction(im, MVT::i16, Legal);
868 setIndexedStoreAction(im, MVT::i32, Legal);
869 setIndexedStoreAction(im, MVT::i64, Legal);
870 setIndexedStoreAction(im, MVT::f64, Legal);
871 setIndexedStoreAction(im, MVT::f32, Legal);
872 setIndexedStoreAction(im, MVT::f16, Legal);
873 setIndexedStoreAction(im, MVT::bf16, Legal);
874 }
875
876 // Trap.
877 setOperationAction(ISD::TRAP, MVT::Other, Legal);
878 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
879 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
880
881 // We combine OR nodes for bitfield operations.
882 setTargetDAGCombine(ISD::OR);
883 // Try to create BICs for vector ANDs.
884 setTargetDAGCombine(ISD::AND);
885
886 // Vector add and sub nodes may conceal a high-half opportunity.
887 // Also, try to fold ADD into CSINC/CSINV..
888 setTargetDAGCombine({ISD::ADD, ISD::ABS, ISD::SUB, ISD::XOR, ISD::SINT_TO_FP,
889 ISD::UINT_TO_FP});
890
891 setTargetDAGCombine({ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_TO_SINT_SAT,
892 ISD::FP_TO_UINT_SAT, ISD::FDIV});
893
894 // Try and combine setcc with csel
895 setTargetDAGCombine(ISD::SETCC);
896
897 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
898
899 setTargetDAGCombine({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND,
900 ISD::VECTOR_SPLICE, ISD::SIGN_EXTEND_INREG,
901 ISD::CONCAT_VECTORS, ISD::EXTRACT_SUBVECTOR,
902 ISD::INSERT_SUBVECTOR, ISD::STORE, ISD::BUILD_VECTOR});
903 setTargetDAGCombine(ISD::LOAD);
904
905 setTargetDAGCombine(ISD::MSTORE);
906
907 setTargetDAGCombine(ISD::MUL);
908
909 setTargetDAGCombine({ISD::SELECT, ISD::VSELECT});
910
911 setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
912 ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
913 ISD::VECREDUCE_ADD, ISD::STEP_VECTOR});
914
915 setTargetDAGCombine({ISD::MGATHER, ISD::MSCATTER});
916
917 setTargetDAGCombine(ISD::FP_EXTEND);
918
919 setTargetDAGCombine(ISD::GlobalAddress);
920
921 // In case of strict alignment, avoid an excessive number of byte wide stores.
922 MaxStoresPerMemsetOptSize = 8;
923 MaxStoresPerMemset =
924 Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32;
925
926 MaxGluedStoresPerMemcpy = 4;
927 MaxStoresPerMemcpyOptSize = 4;
928 MaxStoresPerMemcpy =
929 Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16;
930
931 MaxStoresPerMemmoveOptSize = 4;
932 MaxStoresPerMemmove = 4;
933
934 MaxLoadsPerMemcmpOptSize = 4;
935 MaxLoadsPerMemcmp =
936 Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8;
937
938 setStackPointerRegisterToSaveRestore(AArch64::SP);
939
940 setSchedulingPreference(Sched::Hybrid);
941
942 EnableExtLdPromotion = true;
943
944 // Set required alignment.
945 setMinFunctionAlignment(Align(4));
946 // Set preferred alignments.
947 setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
948 setMaxBytesForAlignment(STI.getMaxBytesForLoopAlignment());
949 setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
950
951 // Only change the limit for entries in a jump table if specified by
952 // the sub target, but not at the command line.
953 unsigned MaxJT = STI.getMaximumJumpTableSize();
954 if (MaxJT && getMaximumJumpTableSize() == UINT_MAX(2147483647 *2U +1U))
955 setMaximumJumpTableSize(MaxJT);
956
957 setHasExtractBitsInsn(true);
958
959 setMaxDivRemBitWidthSupported(128);
960
961 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
962
963 if (Subtarget->hasNEON()) {
964 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
965 // silliness like this:
966 for (auto Op :
967 {ISD::SELECT, ISD::SELECT_CC, ISD::SETCC,
968 ISD::BR_CC, ISD::FADD, ISD::FSUB,
969 ISD::FMUL, ISD::FDIV, ISD::FMA,
970 ISD::FNEG, ISD::FABS, ISD::FCEIL,
971 ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT,
972 ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN,
973 ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM,
974 ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD,
975 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
976 ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
977 ISD::STRICT_FSQRT, ISD::STRICT_FRINT, ISD::STRICT_FNEARBYINT,
978 ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN,
979 ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM,
980 ISD::STRICT_FMAXIMUM})
981 setOperationAction(Op, MVT::v1f64, Expand);
982
983 for (auto Op :
984 {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP,
985 ISD::FP_ROUND, ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, ISD::MUL,
986 ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT,
987 ISD::STRICT_SINT_TO_FP, ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_ROUND})
988 setOperationAction(Op, MVT::v1i64, Expand);
989
990 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
991 // elements smaller than i32, so promote the input to i32 first.
992 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
993 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
994
995 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
996 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
997 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
998 for (auto Op : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
999 ISD::STRICT_UINT_TO_FP})
1000 for (auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
1001 setOperationAction(Op, VT, Custom);
1002
1003 if (Subtarget->hasFullFP16()) {
1004 setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
1005
1006 setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom);
1007 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
1008 setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Custom);
1009 setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
1010 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1011 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1012 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
1013 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1014 } else {
1015 // when AArch64 doesn't have fullfp16 support, promote the input
1016 // to i32 first.
1017 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
1018 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
1019 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
1020 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
1021 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
1022 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
1023 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
1024 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
1025 }
1026
1027 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
1028 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
1029 setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
1030 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
1031 setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
1032 setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
1033 setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
1034 setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
1035 for (auto VT : {MVT::v1i64, MVT::v2i64}) {
1036 setOperationAction(ISD::UMAX, VT, Custom);
1037 setOperationAction(ISD::SMAX, VT, Custom);
1038 setOperationAction(ISD::UMIN, VT, Custom);
1039 setOperationAction(ISD::SMIN, VT, Custom);
1040 }
1041
1042 // AArch64 doesn't have MUL.2d:
1043 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
1044 // Custom handling for some quad-vector types to detect MULL.
1045 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
1046 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1047 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1048
1049 // Saturates
1050 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1051 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1052 setOperationAction(ISD::SADDSAT, VT, Legal);
1053 setOperationAction(ISD::UADDSAT, VT, Legal);
1054 setOperationAction(ISD::SSUBSAT, VT, Legal);
1055 setOperationAction(ISD::USUBSAT, VT, Legal);
1056 }
1057
1058 for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1059 MVT::v4i32}) {
1060 setOperationAction(ISD::AVGFLOORS, VT, Legal);
1061 setOperationAction(ISD::AVGFLOORU, VT, Legal);
1062 setOperationAction(ISD::AVGCEILS, VT, Legal);
1063 setOperationAction(ISD::AVGCEILU, VT, Legal);
1064 setOperationAction(ISD::ABDS, VT, Legal);
1065 setOperationAction(ISD::ABDU, VT, Legal);
1066 }
1067
1068 // Vector reductions
1069 for (MVT VT : { MVT::v4f16, MVT::v2f32,
1070 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1071 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1072 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1073 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1074
1075 setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
1076 }
1077 }
1078 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1079 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1080 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1081 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1082 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1083 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1084 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1085 }
1086 setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
1087
1088 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
1089 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
1090 // Likewise, narrowing and extending vector loads/stores aren't handled
1091 // directly.
1092 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1093 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
1094
1095 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1096 setOperationAction(ISD::MULHS, VT, Legal);
1097 setOperationAction(ISD::MULHU, VT, Legal);
1098 } else {
1099 setOperationAction(ISD::MULHS, VT, Expand);
1100 setOperationAction(ISD::MULHU, VT, Expand);
1101 }
1102 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1103 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1104
1105 setOperationAction(ISD::BSWAP, VT, Expand);
1106 setOperationAction(ISD::CTTZ, VT, Expand);
1107
1108 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
1109 setTruncStoreAction(VT, InnerVT, Expand);
1110 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1111 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1112 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1113 }
1114 }
1115
1116 // AArch64 has implementations of a lot of rounding-like FP operations.
1117 for (auto Op :
1118 {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
1119 ISD::FROUND, ISD::FROUNDEVEN, ISD::STRICT_FFLOOR,
1120 ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL, ISD::STRICT_FRINT,
1121 ISD::STRICT_FTRUNC, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN}) {
1122 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1123 setOperationAction(Op, Ty, Legal);
1124 if (Subtarget->hasFullFP16())
1125 for (MVT Ty : {MVT::v4f16, MVT::v8f16})
1126 setOperationAction(Op, Ty, Legal);
1127 }
1128
1129 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
1130
1131 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1132 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1133 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1134 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1135 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1136 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1137
1138 // ADDP custom lowering
1139 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1140 setOperationAction(ISD::ADD, VT, Custom);
1141 // FADDP custom lowering
1142 for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1143 setOperationAction(ISD::FADD, VT, Custom);
1144 }
1145
1146 if (Subtarget->hasSME()) {
1147 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1148 }
1149
1150 // FIXME: Move lowering for more nodes here if those are common between
1151 // SVE and SME.
1152 if (Subtarget->hasSVE() || Subtarget->hasSME()) {
1153 for (auto VT :
1154 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1155 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1156 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1157 }
1158 }
1159
1160 if (Subtarget->hasSME())
1161 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1162
1163 if (Subtarget->hasSVE()) {
1164 for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1165 setOperationAction(ISD::BITREVERSE, VT, Custom);
1166 setOperationAction(ISD::BSWAP, VT, Custom);
1167 setOperationAction(ISD::CTLZ, VT, Custom);
1168 setOperationAction(ISD::CTPOP, VT, Custom);
1169 setOperationAction(ISD::CTTZ, VT, Custom);
1170 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1171 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1172 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1173 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1174 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1175 setOperationAction(ISD::MGATHER, VT, Custom);
1176 setOperationAction(ISD::MSCATTER, VT, Custom);
1177 setOperationAction(ISD::MLOAD, VT, Custom);
1178 setOperationAction(ISD::MUL, VT, Custom);
1179 setOperationAction(ISD::MULHS, VT, Custom);
1180 setOperationAction(ISD::MULHU, VT, Custom);
1181 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
1182 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1183 setOperationAction(ISD::SELECT, VT, Custom);
1184 setOperationAction(ISD::SETCC, VT, Custom);
1185 setOperationAction(ISD::SDIV, VT, Custom);
1186 setOperationAction(ISD::UDIV, VT, Custom);
1187 setOperationAction(ISD::SMIN, VT, Custom);
1188 setOperationAction(ISD::UMIN, VT, Custom);
1189 setOperationAction(ISD::SMAX, VT, Custom);
1190 setOperationAction(ISD::UMAX, VT, Custom);
1191 setOperationAction(ISD::SHL, VT, Custom);
1192 setOperationAction(ISD::SRL, VT, Custom);
1193 setOperationAction(ISD::SRA, VT, Custom);
1194 setOperationAction(ISD::ABS, VT, Custom);
1195 setOperationAction(ISD::ABDS, VT, Custom);
1196 setOperationAction(ISD::ABDU, VT, Custom);
1197 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1198 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1199 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1200 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1201 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1202 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1203 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1204 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1205 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1206
1207 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1208 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1209 setOperationAction(ISD::SELECT_CC, VT, Expand);
1210 setOperationAction(ISD::ROTL, VT, Expand);
1211 setOperationAction(ISD::ROTR, VT, Expand);
1212
1213 setOperationAction(ISD::SADDSAT, VT, Legal);
1214 setOperationAction(ISD::UADDSAT, VT, Legal);
1215 setOperationAction(ISD::SSUBSAT, VT, Legal);
1216 setOperationAction(ISD::USUBSAT, VT, Legal);
1217 setOperationAction(ISD::UREM, VT, Expand);
1218 setOperationAction(ISD::SREM, VT, Expand);
1219 setOperationAction(ISD::SDIVREM, VT, Expand);
1220 setOperationAction(ISD::UDIVREM, VT, Expand);
1221 }
1222
1223 // Illegal unpacked integer vector types.
1224 for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1225 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1226 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1227 }
1228
1229 // Legalize unpacked bitcasts to REINTERPRET_CAST.
1230 for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
1231 MVT::nxv4bf16, MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
1232 setOperationAction(ISD::BITCAST, VT, Custom);
1233
1234 for (auto VT :
1235 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
1236 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
1237 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
1238
1239 for (auto VT :
1240 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1241 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1242 setOperationAction(ISD::SELECT, VT, Custom);
1243 setOperationAction(ISD::SETCC, VT, Custom);
1244 setOperationAction(ISD::TRUNCATE, VT, Custom);
1245 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1246 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1247 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1248
1249 setOperationAction(ISD::SELECT_CC, VT, Expand);
1250 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1251 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1252
1253 // There are no legal MVT::nxv16f## based types.
1254 if (VT != MVT::nxv16i1) {
1255 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1256 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1257 }
1258 }
1259
1260 // NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
1261 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1262 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1263 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1264 setOperationAction(ISD::MLOAD, VT, Custom);
1265 setOperationAction(ISD::MSTORE, VT, Custom);
1266 setOperationAction(ISD::MGATHER, VT, Custom);
1267 setOperationAction(ISD::MSCATTER, VT, Custom);
1268 }
1269
1270 // Firstly, exclude all scalable vector extending loads/truncating stores,
1271 // include both integer and floating scalable vector.
1272 for (MVT VT : MVT::scalable_vector_valuetypes()) {
1273 for (MVT InnerVT : MVT::scalable_vector_valuetypes()) {
1274 setTruncStoreAction(VT, InnerVT, Expand);
1275 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1276 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1277 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1278 }
1279 }
1280
1281 // Then, selectively enable those which we directly support.
1282 setTruncStoreAction(MVT::nxv2i64, MVT::nxv2i8, Legal);
1283 setTruncStoreAction(MVT::nxv2i64, MVT::nxv2i16, Legal);
1284 setTruncStoreAction(MVT::nxv2i64, MVT::nxv2i32, Legal);
1285 setTruncStoreAction(MVT::nxv4i32, MVT::nxv4i8, Legal);
1286 setTruncStoreAction(MVT::nxv4i32, MVT::nxv4i16, Legal);
1287 setTruncStoreAction(MVT::nxv8i16, MVT::nxv8i8, Legal);
1288 for (auto Op : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
1289 setLoadExtAction(Op, MVT::nxv2i64, MVT::nxv2i8, Legal);
1290 setLoadExtAction(Op, MVT::nxv2i64, MVT::nxv2i16, Legal);
1291 setLoadExtAction(Op, MVT::nxv2i64, MVT::nxv2i32, Legal);
1292 setLoadExtAction(Op, MVT::nxv4i32, MVT::nxv4i8, Legal);
1293 setLoadExtAction(Op, MVT::nxv4i32, MVT::nxv4i16, Legal);
1294 setLoadExtAction(Op, MVT::nxv8i16, MVT::nxv8i8, Legal);
1295 }
1296
1297 // SVE supports truncating stores of 64 and 128-bit vectors
1298 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom);
1299 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom);
1300 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom);
1301 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
1302 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
1303
1304 for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1305 MVT::nxv4f32, MVT::nxv2f64}) {
1306 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1307 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1308 setOperationAction(ISD::MGATHER, VT, Custom);
1309 setOperationAction(ISD::MSCATTER, VT, Custom);
1310 setOperationAction(ISD::MLOAD, VT, Custom);
1311 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
1312 setOperationAction(ISD::SELECT, VT, Custom);
1313 setOperationAction(ISD::FADD, VT, Custom);
1314 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1315 setOperationAction(ISD::FDIV, VT, Custom);
1316 setOperationAction(ISD::FMA, VT, Custom);
1317 setOperationAction(ISD::FMAXIMUM, VT, Custom);
1318 setOperationAction(ISD::FMAXNUM, VT, Custom);
1319 setOperationAction(ISD::FMINIMUM, VT, Custom);
1320 setOperationAction(ISD::FMINNUM, VT, Custom);
1321 setOperationAction(ISD::FMUL, VT, Custom);
1322 setOperationAction(ISD::FNEG, VT, Custom);
1323 setOperationAction(ISD::FSUB, VT, Custom);
1324 setOperationAction(ISD::FCEIL, VT, Custom);
1325 setOperationAction(ISD::FFLOOR, VT, Custom);
1326 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1327 setOperationAction(ISD::FRINT, VT, Custom);
1328 setOperationAction(ISD::FROUND, VT, Custom);
1329 setOperationAction(ISD::FROUNDEVEN, VT, Custom);
1330 setOperationAction(ISD::FTRUNC, VT, Custom);
1331 setOperationAction(ISD::FSQRT, VT, Custom);
1332 setOperationAction(ISD::FABS, VT, Custom);
1333 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1334 setOperationAction(ISD::FP_ROUND, VT, Custom);
1335 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1336 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1337 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1338 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1339 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1340
1341 setOperationAction(ISD::SELECT_CC, VT, Expand);
1342 setOperationAction(ISD::FREM, VT, Expand);
1343 setOperationAction(ISD::FPOW, VT, Expand);
1344 setOperationAction(ISD::FPOWI, VT, Expand);
1345 setOperationAction(ISD::FCOS, VT, Expand);
1346 setOperationAction(ISD::FSIN, VT, Expand);
1347 setOperationAction(ISD::FSINCOS, VT, Expand);
1348 setOperationAction(ISD::FEXP, VT, Expand);
1349 setOperationAction(ISD::FEXP2, VT, Expand);
1350 setOperationAction(ISD::FLOG, VT, Expand);
1351 setOperationAction(ISD::FLOG2, VT, Expand);
1352 setOperationAction(ISD::FLOG10, VT, Expand);
1353
1354 setCondCodeAction(ISD::SETO, VT, Expand);
1355 setCondCodeAction(ISD::SETOLT, VT, Expand);
1356 setCondCodeAction(ISD::SETLT, VT, Expand);
1357 setCondCodeAction(ISD::SETOLE, VT, Expand);
1358 setCondCodeAction(ISD::SETLE, VT, Expand);
1359 setCondCodeAction(ISD::SETULT, VT, Expand);
1360 setCondCodeAction(ISD::SETULE, VT, Expand);
1361 setCondCodeAction(ISD::SETUGE, VT, Expand);
1362 setCondCodeAction(ISD::SETUGT, VT, Expand);
1363 setCondCodeAction(ISD::SETUEQ, VT, Expand);
1364 setCondCodeAction(ISD::SETONE, VT, Expand);
1365 }
1366
1367 for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1368 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1369 setOperationAction(ISD::MGATHER, VT, Custom);
1370 setOperationAction(ISD::MSCATTER, VT, Custom);
1371 setOperationAction(ISD::MLOAD, VT, Custom);
1372 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1373 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
1374 }
1375
1376 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
1377 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
1378
1379 // NEON doesn't support integer divides, but SVE does
1380 for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1381 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1382 setOperationAction(ISD::SDIV, VT, Custom);
1383 setOperationAction(ISD::UDIV, VT, Custom);
1384 }
1385
1386 // NEON doesn't support 64-bit vector integer muls, but SVE does.
1387 setOperationAction(ISD::MUL, MVT::v1i64, Custom);
1388 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1389
1390 // NEON doesn't support across-vector reductions, but SVE does.
1391 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
1392 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1393
1394 if (Subtarget->forceStreamingCompatibleSVE()) {
1395 for (MVT VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1396 MVT::v4i32, MVT::v2i64})
1397 addTypeForStreamingSVE(VT);
1398
1399 for (MVT VT :
1400 {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
1401 addTypeForStreamingSVE(VT);
1402 }
1403
1404 // NOTE: Currently this has to happen after computeRegisterProperties rather
1405 // than the preferred option of combining it with the addRegisterClass call.
1406 if (Subtarget->useSVEForFixedLengthVectors()) {
1407 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
1408 if (useSVEForFixedLengthVectorVT(VT))
1409 addTypeForFixedLengthSVE(VT);
1410 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
1411 if (useSVEForFixedLengthVectorVT(VT))
1412 addTypeForFixedLengthSVE(VT);
1413
1414 // 64bit results can mean a bigger than NEON input.
1415 for (auto VT : {MVT::v8i8, MVT::v4i16})
1416 setOperationAction(ISD::TRUNCATE, VT, Custom);
1417 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
1418
1419 // 128bit results imply a bigger than NEON input.
1420 for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1421 setOperationAction(ISD::TRUNCATE, VT, Custom);
1422 for (auto VT : {MVT::v8f16, MVT::v4f32})
1423 setOperationAction(ISD::FP_ROUND, VT, Custom);
1424
1425 // These operations are not supported on NEON but SVE can do them.
1426 setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
1427 setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
1428 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1429 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
1430 setOperationAction(ISD::MULHS, MVT::v1i64, Custom);
1431 setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
1432 setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
1433 setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
1434 setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
1435 setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
1436 setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
1437 setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
1438 setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
1439 setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
1440 setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
1441 setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
1442 setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
1443 setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
1444 setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
1445 setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
1446
1447 // Int operations with no NEON support.
1448 for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1449 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1450 setOperationAction(ISD::BITREVERSE, VT, Custom);
1451 setOperationAction(ISD::CTTZ, VT, Custom);
1452 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1453 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1454 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1455 }
1456
1457
1458 // Use SVE for vectors with more than 2 elements.
1459 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1460 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1461 }
1462
1463 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv2i1, MVT::nxv2i64);
1464 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32);
1465 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16);
1466 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
1467
1468 setOperationAction(ISD::VSCALE, MVT::i32, Custom);
1469 }
1470
1471 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
1472 // Only required for llvm.aarch64.mops.memset.tag
1473 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
1474 }
1475
1476 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
1477
1478 IsStrictFPEnabled = true;
1479}
1480
1481void AArch64TargetLowering::addTypeForNEON(MVT VT) {
1482 assert(VT.isVector() && "VT should be a vector type")(static_cast <bool> (VT.isVector() && "VT should be a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 1482, __extension__
__PRETTY_FUNCTION__))
;
1483
1484 if (VT.isFloatingPoint()) {
1485 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
1486 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
1487 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
1488 }
1489
1490 // Mark vector float intrinsics as expand.
1491 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
1492 setOperationAction(ISD::FSIN, VT, Expand);
1493 setOperationAction(ISD::FCOS, VT, Expand);
1494 setOperationAction(ISD::FPOW, VT, Expand);
1495 setOperationAction(ISD::FLOG, VT, Expand);
1496 setOperationAction(ISD::FLOG2, VT, Expand);
1497 setOperationAction(ISD::FLOG10, VT, Expand);
1498 setOperationAction(ISD::FEXP, VT, Expand);
1499 setOperationAction(ISD::FEXP2, VT, Expand);
1500 }
1501
1502 // But we do support custom-lowering for FCOPYSIGN.
1503 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
1504 ((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16()))
1505 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1506
1507 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1508 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1509 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1510 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1511 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1512 setOperationAction(ISD::SRA, VT, Custom);
1513 setOperationAction(ISD::SRL, VT, Custom);
1514 setOperationAction(ISD::SHL, VT, Custom);
1515 setOperationAction(ISD::OR, VT, Custom);
1516 setOperationAction(ISD::SETCC, VT, Custom);
1517 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
1518
1519 setOperationAction(ISD::SELECT, VT, Expand);
1520 setOperationAction(ISD::SELECT_CC, VT, Expand);
1521 setOperationAction(ISD::VSELECT, VT, Expand);
1522 for (MVT InnerVT : MVT::all_valuetypes())
1523 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
1524
1525 // CNT supports only B element sizes, then use UADDLP to widen.
1526 if (VT != MVT::v8i8 && VT != MVT::v16i8)
1527 setOperationAction(ISD::CTPOP, VT, Custom);
1528
1529 setOperationAction(ISD::UDIV, VT, Expand);
1530 setOperationAction(ISD::SDIV, VT, Expand);
1531 setOperationAction(ISD::UREM, VT, Expand);
1532 setOperationAction(ISD::SREM, VT, Expand);
1533 setOperationAction(ISD::FREM, VT, Expand);
1534
1535 for (unsigned Opcode :
1536 {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_TO_SINT_SAT,
1537 ISD::FP_TO_UINT_SAT, ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT})
1538 setOperationAction(Opcode, VT, Custom);
1539
1540 if (!VT.isFloatingPoint())
1541 setOperationAction(ISD::ABS, VT, Legal);
1542
1543 // [SU][MIN|MAX] are available for all NEON types apart from i64.
1544 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
1545 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
1546 setOperationAction(Opcode, VT, Legal);
1547
1548 // F[MIN|MAX][NUM|NAN] and simple strict operations are available for all FP
1549 // NEON types.
1550 if (VT.isFloatingPoint() &&
1551 VT.getVectorElementType() != MVT::bf16 &&
1552 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
1553 for (unsigned Opcode :
1554 {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM,
1555 ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_FMINNUM,
1556 ISD::STRICT_FMAXNUM, ISD::STRICT_FADD, ISD::STRICT_FSUB,
1557 ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FMA,
1558 ISD::STRICT_FSQRT})
1559 setOperationAction(Opcode, VT, Legal);
1560
1561 // Strict fp extend and trunc are legal
1562 if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 16)
1563 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
1564 if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 64)
1565 setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
1566
1567 // FIXME: We could potentially make use of the vector comparison instructions
1568 // for STRICT_FSETCC and STRICT_FSETCSS, but there's a number of
1569 // complications:
1570 // * FCMPEQ/NE are quiet comparisons, the rest are signalling comparisons,
1571 // so we would need to expand when the condition code doesn't match the
1572 // kind of comparison.
1573 // * Some kinds of comparison require more than one FCMXY instruction so
1574 // would need to be expanded instead.
1575 // * The lowering of the non-strict versions involves target-specific ISD
1576 // nodes so we would likely need to add strict versions of all of them and
1577 // handle them appropriately.
1578 setOperationAction(ISD::STRICT_FSETCC, VT, Expand);
1579 setOperationAction(ISD::STRICT_FSETCCS, VT, Expand);
1580
1581 if (Subtarget->isLittleEndian()) {
1582 for (unsigned im = (unsigned)ISD::PRE_INC;
1583 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1584 setIndexedLoadAction(im, VT, Legal);
1585 setIndexedStoreAction(im, VT, Legal);
1586 }
1587 }
1588}
1589
1590bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
1591 EVT OpVT) const {
1592 // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
1593 if (!Subtarget->hasSVE())
1594 return true;
1595
1596 // We can only support legal predicate result types. We can use the SVE
1597 // whilelo instruction for generating fixed-width predicates too.
1598 if (ResVT != MVT::nxv2i1 && ResVT != MVT::nxv4i1 && ResVT != MVT::nxv8i1 &&
1599 ResVT != MVT::nxv16i1 && ResVT != MVT::v2i1 && ResVT != MVT::v4i1 &&
1600 ResVT != MVT::v8i1 && ResVT != MVT::v16i1)
1601 return true;
1602
1603 // The whilelo instruction only works with i32 or i64 scalar inputs.
1604 if (OpVT != MVT::i32 && OpVT != MVT::i64)
1605 return true;
1606
1607 return false;
1608}
1609
1610void AArch64TargetLowering::addTypeForStreamingSVE(MVT VT) {
1611 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1612 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1613 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1614 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1615 setOperationAction(ISD::AND, VT, Custom);
1616}
1617
1618void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
1619 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 1619, __extension__
__PRETTY_FUNCTION__))
;
1620
1621 // By default everything must be expanded.
1622 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1623 setOperationAction(Op, VT, Expand);
1624
1625 // We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
1626 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1627
1628 if (VT.isFloatingPoint()) {
1629 setCondCodeAction(ISD::SETO, VT, Expand);
1630 setCondCodeAction(ISD::SETOLT, VT, Expand);
1631 setCondCodeAction(ISD::SETLT, VT, Expand);
1632 setCondCodeAction(ISD::SETOLE, VT, Expand);
1633 setCondCodeAction(ISD::SETLE, VT, Expand);
1634 setCondCodeAction(ISD::SETULT, VT, Expand);
1635 setCondCodeAction(ISD::SETULE, VT, Expand);
1636 setCondCodeAction(ISD::SETUGE, VT, Expand);
1637 setCondCodeAction(ISD::SETUGT, VT, Expand);
1638 setCondCodeAction(ISD::SETUEQ, VT, Expand);
1639 setCondCodeAction(ISD::SETONE, VT, Expand);
1640 }
1641
1642 // Mark integer truncating stores/extending loads as having custom lowering
1643 if (VT.isInteger()) {
1644 MVT InnerVT = VT.changeVectorElementType(MVT::i8);
1645 while (InnerVT != VT) {
1646 setTruncStoreAction(VT, InnerVT, Custom);
1647 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Custom);
1648 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Custom);
1649 InnerVT = InnerVT.changeVectorElementType(
1650 MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
1651 }
1652 }
1653
1654 // Mark floating-point truncating stores/extending loads as having custom
1655 // lowering
1656 if (VT.isFloatingPoint()) {
1657 MVT InnerVT = VT.changeVectorElementType(MVT::f16);
1658 while (InnerVT != VT) {
1659 setTruncStoreAction(VT, InnerVT, Custom);
1660 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Custom);
1661 InnerVT = InnerVT.changeVectorElementType(
1662 MVT::getFloatingPointVT(2 * InnerVT.getScalarSizeInBits()));
1663 }
1664 }
1665
1666 // Lower fixed length vector operations to scalable equivalents.
1667 setOperationAction(ISD::ABS, VT, Custom);
1668 setOperationAction(ISD::ADD, VT, Custom);
1669 setOperationAction(ISD::AND, VT, Custom);
1670 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1671 setOperationAction(ISD::BITCAST, VT, Custom);
1672 setOperationAction(ISD::BITREVERSE, VT, Custom);
1673 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1674 setOperationAction(ISD::BSWAP, VT, Custom);
1675 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1676 setOperationAction(ISD::CTLZ, VT, Custom);
1677 setOperationAction(ISD::CTPOP, VT, Custom);
1678 setOperationAction(ISD::CTTZ, VT, Custom);
1679 setOperationAction(ISD::FABS, VT, Custom);
1680 setOperationAction(ISD::FADD, VT, Custom);
1681 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1682 setOperationAction(ISD::FCEIL, VT, Custom);
1683 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1684 setOperationAction(ISD::FDIV, VT, Custom);
1685 setOperationAction(ISD::FFLOOR, VT, Custom);
1686 setOperationAction(ISD::FMA, VT, Custom);
1687 setOperationAction(ISD::FMAXIMUM, VT, Custom);
1688 setOperationAction(ISD::FMAXNUM, VT, Custom);
1689 setOperationAction(ISD::FMINIMUM, VT, Custom);
1690 setOperationAction(ISD::FMINNUM, VT, Custom);
1691 setOperationAction(ISD::FMUL, VT, Custom);
1692 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1693 setOperationAction(ISD::FNEG, VT, Custom);
1694 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1695 setOperationAction(ISD::FP_ROUND, VT, Custom);
1696 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1697 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1698 setOperationAction(ISD::FRINT, VT, Custom);
1699 setOperationAction(ISD::FROUND, VT, Custom);
1700 setOperationAction(ISD::FROUNDEVEN, VT, Custom);
1701 setOperationAction(ISD::FSQRT, VT, Custom);
1702 setOperationAction(ISD::FSUB, VT, Custom);
1703 setOperationAction(ISD::FTRUNC, VT, Custom);
1704 setOperationAction(ISD::LOAD, VT, Custom);
1705 setOperationAction(ISD::MGATHER, VT, Custom);
1706 setOperationAction(ISD::MLOAD, VT, Custom);
1707 setOperationAction(ISD::MSCATTER, VT, Custom);
1708 setOperationAction(ISD::MSTORE, VT, Custom);
1709 setOperationAction(ISD::MUL, VT, Custom);
1710 setOperationAction(ISD::MULHS, VT, Custom);
1711 setOperationAction(ISD::MULHU, VT, Custom);
1712 setOperationAction(ISD::OR, VT, Custom);
1713 setOperationAction(ISD::SDIV, VT, Custom);
1714 setOperationAction(ISD::SELECT, VT, Custom);
1715 setOperationAction(ISD::SETCC, VT, Custom);
1716 setOperationAction(ISD::SHL, VT, Custom);
1717 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1718 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
1719 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1720 setOperationAction(ISD::SMAX, VT, Custom);
1721 setOperationAction(ISD::SMIN, VT, Custom);
1722 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1723 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1724 setOperationAction(ISD::SRA, VT, Custom);
1725 setOperationAction(ISD::SRL, VT, Custom);
1726 setOperationAction(ISD::STORE, VT, Custom);
1727 setOperationAction(ISD::SUB, VT, Custom);
1728 setOperationAction(ISD::TRUNCATE, VT, Custom);
1729 setOperationAction(ISD::UDIV, VT, Custom);
1730 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1731 setOperationAction(ISD::UMAX, VT, Custom);
1732 setOperationAction(ISD::UMIN, VT, Custom);
1733 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1734 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1735 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1736 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1737 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1738 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1739 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1740 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1741 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1742 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1743 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1744 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1745 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1746 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1747 setOperationAction(ISD::VSELECT, VT, Custom);
1748 setOperationAction(ISD::XOR, VT, Custom);
1749 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1750}
1751
1752void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
1753 addRegisterClass(VT, &AArch64::FPR64RegClass);
1754 addTypeForNEON(VT);
1755}
1756
1757void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
1758 addRegisterClass(VT, &AArch64::FPR128RegClass);
1759 addTypeForNEON(VT);
1760}
1761
1762EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
1763 LLVMContext &C, EVT VT) const {
1764 if (!VT.isVector())
1765 return MVT::i32;
1766 if (VT.isScalableVector())
1767 return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount());
1768 return VT.changeVectorElementTypeToInteger();
1769}
1770
1771// isIntImmediate - This method tests to see if the node is a constant
1772// operand. If so Imm will receive the value.
1773static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
1774 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
1775 Imm = C->getZExtValue();
1776 return true;
1777 }
1778 return false;
1779}
1780
1781// isOpcWithIntImmediate - This method tests to see if the node is a specific
1782// opcode and that it has a immediate integer right operand.
1783// If so Imm will receive the value.
1784static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
1785 uint64_t &Imm) {
1786 return N->getOpcode() == Opc &&
1787 isIntImmediate(N->getOperand(1).getNode(), Imm);
1788}
1789
1790static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
1791 const APInt &Demanded,
1792 TargetLowering::TargetLoweringOpt &TLO,
1793 unsigned NewOpc) {
1794 uint64_t OldImm = Imm, NewImm, Enc;
1795 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
1796
1797 // Return if the immediate is already all zeros, all ones, a bimm32 or a
1798 // bimm64.
1799 if (Imm == 0 || Imm == Mask ||
1800 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
1801 return false;
1802
1803 unsigned EltSize = Size;
1804 uint64_t DemandedBits = Demanded.getZExtValue();
1805
1806 // Clear bits that are not demanded.
1807 Imm &= DemandedBits;
1808
1809 while (true) {
1810 // The goal here is to set the non-demanded bits in a way that minimizes
1811 // the number of switching between 0 and 1. In order to achieve this goal,
1812 // we set the non-demanded bits to the value of the preceding demanded bits.
1813 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
1814 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
1815 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
1816 // The final result is 0b11000011.
1817 uint64_t NonDemandedBits = ~DemandedBits;
1818 uint64_t InvertedImm = ~Imm & DemandedBits;
1819 uint64_t RotatedImm =
1820 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
1821 NonDemandedBits;
1822 uint64_t Sum = RotatedImm + NonDemandedBits;
1823 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
1824 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
1825 NewImm = (Imm | Ones) & Mask;
1826
1827 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
1828 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
1829 // we halve the element size and continue the search.
1830 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
1831 break;
1832
1833 // We cannot shrink the element size any further if it is 2-bits.
1834 if (EltSize == 2)
1835 return false;
1836
1837 EltSize /= 2;
1838 Mask >>= EltSize;
1839 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
1840
1841 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
1842 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
1843 return false;
1844
1845 // Merge the upper and lower halves of Imm and DemandedBits.
1846 Imm |= Hi;
1847 DemandedBits |= DemandedBitsHi;
1848 }
1849
1850 ++NumOptimizedImms;
1851
1852 // Replicate the element across the register width.
1853 while (EltSize < Size) {
1854 NewImm |= NewImm << EltSize;
1855 EltSize *= 2;
1856 }
1857
1858 (void)OldImm;
1859 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 1860, __extension__
__PRETTY_FUNCTION__))
1860 "demanded bits should never be altered")(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 1860, __extension__
__PRETTY_FUNCTION__))
;
1861 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")(static_cast <bool> (OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? void (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 1861, __extension__
__PRETTY_FUNCTION__))
;
1862
1863 // Create the new constant immediate node.
1864 EVT VT = Op.getValueType();
1865 SDLoc DL(Op);
1866 SDValue New;
1867
1868 // If the new constant immediate is all-zeros or all-ones, let the target
1869 // independent DAG combine optimize this node.
1870 if (NewImm == 0 || NewImm == OrigMask) {
1871 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
1872 TLO.DAG.getConstant(NewImm, DL, VT));
1873 // Otherwise, create a machine node so that target independent DAG combine
1874 // doesn't undo this optimization.
1875 } else {
1876 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
1877 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
1878 New = SDValue(
1879 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
1880 }
1881
1882 return TLO.CombineTo(Op, New);
1883}
1884
1885bool AArch64TargetLowering::targetShrinkDemandedConstant(
1886 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1887 TargetLoweringOpt &TLO) const {
1888 // Delay this optimization to as late as possible.
1889 if (!TLO.LegalOps)
1890 return false;
1891
1892 if (!EnableOptimizeLogicalImm)
1893 return false;
1894
1895 EVT VT = Op.getValueType();
1896 if (VT.isVector())
1897 return false;
1898
1899 unsigned Size = VT.getSizeInBits();
1900 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 1901, __extension__
__PRETTY_FUNCTION__))
1901 "i32 or i64 is expected after legalization.")(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 1901, __extension__
__PRETTY_FUNCTION__))
;
1902
1903 // Exit early if we demand all bits.
1904 if (DemandedBits.countPopulation() == Size)
1905 return false;
1906
1907 unsigned NewOpc;
1908 switch (Op.getOpcode()) {
1909 default:
1910 return false;
1911 case ISD::AND:
1912 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1913 break;
1914 case ISD::OR:
1915 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1916 break;
1917 case ISD::XOR:
1918 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1919 break;
1920 }
1921 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1922 if (!C)
1923 return false;
1924 uint64_t Imm = C->getZExtValue();
1925 return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
1926}
1927
1928/// computeKnownBitsForTargetNode - Determine which of the bits specified in
1929/// Mask are known to be either zero or one and return them Known.
1930void AArch64TargetLowering::computeKnownBitsForTargetNode(
1931 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1932 const SelectionDAG &DAG, unsigned Depth) const {
1933 switch (Op.getOpcode()) {
1934 default:
1935 break;
1936 case AArch64ISD::DUP: {
1937 SDValue SrcOp = Op.getOperand(0);
1938 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
1939 if (SrcOp.getValueSizeInBits() != Op.getScalarValueSizeInBits()) {
1940 assert(SrcOp.getValueSizeInBits() > Op.getScalarValueSizeInBits() &&(static_cast <bool> (SrcOp.getValueSizeInBits() > Op
.getScalarValueSizeInBits() && "Expected DUP implicit truncation"
) ? void (0) : __assert_fail ("SrcOp.getValueSizeInBits() > Op.getScalarValueSizeInBits() && \"Expected DUP implicit truncation\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 1941, __extension__
__PRETTY_FUNCTION__))
1941 "Expected DUP implicit truncation")(static_cast <bool> (SrcOp.getValueSizeInBits() > Op
.getScalarValueSizeInBits() && "Expected DUP implicit truncation"
) ? void (0) : __assert_fail ("SrcOp.getValueSizeInBits() > Op.getScalarValueSizeInBits() && \"Expected DUP implicit truncation\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 1941, __extension__
__PRETTY_FUNCTION__))
;
1942 Known = Known.trunc(Op.getScalarValueSizeInBits());
1943 }
1944 break;
1945 }
1946 case AArch64ISD::CSEL: {
1947 KnownBits Known2;
1948 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1949 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1950 Known = KnownBits::commonBits(Known, Known2);
1951 break;
1952 }
1953 case AArch64ISD::BICi: {
1954 // Compute the bit cleared value.
1955 uint64_t Mask =
1956 ~(Op->getConstantOperandVal(1) << Op->getConstantOperandVal(2));
1957 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1958 Known &= KnownBits::makeConstant(APInt(Known.getBitWidth(), Mask));
1959 break;
1960 }
1961 case AArch64ISD::VLSHR: {
1962 KnownBits Known2;
1963 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1964 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1965 Known = KnownBits::lshr(Known, Known2);
1966 break;
1967 }
1968 case AArch64ISD::VASHR: {
1969 KnownBits Known2;
1970 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1971 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1972 Known = KnownBits::ashr(Known, Known2);
1973 break;
1974 }
1975 case AArch64ISD::MOVI: {
1976 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(0));
1977 Known =
1978 KnownBits::makeConstant(APInt(Known.getBitWidth(), CN->getZExtValue()));
1979 break;
1980 }
1981 case AArch64ISD::LOADgot:
1982 case AArch64ISD::ADDlow: {
1983 if (!Subtarget->isTargetILP32())
1984 break;
1985 // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
1986 Known.Zero = APInt::getHighBitsSet(64, 32);
1987 break;
1988 }
1989 case AArch64ISD::ASSERT_ZEXT_BOOL: {
1990 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1991 Known.Zero |= APInt(Known.getBitWidth(), 0xFE);
1992 break;
1993 }
1994 case ISD::INTRINSIC_W_CHAIN: {
1995 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1996 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1997 switch (IntID) {
1998 default: return;
1999 case Intrinsic::aarch64_ldaxr:
2000 case Intrinsic::aarch64_ldxr: {
2001 unsigned BitWidth = Known.getBitWidth();
2002 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
2003 unsigned MemBits = VT.getScalarSizeInBits();
2004 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
2005 return;
2006 }
2007 }
2008 break;
2009 }
2010 case ISD::INTRINSIC_WO_CHAIN:
2011 case ISD::INTRINSIC_VOID: {
2012 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2013 switch (IntNo) {
2014 default:
2015 break;
2016 case Intrinsic::aarch64_neon_umaxv:
2017 case Intrinsic::aarch64_neon_uminv: {
2018 // Figure out the datatype of the vector operand. The UMINV instruction
2019 // will zero extend the result, so we can mark as known zero all the
2020 // bits larger than the element datatype. 32-bit or larget doesn't need
2021 // this as those are legal types and will be handled by isel directly.
2022 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
2023 unsigned BitWidth = Known.getBitWidth();
2024 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2025 assert(BitWidth >= 8 && "Unexpected width!")(static_cast <bool> (BitWidth >= 8 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 2025, __extension__
__PRETTY_FUNCTION__))
;
2026 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
2027 Known.Zero |= Mask;
2028 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
2029 assert(BitWidth >= 16 && "Unexpected width!")(static_cast <bool> (BitWidth >= 16 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 2029, __extension__
__PRETTY_FUNCTION__))
;
2030 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
2031 Known.Zero |= Mask;
2032 }
2033 break;
2034 } break;
2035 }
2036 }
2037 }
2038}
2039
2040MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
2041 EVT) const {
2042 return MVT::i64;
2043}
2044
2045bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
2046 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
2047 bool *Fast) const {
2048 if (Subtarget->requiresStrictAlign())
2049 return false;
2050
2051 if (Fast) {
2052 // Some CPUs are fine with unaligned stores except for 128-bit ones.
2053 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
2054 // See comments in performSTORECombine() for more details about
2055 // these conditions.
2056
2057 // Code that uses clang vector extensions can mark that it
2058 // wants unaligned accesses to be treated as fast by
2059 // underspecifying alignment to be 1 or 2.
2060 Alignment <= 2 ||
2061
2062 // Disregard v2i64. Memcpy lowering produces those and splitting
2063 // them regresses performance on micro-benchmarks and olden/bh.
2064 VT == MVT::v2i64;
2065 }
2066 return true;
2067}
2068
2069// Same as above but handling LLTs instead.
2070bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
2071 LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
2072 bool *Fast) const {
2073 if (Subtarget->requiresStrictAlign())
2074 return false;
2075
2076 if (Fast) {
2077 // Some CPUs are fine with unaligned stores except for 128-bit ones.
2078 *Fast = !Subtarget->isMisaligned128StoreSlow() ||
2079 Ty.getSizeInBytes() != 16 ||
2080 // See comments in performSTORECombine() for more details about
2081 // these conditions.
2082
2083 // Code that uses clang vector extensions can mark that it
2084 // wants unaligned accesses to be treated as fast by
2085 // underspecifying alignment to be 1 or 2.
2086 Alignment <= 2 ||
2087
2088 // Disregard v2i64. Memcpy lowering produces those and splitting
2089 // them regresses performance on micro-benchmarks and olden/bh.
2090 Ty == LLT::fixed_vector(2, 64);
2091 }
2092 return true;
2093}
2094
2095FastISel *
2096AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
2097 const TargetLibraryInfo *libInfo) const {
2098 return AArch64::createFastISel(funcInfo, libInfo);
2099}
2100
2101const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
2102#define MAKE_CASE(V) \
2103 case V: \
2104 return #V;
2105 switch ((AArch64ISD::NodeType)Opcode) {
2106 case AArch64ISD::FIRST_NUMBER:
2107 break;
2108 MAKE_CASE(AArch64ISD::OBSCURE_COPY)
2109 MAKE_CASE(AArch64ISD::SMSTART)
2110 MAKE_CASE(AArch64ISD::SMSTOP)
2111 MAKE_CASE(AArch64ISD::RESTORE_ZA)
2112 MAKE_CASE(AArch64ISD::CALL)
2113 MAKE_CASE(AArch64ISD::ADRP)
2114 MAKE_CASE(AArch64ISD::ADR)
2115 MAKE_CASE(AArch64ISD::ADDlow)
2116 MAKE_CASE(AArch64ISD::LOADgot)
2117 MAKE_CASE(AArch64ISD::RET_FLAG)
2118 MAKE_CASE(AArch64ISD::BRCOND)
2119 MAKE_CASE(AArch64ISD::CSEL)
2120 MAKE_CASE(AArch64ISD::CSINV)
2121 MAKE_CASE(AArch64ISD::CSNEG)
2122 MAKE_CASE(AArch64ISD::CSINC)
2123 MAKE_CASE(AArch64ISD::THREAD_POINTER)
2124 MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
2125 MAKE_CASE(AArch64ISD::ABDS_PRED)
2126 MAKE_CASE(AArch64ISD::ABDU_PRED)
2127 MAKE_CASE(AArch64ISD::MUL_PRED)
2128 MAKE_CASE(AArch64ISD::MULHS_PRED)
2129 MAKE_CASE(AArch64ISD::MULHU_PRED)
2130 MAKE_CASE(AArch64ISD::SDIV_PRED)
2131 MAKE_CASE(AArch64ISD::SHL_PRED)
2132 MAKE_CASE(AArch64ISD::SMAX_PRED)
2133 MAKE_CASE(AArch64ISD::SMIN_PRED)
2134 MAKE_CASE(AArch64ISD::SRA_PRED)
2135 MAKE_CASE(AArch64ISD::SRL_PRED)
2136 MAKE_CASE(AArch64ISD::UDIV_PRED)
2137 MAKE_CASE(AArch64ISD::UMAX_PRED)
2138 MAKE_CASE(AArch64ISD::UMIN_PRED)
2139 MAKE_CASE(AArch64ISD::SRAD_MERGE_OP1)
2140 MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
2141 MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
2142 MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
2143 MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
2144 MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
2145 MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
2146 MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
2147 MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
2148 MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
2149 MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
2150 MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
2151 MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
2152 MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
2153 MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
2154 MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
2155 MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
2156 MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
2157 MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
2158 MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
2159 MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
2160 MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
2161 MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
2162 MAKE_CASE(AArch64ISD::ADC)
2163 MAKE_CASE(AArch64ISD::SBC)
2164 MAKE_CASE(AArch64ISD::ADDS)
2165 MAKE_CASE(AArch64ISD::SUBS)
2166 MAKE_CASE(AArch64ISD::ADCS)
2167 MAKE_CASE(AArch64ISD::SBCS)
2168 MAKE_CASE(AArch64ISD::ANDS)
2169 MAKE_CASE(AArch64ISD::CCMP)
2170 MAKE_CASE(AArch64ISD::CCMN)
2171 MAKE_CASE(AArch64ISD::FCCMP)
2172 MAKE_CASE(AArch64ISD::FCMP)
2173 MAKE_CASE(AArch64ISD::STRICT_FCMP)
2174 MAKE_CASE(AArch64ISD::STRICT_FCMPE)
2175 MAKE_CASE(AArch64ISD::DUP)
2176 MAKE_CASE(AArch64ISD::DUPLANE8)
2177 MAKE_CASE(AArch64ISD::DUPLANE16)
2178 MAKE_CASE(AArch64ISD::DUPLANE32)
2179 MAKE_CASE(AArch64ISD::DUPLANE64)
2180 MAKE_CASE(AArch64ISD::DUPLANE128)
2181 MAKE_CASE(AArch64ISD::MOVI)
2182 MAKE_CASE(AArch64ISD::MOVIshift)
2183 MAKE_CASE(AArch64ISD::MOVIedit)
2184 MAKE_CASE(AArch64ISD::MOVImsl)
2185 MAKE_CASE(AArch64ISD::FMOV)
2186 MAKE_CASE(AArch64ISD::MVNIshift)
2187 MAKE_CASE(AArch64ISD::MVNImsl)
2188 MAKE_CASE(AArch64ISD::BICi)
2189 MAKE_CASE(AArch64ISD::ORRi)
2190 MAKE_CASE(AArch64ISD::BSP)
2191 MAKE_CASE(AArch64ISD::EXTR)
2192 MAKE_CASE(AArch64ISD::ZIP1)
2193 MAKE_CASE(AArch64ISD::ZIP2)
2194 MAKE_CASE(AArch64ISD::UZP1)
2195 MAKE_CASE(AArch64ISD::UZP2)
2196 MAKE_CASE(AArch64ISD::TRN1)
2197 MAKE_CASE(AArch64ISD::TRN2)
2198 MAKE_CASE(AArch64ISD::REV16)
2199 MAKE_CASE(AArch64ISD::REV32)
2200 MAKE_CASE(AArch64ISD::REV64)
2201 MAKE_CASE(AArch64ISD::EXT)
2202 MAKE_CASE(AArch64ISD::SPLICE)
2203 MAKE_CASE(AArch64ISD::VSHL)
2204 MAKE_CASE(AArch64ISD::VLSHR)
2205 MAKE_CASE(AArch64ISD::VASHR)
2206 MAKE_CASE(AArch64ISD::VSLI)
2207 MAKE_CASE(AArch64ISD::VSRI)
2208 MAKE_CASE(AArch64ISD::CMEQ)
2209 MAKE_CASE(AArch64ISD::CMGE)
2210 MAKE_CASE(AArch64ISD::CMGT)
2211 MAKE_CASE(AArch64ISD::CMHI)
2212 MAKE_CASE(AArch64ISD::CMHS)
2213 MAKE_CASE(AArch64ISD::FCMEQ)
2214 MAKE_CASE(AArch64ISD::FCMGE)
2215 MAKE_CASE(AArch64ISD::FCMGT)
2216 MAKE_CASE(AArch64ISD::CMEQz)
2217 MAKE_CASE(AArch64ISD::CMGEz)
2218 MAKE_CASE(AArch64ISD::CMGTz)
2219 MAKE_CASE(AArch64ISD::CMLEz)
2220 MAKE_CASE(AArch64ISD::CMLTz)
2221 MAKE_CASE(AArch64ISD::FCMEQz)
2222 MAKE_CASE(AArch64ISD::FCMGEz)
2223 MAKE_CASE(AArch64ISD::FCMGTz)
2224 MAKE_CASE(AArch64ISD::FCMLEz)
2225 MAKE_CASE(AArch64ISD::FCMLTz)
2226 MAKE_CASE(AArch64ISD::SADDV)
2227 MAKE_CASE(AArch64ISD::UADDV)
2228 MAKE_CASE(AArch64ISD::SDOT)
2229 MAKE_CASE(AArch64ISD::UDOT)
2230 MAKE_CASE(AArch64ISD::SMINV)
2231 MAKE_CASE(AArch64ISD::UMINV)
2232 MAKE_CASE(AArch64ISD::SMAXV)
2233 MAKE_CASE(AArch64ISD::UMAXV)
2234 MAKE_CASE(AArch64ISD::SADDV_PRED)
2235 MAKE_CASE(AArch64ISD::UADDV_PRED)
2236 MAKE_CASE(AArch64ISD::SMAXV_PRED)
2237 MAKE_CASE(AArch64ISD::UMAXV_PRED)
2238 MAKE_CASE(AArch64ISD::SMINV_PRED)
2239 MAKE_CASE(AArch64ISD::UMINV_PRED)
2240 MAKE_CASE(AArch64ISD::ORV_PRED)
2241 MAKE_CASE(AArch64ISD::EORV_PRED)
2242 MAKE_CASE(AArch64ISD::ANDV_PRED)
2243 MAKE_CASE(AArch64ISD::CLASTA_N)
2244 MAKE_CASE(AArch64ISD::CLASTB_N)
2245 MAKE_CASE(AArch64ISD::LASTA)
2246 MAKE_CASE(AArch64ISD::LASTB)
2247 MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
2248 MAKE_CASE(AArch64ISD::LS64_BUILD)
2249 MAKE_CASE(AArch64ISD::LS64_EXTRACT)
2250 MAKE_CASE(AArch64ISD::TBL)
2251 MAKE_CASE(AArch64ISD::FADD_PRED)
2252 MAKE_CASE(AArch64ISD::FADDA_PRED)
2253 MAKE_CASE(AArch64ISD::FADDV_PRED)
2254 MAKE_CASE(AArch64ISD::FDIV_PRED)
2255 MAKE_CASE(AArch64ISD::FMA_PRED)
2256 MAKE_CASE(AArch64ISD::FMAX_PRED)
2257 MAKE_CASE(AArch64ISD::FMAXV_PRED)
2258 MAKE_CASE(AArch64ISD::FMAXNM_PRED)
2259 MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
2260 MAKE_CASE(AArch64ISD::FMIN_PRED)
2261 MAKE_CASE(AArch64ISD::FMINV_PRED)
2262 MAKE_CASE(AArch64ISD::FMINNM_PRED)
2263 MAKE_CASE(AArch64ISD::FMINNMV_PRED)
2264 MAKE_CASE(AArch64ISD::FMUL_PRED)
2265 MAKE_CASE(AArch64ISD::FSUB_PRED)
2266 MAKE_CASE(AArch64ISD::RDSVL)
2267 MAKE_CASE(AArch64ISD::BIC)
2268 MAKE_CASE(AArch64ISD::BIT)
2269 MAKE_CASE(AArch64ISD::CBZ)
2270 MAKE_CASE(AArch64ISD::CBNZ)
2271 MAKE_CASE(AArch64ISD::TBZ)
2272 MAKE_CASE(AArch64ISD::TBNZ)
2273 MAKE_CASE(AArch64ISD::TC_RETURN)
2274 MAKE_CASE(AArch64ISD::PREFETCH)
2275 MAKE_CASE(AArch64ISD::SITOF)
2276 MAKE_CASE(AArch64ISD::UITOF)
2277 MAKE_CASE(AArch64ISD::NVCAST)
2278 MAKE_CASE(AArch64ISD::MRS)
2279 MAKE_CASE(AArch64ISD::SQSHL_I)
2280 MAKE_CASE(AArch64ISD::UQSHL_I)
2281 MAKE_CASE(AArch64ISD::SRSHR_I)
2282 MAKE_CASE(AArch64ISD::URSHR_I)
2283 MAKE_CASE(AArch64ISD::SQSHLU_I)
2284 MAKE_CASE(AArch64ISD::WrapperLarge)
2285 MAKE_CASE(AArch64ISD::LD2post)
2286 MAKE_CASE(AArch64ISD::LD3post)
2287 MAKE_CASE(AArch64ISD::LD4post)
2288 MAKE_CASE(AArch64ISD::ST2post)
2289 MAKE_CASE(AArch64ISD::ST3post)
2290 MAKE_CASE(AArch64ISD::ST4post)
2291 MAKE_CASE(AArch64ISD::LD1x2post)
2292 MAKE_CASE(AArch64ISD::LD1x3post)
2293 MAKE_CASE(AArch64ISD::LD1x4post)
2294 MAKE_CASE(AArch64ISD::ST1x2post)
2295 MAKE_CASE(AArch64ISD::ST1x3post)
2296 MAKE_CASE(AArch64ISD::ST1x4post)
2297 MAKE_CASE(AArch64ISD::LD1DUPpost)
2298 MAKE_CASE(AArch64ISD::LD2DUPpost)
2299 MAKE_CASE(AArch64ISD::LD3DUPpost)
2300 MAKE_CASE(AArch64ISD::LD4DUPpost)
2301 MAKE_CASE(AArch64ISD::LD1LANEpost)
2302 MAKE_CASE(AArch64ISD::LD2LANEpost)
2303 MAKE_CASE(AArch64ISD::LD3LANEpost)
2304 MAKE_CASE(AArch64ISD::LD4LANEpost)
2305 MAKE_CASE(AArch64ISD::ST2LANEpost)
2306 MAKE_CASE(AArch64ISD::ST3LANEpost)
2307 MAKE_CASE(AArch64ISD::ST4LANEpost)
2308 MAKE_CASE(AArch64ISD::SMULL)
2309 MAKE_CASE(AArch64ISD::UMULL)
2310 MAKE_CASE(AArch64ISD::PMULL)
2311 MAKE_CASE(AArch64ISD::FRECPE)
2312 MAKE_CASE(AArch64ISD::FRECPS)
2313 MAKE_CASE(AArch64ISD::FRSQRTE)
2314 MAKE_CASE(AArch64ISD::FRSQRTS)
2315 MAKE_CASE(AArch64ISD::STG)
2316 MAKE_CASE(AArch64ISD::STZG)
2317 MAKE_CASE(AArch64ISD::ST2G)
2318 MAKE_CASE(AArch64ISD::STZ2G)
2319 MAKE_CASE(AArch64ISD::SUNPKHI)
2320 MAKE_CASE(AArch64ISD::SUNPKLO)
2321 MAKE_CASE(AArch64ISD::UUNPKHI)
2322 MAKE_CASE(AArch64ISD::UUNPKLO)
2323 MAKE_CASE(AArch64ISD::INSR)
2324 MAKE_CASE(AArch64ISD::PTEST)
2325 MAKE_CASE(AArch64ISD::PTRUE)
2326 MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
2327 MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
2328 MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
2329 MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO)
2330 MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO)
2331 MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO)
2332 MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO)
2333 MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO)
2334 MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
2335 MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
2336 MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
2337 MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO)
2338 MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO)
2339 MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO)
2340 MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO)
2341 MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
2342 MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
2343 MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
2344 MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
2345 MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
2346 MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
2347 MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO)
2348 MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO)
2349 MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO)
2350 MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO)
2351 MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO)
2352 MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO)
2353 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO)
2354 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO)
2355 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO)
2356 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO)
2357 MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO)
2358 MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO)
2359 MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO)
2360 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO)
2361 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO)
2362 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO)
2363 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO)
2364 MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO)
2365 MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
2366 MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
2367 MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
2368 MAKE_CASE(AArch64ISD::ST1_PRED)
2369 MAKE_CASE(AArch64ISD::SST1_PRED)
2370 MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
2371 MAKE_CASE(AArch64ISD::SST1_SXTW_PRED)
2372 MAKE_CASE(AArch64ISD::SST1_UXTW_PRED)
2373 MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED)
2374 MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED)
2375 MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
2376 MAKE_CASE(AArch64ISD::SSTNT1_PRED)
2377 MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
2378 MAKE_CASE(AArch64ISD::LDP)
2379 MAKE_CASE(AArch64ISD::LDNP)
2380 MAKE_CASE(AArch64ISD::STP)
2381 MAKE_CASE(AArch64ISD::STNP)
2382 MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
2383 MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
2384 MAKE_CASE(AArch64ISD::REVH_MERGE_PASSTHRU)
2385 MAKE_CASE(AArch64ISD::REVW_MERGE_PASSTHRU)
2386 MAKE_CASE(AArch64ISD::REVD_MERGE_PASSTHRU)
2387 MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
2388 MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
2389 MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
2390 MAKE_CASE(AArch64ISD::INDEX_VECTOR)
2391 MAKE_CASE(AArch64ISD::ADDP)
2392 MAKE_CASE(AArch64ISD::SADDLP)
2393 MAKE_CASE(AArch64ISD::UADDLP)
2394 MAKE_CASE(AArch64ISD::CALL_RVMARKER)
2395 MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL)
2396 MAKE_CASE(AArch64ISD::MOPS_MEMSET)
2397 MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING)
2398 MAKE_CASE(AArch64ISD::MOPS_MEMCOPY)
2399 MAKE_CASE(AArch64ISD::MOPS_MEMMOVE)
2400 MAKE_CASE(AArch64ISD::CALL_BTI)
2401 }
2402#undef MAKE_CASE
2403 return nullptr;
2404}
2405
2406MachineBasicBlock *
2407AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
2408 MachineBasicBlock *MBB) const {
2409 // We materialise the F128CSEL pseudo-instruction as some control flow and a
2410 // phi node:
2411
2412 // OrigBB:
2413 // [... previous instrs leading to comparison ...]
2414 // b.ne TrueBB
2415 // b EndBB
2416 // TrueBB:
2417 // ; Fallthrough
2418 // EndBB:
2419 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
2420
2421 MachineFunction *MF = MBB->getParent();
2422 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2423 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2424 DebugLoc DL = MI.getDebugLoc();
2425 MachineFunction::iterator It = ++MBB->getIterator();
2426
2427 Register DestReg = MI.getOperand(0).getReg();
2428 Register IfTrueReg = MI.getOperand(1).getReg();
2429 Register IfFalseReg = MI.getOperand(2).getReg();
2430 unsigned CondCode = MI.getOperand(3).getImm();
2431 bool NZCVKilled = MI.getOperand(4).isKill();
2432
2433 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
2434 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
2435 MF->insert(It, TrueBB);
2436 MF->insert(It, EndBB);
2437
2438 // Transfer rest of current basic-block to EndBB
2439 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
2440 MBB->end());
2441 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
2442
2443 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
2444 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
2445 MBB->addSuccessor(TrueBB);
2446 MBB->addSuccessor(EndBB);
2447
2448 // TrueBB falls through to the end.
2449 TrueBB->addSuccessor(EndBB);
2450
2451 if (!NZCVKilled) {
2452 TrueBB->addLiveIn(AArch64::NZCV);
2453 EndBB->addLiveIn(AArch64::NZCV);
2454 }
2455
2456 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
2457 .addReg(IfTrueReg)
2458 .addMBB(TrueBB)
2459 .addReg(IfFalseReg)
2460 .addMBB(MBB);
2461
2462 MI.eraseFromParent();
2463 return EndBB;
2464}
2465
2466MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
2467 MachineInstr &MI, MachineBasicBlock *BB) const {
2468 assert(!isAsynchronousEHPersonality(classifyEHPersonality((static_cast <bool> (!isAsynchronousEHPersonality(classifyEHPersonality
( BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? void (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 2470, __extension__
__PRETTY_FUNCTION__))
2469 BB->getParent()->getFunction().getPersonalityFn())) &&(static_cast <bool> (!isAsynchronousEHPersonality(classifyEHPersonality
( BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? void (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 2470, __extension__
__PRETTY_FUNCTION__))
2470 "SEH does not use catchret!")(static_cast <bool> (!isAsynchronousEHPersonality(classifyEHPersonality
( BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? void (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 2470, __extension__
__PRETTY_FUNCTION__))
;
2471 return BB;
2472}
2473
2474MachineBasicBlock *
2475AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg,
2476 MachineInstr &MI,
2477 MachineBasicBlock *BB) const {
2478 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2479 MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
2480
2481 MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
2482 MIB.add(MI.getOperand(1)); // slice index register
2483 MIB.add(MI.getOperand(2)); // slice index offset
2484 MIB.add(MI.getOperand(3)); // pg
2485 MIB.add(MI.getOperand(4)); // base
2486 MIB.add(MI.getOperand(5)); // offset
2487
2488 MI.eraseFromParent(); // The pseudo is gone now.
2489 return BB;
2490}
2491
2492MachineBasicBlock *
2493AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const {
2494 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2495 MachineInstrBuilder MIB =
2496 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::LDR_ZA));
2497
2498 MIB.addReg(AArch64::ZA, RegState::Define);
2499 MIB.add(MI.getOperand(0)); // Vector select register
2500 MIB.add(MI.getOperand(1)); // Vector select offset
2501 MIB.add(MI.getOperand(2)); // Base
2502 MIB.add(MI.getOperand(1)); // Offset, same as vector select offset
2503
2504 MI.eraseFromParent(); // The pseudo is gone now.
2505 return BB;
2506}
2507
2508MachineBasicBlock *
2509AArch64TargetLowering::EmitMopa(unsigned Opc, unsigned BaseReg,
2510 MachineInstr &MI, MachineBasicBlock *BB) const {
2511 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2512 MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
2513
2514 MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
2515 MIB.addReg(BaseReg + MI.getOperand(0).getImm());
2516 MIB.add(MI.getOperand(1)); // pn
2517 MIB.add(MI.getOperand(2)); // pm
2518 MIB.add(MI.getOperand(3)); // zn
2519 MIB.add(MI.getOperand(4)); // zm
2520
2521 MI.eraseFromParent(); // The pseudo is gone now.
2522 return BB;
2523}
2524
2525MachineBasicBlock *
2526AArch64TargetLowering::EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg,
2527 MachineInstr &MI,
2528 MachineBasicBlock *BB) const {
2529 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2530 MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
2531
2532 MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
2533 MIB.addReg(BaseReg + MI.getOperand(0).getImm());
2534 MIB.add(MI.getOperand(1)); // Slice index register
2535 MIB.add(MI.getOperand(2)); // Slice index offset
2536 MIB.add(MI.getOperand(3)); // pg
2537 MIB.add(MI.getOperand(4)); // zn
2538
2539 MI.eraseFromParent(); // The pseudo is gone now.
2540 return BB;
2541}
2542
2543MachineBasicBlock *
2544AArch64TargetLowering::EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const {
2545 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2546 MachineInstrBuilder MIB =
2547 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::ZERO_M));
2548 MIB.add(MI.getOperand(0)); // Mask
2549
2550 unsigned Mask = MI.getOperand(0).getImm();
2551 for (unsigned I = 0; I < 8; I++) {
2552 if (Mask & (1 << I))
2553 MIB.addDef(AArch64::ZAD0 + I, RegState::ImplicitDefine);
2554 }
2555
2556 MI.eraseFromParent(); // The pseudo is gone now.
2557 return BB;
2558}
2559
2560MachineBasicBlock *
2561AArch64TargetLowering::EmitAddVectorToTile(unsigned Opc, unsigned BaseReg,
2562 MachineInstr &MI,
2563 MachineBasicBlock *BB) const {
2564 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2565 MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
2566
2567 MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
2568 MIB.addReg(BaseReg + MI.getOperand(0).getImm());
2569 MIB.add(MI.getOperand(1)); // pn
2570 MIB.add(MI.getOperand(2)); // pm
2571 MIB.add(MI.getOperand(3)); // zn
2572
2573 MI.eraseFromParent(); // The pseudo is gone now.
2574 return BB;
2575}
2576
2577MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
2578 MachineInstr &MI, MachineBasicBlock *BB) const {
2579 switch (MI.getOpcode()) {
2580 default:
2581#ifndef NDEBUG
2582 MI.dump();
2583#endif
2584 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 2584)
;
2585
2586 case AArch64::F128CSEL:
2587 return EmitF128CSEL(MI, BB);
2588 case TargetOpcode::STATEPOINT:
2589 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
2590 // while bl call instruction (where statepoint will be lowered at the end)
2591 // has implicit def. This def is early-clobber as it will be set at
2592 // the moment of the call and earlier than any use is read.
2593 // Add this implicit dead def here as a workaround.
2594 MI.addOperand(*MI.getMF(),
2595 MachineOperand::CreateReg(
2596 AArch64::LR, /*isDef*/ true,
2597 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
2598 /*isUndef*/ false, /*isEarlyClobber*/ true));
2599 [[fallthrough]];
2600 case TargetOpcode::STACKMAP:
2601 case TargetOpcode::PATCHPOINT:
2602 return emitPatchPoint(MI, BB);
2603
2604 case AArch64::CATCHRET:
2605 return EmitLoweredCatchRet(MI, BB);
2606 case AArch64::LD1_MXIPXX_H_PSEUDO_B:
2607 return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0, MI, BB);
2608 case AArch64::LD1_MXIPXX_H_PSEUDO_H:
2609 return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0, MI, BB);
2610 case AArch64::LD1_MXIPXX_H_PSEUDO_S:
2611 return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0, MI, BB);
2612 case AArch64::LD1_MXIPXX_H_PSEUDO_D:
2613 return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0, MI, BB);
2614 case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
2615 return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0, MI, BB);
2616 case AArch64::LD1_MXIPXX_V_PSEUDO_B:
2617 return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0, MI, BB);
2618 case AArch64::LD1_MXIPXX_V_PSEUDO_H:
2619 return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0, MI, BB);
2620 case AArch64::LD1_MXIPXX_V_PSEUDO_S:
2621 return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0, MI, BB);
2622 case AArch64::LD1_MXIPXX_V_PSEUDO_D:
2623 return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0, MI, BB);
2624 case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
2625 return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0, MI, BB);
2626 case AArch64::LDR_ZA_PSEUDO:
2627 return EmitFill(MI, BB);
2628 case AArch64::BFMOPA_MPPZZ_PSEUDO:
2629 return EmitMopa(AArch64::BFMOPA_MPPZZ, AArch64::ZAS0, MI, BB);
2630 case AArch64::BFMOPS_MPPZZ_PSEUDO:
2631 return EmitMopa(AArch64::BFMOPS_MPPZZ, AArch64::ZAS0, MI, BB);
2632 case AArch64::FMOPAL_MPPZZ_PSEUDO:
2633 return EmitMopa(AArch64::FMOPAL_MPPZZ, AArch64::ZAS0, MI, BB);
2634 case AArch64::FMOPSL_MPPZZ_PSEUDO:
2635 return EmitMopa(AArch64::FMOPSL_MPPZZ, AArch64::ZAS0, MI, BB);
2636 case AArch64::FMOPA_MPPZZ_S_PSEUDO:
2637 return EmitMopa(AArch64::FMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
2638 case AArch64::FMOPS_MPPZZ_S_PSEUDO:
2639 return EmitMopa(AArch64::FMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
2640 case AArch64::FMOPA_MPPZZ_D_PSEUDO:
2641 return EmitMopa(AArch64::FMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
2642 case AArch64::FMOPS_MPPZZ_D_PSEUDO:
2643 return EmitMopa(AArch64::FMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
2644 case AArch64::SMOPA_MPPZZ_S_PSEUDO:
2645 return EmitMopa(AArch64::SMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
2646 case AArch64::SMOPS_MPPZZ_S_PSEUDO:
2647 return EmitMopa(AArch64::SMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
2648 case AArch64::UMOPA_MPPZZ_S_PSEUDO:
2649 return EmitMopa(AArch64::UMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
2650 case AArch64::UMOPS_MPPZZ_S_PSEUDO:
2651 return EmitMopa(AArch64::UMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
2652 case AArch64::SUMOPA_MPPZZ_S_PSEUDO:
2653 return EmitMopa(AArch64::SUMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
2654 case AArch64::SUMOPS_MPPZZ_S_PSEUDO:
2655 return EmitMopa(AArch64::SUMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
2656 case AArch64::USMOPA_MPPZZ_S_PSEUDO:
2657 return EmitMopa(AArch64::USMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
2658 case AArch64::USMOPS_MPPZZ_S_PSEUDO:
2659 return EmitMopa(AArch64::USMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
2660 case AArch64::SMOPA_MPPZZ_D_PSEUDO:
2661 return EmitMopa(AArch64::SMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
2662 case AArch64::SMOPS_MPPZZ_D_PSEUDO:
2663 return EmitMopa(AArch64::SMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
2664 case AArch64::UMOPA_MPPZZ_D_PSEUDO:
2665 return EmitMopa(AArch64::UMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
2666 case AArch64::UMOPS_MPPZZ_D_PSEUDO:
2667 return EmitMopa(AArch64::UMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
2668 case AArch64::SUMOPA_MPPZZ_D_PSEUDO:
2669 return EmitMopa(AArch64::SUMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
2670 case AArch64::SUMOPS_MPPZZ_D_PSEUDO:
2671 return EmitMopa(AArch64::SUMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
2672 case AArch64::USMOPA_MPPZZ_D_PSEUDO:
2673 return EmitMopa(AArch64::USMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
2674 case AArch64::USMOPS_MPPZZ_D_PSEUDO:
2675 return EmitMopa(AArch64::USMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
2676 case AArch64::INSERT_MXIPZ_H_PSEUDO_B:
2677 return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_B, AArch64::ZAB0, MI,
2678 BB);
2679 case AArch64::INSERT_MXIPZ_H_PSEUDO_H:
2680 return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_H, AArch64::ZAH0, MI,
2681 BB);
2682 case AArch64::INSERT_MXIPZ_H_PSEUDO_S:
2683 return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_S, AArch64::ZAS0, MI,
2684 BB);
2685 case AArch64::INSERT_MXIPZ_H_PSEUDO_D:
2686 return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_D, AArch64::ZAD0, MI,
2687 BB);
2688 case AArch64::INSERT_MXIPZ_H_PSEUDO_Q:
2689 return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_Q, AArch64::ZAQ0, MI,
2690 BB);
2691 case AArch64::INSERT_MXIPZ_V_PSEUDO_B:
2692 return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_B, AArch64::ZAB0, MI,
2693 BB);
2694 case AArch64::INSERT_MXIPZ_V_PSEUDO_H:
2695 return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_H, AArch64::ZAH0, MI,
2696 BB);
2697 case AArch64::INSERT_MXIPZ_V_PSEUDO_S:
2698 return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_S, AArch64::ZAS0, MI,
2699 BB);
2700 case AArch64::INSERT_MXIPZ_V_PSEUDO_D:
2701 return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_D, AArch64::ZAD0, MI,
2702 BB);
2703 case AArch64::INSERT_MXIPZ_V_PSEUDO_Q:
2704 return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_Q, AArch64::ZAQ0, MI,
2705 BB);
2706 case AArch64::ZERO_M_PSEUDO:
2707 return EmitZero(MI, BB);
2708 case AArch64::ADDHA_MPPZ_PSEUDO_S:
2709 return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_S, AArch64::ZAS0, MI, BB);
2710 case AArch64::ADDVA_MPPZ_PSEUDO_S:
2711 return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_S, AArch64::ZAS0, MI, BB);
2712 case AArch64::ADDHA_MPPZ_PSEUDO_D:
2713 return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_D, AArch64::ZAD0, MI, BB);
2714 case AArch64::ADDVA_MPPZ_PSEUDO_D:
2715 return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_D, AArch64::ZAD0, MI, BB);
2716 }
2717}
2718
2719//===----------------------------------------------------------------------===//
2720// AArch64 Lowering private implementation.
2721//===----------------------------------------------------------------------===//
2722
2723//===----------------------------------------------------------------------===//
2724// Lowering Code
2725//===----------------------------------------------------------------------===//
2726
2727// Forward declarations of SVE fixed length lowering helpers
2728static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT);
2729static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
2730static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
2731static SDValue convertFixedMaskToScalableVector(SDValue Mask,
2732 SelectionDAG &DAG);
2733static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
2734 EVT VT);
2735
2736/// isZerosVector - Check whether SDNode N is a zero-filled vector.
2737static bool isZerosVector(const SDNode *N) {
2738 // Look through a bit convert.
2739 while (N->getOpcode() == ISD::BITCAST)
2740 N = N->getOperand(0).getNode();
2741
2742 if (ISD::isConstantSplatVectorAllZeros(N))
2743 return true;
2744
2745 if (N->getOpcode() != AArch64ISD::DUP)
2746 return false;
2747
2748 auto Opnd0 = N->getOperand(0);
2749 return isNullConstant(Opnd0) || isNullFPConstant(Opnd0);
2750}
2751
2752/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
2753/// CC
2754static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
2755 switch (CC) {
2756 default:
2757 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2757)
;
2758 case ISD::SETNE:
2759 return AArch64CC::NE;
2760 case ISD::SETEQ:
2761 return AArch64CC::EQ;
2762 case ISD::SETGT:
2763 return AArch64CC::GT;
2764 case ISD::SETGE:
2765 return AArch64CC::GE;
2766 case ISD::SETLT:
2767 return AArch64CC::LT;
2768 case ISD::SETLE:
2769 return AArch64CC::LE;
2770 case ISD::SETUGT:
2771 return AArch64CC::HI;
2772 case ISD::SETUGE:
2773 return AArch64CC::HS;
2774 case ISD::SETULT:
2775 return AArch64CC::LO;
2776 case ISD::SETULE:
2777 return AArch64CC::LS;
2778 }
2779}
2780
2781/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
2782static void changeFPCCToAArch64CC(ISD::CondCode CC,
2783 AArch64CC::CondCode &CondCode,
2784 AArch64CC::CondCode &CondCode2) {
2785 CondCode2 = AArch64CC::AL;
2786 switch (CC) {
2787 default:
2788 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2788)
;
2789 case ISD::SETEQ:
2790 case ISD::SETOEQ:
2791 CondCode = AArch64CC::EQ;
2792 break;
2793 case ISD::SETGT:
2794 case ISD::SETOGT:
2795 CondCode = AArch64CC::GT;
2796 break;
2797 case ISD::SETGE:
2798 case ISD::SETOGE:
2799 CondCode = AArch64CC::GE;
2800 break;
2801 case ISD::SETOLT:
2802 CondCode = AArch64CC::MI;
2803 break;
2804 case ISD::SETOLE:
2805 CondCode = AArch64CC::LS;
2806 break;
2807 case ISD::SETONE:
2808 CondCode = AArch64CC::MI;
2809 CondCode2 = AArch64CC::GT;
2810 break;
2811 case ISD::SETO:
2812 CondCode = AArch64CC::VC;
2813 break;
2814 case ISD::SETUO:
2815 CondCode = AArch64CC::VS;
2816 break;
2817 case ISD::SETUEQ:
2818 CondCode = AArch64CC::EQ;
2819 CondCode2 = AArch64CC::VS;
2820 break;
2821 case ISD::SETUGT:
2822 CondCode = AArch64CC::HI;
2823 break;
2824 case ISD::SETUGE:
2825 CondCode = AArch64CC::PL;
2826 break;
2827 case ISD::SETLT:
2828 case ISD::SETULT:
2829 CondCode = AArch64CC::LT;
2830 break;
2831 case ISD::SETLE:
2832 case ISD::SETULE:
2833 CondCode = AArch64CC::LE;
2834 break;
2835 case ISD::SETNE:
2836 case ISD::SETUNE:
2837 CondCode = AArch64CC::NE;
2838 break;
2839 }
2840}
2841
2842/// Convert a DAG fp condition code to an AArch64 CC.
2843/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
2844/// should be AND'ed instead of OR'ed.
2845static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
2846 AArch64CC::CondCode &CondCode,
2847 AArch64CC::CondCode &CondCode2) {
2848 CondCode2 = AArch64CC::AL;
2849 switch (CC) {
2850 default:
2851 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2852 assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void
(0) : __assert_fail ("CondCode2 == AArch64CC::AL", "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2852, __extension__ __PRETTY_FUNCTION__))
;
2853 break;
2854 case ISD::SETONE:
2855 // (a one b)
2856 // == ((a olt b) || (a ogt b))
2857 // == ((a ord b) && (a une b))
2858 CondCode = AArch64CC::VC;
2859 CondCode2 = AArch64CC::NE;
2860 break;
2861 case ISD::SETUEQ:
2862 // (a ueq b)
2863 // == ((a uno b) || (a oeq b))
2864 // == ((a ule b) && (a uge b))
2865 CondCode = AArch64CC::PL;
2866 CondCode2 = AArch64CC::LE;
2867 break;
2868 }
2869}
2870
2871/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
2872/// CC usable with the vector instructions. Fewer operations are available
2873/// without a real NZCV register, so we have to use less efficient combinations
2874/// to get the same effect.
2875static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
2876 AArch64CC::CondCode &CondCode,
2877 AArch64CC::CondCode &CondCode2,
2878 bool &Invert) {
2879 Invert = false;
2880 switch (CC) {
2881 default:
2882 // Mostly the scalar mappings work fine.
2883 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2884 break;
2885 case ISD::SETUO:
2886 Invert = true;
2887 [[fallthrough]];
2888 case ISD::SETO:
2889 CondCode = AArch64CC::MI;
2890 CondCode2 = AArch64CC::GE;
2891 break;
2892 case ISD::SETUEQ:
2893 case ISD::SETULT:
2894 case ISD::SETULE:
2895 case ISD::SETUGT:
2896 case ISD::SETUGE:
2897 // All of the compare-mask comparisons are ordered, but we can switch
2898 // between the two by a double inversion. E.g. ULE == !OGT.
2899 Invert = true;
2900 changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
2901 CondCode, CondCode2);
2902 break;
2903 }
2904}
2905
2906static bool isLegalArithImmed(uint64_t C) {
2907 // Matches AArch64DAGToDAGISel::SelectArithImmed().
2908 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
2909 LLVM_DEBUG(dbgs() << "Is imm " << Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
2910 << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
2911 return IsLegal;
2912}
2913
2914// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
2915// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
2916// can be set differently by this operation. It comes down to whether
2917// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
2918// everything is fine. If not then the optimization is wrong. Thus general
2919// comparisons are only valid if op2 != 0.
2920//
2921// So, finally, the only LLVM-native comparisons that don't mention C and V
2922// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
2923// the absence of information about op2.
2924static bool isCMN(SDValue Op, ISD::CondCode CC) {
2925 return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
2926 (CC == ISD::SETEQ || CC == ISD::SETNE);
2927}
2928
2929static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
2930 SelectionDAG &DAG, SDValue Chain,
2931 bool IsSignaling) {
2932 EVT VT = LHS.getValueType();
2933 assert(VT != MVT::f128)(static_cast <bool> (VT != MVT::f128) ? void (0) : __assert_fail
("VT != MVT::f128", "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2933, __extension__ __PRETTY_FUNCTION__))
;
2934
2935 const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
2936
2937 if (VT == MVT::f16 && !FullFP16) {
2938 LHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
2939 {Chain, LHS});
2940 RHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
2941 {LHS.getValue(1), RHS});
2942 Chain = RHS.getValue(1);
2943 VT = MVT::f32;
2944 }
2945 unsigned Opcode =
2946 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
2947 return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
2948}
2949
2950static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2951 const SDLoc &dl, SelectionDAG &DAG) {
2952 EVT VT = LHS.getValueType();
2953 const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
2954
2955 if (VT.isFloatingPoint()) {
2956 assert(VT != MVT::f128)(static_cast <bool> (VT != MVT::f128) ? void (0) : __assert_fail
("VT != MVT::f128", "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2956, __extension__ __PRETTY_FUNCTION__))
;
2957 if (VT == MVT::f16 && !FullFP16) {
2958 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
2959 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
2960 VT = MVT::f32;
2961 }
2962 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
2963 }
2964
2965 // The CMP instruction is just an alias for SUBS, and representing it as
2966 // SUBS means that it's possible to get CSE with subtract operations.
2967 // A later phase can perform the optimization of setting the destination
2968 // register to WZR/XZR if it ends up being unused.
2969 unsigned Opcode = AArch64ISD::SUBS;
2970
2971 if (isCMN(RHS, CC)) {
2972 // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
2973 Opcode = AArch64ISD::ADDS;
2974 RHS = RHS.getOperand(1);
2975 } else if (isCMN(LHS, CC)) {
2976 // As we are looking for EQ/NE compares, the operands can be commuted ; can
2977 // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
2978 Opcode = AArch64ISD::ADDS;
2979 LHS = LHS.getOperand(1);
2980 } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
2981 if (LHS.getOpcode() == ISD::AND) {
2982 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
2983 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
2984 // of the signed comparisons.
2985 const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
2986 DAG.getVTList(VT, MVT_CC),
2987 LHS.getOperand(0),
2988 LHS.getOperand(1));
2989 // Replace all users of (and X, Y) with newly generated (ands X, Y)
2990 DAG.ReplaceAllUsesWith(LHS, ANDSNode);
2991 return ANDSNode.getValue(1);
2992 } else if (LHS.getOpcode() == AArch64ISD::ANDS) {
2993 // Use result of ANDS
2994 return LHS.getValue(1);
2995 }
2996 }
2997
2998 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
2999 .getValue(1);
3000}
3001
3002/// \defgroup AArch64CCMP CMP;CCMP matching
3003///
3004/// These functions deal with the formation of CMP;CCMP;... sequences.
3005/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
3006/// a comparison. They set the NZCV flags to a predefined value if their
3007/// predicate is false. This allows to express arbitrary conjunctions, for
3008/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
3009/// expressed as:
3010/// cmp A
3011/// ccmp B, inv(CB), CA
3012/// check for CB flags
3013///
3014/// This naturally lets us implement chains of AND operations with SETCC
3015/// operands. And we can even implement some other situations by transforming
3016/// them:
3017/// - We can implement (NEG SETCC) i.e. negating a single comparison by
3018/// negating the flags used in a CCMP/FCCMP operations.
3019/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
3020/// by negating the flags we test for afterwards. i.e.
3021/// NEG (CMP CCMP CCCMP ...) can be implemented.
3022/// - Note that we can only ever negate all previously processed results.
3023/// What we can not implement by flipping the flags to test is a negation
3024/// of two sub-trees (because the negation affects all sub-trees emitted so
3025/// far, so the 2nd sub-tree we emit would also affect the first).
3026/// With those tools we can implement some OR operations:
3027/// - (OR (SETCC A) (SETCC B)) can be implemented via:
3028/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
3029/// - After transforming OR to NEG/AND combinations we may be able to use NEG
3030/// elimination rules from earlier to implement the whole thing as a
3031/// CCMP/FCCMP chain.
3032///
3033/// As complete example:
3034/// or (or (setCA (cmp A)) (setCB (cmp B)))
3035/// (and (setCC (cmp C)) (setCD (cmp D)))"
3036/// can be reassociated to:
3037/// or (and (setCC (cmp C)) setCD (cmp D))
3038// (or (setCA (cmp A)) (setCB (cmp B)))
3039/// can be transformed to:
3040/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
3041/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
3042/// which can be implemented as:
3043/// cmp C
3044/// ccmp D, inv(CD), CC
3045/// ccmp A, CA, inv(CD)
3046/// ccmp B, CB, inv(CA)
3047/// check for CB flags
3048///
3049/// A counterexample is "or (and A B) (and C D)" which translates to
3050/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
3051/// can only implement 1 of the inner (not) operations, but not both!
3052/// @{
3053
3054/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
3055static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
3056 ISD::CondCode CC, SDValue CCOp,
3057 AArch64CC::CondCode Predicate,
3058 AArch64CC::CondCode OutCC,
3059 const SDLoc &DL, SelectionDAG &DAG) {
3060 unsigned Opcode = 0;
3061 const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
3062
3063 if (LHS.getValueType().isFloatingPoint()) {
3064 assert(LHS.getValueType() != MVT::f128)(static_cast <bool> (LHS.getValueType() != MVT::f128) ?
void (0) : __assert_fail ("LHS.getValueType() != MVT::f128",
"llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3064, __extension__
__PRETTY_FUNCTION__))
;
3065 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
3066 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
3067 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
3068 }
3069 Opcode = AArch64ISD::FCCMP;
3070 } else if (RHS.getOpcode() == ISD::SUB) {
3071 SDValue SubOp0 = RHS.getOperand(0);
3072 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3073 // See emitComparison() on why we can only do this for SETEQ and SETNE.
3074 Opcode = AArch64ISD::CCMN;
3075 RHS = RHS.getOperand(1);
3076 }
3077 }
3078 if (Opcode == 0)
3079 Opcode = AArch64ISD::CCMP;
3080
3081 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
3082 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
3083 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
3084 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
3085 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
3086}
3087
3088/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
3089/// expressed as a conjunction. See \ref AArch64CCMP.
3090/// \param CanNegate Set to true if we can negate the whole sub-tree just by
3091/// changing the conditions on the SETCC tests.
3092/// (this means we can call emitConjunctionRec() with
3093/// Negate==true on this sub-tree)
3094/// \param MustBeFirst Set to true if this subtree needs to be negated and we
3095/// cannot do the negation naturally. We are required to
3096/// emit the subtree first in this case.
3097/// \param WillNegate Is true if are called when the result of this
3098/// subexpression must be negated. This happens when the
3099/// outer expression is an OR. We can use this fact to know
3100/// that we have a double negation (or (or ...) ...) that
3101/// can be implemented for free.
3102static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
3103 bool &MustBeFirst, bool WillNegate,
3104 unsigned Depth = 0) {
3105 if (!Val.hasOneUse())
3106 return false;
3107 unsigned Opcode = Val->getOpcode();
3108 if (Opcode == ISD::SETCC) {
3109 if (Val->getOperand(0).getValueType() == MVT::f128)
3110 return false;
3111 CanNegate = true;
3112 MustBeFirst = false;
3113 return true;
3114 }
3115 // Protect against exponential runtime and stack overflow.
3116 if (Depth > 6)
3117 return false;
3118 if (Opcode == ISD::AND || Opcode == ISD::OR) {
3119 bool IsOR = Opcode == ISD::OR;
3120 SDValue O0 = Val->getOperand(0);
3121 SDValue O1 = Val->getOperand(1);
3122 bool CanNegateL;
3123 bool MustBeFirstL;
3124 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
3125 return false;
3126 bool CanNegateR;
3127 bool MustBeFirstR;
3128 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
3129 return false;
3130
3131 if (MustBeFirstL && MustBeFirstR)
3132 return false;
3133
3134 if (IsOR) {
3135 // For an OR expression we need to be able to naturally negate at least
3136 // one side or we cannot do the transformation at all.
3137 if (!CanNegateL && !CanNegateR)
3138 return false;
3139 // If we the result of the OR will be negated and we can naturally negate
3140 // the leafs, then this sub-tree as a whole negates naturally.
3141 CanNegate = WillNegate && CanNegateL && CanNegateR;
3142 // If we cannot naturally negate the whole sub-tree, then this must be
3143 // emitted first.
3144 MustBeFirst = !CanNegate;
3145 } else {
3146 assert(Opcode == ISD::AND && "Must be OR or AND")(static_cast <bool> (Opcode == ISD::AND && "Must be OR or AND"
) ? void (0) : __assert_fail ("Opcode == ISD::AND && \"Must be OR or AND\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3146, __extension__
__PRETTY_FUNCTION__))
;
3147 // We cannot naturally negate an AND operation.
3148 CanNegate = false;
3149 MustBeFirst = MustBeFirstL || MustBeFirstR;
3150 }
3151 return true;
3152 }
3153 return false;
3154}
3155
3156/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
3157/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
3158/// Tries to transform the given i1 producing node @p Val to a series compare
3159/// and conditional compare operations. @returns an NZCV flags producing node
3160/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
3161/// transformation was not possible.
3162/// \p Negate is true if we want this sub-tree being negated just by changing
3163/// SETCC conditions.
3164static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
3165 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
3166 AArch64CC::CondCode Predicate) {
3167 // We're at a tree leaf, produce a conditional comparison operation.
3168 unsigned Opcode = Val->getOpcode();
3169 if (Opcode == ISD::SETCC) {
3170 SDValue LHS = Val->getOperand(0);
3171 SDValue RHS = Val->getOperand(1);
3172 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
3173 bool isInteger = LHS.getValueType().isInteger();
3174 if (Negate)
3175 CC = getSetCCInverse(CC, LHS.getValueType());
3176 SDLoc DL(Val);
3177 // Determine OutCC and handle FP special case.
3178 if (isInteger) {
3179 OutCC = changeIntCCToAArch64CC(CC);
3180 } else {
3181 assert(LHS.getValueType().isFloatingPoint())(static_cast <bool> (LHS.getValueType().isFloatingPoint
()) ? void (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3181, __extension__
__PRETTY_FUNCTION__))
;
3182 AArch64CC::CondCode ExtraCC;
3183 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
3184 // Some floating point conditions can't be tested with a single condition
3185 // code. Construct an additional comparison in this case.
3186 if (ExtraCC != AArch64CC::AL) {
3187 SDValue ExtraCmp;
3188 if (!CCOp.getNode())
3189 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
3190 else
3191 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
3192 ExtraCC, DL, DAG);
3193 CCOp = ExtraCmp;
3194 Predicate = ExtraCC;
3195 }
3196 }
3197
3198 // Produce a normal comparison if we are first in the chain
3199 if (!CCOp)
3200 return emitComparison(LHS, RHS, CC, DL, DAG);
3201 // Otherwise produce a ccmp.
3202 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
3203 DAG);
3204 }
3205 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree")(static_cast <bool> (Val->hasOneUse() && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("Val->hasOneUse() && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3205, __extension__
__PRETTY_FUNCTION__))
;
3206
3207 bool IsOR = Opcode == ISD::OR;
3208
3209 SDValue LHS = Val->getOperand(0);
3210 bool CanNegateL;
3211 bool MustBeFirstL;
3212 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
3213 assert(ValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidL && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3213, __extension__
__PRETTY_FUNCTION__))
;
3214 (void)ValidL;
3215
3216 SDValue RHS = Val->getOperand(1);
3217 bool CanNegateR;
3218 bool MustBeFirstR;
3219 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
3220 assert(ValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3220, __extension__
__PRETTY_FUNCTION__))
;
3221 (void)ValidR;
3222
3223 // Swap sub-tree that must come first to the right side.
3224 if (MustBeFirstL) {
3225 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3225, __extension__
__PRETTY_FUNCTION__))
;
3226 std::swap(LHS, RHS);
3227 std::swap(CanNegateL, CanNegateR);
3228 std::swap(MustBeFirstL, MustBeFirstR);
3229 }
3230
3231 bool NegateR;
3232 bool NegateAfterR;
3233 bool NegateL;
3234 bool NegateAfterAll;
3235 if (Opcode == ISD::OR) {
3236 // Swap the sub-tree that we can negate naturally to the left.
3237 if (!CanNegateL) {
3238 assert(CanNegateR && "at least one side must be negatable")(static_cast <bool> (CanNegateR && "at least one side must be negatable"
) ? void (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3238, __extension__
__PRETTY_FUNCTION__))
;
3239 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3239, __extension__
__PRETTY_FUNCTION__))
;
3240 assert(!Negate)(static_cast <bool> (!Negate) ? void (0) : __assert_fail
("!Negate", "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3240, __extension__ __PRETTY_FUNCTION__))
;
3241 std::swap(LHS, RHS);
3242 NegateR = false;
3243 NegateAfterR = true;
3244 } else {
3245 // Negate the left sub-tree if possible, otherwise negate the result.
3246 NegateR = CanNegateR;
3247 NegateAfterR = !CanNegateR;
3248 }
3249 NegateL = true;
3250 NegateAfterAll = !Negate;
3251 } else {
3252 assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree")(static_cast <bool> (Opcode == ISD::AND && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("Opcode == ISD::AND && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3252, __extension__
__PRETTY_FUNCTION__))
;
3253 assert(!Negate && "Valid conjunction/disjunction tree")(static_cast <bool> (!Negate && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3253, __extension__
__PRETTY_FUNCTION__))
;
3254
3255 NegateL = false;
3256 NegateR = false;
3257 NegateAfterR = false;
3258 NegateAfterAll = false;
3259 }
3260
3261 // Emit sub-trees.
3262 AArch64CC::CondCode RHSCC;
3263 SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
3264 if (NegateAfterR)
3265 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
3266 SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
3267 if (NegateAfterAll)
3268 OutCC = AArch64CC::getInvertedCondCode(OutCC);
3269 return CmpL;
3270}
3271
3272/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
3273/// In some cases this is even possible with OR operations in the expression.
3274/// See \ref AArch64CCMP.
3275/// \see emitConjunctionRec().
3276static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
3277 AArch64CC::CondCode &OutCC) {
3278 bool DummyCanNegate;
3279 bool DummyMustBeFirst;
3280 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
3281 return SDValue();
3282
3283 return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
3284}
3285
3286/// @}
3287
3288/// Returns how profitable it is to fold a comparison's operand's shift and/or
3289/// extension operations.
3290static unsigned getCmpOperandFoldingProfit(SDValue Op) {
3291 auto isSupportedExtend = [&](SDValue V) {
3292 if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
3293 return true;
3294
3295 if (V.getOpcode() == ISD::AND)
3296 if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
3297 uint64_t Mask = MaskCst->getZExtValue();
3298 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
3299 }
3300
3301 return false;
3302 };
3303
3304 if (!Op.hasOneUse())
3305 return 0;
3306
3307 if (isSupportedExtend(Op))
3308 return 1;
3309
3310 unsigned Opc = Op.getOpcode();
3311 if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
3312 if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3313 uint64_t Shift = ShiftCst->getZExtValue();
3314 if (isSupportedExtend(Op.getOperand(0)))
3315 return (Shift <= 4) ? 2 : 1;
3316 EVT VT = Op.getValueType();
3317 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
3318 return 1;
3319 }
3320
3321 return 0;
3322}
3323
3324static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3325 SDValue &AArch64cc, SelectionDAG &DAG,
3326 const SDLoc &dl) {
3327 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3328 EVT VT = RHS.getValueType();
3329 uint64_t C = RHSC->getZExtValue();
3330 if (!isLegalArithImmed(C)) {
3331 // Constant does not fit, try adjusting it by one?
3332 switch (CC) {
3333 default:
3334 break;
3335 case ISD::SETLT:
3336 case ISD::SETGE:
3337 if ((VT == MVT::i32 && C != 0x80000000 &&
3338 isLegalArithImmed((uint32_t)(C - 1))) ||
3339 (VT == MVT::i64 && C != 0x80000000ULL &&
3340 isLegalArithImmed(C - 1ULL))) {
3341 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3342 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
3343 RHS = DAG.getConstant(C, dl, VT);
3344 }
3345 break;
3346 case ISD::SETULT:
3347 case ISD::SETUGE:
3348 if ((VT == MVT::i32 && C != 0 &&
3349 isLegalArithImmed((uint32_t)(C - 1))) ||
3350 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
3351 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3352 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
3353 RHS = DAG.getConstant(C, dl, VT);
3354 }
3355 break;
3356 case ISD::SETLE:
3357 case ISD::SETGT:
3358 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
3359 isLegalArithImmed((uint32_t)(C + 1))) ||
3360 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
3361 isLegalArithImmed(C + 1ULL))) {
3362 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3363 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
3364 RHS = DAG.getConstant(C, dl, VT);
3365 }
3366 break;
3367 case ISD::SETULE:
3368 case ISD::SETUGT:
3369 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
3370 isLegalArithImmed((uint32_t)(C + 1))) ||
3371 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
3372 isLegalArithImmed(C + 1ULL))) {
3373 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3374 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
3375 RHS = DAG.getConstant(C, dl, VT);
3376 }
3377 break;
3378 }
3379 }
3380 }
3381
3382 // Comparisons are canonicalized so that the RHS operand is simpler than the
3383 // LHS one, the extreme case being when RHS is an immediate. However, AArch64
3384 // can fold some shift+extend operations on the RHS operand, so swap the
3385 // operands if that can be done.
3386 //
3387 // For example:
3388 // lsl w13, w11, #1
3389 // cmp w13, w12
3390 // can be turned into:
3391 // cmp w12, w11, lsl #1
3392 if (!isa<ConstantSDNode>(RHS) ||
3393 !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
3394 SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
3395
3396 if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
3397 std::swap(LHS, RHS);
3398 CC = ISD::getSetCCSwappedOperands(CC);
3399 }
3400 }
3401
3402 SDValue Cmp;
3403 AArch64CC::CondCode AArch64CC;
3404 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
3405 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
3406
3407 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
3408 // For the i8 operand, the largest immediate is 255, so this can be easily
3409 // encoded in the compare instruction. For the i16 operand, however, the
3410 // largest immediate cannot be encoded in the compare.
3411 // Therefore, use a sign extending load and cmn to avoid materializing the
3412 // -1 constant. For example,
3413 // movz w1, #65535
3414 // ldrh w0, [x0, #0]
3415 // cmp w0, w1
3416 // >
3417 // ldrsh w0, [x0, #0]
3418 // cmn w0, #1
3419 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
3420 // if and only if (sext LHS) == (sext RHS). The checks are in place to
3421 // ensure both the LHS and RHS are truly zero extended and to make sure the
3422 // transformation is profitable.
3423 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
3424 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
3425 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
3426 LHS.getNode()->hasNUsesOfValue(1, 0)) {
3427 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
3428 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
3429 SDValue SExt =
3430 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
3431 DAG.getValueType(MVT::i16));
3432 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
3433 RHS.getValueType()),
3434 CC, dl, DAG);
3435 AArch64CC = changeIntCCToAArch64CC(CC);
3436 }
3437 }
3438
3439 if (!Cmp && (RHSC->isZero() || RHSC->isOne())) {
3440 if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
3441 if ((CC == ISD::SETNE) ^ RHSC->isZero())
3442 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
3443 }
3444 }
3445 }
3446
3447 if (!Cmp) {
3448 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
3449 AArch64CC = changeIntCCToAArch64CC(CC);
3450 }
3451 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
3452 return Cmp;
3453}
3454
3455static std::pair<SDValue, SDValue>
3456getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
3457 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3458, __extension__
__PRETTY_FUNCTION__))
3458 "Unsupported value type")(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3458, __extension__
__PRETTY_FUNCTION__))
;
3459 SDValue Value, Overflow;
3460 SDLoc DL(Op);
3461 SDValue LHS = Op.getOperand(0);
3462 SDValue RHS = Op.getOperand(1);
3463 unsigned Opc = 0;
3464 switch (Op.getOpcode()) {
3465 default:
3466 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3466)
;
3467 case ISD::SADDO:
3468 Opc = AArch64ISD::ADDS;
3469 CC = AArch64CC::VS;
3470 break;
3471 case ISD::UADDO:
3472 Opc = AArch64ISD::ADDS;
3473 CC = AArch64CC::HS;
3474 break;
3475 case ISD::SSUBO:
3476 Opc = AArch64ISD::SUBS;
3477 CC = AArch64CC::VS;
3478 break;
3479 case ISD::USUBO:
3480 Opc = AArch64ISD::SUBS;
3481 CC = AArch64CC::LO;
3482 break;
3483 // Multiply needs a little bit extra work.
3484 case ISD::SMULO:
3485 case ISD::UMULO: {
3486 CC = AArch64CC::NE;
3487 bool IsSigned = Op.getOpcode() == ISD::SMULO;
3488 if (Op.getValueType() == MVT::i32) {
3489 // Extend to 64-bits, then perform a 64-bit multiply.
3490 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3491 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
3492 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
3493 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
3494 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3495
3496 // Check that the result fits into a 32-bit integer.
3497 SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC);
3498 if (IsSigned) {
3499 // cmp xreg, wreg, sxtw
3500 SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value);
3501 Overflow =
3502 DAG.getNode(AArch64ISD::SUBS, DL, VTs, Mul, SExtMul).getValue(1);
3503 } else {
3504 // tst xreg, #0xffffffff00000000
3505 SDValue UpperBits = DAG.getConstant(0xFFFFFFFF00000000, DL, MVT::i64);
3506 Overflow =
3507 DAG.getNode(AArch64ISD::ANDS, DL, VTs, Mul, UpperBits).getValue(1);
3508 }
3509 break;
3510 }
3511 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"Expected an i64 value type") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3511, __extension__
__PRETTY_FUNCTION__))
;
3512 // For the 64 bit multiply
3513 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
3514 if (IsSigned) {
3515 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
3516 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
3517 DAG.getConstant(63, DL, MVT::i64));
3518 // It is important that LowerBits is last, otherwise the arithmetic
3519 // shift will not be folded into the compare (SUBS).
3520 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3521 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
3522 .getValue(1);
3523 } else {
3524 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
3525 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3526 Overflow =
3527 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
3528 DAG.getConstant(0, DL, MVT::i64),
3529 UpperBits).getValue(1);
3530 }
3531 break;
3532 }
3533 } // switch (...)
3534
3535 if (Opc) {
3536 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
3537
3538 // Emit the AArch64 operation with overflow check.
3539 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
3540 Overflow = Value.getValue(1);
3541 }
3542 return std::make_pair(Value, Overflow);
3543}
3544
3545SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
3546 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
3547 return LowerToScalableOp(Op, DAG);
3548
3549 SDValue Sel = Op.getOperand(0);
3550 SDValue Other = Op.getOperand(1);
3551 SDLoc dl(Sel);
3552
3553 // If the operand is an overflow checking operation, invert the condition
3554 // code and kill the Not operation. I.e., transform:
3555 // (xor (overflow_op_bool, 1))
3556 // -->
3557 // (csel 1, 0, invert(cc), overflow_op_bool)
3558 // ... which later gets transformed to just a cset instruction with an
3559 // inverted condition code, rather than a cset + eor sequence.
3560 if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
3561 // Only lower legal XALUO ops.
3562 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
3563 return SDValue();
3564
3565 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3566 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3567 AArch64CC::CondCode CC;
3568 SDValue Value, Overflow;
3569 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
3570 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3571 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
3572 CCVal, Overflow);
3573 }
3574 // If neither operand is a SELECT_CC, give up.
3575 if (Sel.getOpcode() != ISD::SELECT_CC)
3576 std::swap(Sel, Other);
3577 if (Sel.getOpcode() != ISD::SELECT_CC)
3578 return Op;
3579
3580 // The folding we want to perform is:
3581 // (xor x, (select_cc a, b, cc, 0, -1) )
3582 // -->
3583 // (csel x, (xor x, -1), cc ...)
3584 //
3585 // The latter will get matched to a CSINV instruction.
3586
3587 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
3588 SDValue LHS = Sel.getOperand(0);
3589 SDValue RHS = Sel.getOperand(1);
3590 SDValue TVal = Sel.getOperand(2);
3591 SDValue FVal = Sel.getOperand(3);
3592
3593 // FIXME: This could be generalized to non-integer comparisons.
3594 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
3595 return Op;
3596
3597 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
3598 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
3599
3600 // The values aren't constants, this isn't the pattern we're looking for.
3601 if (!CFVal || !CTVal)
3602 return Op;
3603
3604 // We can commute the SELECT_CC by inverting the condition. This
3605 // might be needed to make this fit into a CSINV pattern.
3606 if (CTVal->isAllOnes() && CFVal->isZero()) {
3607 std::swap(TVal, FVal);
3608 std::swap(CTVal, CFVal);
3609 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
3610 }
3611
3612 // If the constants line up, perform the transform!
3613 if (CTVal->isZero() && CFVal->isAllOnes()) {
3614 SDValue CCVal;
3615 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
3616
3617 FVal = Other;
3618 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
3619 DAG.getConstant(-1ULL, dl, Other.getValueType()));
3620
3621 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
3622 CCVal, Cmp);
3623 }
3624
3625 return Op;
3626}
3627
3628// If Invert is false, sets 'C' bit of NZCV to 0 if value is 0, else sets 'C'
3629// bit to 1. If Invert is true, sets 'C' bit of NZCV to 1 if value is 0, else
3630// sets 'C' bit to 0.
3631static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG, bool Invert) {
3632 SDLoc DL(Value);
3633 EVT VT = Value.getValueType();
3634 SDValue Op0 = Invert ? DAG.getConstant(0, DL, VT) : Value;
3635 SDValue Op1 = Invert ? Value : DAG.getConstant(1, DL, VT);
3636 SDValue Cmp =
3637 DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::Glue), Op0, Op1);
3638 return Cmp.getValue(1);
3639}
3640
3641// If Invert is false, value is 1 if 'C' bit of NZCV is 1, else 0.
3642// If Invert is true, value is 0 if 'C' bit of NZCV is 1, else 1.
3643static SDValue carryFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG,
3644 bool Invert) {
3645 assert(Flag.getResNo() == 1)(static_cast <bool> (Flag.getResNo() == 1) ? void (0) :
__assert_fail ("Flag.getResNo() == 1", "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3645, __extension__ __PRETTY_FUNCTION__))
;
3646 SDLoc DL(Flag);
3647 SDValue Zero = DAG.getConstant(0, DL, VT);
3648 SDValue One = DAG.getConstant(1, DL, VT);
3649 unsigned Cond = Invert ? AArch64CC::LO : AArch64CC::HS;
3650 SDValue CC = DAG.getConstant(Cond, DL, MVT::i32);
3651 return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Flag);
3652}
3653
3654// Value is 1 if 'V' bit of NZCV is 1, else 0
3655static SDValue overflowFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG) {
3656 assert(Flag.getResNo() == 1)(static_cast <bool> (Flag.getResNo() == 1) ? void (0) :
__assert_fail ("Flag.getResNo() == 1", "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3656, __extension__ __PRETTY_FUNCTION__))
;
3657 SDLoc DL(Flag);
3658 SDValue Zero = DAG.getConstant(0, DL, VT);
3659 SDValue One = DAG.getConstant(1, DL, VT);
3660 SDValue CC = DAG.getConstant(AArch64CC::VS, DL, MVT::i32);
3661 return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Flag);
3662}
3663
3664// This lowering is inefficient, but it will get cleaned up by
3665// `foldOverflowCheck`
3666static SDValue lowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG, unsigned Opcode,
3667 bool IsSigned) {
3668 EVT VT0 = Op.getValue(0).getValueType();
3669 EVT VT1 = Op.getValue(1).getValueType();
3670
3671 if (VT0 != MVT::i32 && VT0 != MVT::i64)
3672 return SDValue();
3673
3674 bool InvertCarry = Opcode == AArch64ISD::SBCS;
3675 SDValue OpLHS = Op.getOperand(0);
3676 SDValue OpRHS = Op.getOperand(1);
3677 SDValue OpCarryIn = valueToCarryFlag(Op.getOperand(2), DAG, InvertCarry);
3678
3679 SDLoc DL(Op);
3680 SDVTList VTs = DAG.getVTList(VT0, VT1);
3681
3682 SDValue Sum = DAG.getNode(Opcode, DL, DAG.getVTList(VT0, MVT::Glue), OpLHS,
3683 OpRHS, OpCarryIn);
3684
3685 SDValue OutFlag =
3686 IsSigned ? overflowFlagToValue(Sum.getValue(1), VT1, DAG)
3687 : carryFlagToValue(Sum.getValue(1), VT1, DAG, InvertCarry);
3688
3689 return DAG.getNode(ISD::MERGE_VALUES, DL, VTs, Sum, OutFlag);
3690}
3691
3692static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
3693 // Let legalize expand this if it isn't a legal type yet.
3694 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3695 return SDValue();
3696
3697 SDLoc dl(Op);
3698 AArch64CC::CondCode CC;
3699 // The actual operation that sets the overflow or carry flag.
3700 SDValue Value, Overflow;
3701 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
3702
3703 // We use 0 and 1 as false and true values.
3704 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3705 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3706
3707 // We use an inverted condition, because the conditional select is inverted
3708 // too. This will allow it to be selected to a single instruction:
3709 // CSINC Wd, WZR, WZR, invert(cond).
3710 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3711 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
3712 CCVal, Overflow);
3713
3714 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3715 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3716}
3717
3718// Prefetch operands are:
3719// 1: Address to prefetch
3720// 2: bool isWrite
3721// 3: int locality (0 = no locality ... 3 = extreme locality)
3722// 4: bool isDataCache
3723static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
3724 SDLoc DL(Op);
3725 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3726 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
3727 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3728
3729 bool IsStream = !Locality;
3730 // When the locality number is set
3731 if (Locality) {
3732 // The front-end should have filtered out the out-of-range values
3733 assert(Locality <= 3 && "Prefetch locality out-of-range")(static_cast <bool> (Locality <= 3 && "Prefetch locality out-of-range"
) ? void (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3733, __extension__
__PRETTY_FUNCTION__))
;
3734 // The locality degree is the opposite of the cache speed.
3735 // Put the number the other way around.
3736 // The encoding starts at 0 for level 1
3737 Locality = 3 - Locality;
3738 }
3739
3740 // built the mask value encoding the expected behavior.
3741 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
3742 (!IsData << 3) | // IsDataCache bit
3743 (Locality << 1) | // Cache level bits
3744 (unsigned)IsStream; // Stream bit
3745 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
3746 DAG.getTargetConstant(PrfOp, DL, MVT::i32),
3747 Op.getOperand(1));
3748}
3749
3750SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
3751 SelectionDAG &DAG) const {
3752 EVT VT = Op.getValueType();
3753 if (VT.isScalableVector())
3754 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
3755
3756 if (useSVEForFixedLengthVectorVT(VT))
3757 return LowerFixedLengthFPExtendToSVE(Op, DAG);
3758
3759 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")(static_cast <bool> (Op.getValueType() == MVT::f128 &&
"Unexpected lowering") ? void (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3759, __extension__
__PRETTY_FUNCTION__))
;
3760 return SDValue();
3761}
3762
3763SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
3764 SelectionDAG &DAG) const {
3765 if (Op.getValueType().isScalableVector())
3766 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
3767
3768 bool IsStrict = Op->isStrictFPOpcode();
3769 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3770 EVT SrcVT = SrcVal.getValueType();
3771
3772 if (useSVEForFixedLengthVectorVT(SrcVT))
3773 return LowerFixedLengthFPRoundToSVE(Op, DAG);
3774
3775 if (SrcVT != MVT::f128) {
3776 // Expand cases where the input is a vector bigger than NEON.
3777 if (useSVEForFixedLengthVectorVT(SrcVT))
3778 return SDValue();
3779
3780 // It's legal except when f128 is involved
3781 return Op;
3782 }
3783
3784 return SDValue();
3785}
3786
3787SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
3788 SelectionDAG &DAG) const {
3789 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3790 // Any additional optimization in this function should be recorded
3791 // in the cost tables.
3792 bool IsStrict = Op->isStrictFPOpcode();
3793 EVT InVT = Op.getOperand(IsStrict ? 1 : 0).getValueType();
3794 EVT VT = Op.getValueType();
3795
3796 if (VT.isScalableVector()) {
3797 unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
3798 ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
3799 : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
3800 return LowerToPredicatedOp(Op, DAG, Opcode);
3801 }
3802
3803 if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
3804 return LowerFixedLengthFPToIntToSVE(Op, DAG);
3805
3806 unsigned NumElts = InVT.getVectorNumElements();
3807
3808 // f16 conversions are promoted to f32 when full fp16 is not supported.
3809 if (InVT.getVectorElementType() == MVT::f16 &&
3810 !Subtarget->hasFullFP16()) {
3811 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
3812 SDLoc dl(Op);
3813 if (IsStrict) {
3814 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NewVT, MVT::Other},
3815 {Op.getOperand(0), Op.getOperand(1)});
3816 return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
3817 {Ext.getValue(1), Ext.getValue(0)});
3818 }
3819 return DAG.getNode(
3820 Op.getOpcode(), dl, Op.getValueType(),
3821 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
3822 }
3823
3824 uint64_t VTSize = VT.getFixedSizeInBits();
3825 uint64_t InVTSize = InVT.getFixedSizeInBits();
3826 if (VTSize < InVTSize) {
3827 SDLoc dl(Op);
3828 if (IsStrict) {
3829 InVT = InVT.changeVectorElementTypeToInteger();
3830 SDValue Cv = DAG.getNode(Op.getOpcode(), dl, {InVT, MVT::Other},
3831 {Op.getOperand(0), Op.getOperand(1)});
3832 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
3833 return DAG.getMergeValues({Trunc, Cv.getValue(1)}, dl);
3834 }
3835 SDValue Cv =
3836 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
3837 Op.getOperand(0));
3838 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
3839 }
3840
3841 if (VTSize > InVTSize) {
3842 SDLoc dl(Op);
3843 MVT ExtVT =
3844 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
3845 VT.getVectorNumElements());
3846 if (IsStrict) {
3847 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {ExtVT, MVT::Other},
3848 {Op.getOperand(0), Op.getOperand(1)});
3849 return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
3850 {Ext.getValue(1), Ext.getValue(0)});
3851 }
3852 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
3853 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
3854 }
3855
3856 // Use a scalar operation for conversions between single-element vectors of
3857 // the same size.
3858 if (NumElts == 1) {
3859 SDLoc dl(Op);
3860 SDValue Extract = DAG.getNode(
3861 ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(),
3862 Op.getOperand(IsStrict ? 1 : 0), DAG.getConstant(0, dl, MVT::i64));
3863 EVT ScalarVT = VT.getScalarType();
3864 if (IsStrict)
3865 return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other},
3866 {Op.getOperand(0), Extract});
3867 return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract);
3868 }
3869
3870 // Type changing conversions are illegal.
3871 return Op;
3872}
3873
3874SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
3875 SelectionDAG &DAG) const {
3876 bool IsStrict = Op->isStrictFPOpcode();
3877 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3878
3879 if (SrcVal.getValueType().isVector())
3880 return LowerVectorFP_TO_INT(Op, DAG);
3881
3882 // f16 conversions are promoted to f32 when full fp16 is not supported.
3883 if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
3884 SDLoc dl(Op);
3885 if (IsStrict) {
3886 SDValue Ext =
3887 DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
3888 {Op.getOperand(0), SrcVal});
3889 return DAG.getNode(Op.getOpcode(), dl, {Op.getValueType(), MVT::Other},
3890 {Ext.getValue(1), Ext.getValue(0)});
3891 }
3892 return DAG.getNode(
3893 Op.getOpcode(), dl, Op.getValueType(),
3894 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
3895 }
3896
3897 if (SrcVal.getValueType() != MVT::f128) {
3898 // It's legal except when f128 is involved
3899 return Op;
3900 }
3901
3902 return SDValue();
3903}
3904
3905SDValue
3906AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
3907 SelectionDAG &DAG) const {
3908 // AArch64 FP-to-int conversions saturate to the destination element size, so
3909 // we can lower common saturating conversions to simple instructions.
3910 SDValue SrcVal = Op.getOperand(0);
3911 EVT SrcVT = SrcVal.getValueType();
3912 EVT DstVT = Op.getValueType();
3913 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3914
3915 uint64_t SrcElementWidth = SrcVT.getScalarSizeInBits();
3916 uint64_t DstElementWidth = DstVT.getScalarSizeInBits();
3917 uint64_t SatWidth = SatVT.getScalarSizeInBits();
3918 assert(SatWidth <= DstElementWidth &&(static_cast <bool> (SatWidth <= DstElementWidth &&
"Saturation width cannot exceed result width") ? void (0) : __assert_fail
("SatWidth <= DstElementWidth && \"Saturation width cannot exceed result width\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3919, __extension__
__PRETTY_FUNCTION__))
3919 "Saturation width cannot exceed result width")(static_cast <bool> (SatWidth <= DstElementWidth &&
"Saturation width cannot exceed result width") ? void (0) : __assert_fail
("SatWidth <= DstElementWidth && \"Saturation width cannot exceed result width\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3919, __extension__
__PRETTY_FUNCTION__))
;
3920
3921 // TODO: Consider lowering to SVE operations, as in LowerVectorFP_TO_INT.
3922 // Currently, the `llvm.fpto[su]i.sat.*` intrinsics don't accept scalable
3923 // types, so this is hard to reach.
3924 if (DstVT.isScalableVector())
3925 return SDValue();
3926
3927 EVT SrcElementVT = SrcVT.getVectorElementType();
3928
3929 // In the absence of FP16 support, promote f16 to f32 and saturate the result.
3930 if (SrcElementVT == MVT::f16 &&
3931 (!Subtarget->hasFullFP16() || DstElementWidth > 16)) {
3932 MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements());
3933 SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal);
3934 SrcVT = F32VT;
3935 SrcElementVT = MVT::f32;
3936 SrcElementWidth = 32;
3937 } else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
3938 SrcElementVT != MVT::f16)
3939 return SDValue();
3940
3941 SDLoc DL(Op);
3942 // Cases that we can emit directly.
3943 if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
3944 return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
3945 DAG.getValueType(DstVT.getScalarType()));
3946
3947 // Otherwise we emit a cvt that saturates to a higher BW, and saturate the
3948 // result. This is only valid if the legal cvt is larger than the saturate
3949 // width. For double, as we don't have MIN/MAX, it can be simpler to scalarize
3950 // (at least until sqxtn is selected).
3951 if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
3952 return SDValue();
3953
3954 EVT IntVT = SrcVT.changeVectorElementTypeToInteger();
3955 SDValue NativeCvt = DAG.getNode(Op.getOpcode(), DL, IntVT, SrcVal,
3956 DAG.getValueType(IntVT.getScalarType()));
3957 SDValue Sat;
3958 if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
3959 SDValue MinC = DAG.getConstant(
3960 APInt::getSignedMaxValue(SatWidth).sext(SrcElementWidth), DL, IntVT);
3961 SDValue Min = DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt, MinC);
3962 SDValue MaxC = DAG.getConstant(
3963 APInt::getSignedMinValue(SatWidth).sext(SrcElementWidth), DL, IntVT);
3964 Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC);
3965 } else {
3966 SDValue MinC = DAG.getConstant(
3967 APInt::getAllOnesValue(SatWidth).zext(SrcElementWidth), DL, IntVT);
3968 Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC);
3969 }
3970
3971 return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
3972}
3973
3974SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
3975 SelectionDAG &DAG) const {
3976 // AArch64 FP-to-int conversions saturate to the destination register size, so
3977 // we can lower common saturating conversions to simple instructions.
3978 SDValue SrcVal = Op.getOperand(0);
3979 EVT SrcVT = SrcVal.getValueType();
3980
3981 if (SrcVT.isVector())
3982 return LowerVectorFP_TO_INT_SAT(Op, DAG);
3983
3984 EVT DstVT = Op.getValueType();
3985 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3986 uint64_t SatWidth = SatVT.getScalarSizeInBits();
3987 uint64_t DstWidth = DstVT.getScalarSizeInBits();
3988 assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width")(static_cast <bool> (SatWidth <= DstWidth &&
"Saturation width cannot exceed result width") ? void (0) : __assert_fail
("SatWidth <= DstWidth && \"Saturation width cannot exceed result width\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 3988, __extension__
__PRETTY_FUNCTION__))
;
3989
3990 // In the absence of FP16 support, promote f16 to f32 and saturate the result.
3991 if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) {
3992 SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal);
3993 SrcVT = MVT::f32;
3994 } else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16)
3995 return SDValue();
3996
3997 SDLoc DL(Op);
3998 // Cases that we can emit directly.
3999 if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
4000 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
4001 DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32))
4002 return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
4003 DAG.getValueType(DstVT));
4004
4005 // Otherwise we emit a cvt that saturates to a higher BW, and saturate the
4006 // result. This is only valid if the legal cvt is larger than the saturate
4007 // width.
4008 if (DstWidth < SatWidth)
4009 return SDValue();
4010
4011 SDValue NativeCvt =
4012 DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal, DAG.getValueType(DstVT));
4013 SDValue Sat;
4014 if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
4015 SDValue MinC = DAG.getConstant(
4016 APInt::getSignedMaxValue(SatWidth).sext(DstWidth), DL, DstVT);
4017 SDValue Min = DAG.getNode(ISD::SMIN, DL, DstVT, NativeCvt, MinC);
4018 SDValue MaxC = DAG.getConstant(
4019 APInt::getSignedMinValue(SatWidth).sext(DstWidth), DL, DstVT);
4020 Sat = DAG.getNode(ISD::SMAX, DL, DstVT, Min, MaxC);
4021 } else {
4022 SDValue MinC = DAG.getConstant(
4023 APInt::getAllOnesValue(SatWidth).zext(DstWidth), DL, DstVT);
4024 Sat = DAG.getNode(ISD::UMIN, DL, DstVT, NativeCvt, MinC);
4025 }
4026
4027 return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
4028}
4029
4030SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
4031 SelectionDAG &DAG) const {
4032 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
4033 // Any additional optimization in this function should be recorded
4034 // in the cost tables.
4035 bool IsStrict = Op->isStrictFPOpcode();
4036 EVT VT = Op.getValueType();
4037 SDLoc dl(Op);
4038 SDValue In = Op.getOperand(IsStrict ? 1 : 0);
4039 EVT InVT = In.getValueType();
4040 unsigned Opc = Op.getOpcode();
4041 bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
4042
4043 if (VT.isScalableVector()) {
4044 if (InVT.getVectorElementType() == MVT::i1) {
4045 // We can't directly extend an SVE predicate; extend it first.
4046 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
4047 EVT CastVT = getPromotedVTForPredicate(InVT);
4048 In = DAG.getNode(CastOpc, dl, CastVT, In);
4049 return DAG.getNode(Opc, dl, VT, In);
4050 }
4051
4052 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
4053 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
4054 return LowerToPredicatedOp(Op, DAG, Opcode);
4055 }
4056
4057 if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
4058 return LowerFixedLengthIntToFPToSVE(Op, DAG);
4059
4060 uint64_t VTSize = VT.getFixedSizeInBits();
4061 uint64_t InVTSize = InVT.getFixedSizeInBits();
4062 if (VTSize < InVTSize) {
4063 MVT CastVT =
4064 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
4065 InVT.getVectorNumElements());
4066 if (IsStrict) {
4067 In = DAG.getNode(Opc, dl, {CastVT, MVT::Other},
4068 {Op.getOperand(0), In});
4069 return DAG.getNode(
4070 ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other},
4071 {In.getValue(1), In.getValue(0), DAG.getIntPtrConstant(0, dl)});
4072 }
4073 In = DAG.getNode(Opc, dl, CastVT, In);
4074 return DAG.getNode(ISD::FP_ROUND, dl, VT, In,
4075 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
4076 }
4077
4078 if (VTSize > InVTSize) {
4079 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
4080 EVT CastVT = VT.changeVectorElementTypeToInteger();
4081 In = DAG.getNode(CastOpc, dl, CastVT, In);
4082 if (IsStrict)
4083 return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op.getOperand(0), In});
4084 return DAG.getNode(Opc, dl, VT, In);
4085 }
4086
4087 // Use a scalar operation for conversions between single-element vectors of
4088 // the same size.
4089 if (VT.getVectorNumElements() == 1) {
4090 SDValue Extract = DAG.getNode(
4091 ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(),
4092 In, DAG.getConstant(0, dl, MVT::i64));
4093 EVT ScalarVT = VT.getScalarType();
4094 if (IsStrict)
4095 return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other},
4096 {Op.getOperand(0), Extract});
4097 return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract);
4098 }
4099
4100 return Op;
4101}
4102
4103SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
4104 SelectionDAG &DAG) const {
4105 if (Op.getValueType().isVector())
4106 return LowerVectorINT_TO_FP(Op, DAG);
4107
4108 bool IsStrict = Op->isStrictFPOpcode();
4109 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
4110
4111 // f16 conversions are promoted to f32 when full fp16 is not supported.
4112 if (Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
4113 SDLoc dl(Op);
4114 if (IsStrict) {
4115 SDValue Val = DAG.getNode(Op.getOpcode(), dl, {MVT::f32, MVT::Other},
4116 {Op.getOperand(0), SrcVal});
4117 return DAG.getNode(
4118 ISD::STRICT_FP_ROUND, dl, {MVT::f16, MVT::Other},
4119 {Val.getValue(1), Val.getValue(0), DAG.getIntPtrConstant(0, dl)});
4120 }
4121 return DAG.getNode(
4122 ISD::FP_ROUND, dl, MVT::f16,
4123 DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
4124 DAG.getIntPtrConstant(0, dl));
4125 }
4126
4127 // i128 conversions are libcalls.
4128 if (SrcVal.getValueType() == MVT::i128)
4129 return SDValue();
4130
4131 // Other conversions are legal, unless it's to the completely software-based
4132 // fp128.
4133 if (Op.getValueType() != MVT::f128)
4134 return Op;
4135 return SDValue();
4136}
4137
4138SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
4139 SelectionDAG &DAG) const {
4140 // For iOS, we want to call an alternative entry point: __sincos_stret,
4141 // which returns the values in two S / D registers.
4142 SDLoc dl(Op);
4143 SDValue Arg = Op.getOperand(0);
4144 EVT ArgVT = Arg.getValueType();
4145 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
4146
4147 ArgListTy Args;
4148 ArgListEntry Entry;
4149
4150 Entry.Node = Arg;
4151 Entry.Ty = ArgTy;
4152 Entry.IsSExt = false;
4153 Entry.IsZExt = false;
4154 Args.push_back(Entry);
4155
4156 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
4157 : RTLIB::SINCOS_STRET_F32;
4158 const char *LibcallName = getLibcallName(LC);
4159 SDValue Callee =
4160 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
4161
4162 StructType *RetTy = StructType::get(ArgTy, ArgTy);
4163 TargetLowering::CallLoweringInfo CLI(DAG);
4164 CLI.setDebugLoc(dl)
4165 .setChain(DAG.getEntryNode())
4166 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
4167
4168 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4169 return CallResult.first;
4170}
4171
4172static MVT getSVEContainerType(EVT ContentTy);
4173
4174SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
4175 SelectionDAG &DAG) const {
4176 EVT OpVT = Op.getValueType();
4177 EVT ArgVT = Op.getOperand(0).getValueType();
4178
4179 if (useSVEForFixedLengthVectorVT(OpVT))
4180 return LowerFixedLengthBitcastToSVE(Op, DAG);
4181
4182 if (OpVT.isScalableVector()) {
4183 // Bitcasting between unpacked vector types of different element counts is
4184 // not a NOP because the live elements are laid out differently.
4185 // 01234567
4186 // e.g. nxv2i32 = XX??XX??
4187 // nxv4f16 = X?X?X?X?
4188 if (OpVT.getVectorElementCount() != ArgVT.getVectorElementCount())
4189 return SDValue();
4190
4191 if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) {
4192 assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() &&(static_cast <bool> (OpVT.isFloatingPoint() && !
ArgVT.isFloatingPoint() && "Expected int->fp bitcast!"
) ? void (0) : __assert_fail ("OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() && \"Expected int->fp bitcast!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4193, __extension__
__PRETTY_FUNCTION__))
4193 "Expected int->fp bitcast!")(static_cast <bool> (OpVT.isFloatingPoint() && !
ArgVT.isFloatingPoint() && "Expected int->fp bitcast!"
) ? void (0) : __assert_fail ("OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() && \"Expected int->fp bitcast!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4193, __extension__
__PRETTY_FUNCTION__))
;
4194 SDValue ExtResult =
4195 DAG.getNode(ISD::ANY_EXTEND, SDLoc(Op), getSVEContainerType(ArgVT),
4196 Op.getOperand(0));
4197 return getSVESafeBitCast(OpVT, ExtResult, DAG);
4198 }
4199 return getSVESafeBitCast(OpVT, Op.getOperand(0), DAG);
4200 }
4201
4202 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
4203 return SDValue();
4204
4205 // Bitcasts between f16 and bf16 are legal.
4206 if (ArgVT == MVT::f16 || ArgVT == MVT::bf16)
4207 return Op;
4208
4209 assert(ArgVT == MVT::i16)(static_cast <bool> (ArgVT == MVT::i16) ? void (0) : __assert_fail
("ArgVT == MVT::i16", "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4209, __extension__ __PRETTY_FUNCTION__))
;
4210 SDLoc DL(Op);
4211
4212 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
4213 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
4214 return SDValue(
4215 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
4216 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
4217 0);
4218}
4219
4220static EVT getExtensionTo64Bits(const EVT &OrigVT) {
4221 if (OrigVT.getSizeInBits() >= 64)
4222 return OrigVT;
4223
4224 assert(OrigVT.isSimple() && "Expecting a simple value type")(static_cast <bool> (OrigVT.isSimple() && "Expecting a simple value type"
) ? void (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4224, __extension__
__PRETTY_FUNCTION__))
;
4225
4226 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
4227 switch (OrigSimpleTy) {
4228 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4228)
;
4229 case MVT::v2i8:
4230 case MVT::v2i16:
4231 return MVT::v2i32;
4232 case MVT::v4i8:
4233 return MVT::v4i16;
4234 }
4235}
4236
4237static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
4238 const EVT &OrigTy,
4239 const EVT &ExtTy,
4240 unsigned ExtOpcode) {
4241 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
4242 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
4243 // 64-bits we need to insert a new extension so that it will be 64-bits.
4244 assert(ExtTy.is128BitVector() && "Unexpected extension size")(static_cast <bool> (ExtTy.is128BitVector() && "Unexpected extension size"
) ? void (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4244, __extension__
__PRETTY_FUNCTION__))
;
4245 if (OrigTy.getSizeInBits() >= 64)
4246 return N;
4247
4248 // Must extend size to at least 64 bits to be used as an operand for VMULL.
4249 EVT NewVT = getExtensionTo64Bits(OrigTy);
4250
4251 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
4252}
4253
4254// Returns lane if Op extracts from a two-element vector and lane is constant
4255// (i.e., extractelt(<2 x Ty> %v, ConstantLane)), and None otherwise.
4256static Optional<uint64_t> getConstantLaneNumOfExtractHalfOperand(SDValue &Op) {
4257 SDNode *OpNode = Op.getNode();
4258 if (OpNode->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
4259 return None;
4260
4261 EVT VT = OpNode->getOperand(0).getValueType();
4262 ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpNode->getOperand(1));
4263 if (!VT.isFixedLengthVector() || VT.getVectorNumElements() != 2 || !C)
4264 return None;
4265
4266 return C->getZExtValue();
4267}
4268
4269static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
4270 bool isSigned) {
4271 EVT VT = N->getValueType(0);
4272
4273 if (N->getOpcode() != ISD::BUILD_VECTOR)
4274 return false;
4275
4276 for (const SDValue &Elt : N->op_values()) {
4277 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
4278 unsigned EltSize = VT.getScalarSizeInBits();
4279 unsigned HalfSize = EltSize / 2;
4280 if (isSigned) {
4281 if (!isIntN(HalfSize, C->getSExtValue()))
4282 return false;
4283 } else {
4284 if (!isUIntN(HalfSize, C->getZExtValue()))
4285 return false;
4286 }
4287 continue;
4288 }
4289 return false;
4290 }
4291
4292 return true;
4293}
4294
4295static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
4296 if (N->getOpcode() == ISD::SIGN_EXTEND ||
4297 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
4298 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
4299 N->getOperand(0)->getValueType(0),
4300 N->getValueType(0),
4301 N->getOpcode());
4302
4303 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "expected BUILD_VECTOR") ? void (0) : __assert_fail
("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4303, __extension__
__PRETTY_FUNCTION__))
;
4304 EVT VT = N->getValueType(0);
4305 SDLoc dl(N);
4306 unsigned EltSize = VT.getScalarSizeInBits() / 2;
4307 unsigned NumElts = VT.getVectorNumElements();
4308 MVT TruncVT = MVT::getIntegerVT(EltSize);
4309 SmallVector<SDValue, 8> Ops;
4310 for (unsigned i = 0; i != NumElts; ++i) {
4311 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
4312 const APInt &CInt = C->getAPIntValue();
4313 // Element types smaller than 32 bits are not legal, so use i32 elements.
4314 // The values are implicitly truncated so sext vs. zext doesn't matter.
4315 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
4316 }
4317 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
4318}
4319
4320static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
4321 return N->getOpcode() == ISD::SIGN_EXTEND ||
4322 N->getOpcode() == ISD::ANY_EXTEND ||
4323 isExtendedBUILD_VECTOR(N, DAG, true);
4324}
4325
4326static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
4327 return N->getOpcode() == ISD::ZERO_EXTEND ||
4328 N->getOpcode() == ISD::ANY_EXTEND ||
4329 isExtendedBUILD_VECTOR(N, DAG, false);
4330}
4331
4332static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
4333 unsigned Opcode = N->getOpcode();
4334 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
4335 SDNode *N0 = N->getOperand(0).getNode();
4336 SDNode *N1 = N->getOperand(1).getNode();
4337 return N0->hasOneUse() && N1->hasOneUse() &&
4338 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
4339 }
4340 return false;
4341}
4342
4343static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
4344 unsigned Opcode = N->getOpcode();
4345 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
4346 SDNode *N0 = N->getOperand(0).getNode();
4347 SDNode *N1 = N->getOperand(1).getNode();
4348 return N0->hasOneUse() && N1->hasOneUse() &&
4349 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
4350 }
4351 return false;
4352}
4353
4354SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
4355 SelectionDAG &DAG) const {
4356 // The rounding mode is in bits 23:22 of the FPSCR.
4357 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
4358 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
4359 // so that the shift + and get folded into a bitfield extract.
4360 SDLoc dl(Op);
4361
4362 SDValue Chain = Op.getOperand(0);
4363 SDValue FPCR_64 = DAG.getNode(
4364 ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
4365 {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
4366 Chain = FPCR_64.getValue(1);
4367 SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
4368 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
4369 DAG.getConstant(1U << 22, dl, MVT::i32));
4370 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
4371 DAG.getConstant(22, dl, MVT::i32));
4372 SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
4373 DAG.getConstant(3, dl, MVT::i32));
4374 return DAG.getMergeValues({AND, Chain}, dl);
4375}
4376
4377SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
4378 SelectionDAG &DAG) const {
4379 SDLoc DL(Op);
4380 SDValue Chain = Op->getOperand(0);
4381 SDValue RMValue = Op->getOperand(1);
4382
4383 // The rounding mode is in bits 23:22 of the FPCR.
4384 // The llvm.set.rounding argument value to the rounding mode in FPCR mapping
4385 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
4386 // ((arg - 1) & 3) << 22).
4387 //
4388 // The argument of llvm.set.rounding must be within the segment [0, 3], so
4389 // NearestTiesToAway (4) is not handled here. It is responsibility of the code
4390 // generated llvm.set.rounding to ensure this condition.
4391
4392 // Calculate new value of FPCR[23:22].
4393 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
4394 DAG.getConstant(1, DL, MVT::i32));
4395 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
4396 DAG.getConstant(0x3, DL, MVT::i32));
4397 RMValue =
4398 DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
4399 DAG.getConstant(AArch64::RoundingBitsPos, DL, MVT::i32));
4400 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue);
4401
4402 // Get current value of FPCR.
4403 SDValue Ops[] = {
4404 Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
4405 SDValue FPCR =
4406 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
4407 Chain = FPCR.getValue(1);
4408 FPCR = FPCR.getValue(0);
4409
4410 // Put new rounding mode into FPSCR[23:22].
4411 const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos);
4412 FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR,
4413 DAG.getConstant(RMMask, DL, MVT::i64));
4414 FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue);
4415 SDValue Ops2[] = {
4416 Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
4417 FPCR};
4418 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
4419}
4420
4421static unsigned selectUmullSmull(SDNode *&N0, SDNode *&N1, SelectionDAG &DAG,
4422 bool &IsMLA) {
4423 bool IsN0SExt = isSignExtended(N0, DAG);
4424 bool IsN1SExt = isSignExtended(N1, DAG);
4425 if (IsN0SExt && IsN1SExt)
4426 return AArch64ISD::SMULL;
4427
4428 bool IsN0ZExt = isZeroExtended(N0, DAG);
4429 bool IsN1ZExt = isZeroExtended(N1, DAG);
4430
4431 if (IsN0ZExt && IsN1ZExt)
4432 return AArch64ISD::UMULL;
4433
4434 if (!IsN1SExt && !IsN1ZExt)
4435 return 0;
4436 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
4437 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
4438 if (IsN1SExt && isAddSubSExt(N0, DAG)) {
4439 IsMLA = true;
4440 return AArch64ISD::SMULL;
4441 }
4442 if (IsN1ZExt && isAddSubZExt(N0, DAG)) {
4443 IsMLA = true;
4444 return AArch64ISD::UMULL;
4445 }
4446 if (IsN0ZExt && isAddSubZExt(N1, DAG)) {
4447 std::swap(N0, N1);
4448 IsMLA = true;
4449 return AArch64ISD::UMULL;
4450 }
4451 return 0;
4452}
4453
4454SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
4455 EVT VT = Op.getValueType();
4456
4457 // If SVE is available then i64 vector multiplications can also be made legal.
4458 bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
4459
4460 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
4461 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);
4462
4463 // Multiplications are only custom-lowered for 128-bit vectors so that
4464 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
4465 assert(VT.is128BitVector() && VT.isInteger() &&(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4466, __extension__
__PRETTY_FUNCTION__))
4466 "unexpected type for custom-lowering ISD::MUL")(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4466, __extension__
__PRETTY_FUNCTION__))
;
4467 SDNode *N0 = Op.getOperand(0).getNode();
4468 SDNode *N1 = Op.getOperand(1).getNode();
4469 bool isMLA = false;
4470 unsigned NewOpc = selectUmullSmull(N0, N1, DAG, isMLA);
4471
4472 if (!NewOpc) {
4473 if (VT == MVT::v2i64)
4474 // Fall through to expand this. It is not legal.
4475 return SDValue();
4476 else
4477 // Other vector multiplications are legal.
4478 return Op;
4479 }
4480
4481 // Legalize to a S/UMULL instruction
4482 SDLoc DL(Op);
4483 SDValue Op0;
4484 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
4485 if (!isMLA) {
4486 Op0 = skipExtensionForVectorMULL(N0, DAG);
4487 assert(Op0.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4489, __extension__
__PRETTY_FUNCTION__))
4488 Op1.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4489, __extension__
__PRETTY_FUNCTION__))
4489 "unexpected types for extended operands to VMULL")(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4489, __extension__
__PRETTY_FUNCTION__))
;
4490 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
4491 }
4492 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
4493 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
4494 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
4495 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
4496 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
4497 EVT Op1VT = Op1.getValueType();
4498 return DAG.getNode(N0->getOpcode(), DL, VT,
4499 DAG.getNode(NewOpc, DL, VT,
4500 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
4501 DAG.getNode(NewOpc, DL, VT,
4502 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
4503}
4504
4505static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
4506 int Pattern) {
4507 if (VT == MVT::nxv1i1 && Pattern == AArch64SVEPredPattern::all)
4508 return DAG.getConstant(1, DL, MVT::nxv1i1);
4509 return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
4510 DAG.getTargetConstant(Pattern, DL, MVT::i32));
4511}
4512
4513// Returns a safe bitcast between two scalable vector predicates, where
4514// any newly created lanes from a widening bitcast are defined as zero.
4515static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) {
4516 SDLoc DL(Op);
4517 EVT InVT = Op.getValueType();
4518
4519 assert(InVT.getVectorElementType() == MVT::i1 &&(static_cast <bool> (InVT.getVectorElementType() == MVT
::i1 && VT.getVectorElementType() == MVT::i1 &&
"Expected a predicate-to-predicate bitcast") ? void (0) : __assert_fail
("InVT.getVectorElementType() == MVT::i1 && VT.getVectorElementType() == MVT::i1 && \"Expected a predicate-to-predicate bitcast\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4521, __extension__
__PRETTY_FUNCTION__))
4520 VT.getVectorElementType() == MVT::i1 &&(static_cast <bool> (InVT.getVectorElementType() == MVT
::i1 && VT.getVectorElementType() == MVT::i1 &&
"Expected a predicate-to-predicate bitcast") ? void (0) : __assert_fail
("InVT.getVectorElementType() == MVT::i1 && VT.getVectorElementType() == MVT::i1 && \"Expected a predicate-to-predicate bitcast\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4521, __extension__
__PRETTY_FUNCTION__))
4521 "Expected a predicate-to-predicate bitcast")(static_cast <bool> (InVT.getVectorElementType() == MVT
::i1 && VT.getVectorElementType() == MVT::i1 &&
"Expected a predicate-to-predicate bitcast") ? void (0) : __assert_fail
("InVT.getVectorElementType() == MVT::i1 && VT.getVectorElementType() == MVT::i1 && \"Expected a predicate-to-predicate bitcast\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4521, __extension__
__PRETTY_FUNCTION__))
;
4522 assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&(static_cast <bool> (VT.isScalableVector() && DAG
.getTargetLoweringInfo().isTypeLegal(VT) && InVT.isScalableVector
() && DAG.getTargetLoweringInfo().isTypeLegal(InVT) &&
"Only expect to cast between legal scalable predicate types!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && InVT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(InVT) && \"Only expect to cast between legal scalable predicate types!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4525, __extension__
__PRETTY_FUNCTION__))
4523 InVT.isScalableVector() &&(static_cast <bool> (VT.isScalableVector() && DAG
.getTargetLoweringInfo().isTypeLegal(VT) && InVT.isScalableVector
() && DAG.getTargetLoweringInfo().isTypeLegal(InVT) &&
"Only expect to cast between legal scalable predicate types!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && InVT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(InVT) && \"Only expect to cast between legal scalable predicate types!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4525, __extension__
__PRETTY_FUNCTION__))
4524 DAG.getTargetLoweringInfo().isTypeLegal(InVT) &&(static_cast <bool> (VT.isScalableVector() && DAG
.getTargetLoweringInfo().isTypeLegal(VT) && InVT.isScalableVector
() && DAG.getTargetLoweringInfo().isTypeLegal(InVT) &&
"Only expect to cast between legal scalable predicate types!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && InVT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(InVT) && \"Only expect to cast between legal scalable predicate types!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4525, __extension__
__PRETTY_FUNCTION__))
4525 "Only expect to cast between legal scalable predicate types!")(static_cast <bool> (VT.isScalableVector() && DAG
.getTargetLoweringInfo().isTypeLegal(VT) && InVT.isScalableVector
() && DAG.getTargetLoweringInfo().isTypeLegal(InVT) &&
"Only expect to cast between legal scalable predicate types!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && InVT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(InVT) && \"Only expect to cast between legal scalable predicate types!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4525, __extension__
__PRETTY_FUNCTION__))
;
4526
4527 // Return the operand if the cast isn't changing type,
4528 // e.g. <n x 16 x i1> -> <n x 16 x i1>
4529 if (InVT == VT)
4530 return Op;
4531
4532 SDValue Reinterpret = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
4533
4534 // We only have to zero the lanes if new lanes are being defined, e.g. when
4535 // casting from <vscale x 2 x i1> to <vscale x 16 x i1>. If this is not the
4536 // case (e.g. when casting from <vscale x 16 x i1> -> <vscale x 2 x i1>) then
4537 // we can return here.
4538 if (InVT.bitsGT(VT))
4539 return Reinterpret;
4540
4541 // Check if the other lanes are already known to be zeroed by
4542 // construction.
4543 if (isZeroingInactiveLanes(Op))
4544 return Reinterpret;
4545
4546 // Zero the newly introduced lanes.
4547 SDValue Mask = DAG.getConstant(1, DL, InVT);
4548 Mask = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Mask);
4549 return DAG.getNode(ISD::AND, DL, VT, Reinterpret, Mask);
4550}
4551
4552SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain,
4553 SMEAttrs Attrs, SDLoc DL,
4554 EVT VT) const {
4555 if (Attrs.hasStreamingInterfaceOrBody())
4556 return DAG.getConstant(1, DL, VT);
4557
4558 if (Attrs.hasNonStreamingInterfaceAndBody())
4559 return DAG.getConstant(0, DL, VT);
4560
4561 assert(Attrs.hasStreamingCompatibleInterface() && "Unexpected interface")(static_cast <bool> (Attrs.hasStreamingCompatibleInterface
() && "Unexpected interface") ? void (0) : __assert_fail
("Attrs.hasStreamingCompatibleInterface() && \"Unexpected interface\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4561, __extension__
__PRETTY_FUNCTION__))
;
4562
4563 SDValue Callee = DAG.getExternalSymbol("__arm_sme_state",
4564 getPointerTy(DAG.getDataLayout()));
4565 Type *Int64Ty = Type::getInt64Ty(*DAG.getContext());
4566 Type *RetTy = StructType::get(Int64Ty, Int64Ty);
4567 TargetLowering::CallLoweringInfo CLI(DAG);
4568 ArgListTy Args;
4569 CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
4570 CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2,
4571 RetTy, Callee, std::move(Args));
4572 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4573 SDValue Mask = DAG.getConstant(/*PSTATE.SM*/ 1, DL, MVT::i64);
4574 return DAG.getNode(ISD::AND, DL, MVT::i64, CallResult.first.getOperand(0),
4575 Mask);
4576}
4577
4578static Optional<SMEAttrs> getCalleeAttrsFromExternalFunction(SDValue V) {
4579 if (auto *ES = dyn_cast<ExternalSymbolSDNode>(V)) {
4580 StringRef S(ES->getSymbol());
4581 if (S == "__arm_sme_state" || S == "__arm_tpidr2_save")
4582 return SMEAttrs(SMEAttrs::SM_Compatible | SMEAttrs::ZA_Preserved);
4583 if (S == "__arm_tpidr2_restore")
4584 return SMEAttrs(SMEAttrs::SM_Compatible | SMEAttrs::ZA_Shared);
4585 }
4586 return None;
4587}
4588
4589SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
4590 SelectionDAG &DAG) const {
4591 unsigned IntNo = Op.getConstantOperandVal(1);
4592 SDLoc DL(Op);
4593 switch (IntNo) {
4594 default:
4595 return SDValue(); // Don't custom lower most intrinsics.
4596 case Intrinsic::aarch64_mops_memset_tag: {
4597 auto Node = cast<MemIntrinsicSDNode>(Op.getNode());
4598 SDValue Chain = Node->getChain();
4599 SDValue Dst = Op.getOperand(2);
4600 SDValue Val = Op.getOperand(3);
4601 Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i64);
4602 SDValue Size = Op.getOperand(4);
4603 auto Alignment = Node->getMemOperand()->getAlign();
4604 bool IsVol = Node->isVolatile();
4605 auto DstPtrInfo = Node->getPointerInfo();
4606
4607 const auto &SDI =
4608 static_cast<const AArch64SelectionDAGInfo &>(DAG.getSelectionDAGInfo());
4609 SDValue MS =
4610 SDI.EmitMOPS(AArch64ISD::MOPS_MEMSET_TAGGING, DAG, DL, Chain, Dst, Val,
4611 Size, Alignment, IsVol, DstPtrInfo, MachinePointerInfo{});
4612
4613 // MOPS_MEMSET_TAGGING has 3 results (DstWb, SizeWb, Chain) whereas the
4614 // intrinsic has 2. So hide SizeWb using MERGE_VALUES. Otherwise
4615 // LowerOperationWrapper will complain that the number of results has
4616 // changed.
4617 return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL);
4618 }
4619 case Intrinsic::aarch64_sme_za_enable:
4620 return DAG.getNode(
4621 AArch64ISD::SMSTART, DL, MVT::Other,
4622 Op->getOperand(0), // Chain
4623 DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
4624 DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
4625 case Intrinsic::aarch64_sme_za_disable:
4626 return DAG.getNode(
4627 AArch64ISD::SMSTOP, DL, MVT::Other,
4628 Op->getOperand(0), // Chain
4629 DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
4630 DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
4631 }
4632}
4633
4634SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
4635 SelectionDAG &DAG) const {
4636 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4637 SDLoc dl(Op);
4638 switch (IntNo) {
4639 default: return SDValue(); // Don't custom lower most intrinsics.
4640 case Intrinsic::thread_pointer: {
4641 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4642 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
4643 }
4644 case Intrinsic::aarch64_neon_abs: {
4645 EVT Ty = Op.getValueType();
4646 if (Ty == MVT::i64) {
4647 SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
4648 Op.getOperand(1));
4649 Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
4650 return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
4651 } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
4652 return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
4653 } else {
4654 report_fatal_error("Unexpected type for AArch64 NEON intrinic");
4655 }
4656 }
4657 case Intrinsic::aarch64_neon_pmull64: {
4658 SDValue LHS = Op.getOperand(1);
4659 SDValue RHS = Op.getOperand(2);
4660
4661 Optional<uint64_t> LHSLane = getConstantLaneNumOfExtractHalfOperand(LHS);
4662 Optional<uint64_t> RHSLane = getConstantLaneNumOfExtractHalfOperand(RHS);
4663
4664 assert((!LHSLane || *LHSLane < 2) && "Expect lane to be None or 0 or 1")(static_cast <bool> ((!LHSLane || *LHSLane < 2) &&
"Expect lane to be None or 0 or 1") ? void (0) : __assert_fail
("(!LHSLane || *LHSLane < 2) && \"Expect lane to be None or 0 or 1\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4664, __extension__
__PRETTY_FUNCTION__))
;
4665 assert((!RHSLane || *RHSLane < 2) && "Expect lane to be None or 0 or 1")(static_cast <bool> ((!RHSLane || *RHSLane < 2) &&
"Expect lane to be None or 0 or 1") ? void (0) : __assert_fail
("(!RHSLane || *RHSLane < 2) && \"Expect lane to be None or 0 or 1\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4665, __extension__
__PRETTY_FUNCTION__))
;
4666
4667 // 'aarch64_neon_pmull64' takes i64 parameters; while pmull/pmull2
4668 // instructions execute on SIMD registers. So canonicalize i64 to v1i64,
4669 // which ISel recognizes better. For example, generate a ldr into d*
4670 // registers as opposed to a GPR load followed by a fmov.
4671 auto TryVectorizeOperand =
4672 [](SDValue N, Optional<uint64_t> NLane, Optional<uint64_t> OtherLane,
4673 const SDLoc &dl, SelectionDAG &DAG) -> SDValue {
4674 // If the operand is an higher half itself, rewrite it to
4675 // extract_high_v2i64; this way aarch64_neon_pmull64 could
4676 // re-use the dag-combiner function with aarch64_neon_{pmull,smull,umull}.
4677 if (NLane && *NLane == 1)
4678 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v1i64,
4679 N.getOperand(0), DAG.getConstant(1, dl, MVT::i64));
4680
4681 // Operand N is not a higher half but the other operand is.
4682 if (OtherLane && *OtherLane == 1) {
4683 // If this operand is a lower half, rewrite it to
4684 // extract_high_v2i64(duplane(<2 x Ty>, 0)). This saves a roundtrip to
4685 // align lanes of two operands. A roundtrip sequence (to move from lane
4686 // 1 to lane 0) is like this:
4687 // mov x8, v0.d[1]
4688 // fmov d0, x8
4689 if (NLane && *NLane == 0)
4690 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v1i64,
4691 DAG.getNode(AArch64ISD::DUPLANE64, dl, MVT::v2i64,
4692 N.getOperand(0),
4693 DAG.getConstant(0, dl, MVT::i64)),
4694 DAG.getConstant(1, dl, MVT::i64));
4695
4696 // Otherwise just dup from main to all lanes.
4697 return DAG.getNode(AArch64ISD::DUP, dl, MVT::v1i64, N);
4698 }
4699
4700 // Neither operand is an extract of higher half, so codegen may just use
4701 // the non-high version of PMULL instruction. Use v1i64 to represent i64.
4702 assert(N.getValueType() == MVT::i64 &&(static_cast <bool> (N.getValueType() == MVT::i64 &&
"Intrinsic aarch64_neon_pmull64 requires i64 parameters") ? void
(0) : __assert_fail ("N.getValueType() == MVT::i64 && \"Intrinsic aarch64_neon_pmull64 requires i64 parameters\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4703, __extension__
__PRETTY_FUNCTION__))
4703 "Intrinsic aarch64_neon_pmull64 requires i64 parameters")(static_cast <bool> (N.getValueType() == MVT::i64 &&
"Intrinsic aarch64_neon_pmull64 requires i64 parameters") ? void
(0) : __assert_fail ("N.getValueType() == MVT::i64 && \"Intrinsic aarch64_neon_pmull64 requires i64 parameters\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4703, __extension__
__PRETTY_FUNCTION__))
;
4704 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, N);
4705 };
4706
4707 LHS = TryVectorizeOperand(LHS, LHSLane, RHSLane, dl, DAG);
4708 RHS = TryVectorizeOperand(RHS, RHSLane, LHSLane, dl, DAG);
4709
4710 return DAG.getNode(AArch64ISD::PMULL, dl, Op.getValueType(), LHS, RHS);
4711 }
4712 case Intrinsic::aarch64_neon_smax:
4713 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
4714 Op.getOperand(1), Op.getOperand(2));
4715 case Intrinsic::aarch64_neon_umax:
4716 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
4717 Op.getOperand(1), Op.getOperand(2));
4718 case Intrinsic::aarch64_neon_smin:
4719 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
4720 Op.getOperand(1), Op.getOperand(2));
4721 case Intrinsic::aarch64_neon_umin:
4722 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
4723 Op.getOperand(1), Op.getOperand(2));
4724 case Intrinsic::aarch64_neon_scalar_sqxtn:
4725 case Intrinsic::aarch64_neon_scalar_sqxtun:
4726 case Intrinsic::aarch64_neon_scalar_uqxtn: {
4727 assert(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::f32)(static_cast <bool> (Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::f32) ? void (0) : __assert_fail ("Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::f32"
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4727, __extension__
__PRETTY_FUNCTION__))
;
4728 if (Op.getValueType() == MVT::i32)
4729 return DAG.getNode(ISD::BITCAST, dl, MVT::i32,
4730 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::f32,
4731 Op.getOperand(0),
4732 DAG.getNode(ISD::BITCAST, dl, MVT::f64,
4733 Op.getOperand(1))));
4734 return SDValue();
4735 }
4736 case Intrinsic::aarch64_sve_sunpkhi:
4737 return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
4738 Op.getOperand(1));
4739 case Intrinsic::aarch64_sve_sunpklo:
4740 return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
4741 Op.getOperand(1));
4742 case Intrinsic::aarch64_sve_uunpkhi:
4743 return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
4744 Op.getOperand(1));
4745 case Intrinsic::aarch64_sve_uunpklo:
4746 return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
4747 Op.getOperand(1));
4748 case Intrinsic::aarch64_sve_clasta_n:
4749 return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
4750 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4751 case Intrinsic::aarch64_sve_clastb_n:
4752 return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
4753 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4754 case Intrinsic::aarch64_sve_lasta:
4755 return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
4756 Op.getOperand(1), Op.getOperand(2));
4757 case Intrinsic::aarch64_sve_lastb:
4758 return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
4759 Op.getOperand(1), Op.getOperand(2));
4760 case Intrinsic::aarch64_sve_rev:
4761 return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
4762 Op.getOperand(1));
4763 case Intrinsic::aarch64_sve_tbl:
4764 return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
4765 Op.getOperand(1), Op.getOperand(2));
4766 case Intrinsic::aarch64_sve_trn1:
4767 return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
4768 Op.getOperand(1), Op.getOperand(2));
4769 case Intrinsic::aarch64_sve_trn2:
4770 return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
4771 Op.getOperand(1), Op.getOperand(2));
4772 case Intrinsic::aarch64_sve_uzp1:
4773 return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
4774 Op.getOperand(1), Op.getOperand(2));
4775 case Intrinsic::aarch64_sve_uzp2:
4776 return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
4777 Op.getOperand(1), Op.getOperand(2));
4778 case Intrinsic::aarch64_sve_zip1:
4779 return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
4780 Op.getOperand(1), Op.getOperand(2));
4781 case Intrinsic::aarch64_sve_zip2:
4782 return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
4783 Op.getOperand(1), Op.getOperand(2));
4784 case Intrinsic::aarch64_sve_splice:
4785 return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
4786 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4787 case Intrinsic::aarch64_sve_ptrue:
4788 return getPTrue(DAG, dl, Op.getValueType(),
4789 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
4790 case Intrinsic::aarch64_sve_clz:
4791 return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
4792 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4793 case Intrinsic::aarch64_sme_cntsb:
4794 return DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
4795 DAG.getConstant(1, dl, MVT::i32));
4796 case Intrinsic::aarch64_sme_cntsh: {
4797 SDValue One = DAG.getConstant(1, dl, MVT::i32);
4798 SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(), One);
4799 return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes, One);
4800 }
4801 case Intrinsic::aarch64_sme_cntsw: {
4802 SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
4803 DAG.getConstant(1, dl, MVT::i32));
4804 return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes,
4805 DAG.getConstant(2, dl, MVT::i32));
4806 }
4807 case Intrinsic::aarch64_sme_cntsd: {
4808 SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
4809 DAG.getConstant(1, dl, MVT::i32));
4810 return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes,
4811 DAG.getConstant(3, dl, MVT::i32));
4812 }
4813 case Intrinsic::aarch64_sve_cnt: {
4814 SDValue Data = Op.getOperand(3);
4815 // CTPOP only supports integer operands.
4816 if (Data.getValueType().isFloatingPoint())
4817 Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
4818 return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
4819 Op.getOperand(2), Data, Op.getOperand(1));
4820 }
4821 case Intrinsic::aarch64_sve_dupq_lane:
4822 return LowerDUPQLane(Op, DAG);
4823 case Intrinsic::aarch64_sve_convert_from_svbool:
4824 return getSVEPredicateBitCast(Op.getValueType(), Op.getOperand(1), DAG);
4825 case Intrinsic::aarch64_sve_convert_to_svbool:
4826 return getSVEPredicateBitCast(MVT::nxv16i1, Op.getOperand(1), DAG);
4827 case Intrinsic::aarch64_sve_fneg:
4828 return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
4829 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4830 case Intrinsic::aarch64_sve_frintp:
4831 return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
4832 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4833 case Intrinsic::aarch64_sve_frintm:
4834 return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
4835 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4836 case Intrinsic::aarch64_sve_frinti:
4837 return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
4838 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4839 case Intrinsic::aarch64_sve_frintx:
4840 return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
4841 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4842 case Intrinsic::aarch64_sve_frinta:
4843 return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
4844 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4845 case Intrinsic::aarch64_sve_frintn:
4846 return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
4847 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4848 case Intrinsic::aarch64_sve_frintz:
4849 return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
4850 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4851 case Intrinsic::aarch64_sve_ucvtf:
4852 return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
4853 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4854 Op.getOperand(1));
4855 case Intrinsic::aarch64_sve_scvtf:
4856 return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
4857 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4858 Op.getOperand(1));
4859 case Intrinsic::aarch64_sve_fcvtzu:
4860 return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
4861 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4862 Op.getOperand(1));
4863 case Intrinsic::aarch64_sve_fcvtzs:
4864 return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
4865 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4866 Op.getOperand(1));
4867 case Intrinsic::aarch64_sve_fsqrt:
4868 return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
4869 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4870 case Intrinsic::aarch64_sve_frecpx:
4871 return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
4872 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4873 case Intrinsic::aarch64_sve_frecpe_x:
4874 return DAG.getNode(AArch64ISD::FRECPE, dl, Op.getValueType(),
4875 Op.getOperand(1));
4876 case Intrinsic::aarch64_sve_frecps_x:
4877 return DAG.getNode(AArch64ISD::FRECPS, dl, Op.getValueType(),
4878 Op.getOperand(1), Op.getOperand(2));
4879 case Intrinsic::aarch64_sve_frsqrte_x:
4880 return DAG.getNode(AArch64ISD::FRSQRTE, dl, Op.getValueType(),
4881 Op.getOperand(1));
4882 case Intrinsic::aarch64_sve_frsqrts_x:
4883 return DAG.getNode(AArch64ISD::FRSQRTS, dl, Op.getValueType(),
4884 Op.getOperand(1), Op.getOperand(2));
4885 case Intrinsic::aarch64_sve_fabs:
4886 return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
4887 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4888 case Intrinsic::aarch64_sve_abs:
4889 return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
4890 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4891 case Intrinsic::aarch64_sve_neg:
4892 return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
4893 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4894 case Intrinsic::aarch64_sve_insr: {
4895 SDValue Scalar = Op.getOperand(2);
4896 EVT ScalarTy = Scalar.getValueType();
4897 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
4898 Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
4899
4900 return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
4901 Op.getOperand(1), Scalar);
4902 }
4903 case Intrinsic::aarch64_sve_rbit:
4904 return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
4905 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4906 Op.getOperand(1));
4907 case Intrinsic::aarch64_sve_revb:
4908 return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
4909 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4910 case Intrinsic::aarch64_sve_revh:
4911 return DAG.getNode(AArch64ISD::REVH_MERGE_PASSTHRU, dl, Op.getValueType(),
4912 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4913 case Intrinsic::aarch64_sve_revw:
4914 return DAG.getNode(AArch64ISD::REVW_MERGE_PASSTHRU, dl, Op.getValueType(),
4915 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4916 case Intrinsic::aarch64_sve_revd:
4917 return DAG.getNode(AArch64ISD::REVD_MERGE_PASSTHRU, dl, Op.getValueType(),
4918 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4919 case Intrinsic::aarch64_sve_sxtb:
4920 return DAG.getNode(
4921 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4922 Op.getOperand(2), Op.getOperand(3),
4923 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
4924 Op.getOperand(1));
4925 case Intrinsic::aarch64_sve_sxth:
4926 return DAG.getNode(
4927 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4928 Op.getOperand(2), Op.getOperand(3),
4929 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
4930 Op.getOperand(1));
4931 case Intrinsic::aarch64_sve_sxtw:
4932 return DAG.getNode(
4933 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4934 Op.getOperand(2), Op.getOperand(3),
4935 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
4936 Op.getOperand(1));
4937 case Intrinsic::aarch64_sve_uxtb:
4938 return DAG.getNode(
4939 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4940 Op.getOperand(2), Op.getOperand(3),
4941 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
4942 Op.getOperand(1));
4943 case Intrinsic::aarch64_sve_uxth:
4944 return DAG.getNode(
4945 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4946 Op.getOperand(2), Op.getOperand(3),
4947 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
4948 Op.getOperand(1));
4949 case Intrinsic::aarch64_sve_uxtw:
4950 return DAG.getNode(
4951 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4952 Op.getOperand(2), Op.getOperand(3),
4953 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
4954 Op.getOperand(1));
4955 case Intrinsic::localaddress: {
4956 const auto &MF = DAG.getMachineFunction();
4957 const auto *RegInfo = Subtarget->getRegisterInfo();
4958 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
4959 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
4960 Op.getSimpleValueType());
4961 }
4962
4963 case Intrinsic::eh_recoverfp: {
4964 // FIXME: This needs to be implemented to correctly handle highly aligned
4965 // stack objects. For now we simply return the incoming FP. Refer D53541
4966 // for more details.
4967 SDValue FnOp = Op.getOperand(1);
4968 SDValue IncomingFPOp = Op.getOperand(2);
4969 GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
4970 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
4971 if (!Fn)
4972 report_fatal_error(
4973 "llvm.eh.recoverfp must take a function as the first argument");
4974 return IncomingFPOp;
4975 }
4976
4977 case Intrinsic::aarch64_neon_vsri:
4978 case Intrinsic::aarch64_neon_vsli: {
4979 EVT Ty = Op.getValueType();
4980
4981 if (!Ty.isVector())
4982 report_fatal_error("Unexpected type for aarch64_neon_vsli");
4983
4984 assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits())(static_cast <bool> (Op.getConstantOperandVal(3) <= Ty
.getScalarSizeInBits()) ? void (0) : __assert_fail ("Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits()"
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 4984, __extension__
__PRETTY_FUNCTION__))
;
4985
4986 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
4987 unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
4988 return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
4989 Op.getOperand(3));
4990 }
4991
4992 case Intrinsic::aarch64_neon_srhadd:
4993 case Intrinsic::aarch64_neon_urhadd:
4994 case Intrinsic::aarch64_neon_shadd:
4995 case Intrinsic::aarch64_neon_uhadd: {
4996 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4997 IntNo == Intrinsic::aarch64_neon_shadd);
4998 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4999 IntNo == Intrinsic::aarch64_neon_urhadd);
5000 unsigned Opcode = IsSignedAdd
5001 ? (IsRoundingAdd ? ISD::AVGCEILS : ISD::AVGFLOORS)
5002 : (IsRoundingAdd ? ISD::AVGCEILU : ISD::AVGFLOORU);
5003 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
5004 Op.getOperand(2));
5005 }
5006 case Intrinsic::aarch64_neon_sabd:
5007 case Intrinsic::aarch64_neon_uabd: {
5008 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU
5009 : ISD::ABDS;
5010 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
5011 Op.getOperand(2));
5012 }
5013 case Intrinsic::aarch64_neon_saddlp:
5014 case Intrinsic::aarch64_neon_uaddlp: {
5015 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
5016 ? AArch64ISD::UADDLP
5017 : AArch64ISD::SADDLP;
5018 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1));
5019 }
5020 case Intrinsic::aarch64_neon_sdot:
5021 case Intrinsic::aarch64_neon_udot:
5022 case Intrinsic::aarch64_sve_sdot:
5023 case Intrinsic::aarch64_sve_udot: {
5024 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
5025 IntNo == Intrinsic::aarch64_sve_udot)
5026 ? AArch64ISD::UDOT
5027 : AArch64ISD::SDOT;
5028 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
5029 Op.getOperand(2), Op.getOperand(3));
5030 }
5031 case Intrinsic::get_active_lane_mask: {
5032 SDValue ID =
5033 DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl, MVT::i64);
5034 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), ID,
5035 Op.getOperand(1), Op.getOperand(2));
5036 }
5037 }
5038}
5039
5040bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
5041 if (VT.getVectorElementType() == MVT::i8 ||
5042 VT.getVectorElementType() == MVT::i16) {
5043 EltTy = MVT::i32;
5044 return true;
5045 }
5046 return false;
5047}
5048
5049bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
5050 EVT DataVT) const {
5051 // SVE only supports implicit extension of 32-bit indices.
5052 if (!Subtarget->hasSVE() || IndexVT.getVectorElementType() != MVT::i32)
5053 return false;
5054
5055 // Indices cannot be smaller than the main data type.
5056 if (IndexVT.getScalarSizeInBits() < DataVT.getScalarSizeInBits())
5057 return false;
5058
5059 // Scalable vectors with "vscale * 2" or fewer elements sit within a 64-bit
5060 // element container type, which would violate the previous clause.
5061 return DataVT.isFixedLengthVector() || DataVT.getVectorMinNumElements() > 2;
5062}
5063
5064bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
5065 return ExtVal.getValueType().isScalableVector() ||
5066 useSVEForFixedLengthVectorVT(
5067 ExtVal.getValueType(),
5068 /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors());
5069}
5070
5071unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
5072 std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
5073 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
5074 AArch64ISD::GLD1_MERGE_ZERO},
5075 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
5076 AArch64ISD::GLD1_UXTW_MERGE_ZERO},
5077 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
5078 AArch64ISD::GLD1_MERGE_ZERO},
5079 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
5080 AArch64ISD::GLD1_SXTW_MERGE_ZERO},
5081 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
5082 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
5083 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
5084 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
5085 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
5086 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
5087 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
5088 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
5089 };
5090 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
5091 return AddrModes.find(Key)->second;
5092}
5093
5094unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
5095 switch (Opcode) {
5096 default:
5097 llvm_unreachable("unimplemented opcode")::llvm::llvm_unreachable_internal("unimplemented opcode", "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5097)
;
5098 return Opcode;
5099 case AArch64ISD::GLD1_MERGE_ZERO:
5100 return AArch64ISD::GLD1S_MERGE_ZERO;
5101 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
5102 return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
5103 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
5104 return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
5105 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
5106 return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
5107 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
5108 return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
5109 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
5110 return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
5111 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
5112 return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
5113 }
5114}
5115
5116SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
5117 SelectionDAG &DAG) const {
5118 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
5119
5120 SDLoc DL(Op);
5121 SDValue Chain = MGT->getChain();
5122 SDValue PassThru = MGT->getPassThru();
5123 SDValue Mask = MGT->getMask();
5124 SDValue BasePtr = MGT->getBasePtr();
5125 SDValue Index = MGT->getIndex();
5126 SDValue Scale = MGT->getScale();
5127 EVT VT = Op.getValueType();
5128 EVT MemVT = MGT->getMemoryVT();
5129 ISD::LoadExtType ExtType = MGT->getExtensionType();
5130 ISD::MemIndexType IndexType = MGT->getIndexType();
5131
5132 // SVE supports zero (and so undef) passthrough values only, everything else
5133 // must be handled manually by an explicit select on the load's output.
5134 if (!PassThru->isUndef() && !isZerosVector(PassThru.getNode())) {
5135 SDValue Ops[] = {Chain, DAG.getUNDEF(VT), Mask, BasePtr, Index, Scale};
5136 SDValue Load =
5137 DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
5138 MGT->getMemOperand(), IndexType, ExtType);
5139 SDValue Select = DAG.getSelect(DL, VT, Mask, Load, PassThru);
5140 return DAG.getMergeValues({Select, Load.getValue(1)}, DL);
5141 }
5142
5143 bool IsScaled = MGT->isIndexScaled();
5144 bool IsSigned = MGT->isIndexSigned();
5145
5146 // SVE supports an index scaled by sizeof(MemVT.elt) only, everything else
5147 // must be calculated before hand.
5148 uint64_t ScaleVal = cast<ConstantSDNode>(Scale)->getZExtValue();
5149 if (IsScaled && ScaleVal != MemVT.getScalarStoreSize()) {
5150 assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types")(static_cast <bool> (isPowerOf2_64(ScaleVal) &&
"Expecting power-of-two types") ? void (0) : __assert_fail (
"isPowerOf2_64(ScaleVal) && \"Expecting power-of-two types\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5150, __extension__
__PRETTY_FUNCTION__))
;
5151 EVT IndexVT = Index.getValueType();
5152 Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index,
5153 DAG.getConstant(Log2_32(ScaleVal), DL, IndexVT));
5154 Scale = DAG.getTargetConstant(1, DL, Scale.getValueType());
5155
5156 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
5157 return DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
5158 MGT->getMemOperand(), IndexType, ExtType);
5159 }
5160
5161 // Lower fixed length gather to a scalable equivalent.
5162 if (VT.isFixedLengthVector()) {
5163 assert(Subtarget->useSVEForFixedLengthVectors() &&(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors!"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5164, __extension__
__PRETTY_FUNCTION__))
5164 "Cannot lower when not using SVE for fixed vectors!")(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors!"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5164, __extension__
__PRETTY_FUNCTION__))
;
5165
5166 // NOTE: Handle floating-point as if integer then bitcast the result.
5167 EVT DataVT = VT.changeVectorElementTypeToInteger();
5168 MemVT = MemVT.changeVectorElementTypeToInteger();
5169
5170 // Find the smallest integer fixed length vector we can use for the gather.
5171 EVT PromotedVT = VT.changeVectorElementType(MVT::i32);
5172 if (DataVT.getVectorElementType() == MVT::i64 ||
5173 Index.getValueType().getVectorElementType() == MVT::i64 ||
5174 Mask.getValueType().getVectorElementType() == MVT::i64)
5175 PromotedVT = VT.changeVectorElementType(MVT::i64);
5176
5177 // Promote vector operands except for passthrough, which we know is either
5178 // undef or zero, and thus best constructed directly.
5179 unsigned ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5180 Index = DAG.getNode(ExtOpcode, DL, PromotedVT, Index);
5181 Mask = DAG.getNode(ISD::SIGN_EXTEND, DL, PromotedVT, Mask);
5182
5183 // A promoted result type forces the need for an extending load.
5184 if (PromotedVT != DataVT && ExtType == ISD::NON_EXTLOAD)
5185 ExtType = ISD::EXTLOAD;
5186
5187 EVT ContainerVT = getContainerForFixedLengthVector(DAG, PromotedVT);
5188
5189 // Convert fixed length vector operands to scalable.
5190 MemVT = ContainerVT.changeVectorElementType(MemVT.getVectorElementType());
5191 Index = convertToScalableVector(DAG, ContainerVT, Index);
5192 Mask = convertFixedMaskToScalableVector(Mask, DAG);
5193 PassThru = PassThru->isUndef() ? DAG.getUNDEF(ContainerVT)
5194 : DAG.getConstant(0, DL, ContainerVT);
5195
5196 // Emit equivalent scalable vector gather.
5197 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
5198 SDValue Load =
5199 DAG.getMaskedGather(DAG.getVTList(ContainerVT, MVT::Other), MemVT, DL,
5200 Ops, MGT->getMemOperand(), IndexType, ExtType);
5201
5202 // Extract fixed length data then convert to the required result type.
5203 SDValue Result = convertFromScalableVector(DAG, PromotedVT, Load);
5204 Result = DAG.getNode(ISD::TRUNCATE, DL, DataVT, Result);
5205 if (VT.isFloatingPoint())
5206 Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
5207
5208 return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
5209 }
5210
5211 // Everything else is legal.
5212 return Op;
5213}
5214
5215SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
5216 SelectionDAG &DAG) const {
5217 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
5218
5219 SDLoc DL(Op);
5220 SDValue Chain = MSC->getChain();
5221 SDValue StoreVal = MSC->getValue();
5222 SDValue Mask = MSC->getMask();
5223 SDValue BasePtr = MSC->getBasePtr();
5224 SDValue Index = MSC->getIndex();
5225 SDValue Scale = MSC->getScale();
5226 EVT VT = StoreVal.getValueType();
5227 EVT MemVT = MSC->getMemoryVT();
5228 ISD::MemIndexType IndexType = MSC->getIndexType();
5229 bool Truncating = MSC->isTruncatingStore();
5230
5231 bool IsScaled = MSC->isIndexScaled();
5232 bool IsSigned = MSC->isIndexSigned();
5233
5234 // SVE supports an index scaled by sizeof(MemVT.elt) only, everything else
5235 // must be calculated before hand.
5236 uint64_t ScaleVal = cast<ConstantSDNode>(Scale)->getZExtValue();
5237 if (IsScaled && ScaleVal != MemVT.getScalarStoreSize()) {
5238 assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types")(static_cast <bool> (isPowerOf2_64(ScaleVal) &&
"Expecting power-of-two types") ? void (0) : __assert_fail (
"isPowerOf2_64(ScaleVal) && \"Expecting power-of-two types\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5238, __extension__
__PRETTY_FUNCTION__))
;
5239 EVT IndexVT = Index.getValueType();
5240 Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index,
5241 DAG.getConstant(Log2_32(ScaleVal), DL, IndexVT));
5242 Scale = DAG.getTargetConstant(1, DL, Scale.getValueType());
5243
5244 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
5245 return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
5246 MSC->getMemOperand(), IndexType, Truncating);
5247 }
5248
5249 // Lower fixed length scatter to a scalable equivalent.
5250 if (VT.isFixedLengthVector()) {
5251 assert(Subtarget->useSVEForFixedLengthVectors() &&(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors!"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5252, __extension__
__PRETTY_FUNCTION__))
5252 "Cannot lower when not using SVE for fixed vectors!")(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors!"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors!\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5252, __extension__
__PRETTY_FUNCTION__))
;
5253
5254 // Once bitcast we treat floating-point scatters as if integer.
5255 if (VT.isFloatingPoint()) {
5256 VT = VT.changeVectorElementTypeToInteger();
5257 MemVT = MemVT.changeVectorElementTypeToInteger();
5258 StoreVal = DAG.getNode(ISD::BITCAST, DL, VT, StoreVal);
5259 }
5260
5261 // Find the smallest integer fixed length vector we can use for the scatter.
5262 EVT PromotedVT = VT.changeVectorElementType(MVT::i32);
5263 if (VT.getVectorElementType() == MVT::i64 ||
5264 Index.getValueType().getVectorElementType() == MVT::i64 ||
5265 Mask.getValueType().getVectorElementType() == MVT::i64)
5266 PromotedVT = VT.changeVectorElementType(MVT::i64);
5267
5268 // Promote vector operands.
5269 unsigned ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5270 Index = DAG.getNode(ExtOpcode, DL, PromotedVT, Index);
5271 Mask = DAG.getNode(ISD::SIGN_EXTEND, DL, PromotedVT, Mask);
5272 StoreVal = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, StoreVal);
5273
5274 // A promoted value type forces the need for a truncating store.
5275 if (PromotedVT != VT)
5276 Truncating = true;
5277
5278 EVT ContainerVT = getContainerForFixedLengthVector(DAG, PromotedVT);
5279
5280 // Convert fixed length vector operands to scalable.
5281 MemVT = ContainerVT.changeVectorElementType(MemVT.getVectorElementType());
5282 Index = convertToScalableVector(DAG, ContainerVT, Index);
5283 Mask = convertFixedMaskToScalableVector(Mask, DAG);
5284 StoreVal = convertToScalableVector(DAG, ContainerVT, StoreVal);
5285
5286 // Emit equivalent scalable vector scatter.
5287 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
5288 return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
5289 MSC->getMemOperand(), IndexType, Truncating);
5290 }
5291
5292 // Everything else is legal.
5293 return Op;
5294}
5295
5296SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
5297 SDLoc DL(Op);
5298 MaskedLoadSDNode *LoadNode = cast<MaskedLoadSDNode>(Op);
5299 assert(LoadNode && "Expected custom lowering of a masked load node")(static_cast <bool> (LoadNode && "Expected custom lowering of a masked load node"
) ? void (0) : __assert_fail ("LoadNode && \"Expected custom lowering of a masked load node\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5299, __extension__
__PRETTY_FUNCTION__))
;
5300 EVT VT = Op->getValueType(0);
5301
5302 if (useSVEForFixedLengthVectorVT(
5303 VT,
5304 /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
5305 return LowerFixedLengthVectorMLoadToSVE(Op, DAG);
5306
5307 SDValue PassThru = LoadNode->getPassThru();
5308 SDValue Mask = LoadNode->getMask();
5309
5310 if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
5311 return Op;
5312
5313 SDValue Load = DAG.getMaskedLoad(
5314 VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
5315 LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
5316 LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
5317 LoadNode->getExtensionType());
5318
5319 SDValue Result = DAG.getSelect(DL, VT, Mask, Load, PassThru);
5320
5321 return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
5322}
5323
5324// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
5325static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
5326 EVT VT, EVT MemVT,
5327 SelectionDAG &DAG) {
5328 assert(VT.isVector() && "VT should be a vector type")(static_cast <bool> (VT.isVector() && "VT should be a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5328, __extension__
__PRETTY_FUNCTION__))
;
5329 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16)(static_cast <bool> (MemVT == MVT::v4i8 && VT ==
MVT::v4i16) ? void (0) : __assert_fail ("MemVT == MVT::v4i8 && VT == MVT::v4i16"
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5329, __extension__
__PRETTY_FUNCTION__))
;
5330
5331 SDValue Value = ST->getValue();
5332
5333 // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
5334 // the word lane which represent the v4i8 subvector. It optimizes the store
5335 // to:
5336 //
5337 // xtn v0.8b, v0.8h
5338 // str s0, [x0]
5339
5340 SDValue Undef = DAG.getUNDEF(MVT::i16);
5341 SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
5342 {Undef, Undef, Undef, Undef});
5343
5344 SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
5345 Value, UndefVec);
5346 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
5347
5348 Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
5349 SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
5350 Trunc, DAG.getConstant(0, DL, MVT::i64));
5351
5352 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
5353 ST->getBasePtr(), ST->getMemOperand());
5354}
5355
5356// Custom lowering for any store, vector or scalar and/or default or with
5357// a truncate operations. Currently only custom lower truncate operation
5358// from vector v4i16 to v4i8 or volatile stores of i128.
5359SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
5360 SelectionDAG &DAG) const {
5361 SDLoc Dl(Op);
5362 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
5363 assert (StoreNode && "Can only custom lower store nodes")(static_cast <bool> (StoreNode && "Can only custom lower store nodes"
) ? void (0) : __assert_fail ("StoreNode && \"Can only custom lower store nodes\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5363, __extension__
__PRETTY_FUNCTION__))
;
5364
5365 SDValue Value = StoreNode->getValue();
5366
5367 EVT VT = Value.getValueType();
5368 EVT MemVT = StoreNode->getMemoryVT();
5369
5370 if (VT.isVector()) {
5371 if (useSVEForFixedLengthVectorVT(
5372 VT,
5373 /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
5374 return LowerFixedLengthVectorStoreToSVE(Op, DAG);
5375
5376 unsigned AS = StoreNode->getAddressSpace();
5377 Align Alignment = StoreNode->getAlign();
5378 if (Alignment < MemVT.getStoreSize() &&
5379 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
5380 StoreNode->getMemOperand()->getFlags(),
5381 nullptr)) {
5382 return scalarizeVectorStore(StoreNode, DAG);
5383 }
5384
5385 if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
5386 MemVT == MVT::v4i8) {
5387 return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
5388 }
5389 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
5390 // the custom lowering, as there are no un-paired non-temporal stores and
5391 // legalization will break up 256 bit inputs.
5392 ElementCount EC = MemVT.getVectorElementCount();
5393 if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
5394 EC.isKnownEven() &&
5395 ((MemVT.getScalarSizeInBits() == 8u ||
5396 MemVT.getScalarSizeInBits() == 16u ||
5397 MemVT.getScalarSizeInBits() == 32u ||
5398 MemVT.getScalarSizeInBits() == 64u))) {
5399 SDValue Lo =
5400 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
5401 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
5402 StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
5403 SDValue Hi =
5404 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
5405 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
5406 StoreNode->getValue(),
5407 DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
5408 SDValue Result = DAG.getMemIntrinsicNode(
5409 AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
5410 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
5411 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
5412 return Result;
5413 }
5414 } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
5415 return LowerStore128(Op, DAG);
5416 } else if (MemVT == MVT::i64x8) {
5417 SDValue Value = StoreNode->getValue();
5418 assert(Value->getValueType(0) == MVT::i64x8)(static_cast <bool> (Value->getValueType(0) == MVT::
i64x8) ? void (0) : __assert_fail ("Value->getValueType(0) == MVT::i64x8"
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5418, __extension__
__PRETTY_FUNCTION__))
;
5419 SDValue Chain = StoreNode->getChain();
5420 SDValue Base = StoreNode->getBasePtr();
5421 EVT PtrVT = Base.getValueType();
5422 for (unsigned i = 0; i < 8; i++) {
5423 SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
5424 Value, DAG.getConstant(i, Dl, MVT::i32));
5425 SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
5426 DAG.getConstant(i * 8, Dl, PtrVT));
5427 Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
5428 StoreNode->getOriginalAlign());
5429 }
5430 return Chain;
5431 }
5432
5433 return SDValue();
5434}
5435
5436/// Lower atomic or volatile 128-bit stores to a single STP instruction.
5437SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
5438 SelectionDAG &DAG) const {
5439 MemSDNode *StoreNode = cast<MemSDNode>(Op);
5440 assert(StoreNode->getMemoryVT() == MVT::i128)(static_cast <bool> (StoreNode->getMemoryVT() == MVT
::i128) ? void (0) : __assert_fail ("StoreNode->getMemoryVT() == MVT::i128"
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5440, __extension__
__PRETTY_FUNCTION__))
;
5441 assert(StoreNode->isVolatile() || StoreNode->isAtomic())(static_cast <bool> (StoreNode->isVolatile() || StoreNode
->isAtomic()) ? void (0) : __assert_fail ("StoreNode->isVolatile() || StoreNode->isAtomic()"
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5441, __extension__
__PRETTY_FUNCTION__))
;
5442 assert(!StoreNode->isAtomic() ||(static_cast <bool> (!StoreNode->isAtomic() || StoreNode
->getMergedOrdering() == AtomicOrdering::Unordered || StoreNode
->getMergedOrdering() == AtomicOrdering::Monotonic) ? void
(0) : __assert_fail ("!StoreNode->isAtomic() || StoreNode->getMergedOrdering() == AtomicOrdering::Unordered || StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic"
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5444, __extension__
__PRETTY_FUNCTION__))
5443 StoreNode->getMergedOrdering() == AtomicOrdering::Unordered ||(static_cast <bool> (!StoreNode->isAtomic() || StoreNode
->getMergedOrdering() == AtomicOrdering::Unordered || StoreNode
->getMergedOrdering() == AtomicOrdering::Monotonic) ? void
(0) : __assert_fail ("!StoreNode->isAtomic() || StoreNode->getMergedOrdering() == AtomicOrdering::Unordered || StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic"
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5444, __extension__
__PRETTY_FUNCTION__))
5444 StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic)(static_cast <bool> (!StoreNode->isAtomic() || StoreNode
->getMergedOrdering() == AtomicOrdering::Unordered || StoreNode
->getMergedOrdering() == AtomicOrdering::Monotonic) ? void
(0) : __assert_fail ("!StoreNode->isAtomic() || StoreNode->getMergedOrdering() == AtomicOrdering::Unordered || StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic"
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5444, __extension__
__PRETTY_FUNCTION__))
;
5445
5446 SDValue Value = StoreNode->getOpcode() == ISD::STORE
5447 ? StoreNode->getOperand(1)
5448 : StoreNode->getOperand(2);
5449 SDLoc DL(Op);
5450 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value,
5451 DAG.getConstant(0, DL, MVT::i64));
5452 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value,
5453 DAG.getConstant(1, DL, MVT::i64));
5454 SDValue Result = DAG.getMemIntrinsicNode(
5455 AArch64ISD::STP, DL, DAG.getVTList(MVT::Other),
5456 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
5457 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
5458 return Result;
5459}
5460
5461SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
5462 SelectionDAG &DAG) const {
5463 SDLoc DL(Op);
5464 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
5465 assert(LoadNode && "Expected custom lowering of a load node")(static_cast <bool> (LoadNode && "Expected custom lowering of a load node"
) ? void (0) : __assert_fail ("LoadNode && \"Expected custom lowering of a load node\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5465, __extension__
__PRETTY_FUNCTION__))
;
5466
5467 if (LoadNode->getMemoryVT() == MVT::i64x8) {
5468 SmallVector<SDValue, 8> Ops;
5469 SDValue Base = LoadNode->getBasePtr();
5470 SDValue Chain = LoadNode->getChain();
5471 EVT PtrVT = Base.getValueType();
5472 for (unsigned i = 0; i < 8; i++) {
5473 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
5474 DAG.getConstant(i * 8, DL, PtrVT));
5475 SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr,
5476 LoadNode->getPointerInfo(),
5477 LoadNode->getOriginalAlign());
5478 Ops.push_back(Part);
5479 Chain = SDValue(Part.getNode(), 1);
5480 }
5481 SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops);
5482 return DAG.getMergeValues({Loaded, Chain}, DL);
5483 }
5484
5485 // Custom lowering for extending v4i8 vector loads.
5486 EVT VT = Op->getValueType(0);
5487 assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32")(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v4i32
) && "Expected v4i16 or v4i32") ? void (0) : __assert_fail
("(VT == MVT::v4i16 || VT == MVT::v4i32) && \"Expected v4i16 or v4i32\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5487, __extension__
__PRETTY_FUNCTION__))
;
5488
5489 if (LoadNode->getMemoryVT() != MVT::v4i8)
5490 return SDValue();
5491
5492 unsigned ExtType;
5493 if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
5494 ExtType = ISD::SIGN_EXTEND;
5495 else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
5496 LoadNode->getExtensionType() == ISD::EXTLOAD)
5497 ExtType = ISD::ZERO_EXTEND;
5498 else
5499 return SDValue();
5500
5501 SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
5502 LoadNode->getBasePtr(), MachinePointerInfo());
5503 SDValue Chain = Load.getValue(1);
5504 SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load);
5505 SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec);
5506 SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC);
5507 Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext,
5508 DAG.getConstant(0, DL, MVT::i64));
5509 if (VT == MVT::v4i32)
5510 Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext);
5511 return DAG.getMergeValues({Ext, Chain}, DL);
5512}
5513
5514// Generate SUBS and CSEL for integer abs.
5515SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
5516 MVT VT = Op.getSimpleValueType();
5517
5518 if (VT.isVector())
5519 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
5520
5521 SDLoc DL(Op);
5522 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
5523 Op.getOperand(0));
5524 // Generate SUBS & CSEL.
5525 SDValue Cmp =
5526 DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
5527 Op.getOperand(0), DAG.getConstant(0, DL, VT));
5528 return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
5529 DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
5530 Cmp.getValue(1));
5531}
5532
5533static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
5534 SDValue Chain = Op.getOperand(0);
5535 SDValue Cond = Op.getOperand(1);
5536 SDValue Dest = Op.getOperand(2);
5537
5538 AArch64CC::CondCode CC;
5539 if (SDValue Cmp = emitConjunction(DAG, Cond, CC)) {
5540 SDLoc dl(Op);
5541 SDValue CCVal = DAG.getConstant(CC, dl, MVT::i32);
5542 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
5543 Cmp);
5544 }
5545
5546 return SDValue();
5547}
5548
5549SDValue AArch64TargetLowering::LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const {
5550 assert(Op->getOpcode() == ISD::ZERO_EXTEND && "Expected ZERO_EXTEND")(static_cast <bool> (Op->getOpcode() == ISD::ZERO_EXTEND
&& "Expected ZERO_EXTEND") ? void (0) : __assert_fail
("Op->getOpcode() == ISD::ZERO_EXTEND && \"Expected ZERO_EXTEND\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5550, __extension__
__PRETTY_FUNCTION__))
;
5551
5552 if (Op.getValueType().isFixedLengthVector())
5553 return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
5554
5555 // Try to lower to VSELECT to allow zext to transform into
5556 // a predicated instruction like add, sub or mul.
5557 SDValue Value = Op->getOperand(0);
5558 if (!Value->getValueType(0).isScalableVector() ||
5559 Value->getValueType(0).getScalarType() != MVT::i1)
5560 return SDValue();
5561
5562 SDLoc DL = SDLoc(Op);
5563 EVT VT = Op->getValueType(0);
5564 SDValue Ones = DAG.getConstant(1, DL, VT);
5565 SDValue Zeros = DAG.getConstant(0, DL, VT);
5566 return DAG.getNode(ISD::VSELECT, DL, VT, Value, Ones, Zeros);
5567}
5568
5569SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
5570 SelectionDAG &DAG) const {
5571 LLVM_DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
5572 LLVM_DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
5573
5574 switch (Op.getOpcode()) {
5575 default:
5576 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5576)
;
5577 return SDValue();
5578 case ISD::BITCAST:
5579 return LowerBITCAST(Op, DAG);
5580 case ISD::GlobalAddress:
5581 return LowerGlobalAddress(Op, DAG);
5582 case ISD::GlobalTLSAddress:
5583 return LowerGlobalTLSAddress(Op, DAG);
5584 case ISD::SETCC:
5585 case ISD::STRICT_FSETCC:
5586 case ISD::STRICT_FSETCCS:
5587 return LowerSETCC(Op, DAG);
5588 case ISD::SETCCCARRY:
5589 return LowerSETCCCARRY(Op, DAG);
5590 case ISD::BRCOND:
5591 return LowerBRCOND(Op, DAG);
5592 case ISD::BR_CC:
5593 return LowerBR_CC(Op, DAG);
5594 case ISD::SELECT:
5595 return LowerSELECT(Op, DAG);
5596 case ISD::SELECT_CC:
5597 return LowerSELECT_CC(Op, DAG);
5598 case ISD::JumpTable:
5599 return LowerJumpTable(Op, DAG);
5600 case ISD::BR_JT:
5601 return LowerBR_JT(Op, DAG);
5602 case ISD::ConstantPool:
5603 return LowerConstantPool(Op, DAG);
5604 case ISD::BlockAddress:
5605 return LowerBlockAddress(Op, DAG);
5606 case ISD::VASTART:
5607 return LowerVASTART(Op, DAG);
5608 case ISD::VACOPY:
5609 return LowerVACOPY(Op, DAG);
5610 case ISD::VAARG:
5611 return LowerVAARG(Op, DAG);
5612 case ISD::ADDCARRY:
5613 return lowerADDSUBCARRY(Op, DAG, AArch64ISD::ADCS, false /*unsigned*/);
5614 case ISD::SUBCARRY:
5615 return lowerADDSUBCARRY(Op, DAG, AArch64ISD::SBCS, false /*unsigned*/);
5616 case ISD::SADDO_CARRY:
5617 return lowerADDSUBCARRY(Op, DAG, AArch64ISD::ADCS, true /*signed*/);
5618 case ISD::SSUBO_CARRY:
5619 return lowerADDSUBCARRY(Op, DAG, AArch64ISD::SBCS, true /*signed*/);
5620 case ISD::SADDO:
5621 case ISD::UADDO:
5622 case ISD::SSUBO:
5623 case ISD::USUBO:
5624 case ISD::SMULO:
5625 case ISD::UMULO:
5626 return LowerXALUO(Op, DAG);
5627 case ISD::FADD:
5628 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
5629 case ISD::FSUB:
5630 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
5631 case ISD::FMUL:
5632 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
5633 case ISD::FMA:
5634 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
5635 case ISD::FDIV:
5636 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
5637 case ISD::FNEG:
5638 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
5639 case ISD::FCEIL:
5640 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
5641 case ISD::FFLOOR:
5642 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
5643 case ISD::FNEARBYINT:
5644 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
5645 case ISD::FRINT:
5646 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
5647 case ISD::FROUND:
5648 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
5649 case ISD::FROUNDEVEN:
5650 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
5651 case ISD::FTRUNC:
5652 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
5653 case ISD::FSQRT:
5654 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
5655 case ISD::FABS:
5656 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
5657 case ISD::FP_ROUND:
5658 case ISD::STRICT_FP_ROUND:
5659 return LowerFP_ROUND(Op, DAG);
5660 case ISD::FP_EXTEND:
5661 return LowerFP_EXTEND(Op, DAG);
5662 case ISD::FRAMEADDR:
5663 return LowerFRAMEADDR(Op, DAG);
5664 case ISD::SPONENTRY:
5665 return LowerSPONENTRY(Op, DAG);
5666 case ISD::RETURNADDR:
5667 return LowerRETURNADDR(Op, DAG);
5668 case ISD::ADDROFRETURNADDR:
5669 return LowerADDROFRETURNADDR(Op, DAG);
5670 case ISD::CONCAT_VECTORS:
5671 return LowerCONCAT_VECTORS(Op, DAG);
5672 case ISD::INSERT_VECTOR_ELT:
5673 return LowerINSERT_VECTOR_ELT(Op, DAG);
5674 case ISD::EXTRACT_VECTOR_ELT:
5675 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
5676 case ISD::BUILD_VECTOR:
5677 return LowerBUILD_VECTOR(Op, DAG);
5678 case ISD::VECTOR_SHUFFLE:
5679 return LowerVECTOR_SHUFFLE(Op, DAG);
5680 case ISD::SPLAT_VECTOR:
5681 return LowerSPLAT_VECTOR(Op, DAG);
5682 case ISD::EXTRACT_SUBVECTOR:
5683 return LowerEXTRACT_SUBVECTOR(Op, DAG);
5684 case ISD::INSERT_SUBVECTOR:
5685 return LowerINSERT_SUBVECTOR(Op, DAG);
5686 case ISD::SDIV:
5687 case ISD::UDIV:
5688 return LowerDIV(Op, DAG);
5689 case ISD::SMIN:
5690 case ISD::UMIN:
5691 case ISD::SMAX:
5692 case ISD::UMAX:
5693 return LowerMinMax(Op, DAG);
5694 case ISD::SRA:
5695 case ISD::SRL:
5696 case ISD::SHL:
5697 return LowerVectorSRA_SRL_SHL(Op, DAG);
5698 case ISD::SHL_PARTS:
5699 case ISD::SRL_PARTS:
5700 case ISD::SRA_PARTS:
5701 return LowerShiftParts(Op, DAG);
5702 case ISD::CTPOP:
5703 case ISD::PARITY:
5704 return LowerCTPOP_PARITY(Op, DAG);
5705 case ISD::FCOPYSIGN:
5706 return LowerFCOPYSIGN(Op, DAG);
5707 case ISD::OR:
5708 return LowerVectorOR(Op, DAG);
5709 case ISD::XOR:
5710 return LowerXOR(Op, DAG);
5711 case ISD::PREFETCH:
5712 return LowerPREFETCH(Op, DAG);
5713 case ISD::SINT_TO_FP:
5714 case ISD::UINT_TO_FP:
5715 case ISD::STRICT_SINT_TO_FP:
5716 case ISD::STRICT_UINT_TO_FP:
5717 return LowerINT_TO_FP(Op, DAG);
5718 case ISD::FP_TO_SINT:
5719 case ISD::FP_TO_UINT:
5720 case ISD::STRICT_FP_TO_SINT:
5721 case ISD::STRICT_FP_TO_UINT:
5722 return LowerFP_TO_INT(Op, DAG);
5723 case ISD::FP_TO_SINT_SAT:
5724 case ISD::FP_TO_UINT_SAT:
5725 return LowerFP_TO_INT_SAT(Op, DAG);
5726 case ISD::FSINCOS:
5727 return LowerFSINCOS(Op, DAG);
5728 case ISD::FLT_ROUNDS_:
5729 return LowerFLT_ROUNDS_(Op, DAG);
5730 case ISD::SET_ROUNDING:
5731 return LowerSET_ROUNDING(Op, DAG);
5732 case ISD::MUL:
5733 return LowerMUL(Op, DAG);
5734 case ISD::MULHS:
5735 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED);
5736 case ISD::MULHU:
5737 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED);
5738 case ISD::INTRINSIC_VOID:
5739 case ISD::INTRINSIC_W_CHAIN:
5740 return LowerINTRINSIC_W_CHAIN(Op, DAG);
5741 case ISD::INTRINSIC_WO_CHAIN:
5742 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
5743 case ISD::ATOMIC_STORE:
5744 if (cast<MemSDNode>(Op)->getMemoryVT() == MVT::i128) {
5745 assert(Subtarget->hasLSE2())(static_cast <bool> (Subtarget->hasLSE2()) ? void (0
) : __assert_fail ("Subtarget->hasLSE2()", "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5745, __extension__ __PRETTY_FUNCTION__))
;
5746 return LowerStore128(Op, DAG);
5747 }
5748 return SDValue();
5749 case ISD::STORE:
5750 return LowerSTORE(Op, DAG);
5751 case ISD::MSTORE:
5752 return LowerFixedLengthVectorMStoreToSVE(Op, DAG);
5753 case ISD::MGATHER:
5754 return LowerMGATHER(Op, DAG);
5755 case ISD::MSCATTER:
5756 return LowerMSCATTER(Op, DAG);
5757 case ISD::VECREDUCE_SEQ_FADD:
5758 return LowerVECREDUCE_SEQ_FADD(Op, DAG);
5759 case ISD::VECREDUCE_ADD:
5760 case ISD::VECREDUCE_AND:
5761 case ISD::VECREDUCE_OR:
5762 case ISD::VECREDUCE_XOR:
5763 case ISD::VECREDUCE_SMAX:
5764 case ISD::VECREDUCE_SMIN:
5765 case ISD::VECREDUCE_UMAX:
5766 case ISD::VECREDUCE_UMIN:
5767 case ISD::VECREDUCE_FADD:
5768 case ISD::VECREDUCE_FMAX:
5769 case ISD::VECREDUCE_FMIN:
5770 return LowerVECREDUCE(Op, DAG);
5771 case ISD::ATOMIC_LOAD_SUB:
5772 return LowerATOMIC_LOAD_SUB(Op, DAG);
5773 case ISD::ATOMIC_LOAD_AND:
5774 return LowerATOMIC_LOAD_AND(Op, DAG);
5775 case ISD::DYNAMIC_STACKALLOC:
5776 return LowerDYNAMIC_STACKALLOC(Op, DAG);
5777 case ISD::VSCALE:
5778 return LowerVSCALE(Op, DAG);
5779 case ISD::ANY_EXTEND:
5780 case ISD::SIGN_EXTEND:
5781 return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
5782 case ISD::ZERO_EXTEND:
5783 return LowerZERO_EXTEND(Op, DAG);
5784 case ISD::SIGN_EXTEND_INREG: {
5785 // Only custom lower when ExtraVT has a legal byte based element type.
5786 EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5787 EVT ExtraEltVT = ExtraVT.getVectorElementType();
5788 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
5789 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
5790 return SDValue();
5791
5792 return LowerToPredicatedOp(Op, DAG,
5793 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
5794 }
5795 case ISD::TRUNCATE:
5796 return LowerTRUNCATE(Op, DAG);
5797 case ISD::MLOAD:
5798 return LowerMLOAD(Op, DAG);
5799 case ISD::LOAD:
5800 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
5801 return LowerFixedLengthVectorLoadToSVE(Op, DAG);
5802 return LowerLOAD(Op, DAG);
5803 case ISD::ADD:
5804 case ISD::AND:
5805 case ISD::SUB:
5806 return LowerToScalableOp(Op, DAG);
5807 case ISD::FMAXIMUM:
5808 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
5809 case ISD::FMAXNUM:
5810 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
5811 case ISD::FMINIMUM:
5812 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED);
5813 case ISD::FMINNUM:
5814 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
5815 case ISD::VSELECT:
5816 return LowerFixedLengthVectorSelectToSVE(Op, DAG);
5817 case ISD::ABS:
5818 return LowerABS(Op, DAG);
5819 case ISD::ABDS:
5820 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
5821 case ISD::ABDU:
5822 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
5823 case ISD::BITREVERSE:
5824 return LowerBitreverse(Op, DAG);
5825 case ISD::BSWAP:
5826 return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
5827 case ISD::CTLZ:
5828 return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU);
5829 case ISD::CTTZ:
5830 return LowerCTTZ(Op, DAG);
5831 case ISD::VECTOR_SPLICE:
5832 return LowerVECTOR_SPLICE(Op, DAG);
5833 case ISD::STRICT_LROUND:
5834 case ISD::STRICT_LLROUND:
5835 case ISD::STRICT_LRINT:
5836 case ISD::STRICT_LLRINT: {
5837 assert(Op.getOperand(1).getValueType() == MVT::f16 &&(static_cast <bool> (Op.getOperand(1).getValueType() ==
MVT::f16 && "Expected custom lowering of rounding operations only for f16"
) ? void (0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::f16 && \"Expected custom lowering of rounding operations only for f16\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5838, __extension__
__PRETTY_FUNCTION__))
5838 "Expected custom lowering of rounding operations only for f16")(static_cast <bool> (Op.getOperand(1).getValueType() ==
MVT::f16 && "Expected custom lowering of rounding operations only for f16"
) ? void (0) : __assert_fail ("Op.getOperand(1).getValueType() == MVT::f16 && \"Expected custom lowering of rounding operations only for f16\""
, "llvm/lib/Target/AArch64/AArch64ISelLowering.cpp", 5838, __extension__
__PRETTY_FUNCTION__))
;
5839 SDLoc DL(Op);
5840 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
5841 {Op.getOperand(0), Op.getOperand(1)});
5842 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
5843 {Ext.getValue(1), Ext.getValue(0)});
5844 }
5845 }
5846}
5847
5848bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
5849 return !Subtarget->useSVEForFixedLengthVectors();
5850}
5851
5852bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
5853 EVT VT, bool OverrideNEON) const {
5854 if (!VT.isFixedLengthVector() || !VT.isSimple())
5855 return false;
5856
5857 // Don't use SVE for vectors we cannot scalarize if required.
5858 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
5859 // Fixed length predicates should be promoted to i8.
5860 // NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
5861 case MVT::i1:
5862 default:
5863 return false;
5864 case MVT::i8:
5865 case MVT::i16:
5866 case MVT::i32:
5867 case MVT::i64:
5868 case MVT::f16:
5869 case MVT::f32:
5870 case MVT::f64:
5871 break;
5872 }
5873
5874 // All SVE implementations support NEON sized vectors.
5875 if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
5876 return Subtarget->hasSVE();
5877
5878 // Ensure NEON MVTs only belong to a single register class.
5879 if (VT.getFixedSizeInBits() <= 128)
5880 return false;
5881
5882 // Ensure wider than NEON code generation is enabled.
5883 if (!Subtarget->useSVEForFixedLengthVectors())
5884 return false;
5885
5886 // Don't use SVE for types that don't fit.
5887 if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
5888 return false;
5889
5890 // TODO: Perhaps an artificial restriction, but worth having whilst getting
5891 // the base fixed length SVE support in place.
5892 if (!VT.isPow2VectorType())
5893 return false;
5894
5895 return true;
5896}
5897
5898//===----------------------------------------------------------------------===//
5899// Calling Convention Implementation
5900//===----------------------------------------------------------------------===//
5901
5902static unsigned getIntrinsicID(const SDNode *N) {
5903 unsigned Opcode = N->getOpcode();
5904 switch (Opcode) {
5905 default:
5906 return Intrinsic::not_intrinsic;
5907 case ISD::INTRINSIC_WO_CHAIN: {
5908 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
5909 if (IID < Intrinsic::num_intrinsics)
5910 return IID;
5911 return Intrinsic::not_intrinsic;
5912 }
5913 }
5914}
5915
5916bool AArch64TargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0,
5917 SDValue N1) const {
5918 if (!N0.hasOneUse())
5919 return false;
5920
5921 unsigned IID = getIntrinsicID(N1.getNode());
5922 // Avoid reassociating expressions that can be lowered to smlal/umlal.
5923 if (IID == Intrinsic::aarch64_neon_umull ||
5924 N1.getOpcode() == AArch64ISD::UMULL ||
5925 IID == Intrinsic::aarch64_neon_smull ||
5926 N1.getOpcode() == AArch64ISD::SMULL)
5927 return N0.getOpcode() != ISD::ADD;
5928
5929 return true;
5930}
5931
5932/// Selects the correct CCAssignFn for a given CallingConvention value.
5933CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
5934 bool IsVarArg) const {
5935 switch (CC) {
5936 default:
5937 report_fatal_error("Unsupported calling convention.");
5938 case CallingConv::WebKit_JS:
5939 return CC_AArch64_WebKit_JS;
5940 case CallingConv::GHC:
5941 return CC_AArch64_GHC;
5942 case CallingConv::C:
5943 case CallingConv::Fast:
5944 case CallingConv::PreserveMost:
5945 case CallingConv::CXX_FAST_TLS:
5946 case CallingConv::Swift:
5947 case CallingConv::SwiftTail:
5948 case CallingConv::Tail:
5949 if (Subtarget->isTargetWindows() && IsVarArg) {
5950 if (Subtarget->isWindowsArm64EC())
5951 return CC_AArch64_Arm64EC_VarArg;
5952 return CC_AArch64_Win64_VarArg;
5953 }
5954 if (!Subtarget->isTargetDarwin())
5955 return CC_AArch64_AAPCS;
5956 if (!IsVarArg)
5957 return CC_AArch64_DarwinPCS;
5958 return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
5959 : CC_AArch64_DarwinPCS_VarArg;
5960 case CallingConv::Win64:
5961 if (IsVarArg) {
5962 if (Subtarget->isWindowsArm64EC())
5963 return CC_AArch64_Arm64EC_VarArg;
5964 return CC_AArch64_Win64_VarArg;
5965 }
5966 return CC_AArch64_AAPCS;
5967 case CallingConv::CFGuard_Check:
5968 return CC_AArch64_Win64_CFGuard_Check;
5969 case CallingConv::AArch64_VectorCall:
5970 case CallingConv::AArch64_SVE_VectorCall:
5971 case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0:
5972 case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2:
5973 return CC_AArch64_AAPCS;
5974 }
5975}
5976
5977CCAssignFn *
5978AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
5979 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
5980 : RetCC_AArch64_AAPCS;
5981}
5982
5983
5984/// Returns true if the Function has ZA state and contains at least one call to
5985/// a function that requires setting up a lazy-save buffer.
5986static bool requiresBufferForLazySave(const Function &F) {
5987 SMEAttrs CallerAttrs(F);
5988 if (!CallerAttrs.hasZAState())
5989 return false;
5990
5991 for (const BasicBlock &BB : F)
5992 for (const Instruction &I : BB)
5993 if (const CallInst *Call = dyn_cast<CallInst>(&I))
5994 if (CallerAttrs.requiresLazySave(SMEAttrs(*Call)))
5995 return true;
5996 return false;
5997}
5998
5999unsigned
6000AArch64TargetLowering::allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
6001 SelectionDAG &DAG) const {
6002 MachineFunction &MF = DAG.getMachineFunction();
6003 MachineFrameInfo &MFI = MF.getFrameInfo();
6004
6005 // Allocate a lazy-save buffer object of size SVL.B * SVL.B (worst-case)
6006 SDValue N = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
6007 DAG.getConstant(1, DL, MVT::i32));
6008 SDValue NN = DAG.getNode(ISD::MUL, DL, MVT::i64, N, N);
6009 SDValue Ops[] = {Chain, NN, DAG.getConstant(1, DL, MVT::i64)};
6010 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);