Bug Summary

File:llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1110, column 10
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/AArch64 -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-08-28-193554-24367-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64ISelLowering.h"
14#include "AArch64CallingConvention.h"
15#include "AArch64ExpandImm.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/ADT/Triple.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/ObjCARCUtil.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/Analysis.h"
35#include "llvm/CodeGen/CallingConvLower.h"
36#include "llvm/CodeGen/MachineBasicBlock.h"
37#include "llvm/CodeGen/MachineFrameInfo.h"
38#include "llvm/CodeGen/MachineFunction.h"
39#include "llvm/CodeGen/MachineInstr.h"
40#include "llvm/CodeGen/MachineInstrBuilder.h"
41#include "llvm/CodeGen/MachineMemOperand.h"
42#include "llvm/CodeGen/MachineRegisterInfo.h"
43#include "llvm/CodeGen/RuntimeLibcalls.h"
44#include "llvm/CodeGen/SelectionDAG.h"
45#include "llvm/CodeGen/SelectionDAGNodes.h"
46#include "llvm/CodeGen/TargetCallingConv.h"
47#include "llvm/CodeGen/TargetInstrInfo.h"
48#include "llvm/CodeGen/ValueTypes.h"
49#include "llvm/IR/Attributes.h"
50#include "llvm/IR/Constants.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugLoc.h"
53#include "llvm/IR/DerivedTypes.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/GetElementPtrTypeIterator.h"
56#include "llvm/IR/GlobalValue.h"
57#include "llvm/IR/IRBuilder.h"
58#include "llvm/IR/Instruction.h"
59#include "llvm/IR/Instructions.h"
60#include "llvm/IR/IntrinsicInst.h"
61#include "llvm/IR/Intrinsics.h"
62#include "llvm/IR/IntrinsicsAArch64.h"
63#include "llvm/IR/Module.h"
64#include "llvm/IR/OperandTraits.h"
65#include "llvm/IR/PatternMatch.h"
66#include "llvm/IR/Type.h"
67#include "llvm/IR/Use.h"
68#include "llvm/IR/Value.h"
69#include "llvm/MC/MCRegisterInfo.h"
70#include "llvm/Support/Casting.h"
71#include "llvm/Support/CodeGen.h"
72#include "llvm/Support/CommandLine.h"
73#include "llvm/Support/Compiler.h"
74#include "llvm/Support/Debug.h"
75#include "llvm/Support/ErrorHandling.h"
76#include "llvm/Support/KnownBits.h"
77#include "llvm/Support/MachineValueType.h"
78#include "llvm/Support/MathExtras.h"
79#include "llvm/Support/raw_ostream.h"
80#include "llvm/Target/TargetMachine.h"
81#include "llvm/Target/TargetOptions.h"
82#include <algorithm>
83#include <bitset>
84#include <cassert>
85#include <cctype>
86#include <cstdint>
87#include <cstdlib>
88#include <iterator>
89#include <limits>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace llvm::PatternMatch;
96
97#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
98
99STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls"}
;
100STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts"}
;
101STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized"}
;
102
103// FIXME: The necessary dtprel relocations don't seem to be supported
104// well in the GNU bfd and gold linkers at the moment. Therefore, by
105// default, for now, fall back to GeneralDynamic code generation.
106cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
107 "aarch64-elf-ldtls-generation", cl::Hidden,
108 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
109 cl::init(false));
110
111static cl::opt<bool>
112EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
113 cl::desc("Enable AArch64 logical imm instruction "
114 "optimization"),
115 cl::init(true));
116
117// Temporary option added for the purpose of testing functionality added
118// to DAGCombiner.cpp in D92230. It is expected that this can be removed
119// in future when both implementations will be based off MGATHER rather
120// than the GLD1 nodes added for the SVE gather load intrinsics.
121static cl::opt<bool>
122EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
123 cl::desc("Combine extends of AArch64 masked "
124 "gather intrinsics"),
125 cl::init(true));
126
127/// Value type used for condition codes.
128static const MVT MVT_CC = MVT::i32;
129
130static inline EVT getPackedSVEVectorVT(EVT VT) {
131 switch (VT.getSimpleVT().SimpleTy) {
132 default:
133 llvm_unreachable("unexpected element type for vector")::llvm::llvm_unreachable_internal("unexpected element type for vector"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 133)
;
134 case MVT::i8:
135 return MVT::nxv16i8;
136 case MVT::i16:
137 return MVT::nxv8i16;
138 case MVT::i32:
139 return MVT::nxv4i32;
140 case MVT::i64:
141 return MVT::nxv2i64;
142 case MVT::f16:
143 return MVT::nxv8f16;
144 case MVT::f32:
145 return MVT::nxv4f32;
146 case MVT::f64:
147 return MVT::nxv2f64;
148 case MVT::bf16:
149 return MVT::nxv8bf16;
150 }
151}
152
153// NOTE: Currently there's only a need to return integer vector types. If this
154// changes then just add an extra "type" parameter.
155static inline EVT getPackedSVEVectorVT(ElementCount EC) {
156 switch (EC.getKnownMinValue()) {
157 default:
158 llvm_unreachable("unexpected element count for vector")::llvm::llvm_unreachable_internal("unexpected element count for vector"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 158)
;
159 case 16:
160 return MVT::nxv16i8;
161 case 8:
162 return MVT::nxv8i16;
163 case 4:
164 return MVT::nxv4i32;
165 case 2:
166 return MVT::nxv2i64;
167 }
168}
169
170static inline EVT getPromotedVTForPredicate(EVT VT) {
171 assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&(static_cast <bool> (VT.isScalableVector() && (
VT.getVectorElementType() == MVT::i1) && "Expected scalable predicate vector type!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && \"Expected scalable predicate vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 172, __extension__ __PRETTY_FUNCTION__))
172 "Expected scalable predicate vector type!")(static_cast <bool> (VT.isScalableVector() && (
VT.getVectorElementType() == MVT::i1) && "Expected scalable predicate vector type!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && \"Expected scalable predicate vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 172, __extension__ __PRETTY_FUNCTION__))
;
173 switch (VT.getVectorMinNumElements()) {
174 default:
175 llvm_unreachable("unexpected element count for vector")::llvm::llvm_unreachable_internal("unexpected element count for vector"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 175)
;
176 case 2:
177 return MVT::nxv2i64;
178 case 4:
179 return MVT::nxv4i32;
180 case 8:
181 return MVT::nxv8i16;
182 case 16:
183 return MVT::nxv16i8;
184 }
185}
186
187/// Returns true if VT's elements occupy the lowest bit positions of its
188/// associated register class without any intervening space.
189///
190/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
191/// same register class, but only nxv8f16 can be treated as a packed vector.
192static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
193 assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&(static_cast <bool> (VT.isVector() && DAG.getTargetLoweringInfo
().isTypeLegal(VT) && "Expected legal vector type!") ?
void (0) : __assert_fail ("VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 194, __extension__ __PRETTY_FUNCTION__))
194 "Expected legal vector type!")(static_cast <bool> (VT.isVector() && DAG.getTargetLoweringInfo
().isTypeLegal(VT) && "Expected legal vector type!") ?
void (0) : __assert_fail ("VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 194, __extension__ __PRETTY_FUNCTION__))
;
195 return VT.isFixedLengthVector() ||
196 VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
197}
198
199// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
200// predicate and end with a passthru value matching the result type.
201static bool isMergePassthruOpcode(unsigned Opc) {
202 switch (Opc) {
203 default:
204 return false;
205 case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
206 case AArch64ISD::BSWAP_MERGE_PASSTHRU:
207 case AArch64ISD::CTLZ_MERGE_PASSTHRU:
208 case AArch64ISD::CTPOP_MERGE_PASSTHRU:
209 case AArch64ISD::DUP_MERGE_PASSTHRU:
210 case AArch64ISD::ABS_MERGE_PASSTHRU:
211 case AArch64ISD::NEG_MERGE_PASSTHRU:
212 case AArch64ISD::FNEG_MERGE_PASSTHRU:
213 case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
214 case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
215 case AArch64ISD::FCEIL_MERGE_PASSTHRU:
216 case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
217 case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
218 case AArch64ISD::FRINT_MERGE_PASSTHRU:
219 case AArch64ISD::FROUND_MERGE_PASSTHRU:
220 case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
221 case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
222 case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
223 case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
224 case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
225 case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
226 case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
227 case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
228 case AArch64ISD::FSQRT_MERGE_PASSTHRU:
229 case AArch64ISD::FRECPX_MERGE_PASSTHRU:
230 case AArch64ISD::FABS_MERGE_PASSTHRU:
231 return true;
232 }
233}
234
235AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
236 const AArch64Subtarget &STI)
237 : TargetLowering(TM), Subtarget(&STI) {
238 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
239 // we have to make something up. Arbitrarily, choose ZeroOrOne.
240 setBooleanContents(ZeroOrOneBooleanContent);
241 // When comparing vectors the result sets the different elements in the
242 // vector to all-one or all-zero.
243 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
244
245 // Set up the register classes.
246 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
247 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
248
249 if (Subtarget->hasLS64()) {
250 addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass);
251 setOperationAction(ISD::LOAD, MVT::i64x8, Custom);
252 setOperationAction(ISD::STORE, MVT::i64x8, Custom);
253 }
254
255 if (Subtarget->hasFPARMv8()) {
256 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
257 addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
258 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
259 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
260 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
261 }
262
263 if (Subtarget->hasNEON()) {
264 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
265 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
266 // Someone set us up the NEON.
267 addDRTypeForNEON(MVT::v2f32);
268 addDRTypeForNEON(MVT::v8i8);
269 addDRTypeForNEON(MVT::v4i16);
270 addDRTypeForNEON(MVT::v2i32);
271 addDRTypeForNEON(MVT::v1i64);
272 addDRTypeForNEON(MVT::v1f64);
273 addDRTypeForNEON(MVT::v4f16);
274 if (Subtarget->hasBF16())
275 addDRTypeForNEON(MVT::v4bf16);
276
277 addQRTypeForNEON(MVT::v4f32);
278 addQRTypeForNEON(MVT::v2f64);
279 addQRTypeForNEON(MVT::v16i8);
280 addQRTypeForNEON(MVT::v8i16);
281 addQRTypeForNEON(MVT::v4i32);
282 addQRTypeForNEON(MVT::v2i64);
283 addQRTypeForNEON(MVT::v8f16);
284 if (Subtarget->hasBF16())
285 addQRTypeForNEON(MVT::v8bf16);
286 }
287
288 if (Subtarget->hasSVE()) {
289 // Add legal sve predicate types
290 addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
291 addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
292 addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
293 addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
294
295 // Add legal sve data types
296 addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
297 addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
298 addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
299 addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
300
301 addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
302 addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
303 addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
304 addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
305 addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
306 addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
307
308 if (Subtarget->hasBF16()) {
309 addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
310 addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
311 addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
312 }
313
314 if (Subtarget->useSVEForFixedLengthVectors()) {
315 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
316 if (useSVEForFixedLengthVectorVT(VT))
317 addRegisterClass(VT, &AArch64::ZPRRegClass);
318
319 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
320 if (useSVEForFixedLengthVectorVT(VT))
321 addRegisterClass(VT, &AArch64::ZPRRegClass);
322 }
323
324 for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
325 setOperationAction(ISD::SADDSAT, VT, Legal);
326 setOperationAction(ISD::UADDSAT, VT, Legal);
327 setOperationAction(ISD::SSUBSAT, VT, Legal);
328 setOperationAction(ISD::USUBSAT, VT, Legal);
329 setOperationAction(ISD::UREM, VT, Expand);
330 setOperationAction(ISD::SREM, VT, Expand);
331 setOperationAction(ISD::SDIVREM, VT, Expand);
332 setOperationAction(ISD::UDIVREM, VT, Expand);
333 }
334
335 for (auto VT :
336 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
337 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
338 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
339
340 for (auto VT :
341 { MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32,
342 MVT::nxv2f64 }) {
343 setCondCodeAction(ISD::SETO, VT, Expand);
344 setCondCodeAction(ISD::SETOLT, VT, Expand);
345 setCondCodeAction(ISD::SETLT, VT, Expand);
346 setCondCodeAction(ISD::SETOLE, VT, Expand);
347 setCondCodeAction(ISD::SETLE, VT, Expand);
348 setCondCodeAction(ISD::SETULT, VT, Expand);
349 setCondCodeAction(ISD::SETULE, VT, Expand);
350 setCondCodeAction(ISD::SETUGE, VT, Expand);
351 setCondCodeAction(ISD::SETUGT, VT, Expand);
352 setCondCodeAction(ISD::SETUEQ, VT, Expand);
353 setCondCodeAction(ISD::SETUNE, VT, Expand);
354
355 setOperationAction(ISD::FREM, VT, Expand);
356 setOperationAction(ISD::FPOW, VT, Expand);
357 setOperationAction(ISD::FPOWI, VT, Expand);
358 setOperationAction(ISD::FCOS, VT, Expand);
359 setOperationAction(ISD::FSIN, VT, Expand);
360 setOperationAction(ISD::FSINCOS, VT, Expand);
361 setOperationAction(ISD::FEXP, VT, Expand);
362 setOperationAction(ISD::FEXP2, VT, Expand);
363 setOperationAction(ISD::FLOG, VT, Expand);
364 setOperationAction(ISD::FLOG2, VT, Expand);
365 setOperationAction(ISD::FLOG10, VT, Expand);
366 }
367 }
368
369 // Compute derived properties from the register classes
370 computeRegisterProperties(Subtarget->getRegisterInfo());
371
372 // Provide all sorts of operation actions
373 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
374 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
375 setOperationAction(ISD::SETCC, MVT::i32, Custom);
376 setOperationAction(ISD::SETCC, MVT::i64, Custom);
377 setOperationAction(ISD::SETCC, MVT::f16, Custom);
378 setOperationAction(ISD::SETCC, MVT::f32, Custom);
379 setOperationAction(ISD::SETCC, MVT::f64, Custom);
380 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
381 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
382 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
383 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
384 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
385 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
386 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
387 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
388 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
389 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
390 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
391 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
392 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
393 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
394 setOperationAction(ISD::SELECT, MVT::i32, Custom);
395 setOperationAction(ISD::SELECT, MVT::i64, Custom);
396 setOperationAction(ISD::SELECT, MVT::f16, Custom);
397 setOperationAction(ISD::SELECT, MVT::f32, Custom);
398 setOperationAction(ISD::SELECT, MVT::f64, Custom);
399 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
400 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
401 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
402 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
403 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
404 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
405 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
406
407 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
408 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
409 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
410
411 setOperationAction(ISD::FREM, MVT::f32, Expand);
412 setOperationAction(ISD::FREM, MVT::f64, Expand);
413 setOperationAction(ISD::FREM, MVT::f80, Expand);
414
415 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
416
417 // Custom lowering hooks are needed for XOR
418 // to fold it into CSINC/CSINV.
419 setOperationAction(ISD::XOR, MVT::i32, Custom);
420 setOperationAction(ISD::XOR, MVT::i64, Custom);
421
422 // Virtually no operation on f128 is legal, but LLVM can't expand them when
423 // there's a valid register class, so we need custom operations in most cases.
424 setOperationAction(ISD::FABS, MVT::f128, Expand);
425 setOperationAction(ISD::FADD, MVT::f128, LibCall);
426 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
427 setOperationAction(ISD::FCOS, MVT::f128, Expand);
428 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
429 setOperationAction(ISD::FMA, MVT::f128, Expand);
430 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
431 setOperationAction(ISD::FNEG, MVT::f128, Expand);
432 setOperationAction(ISD::FPOW, MVT::f128, Expand);
433 setOperationAction(ISD::FREM, MVT::f128, Expand);
434 setOperationAction(ISD::FRINT, MVT::f128, Expand);
435 setOperationAction(ISD::FSIN, MVT::f128, Expand);
436 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
437 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
438 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
439 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
440 setOperationAction(ISD::SETCC, MVT::f128, Custom);
441 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
442 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
443 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
444 setOperationAction(ISD::SELECT, MVT::f128, Custom);
445 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
446 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
447
448 // Lowering for many of the conversions is actually specified by the non-f128
449 // type. The LowerXXX function will be trivial when f128 isn't involved.
450 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
451 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
452 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
453 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
454 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
455 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
456 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
457 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
458 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
459 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
460 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
461 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
462 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
463 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
464 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
465 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
466 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
467 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
468 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
469 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
470 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
471 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
472 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
473 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
474 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
475 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
476 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
477 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
478 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
479 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
480
481 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
482 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
483 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
484 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
485
486 // Variable arguments.
487 setOperationAction(ISD::VASTART, MVT::Other, Custom);
488 setOperationAction(ISD::VAARG, MVT::Other, Custom);
489 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
490 setOperationAction(ISD::VAEND, MVT::Other, Expand);
491
492 // Variable-sized objects.
493 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
494 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
495
496 if (Subtarget->isTargetWindows())
497 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
498 else
499 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
500
501 // Constant pool entries
502 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
503
504 // BlockAddress
505 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
506
507 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
508 setOperationAction(ISD::ADDC, MVT::i32, Custom);
509 setOperationAction(ISD::ADDE, MVT::i32, Custom);
510 setOperationAction(ISD::SUBC, MVT::i32, Custom);
511 setOperationAction(ISD::SUBE, MVT::i32, Custom);
512 setOperationAction(ISD::ADDC, MVT::i64, Custom);
513 setOperationAction(ISD::ADDE, MVT::i64, Custom);
514 setOperationAction(ISD::SUBC, MVT::i64, Custom);
515 setOperationAction(ISD::SUBE, MVT::i64, Custom);
516
517 // AArch64 lacks both left-rotate and popcount instructions.
518 setOperationAction(ISD::ROTL, MVT::i32, Expand);
519 setOperationAction(ISD::ROTL, MVT::i64, Expand);
520 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
521 setOperationAction(ISD::ROTL, VT, Expand);
522 setOperationAction(ISD::ROTR, VT, Expand);
523 }
524
525 // AArch64 doesn't have i32 MULH{S|U}.
526 setOperationAction(ISD::MULHU, MVT::i32, Expand);
527 setOperationAction(ISD::MULHS, MVT::i32, Expand);
528
529 // AArch64 doesn't have {U|S}MUL_LOHI.
530 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
531 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
532
533 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
534 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
535 setOperationAction(ISD::CTPOP, MVT::i128, Custom);
536
537 setOperationAction(ISD::ABS, MVT::i32, Custom);
538 setOperationAction(ISD::ABS, MVT::i64, Custom);
539
540 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
541 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
542 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
543 setOperationAction(ISD::SDIVREM, VT, Expand);
544 setOperationAction(ISD::UDIVREM, VT, Expand);
545 }
546 setOperationAction(ISD::SREM, MVT::i32, Expand);
547 setOperationAction(ISD::SREM, MVT::i64, Expand);
548 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
549 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
550 setOperationAction(ISD::UREM, MVT::i32, Expand);
551 setOperationAction(ISD::UREM, MVT::i64, Expand);
552
553 // Custom lower Add/Sub/Mul with overflow.
554 setOperationAction(ISD::SADDO, MVT::i32, Custom);
555 setOperationAction(ISD::SADDO, MVT::i64, Custom);
556 setOperationAction(ISD::UADDO, MVT::i32, Custom);
557 setOperationAction(ISD::UADDO, MVT::i64, Custom);
558 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
559 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
560 setOperationAction(ISD::USUBO, MVT::i32, Custom);
561 setOperationAction(ISD::USUBO, MVT::i64, Custom);
562 setOperationAction(ISD::SMULO, MVT::i32, Custom);
563 setOperationAction(ISD::SMULO, MVT::i64, Custom);
564 setOperationAction(ISD::UMULO, MVT::i32, Custom);
565 setOperationAction(ISD::UMULO, MVT::i64, Custom);
566
567 setOperationAction(ISD::FSIN, MVT::f32, Expand);
568 setOperationAction(ISD::FSIN, MVT::f64, Expand);
569 setOperationAction(ISD::FCOS, MVT::f32, Expand);
570 setOperationAction(ISD::FCOS, MVT::f64, Expand);
571 setOperationAction(ISD::FPOW, MVT::f32, Expand);
572 setOperationAction(ISD::FPOW, MVT::f64, Expand);
573 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
574 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
575 if (Subtarget->hasFullFP16())
576 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
577 else
578 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
579
580 setOperationAction(ISD::FREM, MVT::f16, Promote);
581 setOperationAction(ISD::FREM, MVT::v4f16, Expand);
582 setOperationAction(ISD::FREM, MVT::v8f16, Expand);
583 setOperationAction(ISD::FPOW, MVT::f16, Promote);
584 setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
585 setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
586 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
587 setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
588 setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
589 setOperationAction(ISD::FCOS, MVT::f16, Promote);
590 setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
591 setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
592 setOperationAction(ISD::FSIN, MVT::f16, Promote);
593 setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
594 setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
595 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
596 setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
597 setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
598 setOperationAction(ISD::FEXP, MVT::f16, Promote);
599 setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
600 setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
601 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
602 setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
603 setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
604 setOperationAction(ISD::FLOG, MVT::f16, Promote);
605 setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
606 setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
607 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
608 setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
609 setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
610 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
611 setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
612 setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
613
614 if (!Subtarget->hasFullFP16()) {
615 setOperationAction(ISD::SELECT, MVT::f16, Promote);
616 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
617 setOperationAction(ISD::SETCC, MVT::f16, Promote);
618 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
619 setOperationAction(ISD::FADD, MVT::f16, Promote);
620 setOperationAction(ISD::FSUB, MVT::f16, Promote);
621 setOperationAction(ISD::FMUL, MVT::f16, Promote);
622 setOperationAction(ISD::FDIV, MVT::f16, Promote);
623 setOperationAction(ISD::FMA, MVT::f16, Promote);
624 setOperationAction(ISD::FNEG, MVT::f16, Promote);
625 setOperationAction(ISD::FABS, MVT::f16, Promote);
626 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
627 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
628 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
629 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
630 setOperationAction(ISD::FRINT, MVT::f16, Promote);
631 setOperationAction(ISD::FROUND, MVT::f16, Promote);
632 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
633 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
634 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
635 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
636 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
637 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
638
639 // promote v4f16 to v4f32 when that is known to be safe.
640 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
641 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
642 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
643 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
644 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
645 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
646 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
647 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
648
649 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
650 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
651 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
652 setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
653 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
654 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
655 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
656 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
657 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
658 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
659 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
660 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
661 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
662 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
663 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
664 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
665
666 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
667 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
668 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
669 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
670 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
671 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
672 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
673 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
674 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
675 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
676 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
677 setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
678 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
679 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
680 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
681 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
682 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
683 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
684 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
685 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
686 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
687 }
688
689 // AArch64 has implementations of a lot of rounding-like FP operations.
690 for (MVT Ty : {MVT::f32, MVT::f64}) {
691 setOperationAction(ISD::FFLOOR, Ty, Legal);
692 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
693 setOperationAction(ISD::FCEIL, Ty, Legal);
694 setOperationAction(ISD::FRINT, Ty, Legal);
695 setOperationAction(ISD::FTRUNC, Ty, Legal);
696 setOperationAction(ISD::FROUND, Ty, Legal);
697 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
698 setOperationAction(ISD::FMINNUM, Ty, Legal);
699 setOperationAction(ISD::FMAXNUM, Ty, Legal);
700 setOperationAction(ISD::FMINIMUM, Ty, Legal);
701 setOperationAction(ISD::FMAXIMUM, Ty, Legal);
702 setOperationAction(ISD::LROUND, Ty, Legal);
703 setOperationAction(ISD::LLROUND, Ty, Legal);
704 setOperationAction(ISD::LRINT, Ty, Legal);
705 setOperationAction(ISD::LLRINT, Ty, Legal);
706 }
707
708 if (Subtarget->hasFullFP16()) {
709 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
710 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
711 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
712 setOperationAction(ISD::FRINT, MVT::f16, Legal);
713 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
714 setOperationAction(ISD::FROUND, MVT::f16, Legal);
715 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
716 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
717 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
718 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
719 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
720 }
721
722 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
723
724 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
725 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
726
727 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
728 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
729 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
730 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
731 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
732
733 // Generate outline atomics library calls only if LSE was not specified for
734 // subtarget
735 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
736 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
737 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
738 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
739 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
740 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
741 setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
742 setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
743 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
744 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
745 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
746 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
747 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
748 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
749 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
750 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
751 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
752 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
753 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
754 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
755 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
756 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
757 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
758 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
759 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
760 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
761#define LCALLNAMES(A, B, N) \
762 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
763 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
764 setLibcallName(A##N##_REL, #B #N "_rel"); \
765 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
766#define LCALLNAME4(A, B) \
767 LCALLNAMES(A, B, 1) \
768 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
769#define LCALLNAME5(A, B) \
770 LCALLNAMES(A, B, 1) \
771 LCALLNAMES(A, B, 2) \
772 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
773 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
774 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
775 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
776 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
777 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
778 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
779#undef LCALLNAMES
780#undef LCALLNAME4
781#undef LCALLNAME5
782 }
783
784 // 128-bit loads and stores can be done without expanding
785 setOperationAction(ISD::LOAD, MVT::i128, Custom);
786 setOperationAction(ISD::STORE, MVT::i128, Custom);
787
788 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
789 // custom lowering, as there are no un-paired non-temporal stores and
790 // legalization will break up 256 bit inputs.
791 setOperationAction(ISD::STORE, MVT::v32i8, Custom);
792 setOperationAction(ISD::STORE, MVT::v16i16, Custom);
793 setOperationAction(ISD::STORE, MVT::v16f16, Custom);
794 setOperationAction(ISD::STORE, MVT::v8i32, Custom);
795 setOperationAction(ISD::STORE, MVT::v8f32, Custom);
796 setOperationAction(ISD::STORE, MVT::v4f64, Custom);
797 setOperationAction(ISD::STORE, MVT::v4i64, Custom);
798
799 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
800 // This requires the Performance Monitors extension.
801 if (Subtarget->hasPerfMon())
802 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
803
804 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
805 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
806 // Issue __sincos_stret if available.
807 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
808 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
809 } else {
810 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
811 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
812 }
813
814 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
815 // MSVCRT doesn't have powi; fall back to pow
816 setLibcallName(RTLIB::POWI_F32, nullptr);
817 setLibcallName(RTLIB::POWI_F64, nullptr);
818 }
819
820 // Make floating-point constants legal for the large code model, so they don't
821 // become loads from the constant pool.
822 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
823 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
824 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
825 }
826
827 // AArch64 does not have floating-point extending loads, i1 sign-extending
828 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
829 for (MVT VT : MVT::fp_valuetypes()) {
830 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
831 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
832 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
833 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
834 }
835 for (MVT VT : MVT::integer_valuetypes())
836 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
837
838 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
839 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
840 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
841 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
842 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
843 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
844 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
845
846 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
847 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
848 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
849
850 // Indexed loads and stores are supported.
851 for (unsigned im = (unsigned)ISD::PRE_INC;
852 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
853 setIndexedLoadAction(im, MVT::i8, Legal);
854 setIndexedLoadAction(im, MVT::i16, Legal);
855 setIndexedLoadAction(im, MVT::i32, Legal);
856 setIndexedLoadAction(im, MVT::i64, Legal);
857 setIndexedLoadAction(im, MVT::f64, Legal);
858 setIndexedLoadAction(im, MVT::f32, Legal);
859 setIndexedLoadAction(im, MVT::f16, Legal);
860 setIndexedLoadAction(im, MVT::bf16, Legal);
861 setIndexedStoreAction(im, MVT::i8, Legal);
862 setIndexedStoreAction(im, MVT::i16, Legal);
863 setIndexedStoreAction(im, MVT::i32, Legal);
864 setIndexedStoreAction(im, MVT::i64, Legal);
865 setIndexedStoreAction(im, MVT::f64, Legal);
866 setIndexedStoreAction(im, MVT::f32, Legal);
867 setIndexedStoreAction(im, MVT::f16, Legal);
868 setIndexedStoreAction(im, MVT::bf16, Legal);
869 }
870
871 // Trap.
872 setOperationAction(ISD::TRAP, MVT::Other, Legal);
873 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
874 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
875
876 // We combine OR nodes for bitfield operations.
877 setTargetDAGCombine(ISD::OR);
878 // Try to create BICs for vector ANDs.
879 setTargetDAGCombine(ISD::AND);
880
881 // Vector add and sub nodes may conceal a high-half opportunity.
882 // Also, try to fold ADD into CSINC/CSINV..
883 setTargetDAGCombine(ISD::ADD);
884 setTargetDAGCombine(ISD::ABS);
885 setTargetDAGCombine(ISD::SUB);
886 setTargetDAGCombine(ISD::SRL);
887 setTargetDAGCombine(ISD::XOR);
888 setTargetDAGCombine(ISD::SINT_TO_FP);
889 setTargetDAGCombine(ISD::UINT_TO_FP);
890
891 // TODO: Do the same for FP_TO_*INT_SAT.
892 setTargetDAGCombine(ISD::FP_TO_SINT);
893 setTargetDAGCombine(ISD::FP_TO_UINT);
894 setTargetDAGCombine(ISD::FDIV);
895
896 // Try and combine setcc with csel
897 setTargetDAGCombine(ISD::SETCC);
898
899 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
900
901 setTargetDAGCombine(ISD::ANY_EXTEND);
902 setTargetDAGCombine(ISD::ZERO_EXTEND);
903 setTargetDAGCombine(ISD::SIGN_EXTEND);
904 setTargetDAGCombine(ISD::VECTOR_SPLICE);
905 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
906 setTargetDAGCombine(ISD::TRUNCATE);
907 setTargetDAGCombine(ISD::CONCAT_VECTORS);
908 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
909 setTargetDAGCombine(ISD::STORE);
910 if (Subtarget->supportsAddressTopByteIgnored())
911 setTargetDAGCombine(ISD::LOAD);
912
913 setTargetDAGCombine(ISD::MUL);
914
915 setTargetDAGCombine(ISD::SELECT);
916 setTargetDAGCombine(ISD::VSELECT);
917
918 setTargetDAGCombine(ISD::INTRINSIC_VOID);
919 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
920 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
921 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
922 setTargetDAGCombine(ISD::VECREDUCE_ADD);
923 setTargetDAGCombine(ISD::STEP_VECTOR);
924
925 setTargetDAGCombine(ISD::GlobalAddress);
926
927 // In case of strict alignment, avoid an excessive number of byte wide stores.
928 MaxStoresPerMemsetOptSize = 8;
929 MaxStoresPerMemset = Subtarget->requiresStrictAlign()
930 ? MaxStoresPerMemsetOptSize : 32;
931
932 MaxGluedStoresPerMemcpy = 4;
933 MaxStoresPerMemcpyOptSize = 4;
934 MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
935 ? MaxStoresPerMemcpyOptSize : 16;
936
937 MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
938
939 MaxLoadsPerMemcmpOptSize = 4;
940 MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
941 ? MaxLoadsPerMemcmpOptSize : 8;
942
943 setStackPointerRegisterToSaveRestore(AArch64::SP);
944
945 setSchedulingPreference(Sched::Hybrid);
946
947 EnableExtLdPromotion = true;
948
949 // Set required alignment.
950 setMinFunctionAlignment(Align(4));
951 // Set preferred alignments.
952 setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
953 setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
954
955 // Only change the limit for entries in a jump table if specified by
956 // the sub target, but not at the command line.
957 unsigned MaxJT = STI.getMaximumJumpTableSize();
958 if (MaxJT && getMaximumJumpTableSize() == UINT_MAX(2147483647 *2U +1U))
959 setMaximumJumpTableSize(MaxJT);
960
961 setHasExtractBitsInsn(true);
962
963 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
964
965 if (Subtarget->hasNEON()) {
966 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
967 // silliness like this:
968 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
969 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
970 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
971 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
972 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
973 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
974 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
975 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
976 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
977 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
978 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
979 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
980 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
981 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
982 setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand);
983 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
984 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
985 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
986 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
987 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
988 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
989 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
990 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
991 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
992 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
993 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
994
995 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
996 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
997 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
998 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
999 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
1000
1001 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v1i64, Expand);
1002 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v1i64, Expand);
1003
1004 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
1005
1006 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
1007 // elements smaller than i32, so promote the input to i32 first.
1008 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
1009 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
1010 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
1011 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
1012 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
1013 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
1014
1015 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
1016 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1017 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1018 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
1019 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
1020 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
1021 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
1022 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
1023 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1024
1025 if (Subtarget->hasFullFP16()) {
1026 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1027 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1028 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
1029 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1030 } else {
1031 // when AArch64 doesn't have fullfp16 support, promote the input
1032 // to i32 first.
1033 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
1034 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
1035 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
1036 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
1037 }
1038
1039 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
1040 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
1041 setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
1042 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
1043 setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
1044 setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
1045 setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
1046 setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
1047 for (auto VT : {MVT::v1i64, MVT::v2i64}) {
1048 setOperationAction(ISD::UMAX, VT, Custom);
1049 setOperationAction(ISD::SMAX, VT, Custom);
1050 setOperationAction(ISD::UMIN, VT, Custom);
1051 setOperationAction(ISD::SMIN, VT, Custom);
1052 }
1053
1054 // AArch64 doesn't have MUL.2d:
1055 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
1056 // Custom handling for some quad-vector types to detect MULL.
1057 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
1058 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1059 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1060
1061 // Saturates
1062 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1063 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1064 setOperationAction(ISD::SADDSAT, VT, Legal);
1065 setOperationAction(ISD::UADDSAT, VT, Legal);
1066 setOperationAction(ISD::SSUBSAT, VT, Legal);
1067 setOperationAction(ISD::USUBSAT, VT, Legal);
1068 }
1069
1070 for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1071 MVT::v4i32}) {
1072 setOperationAction(ISD::ABDS, VT, Legal);
1073 setOperationAction(ISD::ABDU, VT, Legal);
1074 }
1075
1076 // Vector reductions
1077 for (MVT VT : { MVT::v4f16, MVT::v2f32,
1078 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1079 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1080 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1081 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1082
1083 setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
1084 }
1085 }
1086 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1087 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1088 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1089 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1090 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1091 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1092 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1093 }
1094 setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
1095
1096 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
1097 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
1098 // Likewise, narrowing and extending vector loads/stores aren't handled
1099 // directly.
1100 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1101 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
1102
1103 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1104 setOperationAction(ISD::MULHS, VT, Legal);
1105 setOperationAction(ISD::MULHU, VT, Legal);
1106 } else {
1107 setOperationAction(ISD::MULHS, VT, Expand);
1108 setOperationAction(ISD::MULHU, VT, Expand);
1109 }
1110 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1111 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1112
1113 setOperationAction(ISD::BSWAP, VT, Expand);
1114 setOperationAction(ISD::CTTZ, VT, Expand);
1115
1116 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
1117 setTruncStoreAction(VT, InnerVT, Expand);
1118 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1119 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1120 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1121 }
1122 }
1123
1124 // AArch64 has implementations of a lot of rounding-like FP operations.
1125 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
1126 setOperationAction(ISD::FFLOOR, Ty, Legal);
1127 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
1128 setOperationAction(ISD::FCEIL, Ty, Legal);
1129 setOperationAction(ISD::FRINT, Ty, Legal);
1130 setOperationAction(ISD::FTRUNC, Ty, Legal);
1131 setOperationAction(ISD::FROUND, Ty, Legal);
1132 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
1133 }
1134
1135 if (Subtarget->hasFullFP16()) {
1136 for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
1137 setOperationAction(ISD::FFLOOR, Ty, Legal);
1138 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
1139 setOperationAction(ISD::FCEIL, Ty, Legal);
1140 setOperationAction(ISD::FRINT, Ty, Legal);
1141 setOperationAction(ISD::FTRUNC, Ty, Legal);
1142 setOperationAction(ISD::FROUND, Ty, Legal);
1143 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
1144 }
1145 }
1146
1147 if (Subtarget->hasSVE())
1148 setOperationAction(ISD::VSCALE, MVT::i32, Custom);
1149
1150 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
1151
1152 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1153 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1154 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1155 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1156 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1157 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1158 }
1159
1160 if (Subtarget->hasSVE()) {
1161 for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1162 setOperationAction(ISD::BITREVERSE, VT, Custom);
1163 setOperationAction(ISD::BSWAP, VT, Custom);
1164 setOperationAction(ISD::CTLZ, VT, Custom);
1165 setOperationAction(ISD::CTPOP, VT, Custom);
1166 setOperationAction(ISD::CTTZ, VT, Custom);
1167 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1168 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1169 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1170 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1171 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1172 setOperationAction(ISD::MGATHER, VT, Custom);
1173 setOperationAction(ISD::MSCATTER, VT, Custom);
1174 setOperationAction(ISD::MLOAD, VT, Custom);
1175 setOperationAction(ISD::MUL, VT, Custom);
1176 setOperationAction(ISD::MULHS, VT, Custom);
1177 setOperationAction(ISD::MULHU, VT, Custom);
1178 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1179 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1180 setOperationAction(ISD::SELECT, VT, Custom);
1181 setOperationAction(ISD::SETCC, VT, Custom);
1182 setOperationAction(ISD::SDIV, VT, Custom);
1183 setOperationAction(ISD::UDIV, VT, Custom);
1184 setOperationAction(ISD::SMIN, VT, Custom);
1185 setOperationAction(ISD::UMIN, VT, Custom);
1186 setOperationAction(ISD::SMAX, VT, Custom);
1187 setOperationAction(ISD::UMAX, VT, Custom);
1188 setOperationAction(ISD::SHL, VT, Custom);
1189 setOperationAction(ISD::SRL, VT, Custom);
1190 setOperationAction(ISD::SRA, VT, Custom);
1191 setOperationAction(ISD::ABS, VT, Custom);
1192 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1193 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1194 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1195 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1196 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1197 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1198 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1199 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1200
1201 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1202 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1203 setOperationAction(ISD::SELECT_CC, VT, Expand);
1204 setOperationAction(ISD::ROTL, VT, Expand);
1205 setOperationAction(ISD::ROTR, VT, Expand);
1206 }
1207
1208 // Illegal unpacked integer vector types.
1209 for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1210 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1211 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1212 }
1213
1214 // Legalize unpacked bitcasts to REINTERPRET_CAST.
1215 for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
1216 MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
1217 setOperationAction(ISD::BITCAST, VT, Custom);
1218
1219 for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
1220 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1221 setOperationAction(ISD::SELECT, VT, Custom);
1222 setOperationAction(ISD::SETCC, VT, Custom);
1223 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1224 setOperationAction(ISD::TRUNCATE, VT, Custom);
1225 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1226 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1227 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1228
1229 setOperationAction(ISD::SELECT_CC, VT, Expand);
1230 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1231 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1232
1233 // There are no legal MVT::nxv16f## based types.
1234 if (VT != MVT::nxv16i1) {
1235 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1236 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1237 }
1238 }
1239
1240 // NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
1241 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1242 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1243 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1244 setOperationAction(ISD::MLOAD, VT, Custom);
1245 setOperationAction(ISD::MSTORE, VT, Custom);
1246 setOperationAction(ISD::MGATHER, VT, Custom);
1247 setOperationAction(ISD::MSCATTER, VT, Custom);
1248 }
1249
1250 for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
1251 for (MVT InnerVT : MVT::fp_scalable_vector_valuetypes()) {
1252 // Avoid marking truncating FP stores as legal to prevent the
1253 // DAGCombiner from creating unsupported truncating stores.
1254 setTruncStoreAction(VT, InnerVT, Expand);
1255 // SVE does not have floating-point extending loads.
1256 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1257 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1258 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1259 }
1260 }
1261
1262 // SVE supports truncating stores of 64 and 128-bit vectors
1263 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom);
1264 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom);
1265 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom);
1266 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
1267 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
1268
1269 for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1270 MVT::nxv4f32, MVT::nxv2f64}) {
1271 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1272 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1273 setOperationAction(ISD::MGATHER, VT, Custom);
1274 setOperationAction(ISD::MSCATTER, VT, Custom);
1275 setOperationAction(ISD::MLOAD, VT, Custom);
1276 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1277 setOperationAction(ISD::SELECT, VT, Custom);
1278 setOperationAction(ISD::FADD, VT, Custom);
1279 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1280 setOperationAction(ISD::FDIV, VT, Custom);
1281 setOperationAction(ISD::FMA, VT, Custom);
1282 setOperationAction(ISD::FMAXIMUM, VT, Custom);
1283 setOperationAction(ISD::FMAXNUM, VT, Custom);
1284 setOperationAction(ISD::FMINIMUM, VT, Custom);
1285 setOperationAction(ISD::FMINNUM, VT, Custom);
1286 setOperationAction(ISD::FMUL, VT, Custom);
1287 setOperationAction(ISD::FNEG, VT, Custom);
1288 setOperationAction(ISD::FSUB, VT, Custom);
1289 setOperationAction(ISD::FCEIL, VT, Custom);
1290 setOperationAction(ISD::FFLOOR, VT, Custom);
1291 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1292 setOperationAction(ISD::FRINT, VT, Custom);
1293 setOperationAction(ISD::FROUND, VT, Custom);
1294 setOperationAction(ISD::FROUNDEVEN, VT, Custom);
1295 setOperationAction(ISD::FTRUNC, VT, Custom);
1296 setOperationAction(ISD::FSQRT, VT, Custom);
1297 setOperationAction(ISD::FABS, VT, Custom);
1298 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1299 setOperationAction(ISD::FP_ROUND, VT, Custom);
1300 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1301 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1302 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1303 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1304 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1305
1306 setOperationAction(ISD::SELECT_CC, VT, Expand);
1307 }
1308
1309 for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1310 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1311 setOperationAction(ISD::MGATHER, VT, Custom);
1312 setOperationAction(ISD::MSCATTER, VT, Custom);
1313 setOperationAction(ISD::MLOAD, VT, Custom);
1314 }
1315
1316 setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
1317
1318 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
1319 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
1320
1321 // NOTE: Currently this has to happen after computeRegisterProperties rather
1322 // than the preferred option of combining it with the addRegisterClass call.
1323 if (Subtarget->useSVEForFixedLengthVectors()) {
1324 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
1325 if (useSVEForFixedLengthVectorVT(VT))
1326 addTypeForFixedLengthSVE(VT);
1327 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
1328 if (useSVEForFixedLengthVectorVT(VT))
1329 addTypeForFixedLengthSVE(VT);
1330
1331 // 64bit results can mean a bigger than NEON input.
1332 for (auto VT : {MVT::v8i8, MVT::v4i16})
1333 setOperationAction(ISD::TRUNCATE, VT, Custom);
1334 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
1335
1336 // 128bit results imply a bigger than NEON input.
1337 for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1338 setOperationAction(ISD::TRUNCATE, VT, Custom);
1339 for (auto VT : {MVT::v8f16, MVT::v4f32})
1340 setOperationAction(ISD::FP_ROUND, VT, Custom);
1341
1342 // These operations are not supported on NEON but SVE can do them.
1343 setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
1344 setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
1345 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1346 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
1347 setOperationAction(ISD::MUL, MVT::v1i64, Custom);
1348 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1349 setOperationAction(ISD::MULHS, MVT::v1i64, Custom);
1350 setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
1351 setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
1352 setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
1353 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
1354 setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
1355 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
1356 setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
1357 setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
1358 setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
1359 setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
1360 setOperationAction(ISD::SDIV, MVT::v2i64, Custom);
1361 setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
1362 setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
1363 setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
1364 setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
1365 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
1366 setOperationAction(ISD::UDIV, MVT::v16i8, Custom);
1367 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
1368 setOperationAction(ISD::UDIV, MVT::v8i16, Custom);
1369 setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
1370 setOperationAction(ISD::UDIV, MVT::v4i32, Custom);
1371 setOperationAction(ISD::UDIV, MVT::v1i64, Custom);
1372 setOperationAction(ISD::UDIV, MVT::v2i64, Custom);
1373 setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
1374 setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
1375 setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
1376 setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
1377 setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
1378 setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
1379 setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
1380 setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
1381
1382 // Int operations with no NEON support.
1383 for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1384 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1385 setOperationAction(ISD::BITREVERSE, VT, Custom);
1386 setOperationAction(ISD::CTTZ, VT, Custom);
1387 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1388 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1389 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1390 }
1391
1392 // FP operations with no NEON support.
1393 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
1394 MVT::v1f64, MVT::v2f64})
1395 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1396
1397 // Use SVE for vectors with more than 2 elements.
1398 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1399 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1400 }
1401
1402 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv2i1, MVT::nxv2i64);
1403 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32);
1404 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16);
1405 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
1406 }
1407
1408 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
1409}
1410
1411void AArch64TargetLowering::addTypeForNEON(MVT VT) {
1412 assert(VT.isVector() && "VT should be a vector type")(static_cast <bool> (VT.isVector() && "VT should be a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1412, __extension__ __PRETTY_FUNCTION__))
;
1413
1414 if (VT.isFloatingPoint()) {
1415 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
1416 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
1417 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
1418 }
1419
1420 // Mark vector float intrinsics as expand.
1421 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
1422 setOperationAction(ISD::FSIN, VT, Expand);
1423 setOperationAction(ISD::FCOS, VT, Expand);
1424 setOperationAction(ISD::FPOW, VT, Expand);
1425 setOperationAction(ISD::FLOG, VT, Expand);
1426 setOperationAction(ISD::FLOG2, VT, Expand);
1427 setOperationAction(ISD::FLOG10, VT, Expand);
1428 setOperationAction(ISD::FEXP, VT, Expand);
1429 setOperationAction(ISD::FEXP2, VT, Expand);
1430 }
1431
1432 // But we do support custom-lowering for FCOPYSIGN.
1433 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
1434 ((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16()))
1435 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1436
1437 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1438 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1439 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1440 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1441 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1442 setOperationAction(ISD::SRA, VT, Custom);
1443 setOperationAction(ISD::SRL, VT, Custom);
1444 setOperationAction(ISD::SHL, VT, Custom);
1445 setOperationAction(ISD::OR, VT, Custom);
1446 setOperationAction(ISD::SETCC, VT, Custom);
1447 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
1448
1449 setOperationAction(ISD::SELECT, VT, Expand);
1450 setOperationAction(ISD::SELECT_CC, VT, Expand);
1451 setOperationAction(ISD::VSELECT, VT, Expand);
1452 for (MVT InnerVT : MVT::all_valuetypes())
1453 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
1454
1455 // CNT supports only B element sizes, then use UADDLP to widen.
1456 if (VT != MVT::v8i8 && VT != MVT::v16i8)
1457 setOperationAction(ISD::CTPOP, VT, Custom);
1458
1459 setOperationAction(ISD::UDIV, VT, Expand);
1460 setOperationAction(ISD::SDIV, VT, Expand);
1461 setOperationAction(ISD::UREM, VT, Expand);
1462 setOperationAction(ISD::SREM, VT, Expand);
1463 setOperationAction(ISD::FREM, VT, Expand);
1464
1465 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1466 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1467 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
1468 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
1469
1470 if (!VT.isFloatingPoint())
1471 setOperationAction(ISD::ABS, VT, Legal);
1472
1473 // [SU][MIN|MAX] are available for all NEON types apart from i64.
1474 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
1475 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
1476 setOperationAction(Opcode, VT, Legal);
1477
1478 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
1479 if (VT.isFloatingPoint() &&
1480 VT.getVectorElementType() != MVT::bf16 &&
1481 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
1482 for (unsigned Opcode :
1483 {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
1484 setOperationAction(Opcode, VT, Legal);
1485
1486 if (Subtarget->isLittleEndian()) {
1487 for (unsigned im = (unsigned)ISD::PRE_INC;
1488 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1489 setIndexedLoadAction(im, VT, Legal);
1490 setIndexedStoreAction(im, VT, Legal);
1491 }
1492 }
1493}
1494
1495void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
1496 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1496, __extension__ __PRETTY_FUNCTION__))
;
1497
1498 // By default everything must be expanded.
1499 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1500 setOperationAction(Op, VT, Expand);
1501
1502 // We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
1503 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1504
1505 if (VT.isFloatingPoint()) {
1506 setCondCodeAction(ISD::SETO, VT, Expand);
1507 setCondCodeAction(ISD::SETOLT, VT, Expand);
1508 setCondCodeAction(ISD::SETLT, VT, Expand);
1509 setCondCodeAction(ISD::SETOLE, VT, Expand);
1510 setCondCodeAction(ISD::SETLE, VT, Expand);
1511 setCondCodeAction(ISD::SETULT, VT, Expand);
1512 setCondCodeAction(ISD::SETULE, VT, Expand);
1513 setCondCodeAction(ISD::SETUGE, VT, Expand);
1514 setCondCodeAction(ISD::SETUGT, VT, Expand);
1515 setCondCodeAction(ISD::SETUEQ, VT, Expand);
1516 setCondCodeAction(ISD::SETUNE, VT, Expand);
1517 }
1518
1519 // Mark integer truncating stores as having custom lowering
1520 if (VT.isInteger()) {
1521 MVT InnerVT = VT.changeVectorElementType(MVT::i8);
1522 while (InnerVT != VT) {
1523 setTruncStoreAction(VT, InnerVT, Custom);
1524 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Custom);
1525 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Custom);
1526 InnerVT = InnerVT.changeVectorElementType(
1527 MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
1528 }
1529 }
1530
1531 // Lower fixed length vector operations to scalable equivalents.
1532 setOperationAction(ISD::ABS, VT, Custom);
1533 setOperationAction(ISD::ADD, VT, Custom);
1534 setOperationAction(ISD::AND, VT, Custom);
1535 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1536 setOperationAction(ISD::BITCAST, VT, Custom);
1537 setOperationAction(ISD::BITREVERSE, VT, Custom);
1538 setOperationAction(ISD::BSWAP, VT, Custom);
1539 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1540 setOperationAction(ISD::CTLZ, VT, Custom);
1541 setOperationAction(ISD::CTPOP, VT, Custom);
1542 setOperationAction(ISD::CTTZ, VT, Custom);
1543 setOperationAction(ISD::FABS, VT, Custom);
1544 setOperationAction(ISD::FADD, VT, Custom);
1545 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1546 setOperationAction(ISD::FCEIL, VT, Custom);
1547 setOperationAction(ISD::FDIV, VT, Custom);
1548 setOperationAction(ISD::FFLOOR, VT, Custom);
1549 setOperationAction(ISD::FMA, VT, Custom);
1550 setOperationAction(ISD::FMAXIMUM, VT, Custom);
1551 setOperationAction(ISD::FMAXNUM, VT, Custom);
1552 setOperationAction(ISD::FMINIMUM, VT, Custom);
1553 setOperationAction(ISD::FMINNUM, VT, Custom);
1554 setOperationAction(ISD::FMUL, VT, Custom);
1555 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1556 setOperationAction(ISD::FNEG, VT, Custom);
1557 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1558 setOperationAction(ISD::FP_ROUND, VT, Custom);
1559 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1560 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1561 setOperationAction(ISD::FRINT, VT, Custom);
1562 setOperationAction(ISD::FROUND, VT, Custom);
1563 setOperationAction(ISD::FROUNDEVEN, VT, Custom);
1564 setOperationAction(ISD::FSQRT, VT, Custom);
1565 setOperationAction(ISD::FSUB, VT, Custom);
1566 setOperationAction(ISD::FTRUNC, VT, Custom);
1567 setOperationAction(ISD::LOAD, VT, Custom);
1568 setOperationAction(ISD::MGATHER, VT, Custom);
1569 setOperationAction(ISD::MLOAD, VT, Custom);
1570 setOperationAction(ISD::MSCATTER, VT, Custom);
1571 setOperationAction(ISD::MSTORE, VT, Custom);
1572 setOperationAction(ISD::MUL, VT, Custom);
1573 setOperationAction(ISD::MULHS, VT, Custom);
1574 setOperationAction(ISD::MULHU, VT, Custom);
1575 setOperationAction(ISD::OR, VT, Custom);
1576 setOperationAction(ISD::SDIV, VT, Custom);
1577 setOperationAction(ISD::SELECT, VT, Custom);
1578 setOperationAction(ISD::SETCC, VT, Custom);
1579 setOperationAction(ISD::SHL, VT, Custom);
1580 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1581 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
1582 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1583 setOperationAction(ISD::SMAX, VT, Custom);
1584 setOperationAction(ISD::SMIN, VT, Custom);
1585 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1586 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1587 setOperationAction(ISD::SRA, VT, Custom);
1588 setOperationAction(ISD::SRL, VT, Custom);
1589 setOperationAction(ISD::STORE, VT, Custom);
1590 setOperationAction(ISD::SUB, VT, Custom);
1591 setOperationAction(ISD::TRUNCATE, VT, Custom);
1592 setOperationAction(ISD::UDIV, VT, Custom);
1593 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1594 setOperationAction(ISD::UMAX, VT, Custom);
1595 setOperationAction(ISD::UMIN, VT, Custom);
1596 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1597 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1598 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1599 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1600 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1601 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1602 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1603 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1604 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1605 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1606 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1607 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1608 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1609 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1610 setOperationAction(ISD::VSELECT, VT, Custom);
1611 setOperationAction(ISD::XOR, VT, Custom);
1612 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1613}
1614
1615void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
1616 addRegisterClass(VT, &AArch64::FPR64RegClass);
1617 addTypeForNEON(VT);
1618}
1619
1620void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
1621 addRegisterClass(VT, &AArch64::FPR128RegClass);
1622 addTypeForNEON(VT);
1623}
1624
1625EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
1626 LLVMContext &C, EVT VT) const {
1627 if (!VT.isVector())
1628 return MVT::i32;
1629 if (VT.isScalableVector())
1630 return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount());
1631 return VT.changeVectorElementTypeToInteger();
1632}
1633
1634static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
1635 const APInt &Demanded,
1636 TargetLowering::TargetLoweringOpt &TLO,
1637 unsigned NewOpc) {
1638 uint64_t OldImm = Imm, NewImm, Enc;
1639 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
1640
1641 // Return if the immediate is already all zeros, all ones, a bimm32 or a
1642 // bimm64.
1643 if (Imm == 0 || Imm == Mask ||
1644 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
1645 return false;
1646
1647 unsigned EltSize = Size;
1648 uint64_t DemandedBits = Demanded.getZExtValue();
1649
1650 // Clear bits that are not demanded.
1651 Imm &= DemandedBits;
1652
1653 while (true) {
1654 // The goal here is to set the non-demanded bits in a way that minimizes
1655 // the number of switching between 0 and 1. In order to achieve this goal,
1656 // we set the non-demanded bits to the value of the preceding demanded bits.
1657 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
1658 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
1659 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
1660 // The final result is 0b11000011.
1661 uint64_t NonDemandedBits = ~DemandedBits;
1662 uint64_t InvertedImm = ~Imm & DemandedBits;
1663 uint64_t RotatedImm =
1664 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
1665 NonDemandedBits;
1666 uint64_t Sum = RotatedImm + NonDemandedBits;
1667 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
1668 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
1669 NewImm = (Imm | Ones) & Mask;
1670
1671 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
1672 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
1673 // we halve the element size and continue the search.
1674 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
1675 break;
1676
1677 // We cannot shrink the element size any further if it is 2-bits.
1678 if (EltSize == 2)
1679 return false;
1680
1681 EltSize /= 2;
1682 Mask >>= EltSize;
1683 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
1684
1685 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
1686 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
1687 return false;
1688
1689 // Merge the upper and lower halves of Imm and DemandedBits.
1690 Imm |= Hi;
1691 DemandedBits |= DemandedBitsHi;
1692 }
1693
1694 ++NumOptimizedImms;
1695
1696 // Replicate the element across the register width.
1697 while (EltSize < Size) {
1698 NewImm |= NewImm << EltSize;
1699 EltSize *= 2;
1700 }
1701
1702 (void)OldImm;
1703 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1704, __extension__ __PRETTY_FUNCTION__))
1704 "demanded bits should never be altered")(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1704, __extension__ __PRETTY_FUNCTION__))
;
1705 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")(static_cast <bool> (OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? void (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1705, __extension__ __PRETTY_FUNCTION__))
;
1706
1707 // Create the new constant immediate node.
1708 EVT VT = Op.getValueType();
1709 SDLoc DL(Op);
1710 SDValue New;
1711
1712 // If the new constant immediate is all-zeros or all-ones, let the target
1713 // independent DAG combine optimize this node.
1714 if (NewImm == 0 || NewImm == OrigMask) {
1715 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
1716 TLO.DAG.getConstant(NewImm, DL, VT));
1717 // Otherwise, create a machine node so that target independent DAG combine
1718 // doesn't undo this optimization.
1719 } else {
1720 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
1721 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
1722 New = SDValue(
1723 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
1724 }
1725
1726 return TLO.CombineTo(Op, New);
1727}
1728
1729bool AArch64TargetLowering::targetShrinkDemandedConstant(
1730 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1731 TargetLoweringOpt &TLO) const {
1732 // Delay this optimization to as late as possible.
1733 if (!TLO.LegalOps)
1734 return false;
1735
1736 if (!EnableOptimizeLogicalImm)
1737 return false;
1738
1739 EVT VT = Op.getValueType();
1740 if (VT.isVector())
1741 return false;
1742
1743 unsigned Size = VT.getSizeInBits();
1744 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1745, __extension__ __PRETTY_FUNCTION__))
1745 "i32 or i64 is expected after legalization.")(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1745, __extension__ __PRETTY_FUNCTION__))
;
1746
1747 // Exit early if we demand all bits.
1748 if (DemandedBits.countPopulation() == Size)
1749 return false;
1750
1751 unsigned NewOpc;
1752 switch (Op.getOpcode()) {
1753 default:
1754 return false;
1755 case ISD::AND:
1756 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1757 break;
1758 case ISD::OR:
1759 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1760 break;
1761 case ISD::XOR:
1762 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1763 break;
1764 }
1765 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1766 if (!C)
1767 return false;
1768 uint64_t Imm = C->getZExtValue();
1769 return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
1770}
1771
1772/// computeKnownBitsForTargetNode - Determine which of the bits specified in
1773/// Mask are known to be either zero or one and return them Known.
1774void AArch64TargetLowering::computeKnownBitsForTargetNode(
1775 const SDValue Op, KnownBits &Known,
1776 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
1777 switch (Op.getOpcode()) {
1778 default:
1779 break;
1780 case AArch64ISD::CSEL: {
1781 KnownBits Known2;
1782 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1783 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1784 Known = KnownBits::commonBits(Known, Known2);
1785 break;
1786 }
1787 case AArch64ISD::LOADgot:
1788 case AArch64ISD::ADDlow: {
1789 if (!Subtarget->isTargetILP32())
1790 break;
1791 // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
1792 Known.Zero = APInt::getHighBitsSet(64, 32);
1793 break;
1794 }
1795 case ISD::INTRINSIC_W_CHAIN: {
1796 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1797 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1798 switch (IntID) {
1799 default: return;
1800 case Intrinsic::aarch64_ldaxr:
1801 case Intrinsic::aarch64_ldxr: {
1802 unsigned BitWidth = Known.getBitWidth();
1803 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1804 unsigned MemBits = VT.getScalarSizeInBits();
1805 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1806 return;
1807 }
1808 }
1809 break;
1810 }
1811 case ISD::INTRINSIC_WO_CHAIN:
1812 case ISD::INTRINSIC_VOID: {
1813 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1814 switch (IntNo) {
1815 default:
1816 break;
1817 case Intrinsic::aarch64_neon_umaxv:
1818 case Intrinsic::aarch64_neon_uminv: {
1819 // Figure out the datatype of the vector operand. The UMINV instruction
1820 // will zero extend the result, so we can mark as known zero all the
1821 // bits larger than the element datatype. 32-bit or larget doesn't need
1822 // this as those are legal types and will be handled by isel directly.
1823 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1824 unsigned BitWidth = Known.getBitWidth();
1825 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1826 assert(BitWidth >= 8 && "Unexpected width!")(static_cast <bool> (BitWidth >= 8 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1826, __extension__ __PRETTY_FUNCTION__))
;
1827 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1828 Known.Zero |= Mask;
1829 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1830 assert(BitWidth >= 16 && "Unexpected width!")(static_cast <bool> (BitWidth >= 16 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1830, __extension__ __PRETTY_FUNCTION__))
;
1831 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1832 Known.Zero |= Mask;
1833 }
1834 break;
1835 } break;
1836 }
1837 }
1838 }
1839}
1840
1841MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1842 EVT) const {
1843 return MVT::i64;
1844}
1845
1846bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1847 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1848 bool *Fast) const {
1849 if (Subtarget->requiresStrictAlign())
1850 return false;
1851
1852 if (Fast) {
1853 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1854 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1855 // See comments in performSTORECombine() for more details about
1856 // these conditions.
1857
1858 // Code that uses clang vector extensions can mark that it
1859 // wants unaligned accesses to be treated as fast by
1860 // underspecifying alignment to be 1 or 2.
1861 Alignment <= 2 ||
1862
1863 // Disregard v2i64. Memcpy lowering produces those and splitting
1864 // them regresses performance on micro-benchmarks and olden/bh.
1865 VT == MVT::v2i64;
1866 }
1867 return true;
1868}
1869
1870// Same as above but handling LLTs instead.
1871bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1872 LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1873 bool *Fast) const {
1874 if (Subtarget->requiresStrictAlign())
1875 return false;
1876
1877 if (Fast) {
1878 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1879 *Fast = !Subtarget->isMisaligned128StoreSlow() ||
1880 Ty.getSizeInBytes() != 16 ||
1881 // See comments in performSTORECombine() for more details about
1882 // these conditions.
1883
1884 // Code that uses clang vector extensions can mark that it
1885 // wants unaligned accesses to be treated as fast by
1886 // underspecifying alignment to be 1 or 2.
1887 Alignment <= 2 ||
1888
1889 // Disregard v2i64. Memcpy lowering produces those and splitting
1890 // them regresses performance on micro-benchmarks and olden/bh.
1891 Ty == LLT::fixed_vector(2, 64);
1892 }
1893 return true;
1894}
1895
1896FastISel *
1897AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1898 const TargetLibraryInfo *libInfo) const {
1899 return AArch64::createFastISel(funcInfo, libInfo);
1900}
1901
1902const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1903#define MAKE_CASE(V) \
1904 case V: \
1905 return #V;
1906 switch ((AArch64ISD::NodeType)Opcode) {
1907 case AArch64ISD::FIRST_NUMBER:
1908 break;
1909 MAKE_CASE(AArch64ISD::CALL)
1910 MAKE_CASE(AArch64ISD::ADRP)
1911 MAKE_CASE(AArch64ISD::ADR)
1912 MAKE_CASE(AArch64ISD::ADDlow)
1913 MAKE_CASE(AArch64ISD::LOADgot)
1914 MAKE_CASE(AArch64ISD::RET_FLAG)
1915 MAKE_CASE(AArch64ISD::BRCOND)
1916 MAKE_CASE(AArch64ISD::CSEL)
1917 MAKE_CASE(AArch64ISD::CSINV)
1918 MAKE_CASE(AArch64ISD::CSNEG)
1919 MAKE_CASE(AArch64ISD::CSINC)
1920 MAKE_CASE(AArch64ISD::THREAD_POINTER)
1921 MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
1922 MAKE_CASE(AArch64ISD::ADD_PRED)
1923 MAKE_CASE(AArch64ISD::MUL_PRED)
1924 MAKE_CASE(AArch64ISD::MULHS_PRED)
1925 MAKE_CASE(AArch64ISD::MULHU_PRED)
1926 MAKE_CASE(AArch64ISD::SDIV_PRED)
1927 MAKE_CASE(AArch64ISD::SHL_PRED)
1928 MAKE_CASE(AArch64ISD::SMAX_PRED)
1929 MAKE_CASE(AArch64ISD::SMIN_PRED)
1930 MAKE_CASE(AArch64ISD::SRA_PRED)
1931 MAKE_CASE(AArch64ISD::SRL_PRED)
1932 MAKE_CASE(AArch64ISD::SUB_PRED)
1933 MAKE_CASE(AArch64ISD::UDIV_PRED)
1934 MAKE_CASE(AArch64ISD::UMAX_PRED)
1935 MAKE_CASE(AArch64ISD::UMIN_PRED)
1936 MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
1937 MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
1938 MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
1939 MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
1940 MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
1941 MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
1942 MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
1943 MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
1944 MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
1945 MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
1946 MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
1947 MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
1948 MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
1949 MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
1950 MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
1951 MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
1952 MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
1953 MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
1954 MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
1955 MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
1956 MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
1957 MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
1958 MAKE_CASE(AArch64ISD::ADC)
1959 MAKE_CASE(AArch64ISD::SBC)
1960 MAKE_CASE(AArch64ISD::ADDS)
1961 MAKE_CASE(AArch64ISD::SUBS)
1962 MAKE_CASE(AArch64ISD::ADCS)
1963 MAKE_CASE(AArch64ISD::SBCS)
1964 MAKE_CASE(AArch64ISD::ANDS)
1965 MAKE_CASE(AArch64ISD::CCMP)
1966 MAKE_CASE(AArch64ISD::CCMN)
1967 MAKE_CASE(AArch64ISD::FCCMP)
1968 MAKE_CASE(AArch64ISD::FCMP)
1969 MAKE_CASE(AArch64ISD::STRICT_FCMP)
1970 MAKE_CASE(AArch64ISD::STRICT_FCMPE)
1971 MAKE_CASE(AArch64ISD::DUP)
1972 MAKE_CASE(AArch64ISD::DUPLANE8)
1973 MAKE_CASE(AArch64ISD::DUPLANE16)
1974 MAKE_CASE(AArch64ISD::DUPLANE32)
1975 MAKE_CASE(AArch64ISD::DUPLANE64)
1976 MAKE_CASE(AArch64ISD::MOVI)
1977 MAKE_CASE(AArch64ISD::MOVIshift)
1978 MAKE_CASE(AArch64ISD::MOVIedit)
1979 MAKE_CASE(AArch64ISD::MOVImsl)
1980 MAKE_CASE(AArch64ISD::FMOV)
1981 MAKE_CASE(AArch64ISD::MVNIshift)
1982 MAKE_CASE(AArch64ISD::MVNImsl)
1983 MAKE_CASE(AArch64ISD::BICi)
1984 MAKE_CASE(AArch64ISD::ORRi)
1985 MAKE_CASE(AArch64ISD::BSP)
1986 MAKE_CASE(AArch64ISD::EXTR)
1987 MAKE_CASE(AArch64ISD::ZIP1)
1988 MAKE_CASE(AArch64ISD::ZIP2)
1989 MAKE_CASE(AArch64ISD::UZP1)
1990 MAKE_CASE(AArch64ISD::UZP2)
1991 MAKE_CASE(AArch64ISD::TRN1)
1992 MAKE_CASE(AArch64ISD::TRN2)
1993 MAKE_CASE(AArch64ISD::REV16)
1994 MAKE_CASE(AArch64ISD::REV32)
1995 MAKE_CASE(AArch64ISD::REV64)
1996 MAKE_CASE(AArch64ISD::EXT)
1997 MAKE_CASE(AArch64ISD::SPLICE)
1998 MAKE_CASE(AArch64ISD::VSHL)
1999 MAKE_CASE(AArch64ISD::VLSHR)
2000 MAKE_CASE(AArch64ISD::VASHR)
2001 MAKE_CASE(AArch64ISD::VSLI)
2002 MAKE_CASE(AArch64ISD::VSRI)
2003 MAKE_CASE(AArch64ISD::CMEQ)
2004 MAKE_CASE(AArch64ISD::CMGE)
2005 MAKE_CASE(AArch64ISD::CMGT)
2006 MAKE_CASE(AArch64ISD::CMHI)
2007 MAKE_CASE(AArch64ISD::CMHS)
2008 MAKE_CASE(AArch64ISD::FCMEQ)
2009 MAKE_CASE(AArch64ISD::FCMGE)
2010 MAKE_CASE(AArch64ISD::FCMGT)
2011 MAKE_CASE(AArch64ISD::CMEQz)
2012 MAKE_CASE(AArch64ISD::CMGEz)
2013 MAKE_CASE(AArch64ISD::CMGTz)
2014 MAKE_CASE(AArch64ISD::CMLEz)
2015 MAKE_CASE(AArch64ISD::CMLTz)
2016 MAKE_CASE(AArch64ISD::FCMEQz)
2017 MAKE_CASE(AArch64ISD::FCMGEz)
2018 MAKE_CASE(AArch64ISD::FCMGTz)
2019 MAKE_CASE(AArch64ISD::FCMLEz)
2020 MAKE_CASE(AArch64ISD::FCMLTz)
2021 MAKE_CASE(AArch64ISD::SADDV)
2022 MAKE_CASE(AArch64ISD::UADDV)
2023 MAKE_CASE(AArch64ISD::SRHADD)
2024 MAKE_CASE(AArch64ISD::URHADD)
2025 MAKE_CASE(AArch64ISD::SHADD)
2026 MAKE_CASE(AArch64ISD::UHADD)
2027 MAKE_CASE(AArch64ISD::SDOT)
2028 MAKE_CASE(AArch64ISD::UDOT)
2029 MAKE_CASE(AArch64ISD::SMINV)
2030 MAKE_CASE(AArch64ISD::UMINV)
2031 MAKE_CASE(AArch64ISD::SMAXV)
2032 MAKE_CASE(AArch64ISD::UMAXV)
2033 MAKE_CASE(AArch64ISD::SADDV_PRED)
2034 MAKE_CASE(AArch64ISD::UADDV_PRED)
2035 MAKE_CASE(AArch64ISD::SMAXV_PRED)
2036 MAKE_CASE(AArch64ISD::UMAXV_PRED)
2037 MAKE_CASE(AArch64ISD::SMINV_PRED)
2038 MAKE_CASE(AArch64ISD::UMINV_PRED)
2039 MAKE_CASE(AArch64ISD::ORV_PRED)
2040 MAKE_CASE(AArch64ISD::EORV_PRED)
2041 MAKE_CASE(AArch64ISD::ANDV_PRED)
2042 MAKE_CASE(AArch64ISD::CLASTA_N)
2043 MAKE_CASE(AArch64ISD::CLASTB_N)
2044 MAKE_CASE(AArch64ISD::LASTA)
2045 MAKE_CASE(AArch64ISD::LASTB)
2046 MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
2047 MAKE_CASE(AArch64ISD::LS64_BUILD)
2048 MAKE_CASE(AArch64ISD::LS64_EXTRACT)
2049 MAKE_CASE(AArch64ISD::TBL)
2050 MAKE_CASE(AArch64ISD::FADD_PRED)
2051 MAKE_CASE(AArch64ISD::FADDA_PRED)
2052 MAKE_CASE(AArch64ISD::FADDV_PRED)
2053 MAKE_CASE(AArch64ISD::FDIV_PRED)
2054 MAKE_CASE(AArch64ISD::FMA_PRED)
2055 MAKE_CASE(AArch64ISD::FMAX_PRED)
2056 MAKE_CASE(AArch64ISD::FMAXV_PRED)
2057 MAKE_CASE(AArch64ISD::FMAXNM_PRED)
2058 MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
2059 MAKE_CASE(AArch64ISD::FMIN_PRED)
2060 MAKE_CASE(AArch64ISD::FMINV_PRED)
2061 MAKE_CASE(AArch64ISD::FMINNM_PRED)
2062 MAKE_CASE(AArch64ISD::FMINNMV_PRED)
2063 MAKE_CASE(AArch64ISD::FMUL_PRED)
2064 MAKE_CASE(AArch64ISD::FSUB_PRED)
2065 MAKE_CASE(AArch64ISD::BIC)
2066 MAKE_CASE(AArch64ISD::BIT)
2067 MAKE_CASE(AArch64ISD::CBZ)
2068 MAKE_CASE(AArch64ISD::CBNZ)
2069 MAKE_CASE(AArch64ISD::TBZ)
2070 MAKE_CASE(AArch64ISD::TBNZ)
2071 MAKE_CASE(AArch64ISD::TC_RETURN)
2072 MAKE_CASE(AArch64ISD::PREFETCH)
2073 MAKE_CASE(AArch64ISD::SITOF)
2074 MAKE_CASE(AArch64ISD::UITOF)
2075 MAKE_CASE(AArch64ISD::NVCAST)
2076 MAKE_CASE(AArch64ISD::MRS)
2077 MAKE_CASE(AArch64ISD::SQSHL_I)
2078 MAKE_CASE(AArch64ISD::UQSHL_I)
2079 MAKE_CASE(AArch64ISD::SRSHR_I)
2080 MAKE_CASE(AArch64ISD::URSHR_I)
2081 MAKE_CASE(AArch64ISD::SQSHLU_I)
2082 MAKE_CASE(AArch64ISD::WrapperLarge)
2083 MAKE_CASE(AArch64ISD::LD2post)
2084 MAKE_CASE(AArch64ISD::LD3post)
2085 MAKE_CASE(AArch64ISD::LD4post)
2086 MAKE_CASE(AArch64ISD::ST2post)
2087 MAKE_CASE(AArch64ISD::ST3post)
2088 MAKE_CASE(AArch64ISD::ST4post)
2089 MAKE_CASE(AArch64ISD::LD1x2post)
2090 MAKE_CASE(AArch64ISD::LD1x3post)
2091 MAKE_CASE(AArch64ISD::LD1x4post)
2092 MAKE_CASE(AArch64ISD::ST1x2post)
2093 MAKE_CASE(AArch64ISD::ST1x3post)
2094 MAKE_CASE(AArch64ISD::ST1x4post)
2095 MAKE_CASE(AArch64ISD::LD1DUPpost)
2096 MAKE_CASE(AArch64ISD::LD2DUPpost)
2097 MAKE_CASE(AArch64ISD::LD3DUPpost)
2098 MAKE_CASE(AArch64ISD::LD4DUPpost)
2099 MAKE_CASE(AArch64ISD::LD1LANEpost)
2100 MAKE_CASE(AArch64ISD::LD2LANEpost)
2101 MAKE_CASE(AArch64ISD::LD3LANEpost)
2102 MAKE_CASE(AArch64ISD::LD4LANEpost)
2103 MAKE_CASE(AArch64ISD::ST2LANEpost)
2104 MAKE_CASE(AArch64ISD::ST3LANEpost)
2105 MAKE_CASE(AArch64ISD::ST4LANEpost)
2106 MAKE_CASE(AArch64ISD::SMULL)
2107 MAKE_CASE(AArch64ISD::UMULL)
2108 MAKE_CASE(AArch64ISD::FRECPE)
2109 MAKE_CASE(AArch64ISD::FRECPS)
2110 MAKE_CASE(AArch64ISD::FRSQRTE)
2111 MAKE_CASE(AArch64ISD::FRSQRTS)
2112 MAKE_CASE(AArch64ISD::STG)
2113 MAKE_CASE(AArch64ISD::STZG)
2114 MAKE_CASE(AArch64ISD::ST2G)
2115 MAKE_CASE(AArch64ISD::STZ2G)
2116 MAKE_CASE(AArch64ISD::SUNPKHI)
2117 MAKE_CASE(AArch64ISD::SUNPKLO)
2118 MAKE_CASE(AArch64ISD::UUNPKHI)
2119 MAKE_CASE(AArch64ISD::UUNPKLO)
2120 MAKE_CASE(AArch64ISD::INSR)
2121 MAKE_CASE(AArch64ISD::PTEST)
2122 MAKE_CASE(AArch64ISD::PTRUE)
2123 MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
2124 MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
2125 MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
2126 MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO)
2127 MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO)
2128 MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO)
2129 MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO)
2130 MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO)
2131 MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
2132 MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
2133 MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
2134 MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO)
2135 MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO)
2136 MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO)
2137 MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO)
2138 MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
2139 MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
2140 MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
2141 MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
2142 MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
2143 MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
2144 MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO)
2145 MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO)
2146 MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO)
2147 MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO)
2148 MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO)
2149 MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO)
2150 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO)
2151 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO)
2152 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO)
2153 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO)
2154 MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO)
2155 MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO)
2156 MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO)
2157 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO)
2158 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO)
2159 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO)
2160 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO)
2161 MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO)
2162 MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
2163 MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
2164 MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
2165 MAKE_CASE(AArch64ISD::ST1_PRED)
2166 MAKE_CASE(AArch64ISD::SST1_PRED)
2167 MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
2168 MAKE_CASE(AArch64ISD::SST1_SXTW_PRED)
2169 MAKE_CASE(AArch64ISD::SST1_UXTW_PRED)
2170 MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED)
2171 MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED)
2172 MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
2173 MAKE_CASE(AArch64ISD::SSTNT1_PRED)
2174 MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
2175 MAKE_CASE(AArch64ISD::LDP)
2176 MAKE_CASE(AArch64ISD::STP)
2177 MAKE_CASE(AArch64ISD::STNP)
2178 MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
2179 MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
2180 MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
2181 MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
2182 MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
2183 MAKE_CASE(AArch64ISD::INDEX_VECTOR)
2184 MAKE_CASE(AArch64ISD::UADDLP)
2185 MAKE_CASE(AArch64ISD::CALL_RVMARKER)
2186 }
2187#undef MAKE_CASE
2188 return nullptr;
2189}
2190
2191MachineBasicBlock *
2192AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
2193 MachineBasicBlock *MBB) const {
2194 // We materialise the F128CSEL pseudo-instruction as some control flow and a
2195 // phi node:
2196
2197 // OrigBB:
2198 // [... previous instrs leading to comparison ...]
2199 // b.ne TrueBB
2200 // b EndBB
2201 // TrueBB:
2202 // ; Fallthrough
2203 // EndBB:
2204 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
2205
2206 MachineFunction *MF = MBB->getParent();
2207 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2208 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2209 DebugLoc DL = MI.getDebugLoc();
2210 MachineFunction::iterator It = ++MBB->getIterator();
2211
2212 Register DestReg = MI.getOperand(0).getReg();
2213 Register IfTrueReg = MI.getOperand(1).getReg();
2214 Register IfFalseReg = MI.getOperand(2).getReg();
2215 unsigned CondCode = MI.getOperand(3).getImm();
2216 bool NZCVKilled = MI.getOperand(4).isKill();
2217
2218 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
2219 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
2220 MF->insert(It, TrueBB);
2221 MF->insert(It, EndBB);
2222
2223 // Transfer rest of current basic-block to EndBB
2224 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
2225 MBB->end());
2226 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
2227
2228 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
2229 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
2230 MBB->addSuccessor(TrueBB);
2231 MBB->addSuccessor(EndBB);
2232
2233 // TrueBB falls through to the end.
2234 TrueBB->addSuccessor(EndBB);
2235
2236 if (!NZCVKilled) {
2237 TrueBB->addLiveIn(AArch64::NZCV);
2238 EndBB->addLiveIn(AArch64::NZCV);
2239 }
2240
2241 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
2242 .addReg(IfTrueReg)
2243 .addMBB(TrueBB)
2244 .addReg(IfFalseReg)
2245 .addMBB(MBB);
2246
2247 MI.eraseFromParent();
2248 return EndBB;
2249}
2250
2251MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
2252 MachineInstr &MI, MachineBasicBlock *BB) const {
2253 assert(!isAsynchronousEHPersonality(classifyEHPersonality((static_cast <bool> (!isAsynchronousEHPersonality(classifyEHPersonality
( BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? void (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2255, __extension__ __PRETTY_FUNCTION__))
2254 BB->getParent()->getFunction().getPersonalityFn())) &&(static_cast <bool> (!isAsynchronousEHPersonality(classifyEHPersonality
( BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? void (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2255, __extension__ __PRETTY_FUNCTION__))
2255 "SEH does not use catchret!")(static_cast <bool> (!isAsynchronousEHPersonality(classifyEHPersonality
( BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? void (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2255, __extension__ __PRETTY_FUNCTION__))
;
2256 return BB;
2257}
2258
2259MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
2260 MachineInstr &MI, MachineBasicBlock *BB) const {
2261 switch (MI.getOpcode()) {
2262 default:
2263#ifndef NDEBUG
2264 MI.dump();
2265#endif
2266 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2266)
;
2267
2268 case AArch64::F128CSEL:
2269 return EmitF128CSEL(MI, BB);
2270
2271 case TargetOpcode::STACKMAP:
2272 case TargetOpcode::PATCHPOINT:
2273 case TargetOpcode::STATEPOINT:
2274 return emitPatchPoint(MI, BB);
2275
2276 case AArch64::CATCHRET:
2277 return EmitLoweredCatchRet(MI, BB);
2278 }
2279}
2280
2281//===----------------------------------------------------------------------===//
2282// AArch64 Lowering private implementation.
2283//===----------------------------------------------------------------------===//
2284
2285//===----------------------------------------------------------------------===//
2286// Lowering Code
2287//===----------------------------------------------------------------------===//
2288
2289// Forward declarations of SVE fixed length lowering helpers
2290static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT);
2291static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
2292static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
2293static SDValue convertFixedMaskToScalableVector(SDValue Mask,
2294 SelectionDAG &DAG);
2295
2296/// isZerosVector - Check whether SDNode N is a zero-filled vector.
2297static bool isZerosVector(const SDNode *N) {
2298 // Look through a bit convert.
2299 while (N->getOpcode() == ISD::BITCAST)
2300 N = N->getOperand(0).getNode();
2301
2302 if (ISD::isConstantSplatVectorAllZeros(N))
2303 return true;
2304
2305 if (N->getOpcode() != AArch64ISD::DUP)
2306 return false;
2307
2308 auto Opnd0 = N->getOperand(0);
2309 auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
2310 auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
2311 return (CINT && CINT->isNullValue()) || (CFP && CFP->isZero());
2312}
2313
2314/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
2315/// CC
2316static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
2317 switch (CC) {
2318 default:
2319 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2319)
;
2320 case ISD::SETNE:
2321 return AArch64CC::NE;
2322 case ISD::SETEQ:
2323 return AArch64CC::EQ;
2324 case ISD::SETGT:
2325 return AArch64CC::GT;
2326 case ISD::SETGE:
2327 return AArch64CC::GE;
2328 case ISD::SETLT:
2329 return AArch64CC::LT;
2330 case ISD::SETLE:
2331 return AArch64CC::LE;
2332 case ISD::SETUGT:
2333 return AArch64CC::HI;
2334 case ISD::SETUGE:
2335 return AArch64CC::HS;
2336 case ISD::SETULT:
2337 return AArch64CC::LO;
2338 case ISD::SETULE:
2339 return AArch64CC::LS;
2340 }
2341}
2342
2343/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
2344static void changeFPCCToAArch64CC(ISD::CondCode CC,
2345 AArch64CC::CondCode &CondCode,
2346 AArch64CC::CondCode &CondCode2) {
2347 CondCode2 = AArch64CC::AL;
2348 switch (CC) {
2349 default:
2350 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2350)
;
2351 case ISD::SETEQ:
2352 case ISD::SETOEQ:
2353 CondCode = AArch64CC::EQ;
2354 break;
2355 case ISD::SETGT:
2356 case ISD::SETOGT:
2357 CondCode = AArch64CC::GT;
2358 break;
2359 case ISD::SETGE:
2360 case ISD::SETOGE:
2361 CondCode = AArch64CC::GE;
2362 break;
2363 case ISD::SETOLT:
2364 CondCode = AArch64CC::MI;
2365 break;
2366 case ISD::SETOLE:
2367 CondCode = AArch64CC::LS;
2368 break;
2369 case ISD::SETONE:
2370 CondCode = AArch64CC::MI;
2371 CondCode2 = AArch64CC::GT;
2372 break;
2373 case ISD::SETO:
2374 CondCode = AArch64CC::VC;
2375 break;
2376 case ISD::SETUO:
2377 CondCode = AArch64CC::VS;
2378 break;
2379 case ISD::SETUEQ:
2380 CondCode = AArch64CC::EQ;
2381 CondCode2 = AArch64CC::VS;
2382 break;
2383 case ISD::SETUGT:
2384 CondCode = AArch64CC::HI;
2385 break;
2386 case ISD::SETUGE:
2387 CondCode = AArch64CC::PL;
2388 break;
2389 case ISD::SETLT:
2390 case ISD::SETULT:
2391 CondCode = AArch64CC::LT;
2392 break;
2393 case ISD::SETLE:
2394 case ISD::SETULE:
2395 CondCode = AArch64CC::LE;
2396 break;
2397 case ISD::SETNE:
2398 case ISD::SETUNE:
2399 CondCode = AArch64CC::NE;
2400 break;
2401 }
2402}
2403
2404/// Convert a DAG fp condition code to an AArch64 CC.
2405/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
2406/// should be AND'ed instead of OR'ed.
2407static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
2408 AArch64CC::CondCode &CondCode,
2409 AArch64CC::CondCode &CondCode2) {
2410 CondCode2 = AArch64CC::AL;
2411 switch (CC) {
2412 default:
2413 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2414 assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void
(0) : __assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2414, __extension__ __PRETTY_FUNCTION__))
;
2415 break;
2416 case ISD::SETONE:
2417 // (a one b)
2418 // == ((a olt b) || (a ogt b))
2419 // == ((a ord b) && (a une b))
2420 CondCode = AArch64CC::VC;
2421 CondCode2 = AArch64CC::NE;
2422 break;
2423 case ISD::SETUEQ:
2424 // (a ueq b)
2425 // == ((a uno b) || (a oeq b))
2426 // == ((a ule b) && (a uge b))
2427 CondCode = AArch64CC::PL;
2428 CondCode2 = AArch64CC::LE;
2429 break;
2430 }
2431}
2432
2433/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
2434/// CC usable with the vector instructions. Fewer operations are available
2435/// without a real NZCV register, so we have to use less efficient combinations
2436/// to get the same effect.
2437static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
2438 AArch64CC::CondCode &CondCode,
2439 AArch64CC::CondCode &CondCode2,
2440 bool &Invert) {
2441 Invert = false;
2442 switch (CC) {
2443 default:
2444 // Mostly the scalar mappings work fine.
2445 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2446 break;
2447 case ISD::SETUO:
2448 Invert = true;
2449 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2450 case ISD::SETO:
2451 CondCode = AArch64CC::MI;
2452 CondCode2 = AArch64CC::GE;
2453 break;
2454 case ISD::SETUEQ:
2455 case ISD::SETULT:
2456 case ISD::SETULE:
2457 case ISD::SETUGT:
2458 case ISD::SETUGE:
2459 // All of the compare-mask comparisons are ordered, but we can switch
2460 // between the two by a double inversion. E.g. ULE == !OGT.
2461 Invert = true;
2462 changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
2463 CondCode, CondCode2);
2464 break;
2465 }
2466}
2467
2468static bool isLegalArithImmed(uint64_t C) {
2469 // Matches AArch64DAGToDAGISel::SelectArithImmed().
2470 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
2471 LLVM_DEBUG(dbgs() << "Is imm " << Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
2472 << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
2473 return IsLegal;
2474}
2475
2476// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
2477// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
2478// can be set differently by this operation. It comes down to whether
2479// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
2480// everything is fine. If not then the optimization is wrong. Thus general
2481// comparisons are only valid if op2 != 0.
2482//
2483// So, finally, the only LLVM-native comparisons that don't mention C and V
2484// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
2485// the absence of information about op2.
2486static bool isCMN(SDValue Op, ISD::CondCode CC) {
2487 return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
2488 (CC == ISD::SETEQ || CC == ISD::SETNE);
2489}
2490
2491static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
2492 SelectionDAG &DAG, SDValue Chain,
2493 bool IsSignaling) {
2494 EVT VT = LHS.getValueType();
2495 assert(VT != MVT::f128)(static_cast <bool> (VT != MVT::f128) ? void (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2495, __extension__ __PRETTY_FUNCTION__))
;
2496 assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented")(static_cast <bool> (VT != MVT::f16 && "Lowering of strict fp16 not yet implemented"
) ? void (0) : __assert_fail ("VT != MVT::f16 && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2496, __extension__ __PRETTY_FUNCTION__))
;
2497 unsigned Opcode =
2498 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
2499 return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
2500}
2501
2502static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2503 const SDLoc &dl, SelectionDAG &DAG) {
2504 EVT VT = LHS.getValueType();
2505 const bool FullFP16 =
2506 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2507
2508 if (VT.isFloatingPoint()) {
2509 assert(VT != MVT::f128)(static_cast <bool> (VT != MVT::f128) ? void (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2509, __extension__ __PRETTY_FUNCTION__))
;
2510 if (VT == MVT::f16 && !FullFP16) {
2511 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
2512 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
2513 VT = MVT::f32;
2514 }
2515 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
2516 }
2517
2518 // The CMP instruction is just an alias for SUBS, and representing it as
2519 // SUBS means that it's possible to get CSE with subtract operations.
2520 // A later phase can perform the optimization of setting the destination
2521 // register to WZR/XZR if it ends up being unused.
2522 unsigned Opcode = AArch64ISD::SUBS;
2523
2524 if (isCMN(RHS, CC)) {
2525 // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
2526 Opcode = AArch64ISD::ADDS;
2527 RHS = RHS.getOperand(1);
2528 } else if (isCMN(LHS, CC)) {
2529 // As we are looking for EQ/NE compares, the operands can be commuted ; can
2530 // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
2531 Opcode = AArch64ISD::ADDS;
2532 LHS = LHS.getOperand(1);
2533 } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
2534 if (LHS.getOpcode() == ISD::AND) {
2535 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
2536 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
2537 // of the signed comparisons.
2538 const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
2539 DAG.getVTList(VT, MVT_CC),
2540 LHS.getOperand(0),
2541 LHS.getOperand(1));
2542 // Replace all users of (and X, Y) with newly generated (ands X, Y)
2543 DAG.ReplaceAllUsesWith(LHS, ANDSNode);
2544 return ANDSNode.getValue(1);
2545 } else if (LHS.getOpcode() == AArch64ISD::ANDS) {
2546 // Use result of ANDS
2547 return LHS.getValue(1);
2548 }
2549 }
2550
2551 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
2552 .getValue(1);
2553}
2554
2555/// \defgroup AArch64CCMP CMP;CCMP matching
2556///
2557/// These functions deal with the formation of CMP;CCMP;... sequences.
2558/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
2559/// a comparison. They set the NZCV flags to a predefined value if their
2560/// predicate is false. This allows to express arbitrary conjunctions, for
2561/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
2562/// expressed as:
2563/// cmp A
2564/// ccmp B, inv(CB), CA
2565/// check for CB flags
2566///
2567/// This naturally lets us implement chains of AND operations with SETCC
2568/// operands. And we can even implement some other situations by transforming
2569/// them:
2570/// - We can implement (NEG SETCC) i.e. negating a single comparison by
2571/// negating the flags used in a CCMP/FCCMP operations.
2572/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
2573/// by negating the flags we test for afterwards. i.e.
2574/// NEG (CMP CCMP CCCMP ...) can be implemented.
2575/// - Note that we can only ever negate all previously processed results.
2576/// What we can not implement by flipping the flags to test is a negation
2577/// of two sub-trees (because the negation affects all sub-trees emitted so
2578/// far, so the 2nd sub-tree we emit would also affect the first).
2579/// With those tools we can implement some OR operations:
2580/// - (OR (SETCC A) (SETCC B)) can be implemented via:
2581/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
2582/// - After transforming OR to NEG/AND combinations we may be able to use NEG
2583/// elimination rules from earlier to implement the whole thing as a
2584/// CCMP/FCCMP chain.
2585///
2586/// As complete example:
2587/// or (or (setCA (cmp A)) (setCB (cmp B)))
2588/// (and (setCC (cmp C)) (setCD (cmp D)))"
2589/// can be reassociated to:
2590/// or (and (setCC (cmp C)) setCD (cmp D))
2591// (or (setCA (cmp A)) (setCB (cmp B)))
2592/// can be transformed to:
2593/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
2594/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
2595/// which can be implemented as:
2596/// cmp C
2597/// ccmp D, inv(CD), CC
2598/// ccmp A, CA, inv(CD)
2599/// ccmp B, CB, inv(CA)
2600/// check for CB flags
2601///
2602/// A counterexample is "or (and A B) (and C D)" which translates to
2603/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
2604/// can only implement 1 of the inner (not) operations, but not both!
2605/// @{
2606
2607/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
2608static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
2609 ISD::CondCode CC, SDValue CCOp,
2610 AArch64CC::CondCode Predicate,
2611 AArch64CC::CondCode OutCC,
2612 const SDLoc &DL, SelectionDAG &DAG) {
2613 unsigned Opcode = 0;
2614 const bool FullFP16 =
2615 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2616
2617 if (LHS.getValueType().isFloatingPoint()) {
2618 assert(LHS.getValueType() != MVT::f128)(static_cast <bool> (LHS.getValueType() != MVT::f128) ?
void (0) : __assert_fail ("LHS.getValueType() != MVT::f128",
"/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2618, __extension__ __PRETTY_FUNCTION__))
;
2619 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
2620 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
2621 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
2622 }
2623 Opcode = AArch64ISD::FCCMP;
2624 } else if (RHS.getOpcode() == ISD::SUB) {
2625 SDValue SubOp0 = RHS.getOperand(0);
2626 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2627 // See emitComparison() on why we can only do this for SETEQ and SETNE.
2628 Opcode = AArch64ISD::CCMN;
2629 RHS = RHS.getOperand(1);
2630 }
2631 }
2632 if (Opcode == 0)
2633 Opcode = AArch64ISD::CCMP;
2634
2635 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
2636 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
2637 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
2638 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
2639 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
2640}
2641
2642/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
2643/// expressed as a conjunction. See \ref AArch64CCMP.
2644/// \param CanNegate Set to true if we can negate the whole sub-tree just by
2645/// changing the conditions on the SETCC tests.
2646/// (this means we can call emitConjunctionRec() with
2647/// Negate==true on this sub-tree)
2648/// \param MustBeFirst Set to true if this subtree needs to be negated and we
2649/// cannot do the negation naturally. We are required to
2650/// emit the subtree first in this case.
2651/// \param WillNegate Is true if are called when the result of this
2652/// subexpression must be negated. This happens when the
2653/// outer expression is an OR. We can use this fact to know
2654/// that we have a double negation (or (or ...) ...) that
2655/// can be implemented for free.
2656static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
2657 bool &MustBeFirst, bool WillNegate,
2658 unsigned Depth = 0) {
2659 if (!Val.hasOneUse())
2660 return false;
2661 unsigned Opcode = Val->getOpcode();
2662 if (Opcode == ISD::SETCC) {
2663 if (Val->getOperand(0).getValueType() == MVT::f128)
2664 return false;
2665 CanNegate = true;
2666 MustBeFirst = false;
2667 return true;
2668 }
2669 // Protect against exponential runtime and stack overflow.
2670 if (Depth > 6)
2671 return false;
2672 if (Opcode == ISD::AND || Opcode == ISD::OR) {
2673 bool IsOR = Opcode == ISD::OR;
2674 SDValue O0 = Val->getOperand(0);
2675 SDValue O1 = Val->getOperand(1);
2676 bool CanNegateL;
2677 bool MustBeFirstL;
2678 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
2679 return false;
2680 bool CanNegateR;
2681 bool MustBeFirstR;
2682 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
2683 return false;
2684
2685 if (MustBeFirstL && MustBeFirstR)
2686 return false;
2687
2688 if (IsOR) {
2689 // For an OR expression we need to be able to naturally negate at least
2690 // one side or we cannot do the transformation at all.
2691 if (!CanNegateL && !CanNegateR)
2692 return false;
2693 // If we the result of the OR will be negated and we can naturally negate
2694 // the leafs, then this sub-tree as a whole negates naturally.
2695 CanNegate = WillNegate && CanNegateL && CanNegateR;
2696 // If we cannot naturally negate the whole sub-tree, then this must be
2697 // emitted first.
2698 MustBeFirst = !CanNegate;
2699 } else {
2700 assert(Opcode == ISD::AND && "Must be OR or AND")(static_cast <bool> (Opcode == ISD::AND && "Must be OR or AND"
) ? void (0) : __assert_fail ("Opcode == ISD::AND && \"Must be OR or AND\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2700, __extension__ __PRETTY_FUNCTION__))
;
2701 // We cannot naturally negate an AND operation.
2702 CanNegate = false;
2703 MustBeFirst = MustBeFirstL || MustBeFirstR;
2704 }
2705 return true;
2706 }
2707 return false;
2708}
2709
2710/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
2711/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
2712/// Tries to transform the given i1 producing node @p Val to a series compare
2713/// and conditional compare operations. @returns an NZCV flags producing node
2714/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
2715/// transformation was not possible.
2716/// \p Negate is true if we want this sub-tree being negated just by changing
2717/// SETCC conditions.
2718static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
2719 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
2720 AArch64CC::CondCode Predicate) {
2721 // We're at a tree leaf, produce a conditional comparison operation.
2722 unsigned Opcode = Val->getOpcode();
2723 if (Opcode == ISD::SETCC) {
2724 SDValue LHS = Val->getOperand(0);
2725 SDValue RHS = Val->getOperand(1);
2726 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
2727 bool isInteger = LHS.getValueType().isInteger();
2728 if (Negate)
2729 CC = getSetCCInverse(CC, LHS.getValueType());
2730 SDLoc DL(Val);
2731 // Determine OutCC and handle FP special case.
2732 if (isInteger) {
2733 OutCC = changeIntCCToAArch64CC(CC);
2734 } else {
2735 assert(LHS.getValueType().isFloatingPoint())(static_cast <bool> (LHS.getValueType().isFloatingPoint
()) ? void (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2735, __extension__ __PRETTY_FUNCTION__))
;
2736 AArch64CC::CondCode ExtraCC;
2737 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
2738 // Some floating point conditions can't be tested with a single condition
2739 // code. Construct an additional comparison in this case.
2740 if (ExtraCC != AArch64CC::AL) {
2741 SDValue ExtraCmp;
2742 if (!CCOp.getNode())
2743 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
2744 else
2745 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
2746 ExtraCC, DL, DAG);
2747 CCOp = ExtraCmp;
2748 Predicate = ExtraCC;
2749 }
2750 }
2751
2752 // Produce a normal comparison if we are first in the chain
2753 if (!CCOp)
2754 return emitComparison(LHS, RHS, CC, DL, DAG);
2755 // Otherwise produce a ccmp.
2756 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
2757 DAG);
2758 }
2759 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree")(static_cast <bool> (Val->hasOneUse() && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("Val->hasOneUse() && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2759, __extension__ __PRETTY_FUNCTION__))
;
2760
2761 bool IsOR = Opcode == ISD::OR;
2762
2763 SDValue LHS = Val->getOperand(0);
2764 bool CanNegateL;
2765 bool MustBeFirstL;
2766 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
2767 assert(ValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidL && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2767, __extension__ __PRETTY_FUNCTION__))
;
2768 (void)ValidL;
2769
2770 SDValue RHS = Val->getOperand(1);
2771 bool CanNegateR;
2772 bool MustBeFirstR;
2773 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
2774 assert(ValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2774, __extension__ __PRETTY_FUNCTION__))
;
2775 (void)ValidR;
2776
2777 // Swap sub-tree that must come first to the right side.
2778 if (MustBeFirstL) {
2779 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2779, __extension__ __PRETTY_FUNCTION__))
;
2780 std::swap(LHS, RHS);
2781 std::swap(CanNegateL, CanNegateR);
2782 std::swap(MustBeFirstL, MustBeFirstR);
2783 }
2784
2785 bool NegateR;
2786 bool NegateAfterR;
2787 bool NegateL;
2788 bool NegateAfterAll;
2789 if (Opcode == ISD::OR) {
2790 // Swap the sub-tree that we can negate naturally to the left.
2791 if (!CanNegateL) {
2792 assert(CanNegateR && "at least one side must be negatable")(static_cast <bool> (CanNegateR && "at least one side must be negatable"
) ? void (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2792, __extension__ __PRETTY_FUNCTION__))
;
2793 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2793, __extension__ __PRETTY_FUNCTION__))
;
2794 assert(!Negate)(static_cast <bool> (!Negate) ? void (0) : __assert_fail
("!Negate", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2794, __extension__ __PRETTY_FUNCTION__))
;
2795 std::swap(LHS, RHS);
2796 NegateR = false;
2797 NegateAfterR = true;
2798 } else {
2799 // Negate the left sub-tree if possible, otherwise negate the result.
2800 NegateR = CanNegateR;
2801 NegateAfterR = !CanNegateR;
2802 }
2803 NegateL = true;
2804 NegateAfterAll = !Negate;
2805 } else {
2806 assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree")(static_cast <bool> (Opcode == ISD::AND && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("Opcode == ISD::AND && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2806, __extension__ __PRETTY_FUNCTION__))
;
2807 assert(!Negate && "Valid conjunction/disjunction tree")(static_cast <bool> (!Negate && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2807, __extension__ __PRETTY_FUNCTION__))
;
2808
2809 NegateL = false;
2810 NegateR = false;
2811 NegateAfterR = false;
2812 NegateAfterAll = false;
2813 }
2814
2815 // Emit sub-trees.
2816 AArch64CC::CondCode RHSCC;
2817 SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
2818 if (NegateAfterR)
2819 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
2820 SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
2821 if (NegateAfterAll)
2822 OutCC = AArch64CC::getInvertedCondCode(OutCC);
2823 return CmpL;
2824}
2825
2826/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
2827/// In some cases this is even possible with OR operations in the expression.
2828/// See \ref AArch64CCMP.
2829/// \see emitConjunctionRec().
2830static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
2831 AArch64CC::CondCode &OutCC) {
2832 bool DummyCanNegate;
2833 bool DummyMustBeFirst;
2834 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
2835 return SDValue();
2836
2837 return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
2838}
2839
2840/// @}
2841
2842/// Returns how profitable it is to fold a comparison's operand's shift and/or
2843/// extension operations.
2844static unsigned getCmpOperandFoldingProfit(SDValue Op) {
2845 auto isSupportedExtend = [&](SDValue V) {
2846 if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
2847 return true;
2848
2849 if (V.getOpcode() == ISD::AND)
2850 if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
2851 uint64_t Mask = MaskCst->getZExtValue();
2852 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
2853 }
2854
2855 return false;
2856 };
2857
2858 if (!Op.hasOneUse())
2859 return 0;
2860
2861 if (isSupportedExtend(Op))
2862 return 1;
2863
2864 unsigned Opc = Op.getOpcode();
2865 if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
2866 if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2867 uint64_t Shift = ShiftCst->getZExtValue();
2868 if (isSupportedExtend(Op.getOperand(0)))
2869 return (Shift <= 4) ? 2 : 1;
2870 EVT VT = Op.getValueType();
2871 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
2872 return 1;
2873 }
2874
2875 return 0;
2876}
2877
2878static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2879 SDValue &AArch64cc, SelectionDAG &DAG,
2880 const SDLoc &dl) {
2881 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2882 EVT VT = RHS.getValueType();
2883 uint64_t C = RHSC->getZExtValue();
2884 if (!isLegalArithImmed(C)) {
2885 // Constant does not fit, try adjusting it by one?
2886 switch (CC) {
2887 default:
2888 break;
2889 case ISD::SETLT:
2890 case ISD::SETGE:
2891 if ((VT == MVT::i32 && C != 0x80000000 &&
2892 isLegalArithImmed((uint32_t)(C - 1))) ||
2893 (VT == MVT::i64 && C != 0x80000000ULL &&
2894 isLegalArithImmed(C - 1ULL))) {
2895 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2896 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2897 RHS = DAG.getConstant(C, dl, VT);
2898 }
2899 break;
2900 case ISD::SETULT:
2901 case ISD::SETUGE:
2902 if ((VT == MVT::i32 && C != 0 &&
2903 isLegalArithImmed((uint32_t)(C - 1))) ||
2904 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
2905 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2906 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2907 RHS = DAG.getConstant(C, dl, VT);
2908 }
2909 break;
2910 case ISD::SETLE:
2911 case ISD::SETGT:
2912 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
2913 isLegalArithImmed((uint32_t)(C + 1))) ||
2914 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
2915 isLegalArithImmed(C + 1ULL))) {
2916 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2917 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2918 RHS = DAG.getConstant(C, dl, VT);
2919 }
2920 break;
2921 case ISD::SETULE:
2922 case ISD::SETUGT:
2923 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
2924 isLegalArithImmed((uint32_t)(C + 1))) ||
2925 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
2926 isLegalArithImmed(C + 1ULL))) {
2927 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2928 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2929 RHS = DAG.getConstant(C, dl, VT);
2930 }
2931 break;
2932 }
2933 }
2934 }
2935
2936 // Comparisons are canonicalized so that the RHS operand is simpler than the
2937 // LHS one, the extreme case being when RHS is an immediate. However, AArch64
2938 // can fold some shift+extend operations on the RHS operand, so swap the
2939 // operands if that can be done.
2940 //
2941 // For example:
2942 // lsl w13, w11, #1
2943 // cmp w13, w12
2944 // can be turned into:
2945 // cmp w12, w11, lsl #1
2946 if (!isa<ConstantSDNode>(RHS) ||
2947 !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
2948 SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
2949
2950 if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
2951 std::swap(LHS, RHS);
2952 CC = ISD::getSetCCSwappedOperands(CC);
2953 }
2954 }
2955
2956 SDValue Cmp;
2957 AArch64CC::CondCode AArch64CC;
2958 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
2959 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
2960
2961 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
2962 // For the i8 operand, the largest immediate is 255, so this can be easily
2963 // encoded in the compare instruction. For the i16 operand, however, the
2964 // largest immediate cannot be encoded in the compare.
2965 // Therefore, use a sign extending load and cmn to avoid materializing the
2966 // -1 constant. For example,
2967 // movz w1, #65535
2968 // ldrh w0, [x0, #0]
2969 // cmp w0, w1
2970 // >
2971 // ldrsh w0, [x0, #0]
2972 // cmn w0, #1
2973 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
2974 // if and only if (sext LHS) == (sext RHS). The checks are in place to
2975 // ensure both the LHS and RHS are truly zero extended and to make sure the
2976 // transformation is profitable.
2977 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
2978 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
2979 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
2980 LHS.getNode()->hasNUsesOfValue(1, 0)) {
2981 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
2982 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
2983 SDValue SExt =
2984 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
2985 DAG.getValueType(MVT::i16));
2986 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
2987 RHS.getValueType()),
2988 CC, dl, DAG);
2989 AArch64CC = changeIntCCToAArch64CC(CC);
2990 }
2991 }
2992
2993 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
2994 if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
2995 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
2996 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
2997 }
2998 }
2999 }
3000
3001 if (!Cmp) {
3002 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
3003 AArch64CC = changeIntCCToAArch64CC(CC);
3004 }
3005 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
3006 return Cmp;
3007}
3008
3009static std::pair<SDValue, SDValue>
3010getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
3011 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3012, __extension__ __PRETTY_FUNCTION__))
3012 "Unsupported value type")(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3012, __extension__ __PRETTY_FUNCTION__))
;
3013 SDValue Value, Overflow;
3014 SDLoc DL(Op);
3015 SDValue LHS = Op.getOperand(0);
3016 SDValue RHS = Op.getOperand(1);
3017 unsigned Opc = 0;
3018 switch (Op.getOpcode()) {
3019 default:
3020 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3020)
;
3021 case ISD::SADDO:
3022 Opc = AArch64ISD::ADDS;
3023 CC = AArch64CC::VS;
3024 break;
3025 case ISD::UADDO:
3026 Opc = AArch64ISD::ADDS;
3027 CC = AArch64CC::HS;
3028 break;
3029 case ISD::SSUBO:
3030 Opc = AArch64ISD::SUBS;
3031 CC = AArch64CC::VS;
3032 break;
3033 case ISD::USUBO:
3034 Opc = AArch64ISD::SUBS;
3035 CC = AArch64CC::LO;
3036 break;
3037 // Multiply needs a little bit extra work.
3038 case ISD::SMULO:
3039 case ISD::UMULO: {
3040 CC = AArch64CC::NE;
3041 bool IsSigned = Op.getOpcode() == ISD::SMULO;
3042 if (Op.getValueType() == MVT::i32) {
3043 // Extend to 64-bits, then perform a 64-bit multiply.
3044 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3045 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
3046 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
3047 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
3048 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3049
3050 // Check that the result fits into a 32-bit integer.
3051 SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC);
3052 if (IsSigned) {
3053 // cmp xreg, wreg, sxtw
3054 SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value);
3055 Overflow =
3056 DAG.getNode(AArch64ISD::SUBS, DL, VTs, Mul, SExtMul).getValue(1);
3057 } else {
3058 // tst xreg, #0xffffffff00000000
3059 SDValue UpperBits = DAG.getConstant(0xFFFFFFFF00000000, DL, MVT::i64);
3060 Overflow =
3061 DAG.getNode(AArch64ISD::ANDS, DL, VTs, Mul, UpperBits).getValue(1);
3062 }
3063 break;
3064 }
3065 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"Expected an i64 value type") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3065, __extension__ __PRETTY_FUNCTION__))
;
3066 // For the 64 bit multiply
3067 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
3068 if (IsSigned) {
3069 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
3070 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
3071 DAG.getConstant(63, DL, MVT::i64));
3072 // It is important that LowerBits is last, otherwise the arithmetic
3073 // shift will not be folded into the compare (SUBS).
3074 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3075 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
3076 .getValue(1);
3077 } else {
3078 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
3079 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3080 Overflow =
3081 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
3082 DAG.getConstant(0, DL, MVT::i64),
3083 UpperBits).getValue(1);
3084 }
3085 break;
3086 }
3087 } // switch (...)
3088
3089 if (Opc) {
3090 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
3091
3092 // Emit the AArch64 operation with overflow check.
3093 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
3094 Overflow = Value.getValue(1);
3095 }
3096 return std::make_pair(Value, Overflow);
3097}
3098
3099SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
3100 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
3101 return LowerToScalableOp(Op, DAG);
3102
3103 SDValue Sel = Op.getOperand(0);
3104 SDValue Other = Op.getOperand(1);
3105 SDLoc dl(Sel);
3106
3107 // If the operand is an overflow checking operation, invert the condition
3108 // code and kill the Not operation. I.e., transform:
3109 // (xor (overflow_op_bool, 1))
3110 // -->
3111 // (csel 1, 0, invert(cc), overflow_op_bool)
3112 // ... which later gets transformed to just a cset instruction with an
3113 // inverted condition code, rather than a cset + eor sequence.
3114 if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
3115 // Only lower legal XALUO ops.
3116 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
3117 return SDValue();
3118
3119 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3120 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3121 AArch64CC::CondCode CC;
3122 SDValue Value, Overflow;
3123 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
3124 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3125 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
3126 CCVal, Overflow);
3127 }
3128 // If neither operand is a SELECT_CC, give up.
3129 if (Sel.getOpcode() != ISD::SELECT_CC)
3130 std::swap(Sel, Other);
3131 if (Sel.getOpcode() != ISD::SELECT_CC)
3132 return Op;
3133
3134 // The folding we want to perform is:
3135 // (xor x, (select_cc a, b, cc, 0, -1) )
3136 // -->
3137 // (csel x, (xor x, -1), cc ...)
3138 //
3139 // The latter will get matched to a CSINV instruction.
3140
3141 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
3142 SDValue LHS = Sel.getOperand(0);
3143 SDValue RHS = Sel.getOperand(1);
3144 SDValue TVal = Sel.getOperand(2);
3145 SDValue FVal = Sel.getOperand(3);
3146
3147 // FIXME: This could be generalized to non-integer comparisons.
3148 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
3149 return Op;
3150
3151 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
3152 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
3153
3154 // The values aren't constants, this isn't the pattern we're looking for.
3155 if (!CFVal || !CTVal)
3156 return Op;
3157
3158 // We can commute the SELECT_CC by inverting the condition. This
3159 // might be needed to make this fit into a CSINV pattern.
3160 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
3161 std::swap(TVal, FVal);
3162 std::swap(CTVal, CFVal);
3163 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
3164 }
3165
3166 // If the constants line up, perform the transform!
3167 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
3168 SDValue CCVal;
3169 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
3170
3171 FVal = Other;
3172 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
3173 DAG.getConstant(-1ULL, dl, Other.getValueType()));
3174
3175 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
3176 CCVal, Cmp);
3177 }
3178
3179 return Op;
3180}
3181
3182static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
3183 EVT VT = Op.getValueType();
3184
3185 // Let legalize expand this if it isn't a legal type yet.
3186 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3187 return SDValue();
3188
3189 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3190
3191 unsigned Opc;
3192 bool ExtraOp = false;
3193 switch (Op.getOpcode()) {
3194 default:
3195 llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3195)
;
3196 case ISD::ADDC:
3197 Opc = AArch64ISD::ADDS;
3198 break;
3199 case ISD::SUBC:
3200 Opc = AArch64ISD::SUBS;
3201 break;
3202 case ISD::ADDE:
3203 Opc = AArch64ISD::ADCS;
3204 ExtraOp = true;
3205 break;
3206 case ISD::SUBE:
3207 Opc = AArch64ISD::SBCS;
3208 ExtraOp = true;
3209 break;
3210 }
3211
3212 if (!ExtraOp)
3213 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
3214 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
3215 Op.getOperand(2));
3216}
3217
3218static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
3219 // Let legalize expand this if it isn't a legal type yet.
3220 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3221 return SDValue();
3222
3223 SDLoc dl(Op);
3224 AArch64CC::CondCode CC;
3225 // The actual operation that sets the overflow or carry flag.
3226 SDValue Value, Overflow;
3227 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
3228
3229 // We use 0 and 1 as false and true values.
3230 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3231 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3232
3233 // We use an inverted condition, because the conditional select is inverted
3234 // too. This will allow it to be selected to a single instruction:
3235 // CSINC Wd, WZR, WZR, invert(cond).
3236 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3237 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
3238 CCVal, Overflow);
3239
3240 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3241 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3242}
3243
3244// Prefetch operands are:
3245// 1: Address to prefetch
3246// 2: bool isWrite
3247// 3: int locality (0 = no locality ... 3 = extreme locality)
3248// 4: bool isDataCache
3249static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
3250 SDLoc DL(Op);
3251 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3252 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
3253 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3254
3255 bool IsStream = !Locality;
3256 // When the locality number is set
3257 if (Locality) {
3258 // The front-end should have filtered out the out-of-range values
3259 assert(Locality <= 3 && "Prefetch locality out-of-range")(static_cast <bool> (Locality <= 3 && "Prefetch locality out-of-range"
) ? void (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3259, __extension__ __PRETTY_FUNCTION__))
;
3260 // The locality degree is the opposite of the cache speed.
3261 // Put the number the other way around.
3262 // The encoding starts at 0 for level 1
3263 Locality = 3 - Locality;
3264 }
3265
3266 // built the mask value encoding the expected behavior.
3267 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
3268 (!IsData << 3) | // IsDataCache bit
3269 (Locality << 1) | // Cache level bits
3270 (unsigned)IsStream; // Stream bit
3271 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
3272 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
3273}
3274
3275SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
3276 SelectionDAG &DAG) const {
3277 EVT VT = Op.getValueType();
3278 if (VT.isScalableVector())
3279 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
3280
3281 if (useSVEForFixedLengthVectorVT(VT))
3282 return LowerFixedLengthFPExtendToSVE(Op, DAG);
3283
3284 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")(static_cast <bool> (Op.getValueType() == MVT::f128 &&
"Unexpected lowering") ? void (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3284, __extension__ __PRETTY_FUNCTION__))
;
3285 return SDValue();
3286}
3287
3288SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
3289 SelectionDAG &DAG) const {
3290 if (Op.getValueType().isScalableVector())
3291 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
3292
3293 bool IsStrict = Op->isStrictFPOpcode();
3294 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3295 EVT SrcVT = SrcVal.getValueType();
3296
3297 if (useSVEForFixedLengthVectorVT(SrcVT))
3298 return LowerFixedLengthFPRoundToSVE(Op, DAG);
3299
3300 if (SrcVT != MVT::f128) {
3301 // Expand cases where the input is a vector bigger than NEON.
3302 if (useSVEForFixedLengthVectorVT(SrcVT))
3303 return SDValue();
3304
3305 // It's legal except when f128 is involved
3306 return Op;
3307 }
3308
3309 return SDValue();
3310}
3311
3312SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
3313 SelectionDAG &DAG) const {
3314 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3315 // Any additional optimization in this function should be recorded
3316 // in the cost tables.
3317 EVT InVT = Op.getOperand(0).getValueType();
3318 EVT VT = Op.getValueType();
3319
3320 if (VT.isScalableVector()) {
3321 unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
3322 ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
3323 : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
3324 return LowerToPredicatedOp(Op, DAG, Opcode);
3325 }
3326
3327 if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
3328 return LowerFixedLengthFPToIntToSVE(Op, DAG);
3329
3330 unsigned NumElts = InVT.getVectorNumElements();
3331
3332 // f16 conversions are promoted to f32 when full fp16 is not supported.
3333 if (InVT.getVectorElementType() == MVT::f16 &&
3334 !Subtarget->hasFullFP16()) {
3335 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
3336 SDLoc dl(Op);
3337 return DAG.getNode(
3338 Op.getOpcode(), dl, Op.getValueType(),
3339 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
3340 }
3341
3342 uint64_t VTSize = VT.getFixedSizeInBits();
3343 uint64_t InVTSize = InVT.getFixedSizeInBits();
3344 if (VTSize < InVTSize) {
3345 SDLoc dl(Op);
3346 SDValue Cv =
3347 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
3348 Op.getOperand(0));
3349 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
3350 }
3351
3352 if (VTSize > InVTSize) {
3353 SDLoc dl(Op);
3354 MVT ExtVT =
3355 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
3356 VT.getVectorNumElements());
3357 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
3358 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
3359 }
3360
3361 // Type changing conversions are illegal.
3362 return Op;
3363}
3364
3365SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
3366 SelectionDAG &DAG) const {
3367 bool IsStrict = Op->isStrictFPOpcode();
3368 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3369
3370 if (SrcVal.getValueType().isVector())
3371 return LowerVectorFP_TO_INT(Op, DAG);
3372
3373 // f16 conversions are promoted to f32 when full fp16 is not supported.
3374 if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
3375 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")(static_cast <bool> (!IsStrict && "Lowering of strict fp16 not yet implemented"
) ? void (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3375, __extension__ __PRETTY_FUNCTION__))
;
3376 SDLoc dl(Op);
3377 return DAG.getNode(
3378 Op.getOpcode(), dl, Op.getValueType(),
3379 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
3380 }
3381
3382 if (SrcVal.getValueType() != MVT::f128) {
3383 // It's legal except when f128 is involved
3384 return Op;
3385 }
3386
3387 return SDValue();
3388}
3389
3390SDValue
3391AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
3392 SelectionDAG &DAG) const {
3393 // AArch64 FP-to-int conversions saturate to the destination element size, so
3394 // we can lower common saturating conversions to simple instructions.
3395 SDValue SrcVal = Op.getOperand(0);
3396 EVT SrcVT = SrcVal.getValueType();
3397 EVT DstVT = Op.getValueType();
3398 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3399
3400 uint64_t SrcElementWidth = SrcVT.getScalarSizeInBits();
3401 uint64_t DstElementWidth = DstVT.getScalarSizeInBits();
3402 uint64_t SatWidth = SatVT.getScalarSizeInBits();
3403 assert(SatWidth <= DstElementWidth &&(static_cast <bool> (SatWidth <= DstElementWidth &&
"Saturation width cannot exceed result width") ? void (0) : __assert_fail
("SatWidth <= DstElementWidth && \"Saturation width cannot exceed result width\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3404, __extension__ __PRETTY_FUNCTION__))
3404 "Saturation width cannot exceed result width")(static_cast <bool> (SatWidth <= DstElementWidth &&
"Saturation width cannot exceed result width") ? void (0) : __assert_fail
("SatWidth <= DstElementWidth && \"Saturation width cannot exceed result width\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3404, __extension__ __PRETTY_FUNCTION__))
;
3405
3406 // TODO: Consider lowering to SVE operations, as in LowerVectorFP_TO_INT.
3407 // Currently, the `llvm.fpto[su]i.sat.*` instrinsics don't accept scalable
3408 // types, so this is hard to reach.
3409 if (DstVT.isScalableVector())
3410 return SDValue();
3411
3412 // TODO: Saturate to SatWidth explicitly.
3413 if (SatWidth != DstElementWidth)
3414 return SDValue();
3415
3416 EVT SrcElementVT = SrcVT.getVectorElementType();
3417
3418 // In the absence of FP16 support, promote f16 to f32, like
3419 // LowerVectorFP_TO_INT().
3420 if (SrcElementVT == MVT::f16 && !Subtarget->hasFullFP16()) {
3421 MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements());
3422 return DAG.getNode(Op.getOpcode(), SDLoc(Op), DstVT,
3423 DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal),
3424 Op.getOperand(1));
3425 }
3426
3427 // Cases that we can emit directly.
3428 if ((SrcElementWidth == DstElementWidth) &&
3429 (SrcElementVT == MVT::f64 || SrcElementVT == MVT::f32 ||
3430 (SrcElementVT == MVT::f16 && Subtarget->hasFullFP16()))) {
3431 return Op;
3432 }
3433
3434 // For all other cases, fall back on the expanded form.
3435 return SDValue();
3436}
3437
3438SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
3439 SelectionDAG &DAG) const {
3440 // AArch64 FP-to-int conversions saturate to the destination register size, so
3441 // we can lower common saturating conversions to simple instructions.
3442 SDValue SrcVal = Op.getOperand(0);
3443 EVT SrcVT = SrcVal.getValueType();
3444
3445 if (SrcVT.isVector())
3446 return LowerVectorFP_TO_INT_SAT(Op, DAG);
3447
3448 EVT DstVT = Op.getValueType();
3449 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3450 uint64_t SatWidth = SatVT.getScalarSizeInBits();
3451 uint64_t DstWidth = DstVT.getScalarSizeInBits();
3452 assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width")(static_cast <bool> (SatWidth <= DstWidth &&
"Saturation width cannot exceed result width") ? void (0) : __assert_fail
("SatWidth <= DstWidth && \"Saturation width cannot exceed result width\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3452, __extension__ __PRETTY_FUNCTION__))
;
3453
3454 // TODO: Saturate to SatWidth explicitly.
3455 if (SatWidth != DstWidth)
3456 return SDValue();
3457
3458 // In the absence of FP16 support, promote f16 to f32, like LowerFP_TO_INT().
3459 if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16())
3460 return DAG.getNode(Op.getOpcode(), SDLoc(Op), DstVT,
3461 DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal),
3462 Op.getOperand(1));
3463
3464 // Cases that we can emit directly.
3465 if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
3466 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
3467 (DstVT == MVT::i64 || DstVT == MVT::i32))
3468 return Op;
3469
3470 // For all other cases, fall back on the expanded form.
3471 return SDValue();
3472}
3473
3474SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
3475 SelectionDAG &DAG) const {
3476 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3477 // Any additional optimization in this function should be recorded
3478 // in the cost tables.
3479 EVT VT = Op.getValueType();
3480 SDLoc dl(Op);
3481 SDValue In = Op.getOperand(0);
3482 EVT InVT = In.getValueType();
3483 unsigned Opc = Op.getOpcode();
3484 bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
3485
3486 if (VT.isScalableVector()) {
3487 if (InVT.getVectorElementType() == MVT::i1) {
3488 // We can't directly extend an SVE predicate; extend it first.
3489 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3490 EVT CastVT = getPromotedVTForPredicate(InVT);
3491 In = DAG.getNode(CastOpc, dl, CastVT, In);
3492 return DAG.getNode(Opc, dl, VT, In);
3493 }
3494
3495 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
3496 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
3497 return LowerToPredicatedOp(Op, DAG, Opcode);
3498 }
3499
3500 if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
3501 return LowerFixedLengthIntToFPToSVE(Op, DAG);
3502
3503 uint64_t VTSize = VT.getFixedSizeInBits();
3504 uint64_t InVTSize = InVT.getFixedSizeInBits();
3505 if (VTSize < InVTSize) {
3506 MVT CastVT =
3507 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
3508 InVT.getVectorNumElements());
3509 In = DAG.getNode(Opc, dl, CastVT, In);
3510 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
3511 }
3512
3513 if (VTSize > InVTSize) {
3514 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3515 EVT CastVT = VT.changeVectorElementTypeToInteger();
3516 In = DAG.getNode(CastOpc, dl, CastVT, In);
3517 return DAG.getNode(Opc, dl, VT, In);
3518 }
3519
3520 return Op;
3521}
3522
3523SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
3524 SelectionDAG &DAG) const {
3525 if (Op.getValueType().isVector())
3526 return LowerVectorINT_TO_FP(Op, DAG);
3527
3528 bool IsStrict = Op->isStrictFPOpcode();
3529 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3530
3531 // f16 conversions are promoted to f32 when full fp16 is not supported.
3532 if (Op.getValueType() == MVT::f16 &&
3533 !Subtarget->hasFullFP16()) {
3534 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")(static_cast <bool> (!IsStrict && "Lowering of strict fp16 not yet implemented"
) ? void (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3534, __extension__ __PRETTY_FUNCTION__))
;
3535 SDLoc dl(Op);
3536 return DAG.getNode(
3537 ISD::FP_ROUND, dl, MVT::f16,
3538 DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
3539 DAG.getIntPtrConstant(0, dl));
3540 }
3541
3542 // i128 conversions are libcalls.
3543 if (SrcVal.getValueType() == MVT::i128)
3544 return SDValue();
3545
3546 // Other conversions are legal, unless it's to the completely software-based
3547 // fp128.
3548 if (Op.getValueType() != MVT::f128)
3549 return Op;
3550 return SDValue();
3551}
3552
3553SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
3554 SelectionDAG &DAG) const {
3555 // For iOS, we want to call an alternative entry point: __sincos_stret,
3556 // which returns the values in two S / D registers.
3557 SDLoc dl(Op);
3558 SDValue Arg = Op.getOperand(0);
3559 EVT ArgVT = Arg.getValueType();
3560 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
3561
3562 ArgListTy Args;
3563 ArgListEntry Entry;
3564
3565 Entry.Node = Arg;
3566 Entry.Ty = ArgTy;
3567 Entry.IsSExt = false;
3568 Entry.IsZExt = false;
3569 Args.push_back(Entry);
3570
3571 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
3572 : RTLIB::SINCOS_STRET_F32;
3573 const char *LibcallName = getLibcallName(LC);
3574 SDValue Callee =
3575 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
3576
3577 StructType *RetTy = StructType::get(ArgTy, ArgTy);
3578 TargetLowering::CallLoweringInfo CLI(DAG);
3579 CLI.setDebugLoc(dl)
3580 .setChain(DAG.getEntryNode())
3581 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
3582
3583 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3584 return CallResult.first;
3585}
3586
3587static MVT getSVEContainerType(EVT ContentTy);
3588
3589SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
3590 SelectionDAG &DAG) const {
3591 EVT OpVT = Op.getValueType();
3592 EVT ArgVT = Op.getOperand(0).getValueType();
3593
3594 if (useSVEForFixedLengthVectorVT(OpVT))
3595 return LowerFixedLengthBitcastToSVE(Op, DAG);
3596
3597 if (OpVT.isScalableVector()) {
3598 if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) {
3599 assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() &&(static_cast <bool> (OpVT.isFloatingPoint() && !
ArgVT.isFloatingPoint() && "Expected int->fp bitcast!"
) ? void (0) : __assert_fail ("OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() && \"Expected int->fp bitcast!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3600, __extension__ __PRETTY_FUNCTION__))
3600 "Expected int->fp bitcast!")(static_cast <bool> (OpVT.isFloatingPoint() && !
ArgVT.isFloatingPoint() && "Expected int->fp bitcast!"
) ? void (0) : __assert_fail ("OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() && \"Expected int->fp bitcast!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3600, __extension__ __PRETTY_FUNCTION__))
;
3601 SDValue ExtResult =
3602 DAG.getNode(ISD::ANY_EXTEND, SDLoc(Op), getSVEContainerType(ArgVT),
3603 Op.getOperand(0));
3604 return getSVESafeBitCast(OpVT, ExtResult, DAG);
3605 }
3606 return getSVESafeBitCast(OpVT, Op.getOperand(0), DAG);
3607 }
3608
3609 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
3610 return SDValue();
3611
3612 assert(ArgVT == MVT::i16)(static_cast <bool> (ArgVT == MVT::i16) ? void (0) : __assert_fail
("ArgVT == MVT::i16", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3612, __extension__ __PRETTY_FUNCTION__))
;
3613 SDLoc DL(Op);
3614
3615 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
3616 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
3617 return SDValue(
3618 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
3619 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
3620 0);
3621}
3622
3623static EVT getExtensionTo64Bits(const EVT &OrigVT) {
3624 if (OrigVT.getSizeInBits() >= 64)
3625 return OrigVT;
3626
3627 assert(OrigVT.isSimple() && "Expecting a simple value type")(static_cast <bool> (OrigVT.isSimple() && "Expecting a simple value type"
) ? void (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3627, __extension__ __PRETTY_FUNCTION__))
;
3628
3629 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
3630 switch (OrigSimpleTy) {
3631 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3631)
;
3632 case MVT::v2i8:
3633 case MVT::v2i16:
3634 return MVT::v2i32;
3635 case MVT::v4i8:
3636 return MVT::v4i16;
3637 }
3638}
3639
3640static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
3641 const EVT &OrigTy,
3642 const EVT &ExtTy,
3643 unsigned ExtOpcode) {
3644 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
3645 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
3646 // 64-bits we need to insert a new extension so that it will be 64-bits.
3647 assert(ExtTy.is128BitVector() && "Unexpected extension size")(static_cast <bool> (ExtTy.is128BitVector() && "Unexpected extension size"
) ? void (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3647, __extension__ __PRETTY_FUNCTION__))
;
3648 if (OrigTy.getSizeInBits() >= 64)
3649 return N;
3650
3651 // Must extend size to at least 64 bits to be used as an operand for VMULL.
3652 EVT NewVT = getExtensionTo64Bits(OrigTy);
3653
3654 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
3655}
3656
3657static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
3658 bool isSigned) {
3659 EVT VT = N->getValueType(0);
3660
3661 if (N->getOpcode() != ISD::BUILD_VECTOR)
3662 return false;
3663
3664 for (const SDValue &Elt : N->op_values()) {
3665 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
3666 unsigned EltSize = VT.getScalarSizeInBits();
3667 unsigned HalfSize = EltSize / 2;
3668 if (isSigned) {
3669 if (!isIntN(HalfSize, C->getSExtValue()))
3670 return false;
3671 } else {
3672 if (!isUIntN(HalfSize, C->getZExtValue()))
3673 return false;
3674 }
3675 continue;
3676 }
3677 return false;
3678 }
3679
3680 return true;
3681}
3682
3683static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
3684 if (N->getOpcode() == ISD::SIGN_EXTEND ||
3685 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
3686 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
3687 N->getOperand(0)->getValueType(0),
3688 N->getValueType(0),
3689 N->getOpcode());
3690
3691 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "expected BUILD_VECTOR") ? void (0) : __assert_fail
("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3691, __extension__ __PRETTY_FUNCTION__))
;
3692 EVT VT = N->getValueType(0);
3693 SDLoc dl(N);
3694 unsigned EltSize = VT.getScalarSizeInBits() / 2;
3695 unsigned NumElts = VT.getVectorNumElements();
3696 MVT TruncVT = MVT::getIntegerVT(EltSize);
3697 SmallVector<SDValue, 8> Ops;
3698 for (unsigned i = 0; i != NumElts; ++i) {
3699 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
3700 const APInt &CInt = C->getAPIntValue();
3701 // Element types smaller than 32 bits are not legal, so use i32 elements.
3702 // The values are implicitly truncated so sext vs. zext doesn't matter.
3703 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
3704 }
3705 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
3706}
3707
3708static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
3709 return N->getOpcode() == ISD::SIGN_EXTEND ||
3710 N->getOpcode() == ISD::ANY_EXTEND ||
3711 isExtendedBUILD_VECTOR(N, DAG, true);
3712}
3713
3714static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
3715 return N->getOpcode() == ISD::ZERO_EXTEND ||
3716 N->getOpcode() == ISD::ANY_EXTEND ||
3717 isExtendedBUILD_VECTOR(N, DAG, false);
3718}
3719
3720static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
3721 unsigned Opcode = N->getOpcode();
3722 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3723 SDNode *N0 = N->getOperand(0).getNode();
3724 SDNode *N1 = N->getOperand(1).getNode();
3725 return N0->hasOneUse() && N1->hasOneUse() &&
3726 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
3727 }
3728 return false;
3729}
3730
3731static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
3732 unsigned Opcode = N->getOpcode();
3733 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3734 SDNode *N0 = N->getOperand(0).getNode();
3735 SDNode *N1 = N->getOperand(1).getNode();
3736 return N0->hasOneUse() && N1->hasOneUse() &&
3737 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
3738 }
3739 return false;
3740}
3741
3742SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
3743 SelectionDAG &DAG) const {
3744 // The rounding mode is in bits 23:22 of the FPSCR.
3745 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
3746 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
3747 // so that the shift + and get folded into a bitfield extract.
3748 SDLoc dl(Op);
3749
3750 SDValue Chain = Op.getOperand(0);
3751 SDValue FPCR_64 = DAG.getNode(
3752 ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
3753 {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
3754 Chain = FPCR_64.getValue(1);
3755 SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
3756 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
3757 DAG.getConstant(1U << 22, dl, MVT::i32));
3758 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
3759 DAG.getConstant(22, dl, MVT::i32));
3760 SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
3761 DAG.getConstant(3, dl, MVT::i32));
3762 return DAG.getMergeValues({AND, Chain}, dl);
3763}
3764
3765SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
3766 SelectionDAG &DAG) const {
3767 SDLoc DL(Op);
3768 SDValue Chain = Op->getOperand(0);
3769 SDValue RMValue = Op->getOperand(1);
3770
3771 // The rounding mode is in bits 23:22 of the FPCR.
3772 // The llvm.set.rounding argument value to the rounding mode in FPCR mapping
3773 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
3774 // ((arg - 1) & 3) << 22).
3775 //
3776 // The argument of llvm.set.rounding must be within the segment [0, 3], so
3777 // NearestTiesToAway (4) is not handled here. It is responsibility of the code
3778 // generated llvm.set.rounding to ensure this condition.
3779
3780 // Calculate new value of FPCR[23:22].
3781 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
3782 DAG.getConstant(1, DL, MVT::i32));
3783 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
3784 DAG.getConstant(0x3, DL, MVT::i32));
3785 RMValue =
3786 DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
3787 DAG.getConstant(AArch64::RoundingBitsPos, DL, MVT::i32));
3788 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue);
3789
3790 // Get current value of FPCR.
3791 SDValue Ops[] = {
3792 Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
3793 SDValue FPCR =
3794 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
3795 Chain = FPCR.getValue(1);
3796 FPCR = FPCR.getValue(0);
3797
3798 // Put new rounding mode into FPSCR[23:22].
3799 const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos);
3800 FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR,
3801 DAG.getConstant(RMMask, DL, MVT::i64));
3802 FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue);
3803 SDValue Ops2[] = {
3804 Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
3805 FPCR};
3806 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
3807}
3808
3809SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
3810 EVT VT = Op.getValueType();
3811
3812 // If SVE is available then i64 vector multiplications can also be made legal.
3813 bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
3814
3815 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
3816 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
3817
3818 // Multiplications are only custom-lowered for 128-bit vectors so that
3819 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
3820 assert(VT.is128BitVector() && VT.isInteger() &&(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3821, __extension__ __PRETTY_FUNCTION__))
3821 "unexpected type for custom-lowering ISD::MUL")(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3821, __extension__ __PRETTY_FUNCTION__))
;
3822 SDNode *N0 = Op.getOperand(0).getNode();
3823 SDNode *N1 = Op.getOperand(1).getNode();
3824 unsigned NewOpc = 0;
3825 bool isMLA = false;
3826 bool isN0SExt = isSignExtended(N0, DAG);
3827 bool isN1SExt = isSignExtended(N1, DAG);
3828 if (isN0SExt && isN1SExt)
3829 NewOpc = AArch64ISD::SMULL;
3830 else {
3831 bool isN0ZExt = isZeroExtended(N0, DAG);
3832 bool isN1ZExt = isZeroExtended(N1, DAG);
3833 if (isN0ZExt && isN1ZExt)
3834 NewOpc = AArch64ISD::UMULL;
3835 else if (isN1SExt || isN1ZExt) {
3836 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
3837 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
3838 if (isN1SExt && isAddSubSExt(N0, DAG)) {
3839 NewOpc = AArch64ISD::SMULL;
3840 isMLA = true;
3841 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
3842 NewOpc = AArch64ISD::UMULL;
3843 isMLA = true;
3844 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
3845 std::swap(N0, N1);
3846 NewOpc = AArch64ISD::UMULL;
3847 isMLA = true;
3848 }
3849 }
3850
3851 if (!NewOpc) {
3852 if (VT == MVT::v2i64)
3853 // Fall through to expand this. It is not legal.
3854 return SDValue();
3855 else
3856 // Other vector multiplications are legal.
3857 return Op;
3858 }
3859 }
3860
3861 // Legalize to a S/UMULL instruction
3862 SDLoc DL(Op);
3863 SDValue Op0;
3864 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
3865 if (!isMLA) {
3866 Op0 = skipExtensionForVectorMULL(N0, DAG);
3867 assert(Op0.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3869, __extension__ __PRETTY_FUNCTION__))
3868 Op1.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3869, __extension__ __PRETTY_FUNCTION__))
3869 "unexpected types for extended operands to VMULL")(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3869, __extension__ __PRETTY_FUNCTION__))
;
3870 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
3871 }
3872 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
3873 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
3874 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
3875 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
3876 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
3877 EVT Op1VT = Op1.getValueType();
3878 return DAG.getNode(N0->getOpcode(), DL, VT,
3879 DAG.getNode(NewOpc, DL, VT,
3880 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
3881 DAG.getNode(NewOpc, DL, VT,
3882 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
3883}
3884
3885static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
3886 int Pattern) {
3887 return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
3888 DAG.getTargetConstant(Pattern, DL, MVT::i32));
3889}
3890
3891static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
3892 SDLoc DL(Op);
3893 EVT OutVT = Op.getValueType();
3894 SDValue InOp = Op.getOperand(1);
3895 EVT InVT = InOp.getValueType();
3896
3897 // Return the operand if the cast isn't changing type,
3898 // i.e. <n x 16 x i1> -> <n x 16 x i1>
3899 if (InVT == OutVT)
3900 return InOp;
3901
3902 SDValue Reinterpret =
3903 DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, InOp);
3904
3905 // If the argument converted to an svbool is a ptrue or a comparison, the
3906 // lanes introduced by the widening are zero by construction.
3907 switch (InOp.getOpcode()) {
3908 case AArch64ISD::SETCC_MERGE_ZERO:
3909 return Reinterpret;
3910 case ISD::INTRINSIC_WO_CHAIN:
3911 if (InOp.getConstantOperandVal(0) == Intrinsic::aarch64_sve_ptrue)
3912 return Reinterpret;
3913 }
3914
3915 // Otherwise, zero the newly introduced lanes.
3916 SDValue Mask = getPTrue(DAG, DL, InVT, AArch64SVEPredPattern::all);
3917 SDValue MaskReinterpret =
3918 DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, Mask);
3919 return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret);
3920}
3921
3922SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
3923 SelectionDAG &DAG) const {
3924 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3925 SDLoc dl(Op);
3926 switch (IntNo) {
3927 default: return SDValue(); // Don't custom lower most intrinsics.
3928 case Intrinsic::thread_pointer: {
3929 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3930 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
3931 }
3932 case Intrinsic::aarch64_neon_abs: {
3933 EVT Ty = Op.getValueType();
3934 if (Ty == MVT::i64) {
3935 SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
3936 Op.getOperand(1));
3937 Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
3938 return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
3939 } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
3940 return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
3941 } else {
3942 report_fatal_error("Unexpected type for AArch64 NEON intrinic");
3943 }
3944 }
3945 case Intrinsic::aarch64_neon_smax:
3946 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
3947 Op.getOperand(1), Op.getOperand(2));
3948 case Intrinsic::aarch64_neon_umax:
3949 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
3950 Op.getOperand(1), Op.getOperand(2));
3951 case Intrinsic::aarch64_neon_smin:
3952 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
3953 Op.getOperand(1), Op.getOperand(2));
3954 case Intrinsic::aarch64_neon_umin:
3955 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
3956 Op.getOperand(1), Op.getOperand(2));
3957
3958 case Intrinsic::aarch64_sve_sunpkhi:
3959 return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
3960 Op.getOperand(1));
3961 case Intrinsic::aarch64_sve_sunpklo:
3962 return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
3963 Op.getOperand(1));
3964 case Intrinsic::aarch64_sve_uunpkhi:
3965 return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
3966 Op.getOperand(1));
3967 case Intrinsic::aarch64_sve_uunpklo:
3968 return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
3969 Op.getOperand(1));
3970 case Intrinsic::aarch64_sve_clasta_n:
3971 return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
3972 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3973 case Intrinsic::aarch64_sve_clastb_n:
3974 return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
3975 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3976 case Intrinsic::aarch64_sve_lasta:
3977 return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
3978 Op.getOperand(1), Op.getOperand(2));
3979 case Intrinsic::aarch64_sve_lastb:
3980 return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
3981 Op.getOperand(1), Op.getOperand(2));
3982 case Intrinsic::aarch64_sve_rev:
3983 return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
3984 Op.getOperand(1));
3985 case Intrinsic::aarch64_sve_tbl:
3986 return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
3987 Op.getOperand(1), Op.getOperand(2));
3988 case Intrinsic::aarch64_sve_trn1:
3989 return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
3990 Op.getOperand(1), Op.getOperand(2));
3991 case Intrinsic::aarch64_sve_trn2:
3992 return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
3993 Op.getOperand(1), Op.getOperand(2));
3994 case Intrinsic::aarch64_sve_uzp1:
3995 return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
3996 Op.getOperand(1), Op.getOperand(2));
3997 case Intrinsic::aarch64_sve_uzp2:
3998 return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
3999 Op.getOperand(1), Op.getOperand(2));
4000 case Intrinsic::aarch64_sve_zip1:
4001 return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
4002 Op.getOperand(1), Op.getOperand(2));
4003 case Intrinsic::aarch64_sve_zip2:
4004 return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
4005 Op.getOperand(1), Op.getOperand(2));
4006 case Intrinsic::aarch64_sve_splice:
4007 return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
4008 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4009 case Intrinsic::aarch64_sve_ptrue:
4010 return getPTrue(DAG, dl, Op.getValueType(),
4011 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
4012 case Intrinsic::aarch64_sve_clz:
4013 return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
4014 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4015 case Intrinsic::aarch64_sve_cnt: {
4016 SDValue Data = Op.getOperand(3);
4017 // CTPOP only supports integer operands.
4018 if (Data.getValueType().isFloatingPoint())
4019 Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
4020 return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
4021 Op.getOperand(2), Data, Op.getOperand(1));
4022 }
4023 case Intrinsic::aarch64_sve_dupq_lane:
4024 return LowerDUPQLane(Op, DAG);
4025 case Intrinsic::aarch64_sve_convert_from_svbool:
4026 return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
4027 Op.getOperand(1));
4028 case Intrinsic::aarch64_sve_convert_to_svbool:
4029 return lowerConvertToSVBool(Op, DAG);
4030 case Intrinsic::aarch64_sve_fneg:
4031 return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
4032 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4033 case Intrinsic::aarch64_sve_frintp:
4034 return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
4035 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4036 case Intrinsic::aarch64_sve_frintm:
4037 return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
4038 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4039 case Intrinsic::aarch64_sve_frinti:
4040 return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
4041 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4042 case Intrinsic::aarch64_sve_frintx:
4043 return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
4044 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4045 case Intrinsic::aarch64_sve_frinta:
4046 return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
4047 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4048 case Intrinsic::aarch64_sve_frintn:
4049 return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
4050 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4051 case Intrinsic::aarch64_sve_frintz:
4052 return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
4053 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4054 case Intrinsic::aarch64_sve_ucvtf:
4055 return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
4056 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4057 Op.getOperand(1));
4058 case Intrinsic::aarch64_sve_scvtf:
4059 return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
4060 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4061 Op.getOperand(1));
4062 case Intrinsic::aarch64_sve_fcvtzu:
4063 return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
4064 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4065 Op.getOperand(1));
4066 case Intrinsic::aarch64_sve_fcvtzs:
4067 return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
4068 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4069 Op.getOperand(1));
4070 case Intrinsic::aarch64_sve_fsqrt:
4071 return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
4072 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4073 case Intrinsic::aarch64_sve_frecpx:
4074 return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
4075 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4076 case Intrinsic::aarch64_sve_fabs:
4077 return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
4078 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4079 case Intrinsic::aarch64_sve_abs:
4080 return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
4081 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4082 case Intrinsic::aarch64_sve_neg:
4083 return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
4084 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4085 case Intrinsic::aarch64_sve_insr: {
4086 SDValue Scalar = Op.getOperand(2);
4087 EVT ScalarTy = Scalar.getValueType();
4088 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
4089 Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
4090
4091 return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
4092 Op.getOperand(1), Scalar);
4093 }
4094 case Intrinsic::aarch64_sve_rbit:
4095 return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
4096 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4097 Op.getOperand(1));
4098 case Intrinsic::aarch64_sve_revb:
4099 return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
4100 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4101 case Intrinsic::aarch64_sve_sxtb:
4102 return DAG.getNode(
4103 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4104 Op.getOperand(2), Op.getOperand(3),
4105 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
4106 Op.getOperand(1));
4107 case Intrinsic::aarch64_sve_sxth:
4108 return DAG.getNode(
4109 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4110 Op.getOperand(2), Op.getOperand(3),
4111 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
4112 Op.getOperand(1));
4113 case Intrinsic::aarch64_sve_sxtw:
4114 return DAG.getNode(
4115 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4116 Op.getOperand(2), Op.getOperand(3),
4117 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
4118 Op.getOperand(1));
4119 case Intrinsic::aarch64_sve_uxtb:
4120 return DAG.getNode(
4121 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4122 Op.getOperand(2), Op.getOperand(3),
4123 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
4124 Op.getOperand(1));
4125 case Intrinsic::aarch64_sve_uxth:
4126 return DAG.getNode(
4127 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4128 Op.getOperand(2), Op.getOperand(3),
4129 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
4130 Op.getOperand(1));
4131 case Intrinsic::aarch64_sve_uxtw:
4132 return DAG.getNode(
4133 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4134 Op.getOperand(2), Op.getOperand(3),
4135 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
4136 Op.getOperand(1));
4137
4138 case Intrinsic::localaddress: {
4139 const auto &MF = DAG.getMachineFunction();
4140 const auto *RegInfo = Subtarget->getRegisterInfo();
4141 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
4142 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
4143 Op.getSimpleValueType());
4144 }
4145
4146 case Intrinsic::eh_recoverfp: {
4147 // FIXME: This needs to be implemented to correctly handle highly aligned
4148 // stack objects. For now we simply return the incoming FP. Refer D53541
4149 // for more details.
4150 SDValue FnOp = Op.getOperand(1);
4151 SDValue IncomingFPOp = Op.getOperand(2);
4152 GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
4153 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
4154 if (!Fn)
4155 report_fatal_error(
4156 "llvm.eh.recoverfp must take a function as the first argument");
4157 return IncomingFPOp;
4158 }
4159
4160 case Intrinsic::aarch64_neon_vsri:
4161 case Intrinsic::aarch64_neon_vsli: {
4162 EVT Ty = Op.getValueType();
4163
4164 if (!Ty.isVector())
4165 report_fatal_error("Unexpected type for aarch64_neon_vsli");
4166
4167 assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits())(static_cast <bool> (Op.getConstantOperandVal(3) <= Ty
.getScalarSizeInBits()) ? void (0) : __assert_fail ("Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4167, __extension__ __PRETTY_FUNCTION__))
;
4168
4169 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
4170 unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
4171 return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
4172 Op.getOperand(3));
4173 }
4174
4175 case Intrinsic::aarch64_neon_srhadd:
4176 case Intrinsic::aarch64_neon_urhadd:
4177 case Intrinsic::aarch64_neon_shadd:
4178 case Intrinsic::aarch64_neon_uhadd: {
4179 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4180 IntNo == Intrinsic::aarch64_neon_shadd);
4181 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4182 IntNo == Intrinsic::aarch64_neon_urhadd);
4183 unsigned Opcode =
4184 IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
4185 : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
4186 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4187 Op.getOperand(2));
4188 }
4189 case Intrinsic::aarch64_neon_sabd:
4190 case Intrinsic::aarch64_neon_uabd: {
4191 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU
4192 : ISD::ABDS;
4193 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4194 Op.getOperand(2));
4195 }
4196 case Intrinsic::aarch64_neon_uaddlp: {
4197 unsigned Opcode = AArch64ISD::UADDLP;
4198 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1));
4199 }
4200 case Intrinsic::aarch64_neon_sdot:
4201 case Intrinsic::aarch64_neon_udot:
4202 case Intrinsic::aarch64_sve_sdot:
4203 case Intrinsic::aarch64_sve_udot: {
4204 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
4205 IntNo == Intrinsic::aarch64_sve_udot)
4206 ? AArch64ISD::UDOT
4207 : AArch64ISD::SDOT;
4208 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4209 Op.getOperand(2), Op.getOperand(3));
4210 }
4211 }
4212}
4213
4214bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
4215 if (VT.getVectorElementType() == MVT::i8 ||
4216 VT.getVectorElementType() == MVT::i16) {
4217 EltTy = MVT::i32;
4218 return true;
4219 }
4220 return false;
4221}
4222
4223bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
4224 if (VT.getVectorElementType() == MVT::i32 &&
4225 VT.getVectorElementCount().getKnownMinValue() >= 4)
4226 return true;
4227
4228 return false;
4229}
4230
4231bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
4232 return ExtVal.getValueType().isScalableVector() ||
4233 useSVEForFixedLengthVectorVT(ExtVal.getValueType(),
4234 /*OverrideNEON=*/true);
4235}
4236
4237unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
4238 std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
4239 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
4240 AArch64ISD::GLD1_MERGE_ZERO},
4241 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
4242 AArch64ISD::GLD1_UXTW_MERGE_ZERO},
4243 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
4244 AArch64ISD::GLD1_MERGE_ZERO},
4245 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
4246 AArch64ISD::GLD1_SXTW_MERGE_ZERO},
4247 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
4248 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
4249 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
4250 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
4251 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
4252 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
4253 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
4254 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
4255 };
4256 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4257 return AddrModes.find(Key)->second;
4258}
4259
4260unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
4261 std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
4262 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
4263 AArch64ISD::SST1_PRED},
4264 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
4265 AArch64ISD::SST1_UXTW_PRED},
4266 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
4267 AArch64ISD::SST1_PRED},
4268 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
4269 AArch64ISD::SST1_SXTW_PRED},
4270 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
4271 AArch64ISD::SST1_SCALED_PRED},
4272 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
4273 AArch64ISD::SST1_UXTW_SCALED_PRED},
4274 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
4275 AArch64ISD::SST1_SCALED_PRED},
4276 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
4277 AArch64ISD::SST1_SXTW_SCALED_PRED},
4278 };
4279 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4280 return AddrModes.find(Key)->second;
4281}
4282
4283unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
4284 switch (Opcode) {
4285 default:
4286 llvm_unreachable("unimplemented opcode")::llvm::llvm_unreachable_internal("unimplemented opcode", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4286)
;
4287 return Opcode;
4288 case AArch64ISD::GLD1_MERGE_ZERO:
4289 return AArch64ISD::GLD1S_MERGE_ZERO;
4290 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
4291 return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
4292 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
4293 return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
4294 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
4295 return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
4296 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
4297 return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
4298 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
4299 return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
4300 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
4301 return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
4302 }
4303}
4304
4305bool getGatherScatterIndexIsExtended(SDValue Index) {
4306 unsigned Opcode = Index.getOpcode();
4307 if (Opcode == ISD::SIGN_EXTEND_INREG)
4308 return true;
4309
4310 if (Opcode == ISD::AND) {
4311 SDValue Splat = Index.getOperand(1);
4312 if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
4313 return false;
4314 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
4315 if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
4316 return false;
4317 return true;
4318 }
4319
4320 return false;
4321}
4322
4323// If the base pointer of a masked gather or scatter is null, we
4324// may be able to swap BasePtr & Index and use the vector + register
4325// or vector + immediate addressing mode, e.g.
4326// VECTOR + REGISTER:
4327// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
4328// -> getelementptr %offset, <vscale x N x T> %indices
4329// VECTOR + IMMEDIATE:
4330// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
4331// -> getelementptr #x, <vscale x N x T> %indices
4332void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
4333 unsigned &Opcode, bool IsGather,
4334 SelectionDAG &DAG) {
4335 if (!isNullConstant(BasePtr))
4336 return;
4337
4338 // FIXME: This will not match for fixed vector type codegen as the nodes in
4339 // question will have fixed<->scalable conversions around them. This should be
4340 // moved to a DAG combine or complex pattern so that is executes after all of
4341 // the fixed vector insert and extracts have been removed. This deficiency
4342 // will result in a sub-optimal addressing mode being used, i.e. an ADD not
4343 // being folded into the scatter/gather.
4344 ConstantSDNode *Offset = nullptr;
4345 if (Index.getOpcode() == ISD::ADD)
4346 if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
4347 if (isa<ConstantSDNode>(SplatVal))
4348 Offset = cast<ConstantSDNode>(SplatVal);
4349 else {
4350 BasePtr = SplatVal;
4351 Index = Index->getOperand(0);
4352 return;
4353 }
4354 }
4355
4356 unsigned NewOp =
4357 IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
4358
4359 if (!Offset) {
4360 std::swap(BasePtr, Index);
4361 Opcode = NewOp;
4362 return;
4363 }
4364
4365 uint64_t OffsetVal = Offset->getZExtValue();
4366 unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
4367 auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
4368
4369 if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
4370 // Index is out of range for the immediate addressing mode
4371 BasePtr = ConstOffset;
4372 Index = Index->getOperand(0);
4373 return;
4374 }
4375
4376 // Immediate is in range
4377 Opcode = NewOp;
4378 BasePtr = Index->getOperand(0);
4379 Index = ConstOffset;
4380}
4381
4382SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
4383 SelectionDAG &DAG) const {
4384 SDLoc DL(Op);
4385 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
4386 assert(MGT && "Can only custom lower gather load nodes")(static_cast <bool> (MGT && "Can only custom lower gather load nodes"
) ? void (0) : __assert_fail ("MGT && \"Can only custom lower gather load nodes\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4386, __extension__ __PRETTY_FUNCTION__))
;
4387
4388 bool IsFixedLength = MGT->getMemoryVT().isFixedLengthVector();
4389
4390 SDValue Index = MGT->getIndex();
4391 SDValue Chain = MGT->getChain();
4392 SDValue PassThru = MGT->getPassThru();
4393 SDValue Mask = MGT->getMask();
4394 SDValue BasePtr = MGT->getBasePtr();
4395 ISD::LoadExtType ExtTy = MGT->getExtensionType();
4396
4397 ISD::MemIndexType IndexType = MGT->getIndexType();
4398 bool IsScaled =
4399 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4400 bool IsSigned =
4401 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4402 bool IdxNeedsExtend =
4403 getGatherScatterIndexIsExtended(Index) ||
4404 Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4405 bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
4406
4407 EVT VT = PassThru.getSimpleValueType();
4408 EVT IndexVT = Index.getSimpleValueType();
4409 EVT MemVT = MGT->getMemoryVT();
4410 SDValue InputVT = DAG.getValueType(MemVT);
4411
4412 if (VT.getVectorElementType() == MVT::bf16 &&
4413 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4414 return SDValue();
4415
4416 if (IsFixedLength) {
4417 assert(Subtarget->useSVEForFixedLengthVectors() &&(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4418, __extension__ __PRETTY_FUNCTION__))
4418 "Cannot lower when not using SVE for fixed vectors")(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4418, __extension__ __PRETTY_FUNCTION__))
;
4419 if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
4420 IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
4421 MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
4422 } else {
4423 MemVT = getContainerForFixedLengthVector(DAG, MemVT);
4424 IndexVT = MemVT.changeTypeToInteger();
4425 }
4426 InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
4427 Mask = DAG.getNode(
4428 ISD::ZERO_EXTEND, DL,
4429 VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
4430 }
4431
4432 if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
4433 PassThru = SDValue();
4434
4435 if (VT.isFloatingPoint() && !IsFixedLength) {
4436 // Handle FP data by using an integer gather and casting the result.
4437 if (PassThru) {
4438 EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4439 PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
4440 }
4441 InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4442 }
4443
4444 SDVTList VTs = DAG.getVTList(IndexVT, MVT::Other);
4445
4446 if (getGatherScatterIndexIsExtended(Index))
4447 Index = Index.getOperand(0);
4448
4449 unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
4450 selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4451 /*isGather=*/true, DAG);
4452
4453 if (ResNeedsSignExtend)
4454 Opcode = getSignExtendedGatherOpcode(Opcode);
4455
4456 if (IsFixedLength) {
4457 if (Index.getSimpleValueType().isFixedLengthVector())
4458 Index = convertToScalableVector(DAG, IndexVT, Index);
4459 if (BasePtr.getSimpleValueType().isFixedLengthVector())
4460 BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
4461 Mask = convertFixedMaskToScalableVector(Mask, DAG);
4462 }
4463
4464 SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT};
4465 SDValue Result = DAG.getNode(Opcode, DL, VTs, Ops);
4466 Chain = Result.getValue(1);
4467
4468 if (IsFixedLength) {
4469 Result = convertFromScalableVector(
4470 DAG, VT.changeVectorElementType(IndexVT.getVectorElementType()),
4471 Result);
4472 Result = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Result);
4473 Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
4474
4475 if (PassThru)
4476 Result = DAG.getSelect(DL, VT, MGT->getMask(), Result, PassThru);
4477 } else {
4478 if (PassThru)
4479 Result = DAG.getSelect(DL, IndexVT, Mask, Result, PassThru);
4480
4481 if (VT.isFloatingPoint())
4482 Result = getSVESafeBitCast(VT, Result, DAG);
4483 }
4484
4485 return DAG.getMergeValues({Result, Chain}, DL);
4486}
4487
4488SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
4489 SelectionDAG &DAG) const {
4490 SDLoc DL(Op);
4491 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
4492 assert(MSC && "Can only custom lower scatter store nodes")(static_cast <bool> (MSC && "Can only custom lower scatter store nodes"
) ? void (0) : __assert_fail ("MSC && \"Can only custom lower scatter store nodes\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4492, __extension__ __PRETTY_FUNCTION__))
;
4493
4494 bool IsFixedLength = MSC->getMemoryVT().isFixedLengthVector();
4495
4496 SDValue Index = MSC->getIndex();
4497 SDValue Chain = MSC->getChain();
4498 SDValue StoreVal = MSC->getValue();
4499 SDValue Mask = MSC->getMask();
4500 SDValue BasePtr = MSC->getBasePtr();
4501
4502 ISD::MemIndexType IndexType = MSC->getIndexType();
4503 bool IsScaled =
4504 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4505 bool IsSigned =
4506 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4507 bool NeedsExtend =
4508 getGatherScatterIndexIsExtended(Index) ||
4509 Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4510
4511 EVT VT = StoreVal.getSimpleValueType();
4512 EVT IndexVT = Index.getSimpleValueType();
4513 SDVTList VTs = DAG.getVTList(MVT::Other);
4514 EVT MemVT = MSC->getMemoryVT();
4515 SDValue InputVT = DAG.getValueType(MemVT);
4516
4517 if (VT.getVectorElementType() == MVT::bf16 &&
4518 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4519 return SDValue();
4520
4521 if (IsFixedLength) {
4522 assert(Subtarget->useSVEForFixedLengthVectors() &&(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4523, __extension__ __PRETTY_FUNCTION__))
4523 "Cannot lower when not using SVE for fixed vectors")(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4523, __extension__ __PRETTY_FUNCTION__))
;
4524 if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
4525 IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
4526 MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
4527 } else {
4528 MemVT = getContainerForFixedLengthVector(DAG, MemVT);
4529 IndexVT = MemVT.changeTypeToInteger();
4530 }
4531 InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
4532
4533 StoreVal =
4534 DAG.getNode(ISD::BITCAST, DL, VT.changeTypeToInteger(), StoreVal);
4535 StoreVal = DAG.getNode(
4536 ISD::ANY_EXTEND, DL,
4537 VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal);
4538 StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal);
4539 Mask = DAG.getNode(
4540 ISD::ZERO_EXTEND, DL,
4541 VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
4542 } else if (VT.isFloatingPoint()) {
4543 // Handle FP data by casting the data so an integer scatter can be used.
4544 EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4545 StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
4546 InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4547 }
4548
4549 if (getGatherScatterIndexIsExtended(Index))
4550 Index = Index.getOperand(0);
4551
4552 unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
4553 selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4554 /*isGather=*/false, DAG);
4555
4556 if (IsFixedLength) {
4557 if (Index.getSimpleValueType().isFixedLengthVector())
4558 Index = convertToScalableVector(DAG, IndexVT, Index);
4559 if (BasePtr.getSimpleValueType().isFixedLengthVector())
4560 BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
4561 Mask = convertFixedMaskToScalableVector(Mask, DAG);
4562 }
4563
4564 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
4565 return DAG.getNode(Opcode, DL, VTs, Ops);
4566}
4567
4568SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
4569 SDLoc DL(Op);
4570 MaskedLoadSDNode *LoadNode = cast<MaskedLoadSDNode>(Op);
4571 assert(LoadNode && "Expected custom lowering of a masked load node")(static_cast <bool> (LoadNode && "Expected custom lowering of a masked load node"
) ? void (0) : __assert_fail ("LoadNode && \"Expected custom lowering of a masked load node\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4571, __extension__ __PRETTY_FUNCTION__))
;
4572 EVT VT = Op->getValueType(0);
4573
4574 if (useSVEForFixedLengthVectorVT(VT, true))
4575 return LowerFixedLengthVectorMLoadToSVE(Op, DAG);
4576
4577 SDValue PassThru = LoadNode->getPassThru();
4578 SDValue Mask = LoadNode->getMask();
4579
4580 if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
4581 return Op;
4582
4583 SDValue Load = DAG.getMaskedLoad(
4584 VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
4585 LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
4586 LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
4587 LoadNode->getExtensionType());
4588
4589 SDValue Result = DAG.getSelect(DL, VT, Mask, Load, PassThru);
4590
4591 return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
4592}
4593
4594// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
4595static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
4596 EVT VT, EVT MemVT,
4597 SelectionDAG &DAG) {
4598 assert(VT.isVector() && "VT should be a vector type")(static_cast <bool> (VT.isVector() && "VT should be a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4598, __extension__ __PRETTY_FUNCTION__))
;
4599 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16)(static_cast <bool> (MemVT == MVT::v4i8 && VT ==
MVT::v4i16) ? void (0) : __assert_fail ("MemVT == MVT::v4i8 && VT == MVT::v4i16"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4599, __extension__ __PRETTY_FUNCTION__))
;
4600
4601 SDValue Value = ST->getValue();
4602
4603 // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
4604 // the word lane which represent the v4i8 subvector. It optimizes the store
4605 // to:
4606 //
4607 // xtn v0.8b, v0.8h
4608 // str s0, [x0]
4609
4610 SDValue Undef = DAG.getUNDEF(MVT::i16);
4611 SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
4612 {Undef, Undef, Undef, Undef});
4613
4614 SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
4615 Value, UndefVec);
4616 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
4617
4618 Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
4619 SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4620 Trunc, DAG.getConstant(0, DL, MVT::i64));
4621
4622 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
4623 ST->getBasePtr(), ST->getMemOperand());
4624}
4625
4626// Custom lowering for any store, vector or scalar and/or default or with
4627// a truncate operations. Currently only custom lower truncate operation
4628// from vector v4i16 to v4i8 or volatile stores of i128.
4629SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
4630 SelectionDAG &DAG) const {
4631 SDLoc Dl(Op);
4632 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
4633 assert (StoreNode && "Can only custom lower store nodes")(static_cast <bool> (StoreNode && "Can only custom lower store nodes"
) ? void (0) : __assert_fail ("StoreNode && \"Can only custom lower store nodes\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4633, __extension__ __PRETTY_FUNCTION__))
;
4634
4635 SDValue Value = StoreNode->getValue();
4636
4637 EVT VT = Value.getValueType();
4638 EVT MemVT = StoreNode->getMemoryVT();
4639
4640 if (VT.isVector()) {
4641 if (useSVEForFixedLengthVectorVT(VT, true))
4642 return LowerFixedLengthVectorStoreToSVE(Op, DAG);
4643
4644 unsigned AS = StoreNode->getAddressSpace();
4645 Align Alignment = StoreNode->getAlign();
4646 if (Alignment < MemVT.getStoreSize() &&
4647 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
4648 StoreNode->getMemOperand()->getFlags(),
4649 nullptr)) {
4650 return scalarizeVectorStore(StoreNode, DAG);
4651 }
4652
4653 if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
4654 MemVT == MVT::v4i8) {
4655 return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
4656 }
4657 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
4658 // the custom lowering, as there are no un-paired non-temporal stores and
4659 // legalization will break up 256 bit inputs.
4660 ElementCount EC = MemVT.getVectorElementCount();
4661 if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
4662 EC.isKnownEven() &&
4663 ((MemVT.getScalarSizeInBits() == 8u ||
4664 MemVT.getScalarSizeInBits() == 16u ||
4665 MemVT.getScalarSizeInBits() == 32u ||
4666 MemVT.getScalarSizeInBits() == 64u))) {
4667 SDValue Lo =
4668 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
4669 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4670 StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
4671 SDValue Hi =
4672 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
4673 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4674 StoreNode->getValue(),
4675 DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
4676 SDValue Result = DAG.getMemIntrinsicNode(
4677 AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
4678 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4679 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4680 return Result;
4681 }
4682 } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
4683 assert(StoreNode->getValue()->getValueType(0) == MVT::i128)(static_cast <bool> (StoreNode->getValue()->getValueType
(0) == MVT::i128) ? void (0) : __assert_fail ("StoreNode->getValue()->getValueType(0) == MVT::i128"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4683, __extension__ __PRETTY_FUNCTION__))
;
4684 SDValue Lo =
4685 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4686 DAG.getConstant(0, Dl, MVT::i64));
4687 SDValue Hi =
4688 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4689 DAG.getConstant(1, Dl, MVT::i64));
4690 SDValue Result = DAG.getMemIntrinsicNode(
4691 AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
4692 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4693 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4694 return Result;
4695 } else if (MemVT == MVT::i64x8) {
4696 SDValue Value = StoreNode->getValue();
4697 assert(Value->getValueType(0) == MVT::i64x8)(static_cast <bool> (Value->getValueType(0) == MVT::
i64x8) ? void (0) : __assert_fail ("Value->getValueType(0) == MVT::i64x8"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4697, __extension__ __PRETTY_FUNCTION__))
;
4698 SDValue Chain = StoreNode->getChain();
4699 SDValue Base = StoreNode->getBasePtr();
4700 EVT PtrVT = Base.getValueType();
4701 for (unsigned i = 0; i < 8; i++) {
4702 SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
4703 Value, DAG.getConstant(i, Dl, MVT::i32));
4704 SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
4705 DAG.getConstant(i * 8, Dl, PtrVT));
4706 Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
4707 StoreNode->getOriginalAlign());
4708 }
4709 return Chain;
4710 }
4711
4712 return SDValue();
4713}
4714
4715SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
4716 SelectionDAG &DAG) const {
4717 SDLoc DL(Op);
4718 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
4719 assert(LoadNode && "Expected custom lowering of a load node")(static_cast <bool> (LoadNode && "Expected custom lowering of a load node"
) ? void (0) : __assert_fail ("LoadNode && \"Expected custom lowering of a load node\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4719, __extension__ __PRETTY_FUNCTION__))
;
4720
4721 if (LoadNode->getMemoryVT() == MVT::i64x8) {
4722 SmallVector<SDValue, 8> Ops;
4723 SDValue Base = LoadNode->getBasePtr();
4724 SDValue Chain = LoadNode->getChain();
4725 EVT PtrVT = Base.getValueType();
4726 for (unsigned i = 0; i < 8; i++) {
4727 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
4728 DAG.getConstant(i * 8, DL, PtrVT));
4729 SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr,
4730 LoadNode->getPointerInfo(),
4731 LoadNode->getOriginalAlign());
4732 Ops.push_back(Part);
4733 Chain = SDValue(Part.getNode(), 1);
4734 }
4735 SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops);
4736 return DAG.getMergeValues({Loaded, Chain}, DL);
4737 }
4738
4739 // Custom lowering for extending v4i8 vector loads.
4740 EVT VT = Op->getValueType(0);
4741 assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32")(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v4i32
) && "Expected v4i16 or v4i32") ? void (0) : __assert_fail
("(VT == MVT::v4i16 || VT == MVT::v4i32) && \"Expected v4i16 or v4i32\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4741, __extension__ __PRETTY_FUNCTION__))
;
4742
4743 if (LoadNode->getMemoryVT() != MVT::v4i8)
4744 return SDValue();
4745
4746 unsigned ExtType;
4747 if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
4748 ExtType = ISD::SIGN_EXTEND;
4749 else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
4750 LoadNode->getExtensionType() == ISD::EXTLOAD)
4751 ExtType = ISD::ZERO_EXTEND;
4752 else
4753 return SDValue();
4754
4755 SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
4756 LoadNode->getBasePtr(), MachinePointerInfo());
4757 SDValue Chain = Load.getValue(1);
4758 SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load);
4759 SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec);
4760 SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC);
4761 Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext,
4762 DAG.getConstant(0, DL, MVT::i64));
4763 if (VT == MVT::v4i32)
4764 Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext);
4765 return DAG.getMergeValues({Ext, Chain}, DL);
4766}
4767
4768// Generate SUBS and CSEL for integer abs.
4769SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
4770 MVT VT = Op.getSimpleValueType();
4771
4772 if (VT.isVector())
4773 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
4774
4775 SDLoc DL(Op);
4776 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4777 Op.getOperand(0));
4778 // Generate SUBS & CSEL.
4779 SDValue Cmp =
4780 DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
4781 Op.getOperand(0), DAG.getConstant(0, DL, VT));
4782 return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
4783 DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
4784 Cmp.getValue(1));
4785}
4786
4787SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
4788 SelectionDAG &DAG) const {
4789 LLVM_DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
4790 LLVM_DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
4791
4792 switch (Op.getOpcode()) {
4793 default:
4794 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4794)
;
4795 return SDValue();
4796 case ISD::BITCAST:
4797 return LowerBITCAST(Op, DAG);
4798 case ISD::GlobalAddress:
4799 return LowerGlobalAddress(Op, DAG);
4800 case ISD::GlobalTLSAddress:
4801 return LowerGlobalTLSAddress(Op, DAG);
4802 case ISD::SETCC:
4803 case ISD::STRICT_FSETCC:
4804 case ISD::STRICT_FSETCCS:
4805 return LowerSETCC(Op, DAG);
4806 case ISD::BR_CC:
4807 return LowerBR_CC(Op, DAG);
4808 case ISD::SELECT:
4809 return LowerSELECT(Op, DAG);
4810 case ISD::SELECT_CC:
4811 return LowerSELECT_CC(Op, DAG);
4812 case ISD::JumpTable:
4813 return LowerJumpTable(Op, DAG);
4814 case ISD::BR_JT:
4815 return LowerBR_JT(Op, DAG);
4816 case ISD::ConstantPool:
4817 return LowerConstantPool(Op, DAG);
4818 case ISD::BlockAddress:
4819 return LowerBlockAddress(Op, DAG);
4820 case ISD::VASTART:
4821 return LowerVASTART(Op, DAG);
4822 case ISD::VACOPY:
4823 return LowerVACOPY(Op, DAG);
4824 case ISD::VAARG:
4825 return LowerVAARG(Op, DAG);
4826 case ISD::ADDC:
4827 case ISD::ADDE:
4828 case ISD::SUBC:
4829 case ISD::SUBE:
4830 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
4831 case ISD::SADDO:
4832 case ISD::UADDO:
4833 case ISD::SSUBO:
4834 case ISD::USUBO:
4835 case ISD::SMULO:
4836 case ISD::UMULO:
4837 return LowerXALUO(Op, DAG);
4838 case ISD::FADD:
4839 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
4840 case ISD::FSUB:
4841 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
4842 case ISD::FMUL:
4843 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
4844 case ISD::FMA:
4845 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
4846 case ISD::FDIV:
4847 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
4848 case ISD::FNEG:
4849 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
4850 case ISD::FCEIL:
4851 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
4852 case ISD::FFLOOR:
4853 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
4854 case ISD::FNEARBYINT:
4855 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
4856 case ISD::FRINT:
4857 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
4858 case ISD::FROUND:
4859 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
4860 case ISD::FROUNDEVEN:
4861 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
4862 case ISD::FTRUNC:
4863 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
4864 case ISD::FSQRT:
4865 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
4866 case ISD::FABS:
4867 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
4868 case ISD::FP_ROUND:
4869 case ISD::STRICT_FP_ROUND:
4870 return LowerFP_ROUND(Op, DAG);
4871 case ISD::FP_EXTEND:
4872 return LowerFP_EXTEND(Op, DAG);
4873 case ISD::FRAMEADDR:
4874 return LowerFRAMEADDR(Op, DAG);
4875 case ISD::SPONENTRY:
4876 return LowerSPONENTRY(Op, DAG);
4877 case ISD::RETURNADDR:
4878 return LowerRETURNADDR(Op, DAG);
4879 case ISD::ADDROFRETURNADDR:
4880 return LowerADDROFRETURNADDR(Op, DAG);
4881 case ISD::CONCAT_VECTORS:
4882 return LowerCONCAT_VECTORS(Op, DAG);
4883 case ISD::INSERT_VECTOR_ELT:
4884 return LowerINSERT_VECTOR_ELT(Op, DAG);
4885 case ISD::EXTRACT_VECTOR_ELT:
4886 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
4887 case ISD::BUILD_VECTOR:
4888 return LowerBUILD_VECTOR(Op, DAG);
4889 case ISD::VECTOR_SHUFFLE:
4890 return LowerVECTOR_SHUFFLE(Op, DAG);
4891 case ISD::SPLAT_VECTOR:
4892 return LowerSPLAT_VECTOR(Op, DAG);
4893 case ISD::EXTRACT_SUBVECTOR:
4894 return LowerEXTRACT_SUBVECTOR(Op, DAG);
4895 case ISD::INSERT_SUBVECTOR:
4896 return LowerINSERT_SUBVECTOR(Op, DAG);
4897 case ISD::SDIV:
4898 case ISD::UDIV:
4899 return LowerDIV(Op, DAG);
4900 case ISD::SMIN:
4901 case ISD::UMIN:
4902 case ISD::SMAX:
4903 case ISD::UMAX:
4904 return LowerMinMax(Op, DAG);
4905 case ISD::SRA:
4906 case ISD::SRL:
4907 case ISD::SHL:
4908 return LowerVectorSRA_SRL_SHL(Op, DAG);
4909 case ISD::SHL_PARTS:
4910 case ISD::SRL_PARTS:
4911 case ISD::SRA_PARTS:
4912 return LowerShiftParts(Op, DAG);
4913 case ISD::CTPOP:
4914 return LowerCTPOP(Op, DAG);
4915 case ISD::FCOPYSIGN:
4916 return LowerFCOPYSIGN(Op, DAG);
4917 case ISD::OR:
4918 return LowerVectorOR(Op, DAG);
4919 case ISD::XOR:
4920 return LowerXOR(Op, DAG);
4921 case ISD::PREFETCH:
4922 return LowerPREFETCH(Op, DAG);
4923 case ISD::SINT_TO_FP:
4924 case ISD::UINT_TO_FP:
4925 case ISD::STRICT_SINT_TO_FP:
4926 case ISD::STRICT_UINT_TO_FP:
4927 return LowerINT_TO_FP(Op, DAG);
4928 case ISD::FP_TO_SINT:
4929 case ISD::FP_TO_UINT:
4930 case ISD::STRICT_FP_TO_SINT:
4931 case ISD::STRICT_FP_TO_UINT:
4932 return LowerFP_TO_INT(Op, DAG);
4933 case ISD::FP_TO_SINT_SAT:
4934 case ISD::FP_TO_UINT_SAT:
4935 return LowerFP_TO_INT_SAT(Op, DAG);
4936 case ISD::FSINCOS:
4937 return LowerFSINCOS(Op, DAG);
4938 case ISD::FLT_ROUNDS_:
4939 return LowerFLT_ROUNDS_(Op, DAG);
4940 case ISD::SET_ROUNDING:
4941 return LowerSET_ROUNDING(Op, DAG);
4942 case ISD::MUL:
4943 return LowerMUL(Op, DAG);
4944 case ISD::MULHS:
4945 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED,
4946 /*OverrideNEON=*/true);
4947 case ISD::MULHU:
4948 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
4949 /*OverrideNEON=*/true);
4950 case ISD::INTRINSIC_WO_CHAIN:
4951 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4952 case ISD::STORE:
4953 return LowerSTORE(Op, DAG);
4954 case ISD::MSTORE:
4955 return LowerFixedLengthVectorMStoreToSVE(Op, DAG);
4956 case ISD::MGATHER:
4957 return LowerMGATHER(Op, DAG);
4958 case ISD::MSCATTER:
4959 return LowerMSCATTER(Op, DAG);
4960 case ISD::VECREDUCE_SEQ_FADD:
4961 return LowerVECREDUCE_SEQ_FADD(Op, DAG);
4962 case ISD::VECREDUCE_ADD:
4963 case ISD::VECREDUCE_AND:
4964 case ISD::VECREDUCE_OR:
4965 case ISD::VECREDUCE_XOR:
4966 case ISD::VECREDUCE_SMAX:
4967 case ISD::VECREDUCE_SMIN:
4968 case ISD::VECREDUCE_UMAX:
4969 case ISD::VECREDUCE_UMIN:
4970 case ISD::VECREDUCE_FADD:
4971 case ISD::VECREDUCE_FMAX:
4972 case ISD::VECREDUCE_FMIN:
4973 return LowerVECREDUCE(Op, DAG);
4974 case ISD::ATOMIC_LOAD_SUB:
4975 return LowerATOMIC_LOAD_SUB(Op, DAG);
4976 case ISD::ATOMIC_LOAD_AND:
4977 return LowerATOMIC_LOAD_AND(Op, DAG);
4978 case ISD::DYNAMIC_STACKALLOC:
4979 return LowerDYNAMIC_STACKALLOC(Op, DAG);
4980 case ISD::VSCALE:
4981 return LowerVSCALE(Op, DAG);
4982 case ISD::ANY_EXTEND:
4983 case ISD::SIGN_EXTEND:
4984 case ISD::ZERO_EXTEND:
4985 return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
4986 case ISD::SIGN_EXTEND_INREG: {
4987 // Only custom lower when ExtraVT has a legal byte based element type.
4988 EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4989 EVT ExtraEltVT = ExtraVT.getVectorElementType();
4990 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
4991 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
4992 return SDValue();
4993
4994 return LowerToPredicatedOp(Op, DAG,
4995 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
4996 }
4997 case ISD::TRUNCATE:
4998 return LowerTRUNCATE(Op, DAG);
4999 case ISD::MLOAD:
5000 return LowerMLOAD(Op, DAG);
5001 case ISD::LOAD:
5002 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
5003 return LowerFixedLengthVectorLoadToSVE(Op, DAG);
5004 return LowerLOAD(Op, DAG);
5005 case ISD::ADD:
5006 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
5007 case ISD::AND:
5008 return LowerToScalableOp(Op, DAG);
5009 case ISD::SUB:
5010 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
5011 case ISD::FMAXIMUM:
5012 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
5013 case ISD::FMAXNUM:
5014 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
5015 case ISD::FMINIMUM:
5016 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED);
5017 case ISD::FMINNUM:
5018 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
5019 case ISD::VSELECT:
5020 return LowerFixedLengthVectorSelectToSVE(Op, DAG);
5021 case ISD::ABS:
5022 return LowerABS(Op, DAG);
5023 case ISD::BITREVERSE:
5024 return LowerBitreverse(Op, DAG);
5025 case ISD::BSWAP:
5026 return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
5027 case ISD::CTLZ:
5028 return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
5029 /*OverrideNEON=*/true);
5030 case ISD::CTTZ:
5031 return LowerCTTZ(Op, DAG);
5032 case ISD::VECTOR_SPLICE:
5033 return LowerVECTOR_SPLICE(Op, DAG);
5034 }
5035}
5036
5037bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
5038 return !Subtarget->useSVEForFixedLengthVectors();
5039}
5040
5041bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
5042 EVT VT, bool OverrideNEON) const {
5043 if (!Subtarget->useSVEForFixedLengthVectors())
5044 return false;
5045
5046 if (!VT.isFixedLengthVector())
5047 return false;
5048
5049 // Don't use SVE for vectors we cannot scalarize if required.
5050 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
5051 // Fixed length predicates should be promoted to i8.
5052 // NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
5053 case MVT::i1:
5054 default:
5055 return false;
5056 case MVT::i8:
5057 case MVT::i16:
5058 case MVT::i32:
5059 case MVT::i64:
5060 case MVT::f16:
5061 case MVT::f32:
5062 case MVT::f64:
5063 break;
5064 }
5065
5066 // All SVE implementations support NEON sized vectors.
5067 if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
5068 return true;
5069
5070 // Ensure NEON MVTs only belong to a single register class.
5071 if (VT.getFixedSizeInBits() <= 128)
5072 return false;
5073
5074 // Don't use SVE for types that don't fit.
5075 if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
5076 return false;
5077
5078 // TODO: Perhaps an artificial restriction, but worth having whilst getting
5079 // the base fixed length SVE support in place.
5080 if (!VT.isPow2VectorType())
5081 return false;
5082
5083 return true;
5084}
5085
5086//===----------------------------------------------------------------------===//
5087// Calling Convention Implementation
5088//===----------------------------------------------------------------------===//
5089
5090/// Selects the correct CCAssignFn for a given CallingConvention value.
5091CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
5092 bool IsVarArg) const {
5093 switch (CC) {
5094 default:
5095 report_fatal_error("Unsupported calling convention.");
5096 case CallingConv::WebKit_JS:
5097 return CC_AArch64_WebKit_JS;
5098 case CallingConv::GHC:
5099 return CC_AArch64_GHC;
5100 case CallingConv::C:
5101 case CallingConv::Fast:
5102 case CallingConv::PreserveMost:
5103 case CallingConv::CXX_FAST_TLS:
5104 case CallingConv::Swift:
5105 case CallingConv::SwiftTail:
5106 case CallingConv::Tail:
5107 if (Subtarget->isTargetWindows() && IsVarArg)
5108 return CC_AArch64_Win64_VarArg;
5109 if (!Subtarget->isTargetDarwin())
5110 return CC_AArch64_AAPCS;
5111 if (!IsVarArg)
5112 return CC_AArch64_DarwinPCS;
5113 return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
5114 : CC_AArch64_DarwinPCS_VarArg;
5115 case CallingConv::Win64:
5116 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
5117 case CallingConv::CFGuard_Check:
5118 return CC_AArch64_Win64_CFGuard_Check;
5119 case CallingConv::AArch64_VectorCall:
5120 case CallingConv::AArch64_SVE_VectorCall:
5121 return CC_AArch64_AAPCS;
5122 }
5123}
5124
5125CCAssignFn *
5126AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
5127 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
5128 : RetCC_AArch64_AAPCS;
5129}
5130
5131SDValue AArch64TargetLowering::LowerFormalArguments(
5132 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
5133 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5134 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5135 MachineFunction &MF = DAG.getMachineFunction();
5136 MachineFrameInfo &MFI = MF.getFrameInfo();
5137 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
5138
5139 // Assign locations to all of the incoming arguments.
5140 SmallVector<CCValAssign, 16> ArgLocs;
5141 DenseMap<unsigned, SDValue> CopiedRegs;
5142 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
5143 *DAG.getContext());
5144
5145 // At this point, Ins[].VT may already be promoted to i32. To correctly
5146 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
5147 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
5148 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
5149 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
5150 // LocVT.
5151 unsigned NumArgs = Ins.size();
5152 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
5153 unsigned CurArgIdx = 0;
5154 for (unsigned i = 0; i != NumArgs; ++i) {
5155 MVT ValVT = Ins[i].VT;
5156 if (Ins[i].isOrigArg()) {
5157 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
5158 CurArgIdx = Ins[i].getOrigArgIndex();
5159
5160 // Get type of the original argument.
5161 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
5162 /*AllowUnknown*/ true);
5163 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
5164 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5165 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5166 ValVT = MVT::i8;
5167 else if (ActualMVT == MVT::i16)
5168 ValVT = MVT::i16;
5169 }
5170 bool UseVarArgCC = false;
5171 if (IsWin64)
5172 UseVarArgCC = isVarArg;
5173 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
5174 bool Res =
5175 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
5176 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5176, __extension__ __PRETTY_FUNCTION__))
;
5177 (void)Res;
5178 }
5179 SmallVector<SDValue, 16> ArgValues;
5180 unsigned ExtraArgLocs = 0;
5181 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5182 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
5183
5184 if (Ins[i].Flags.isByVal()) {
5185 // Byval is used for HFAs in the PCS, but the system should work in a
5186 // non-compliant manner for larger structs.
5187 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5188 int Size = Ins[i].Flags.getByValSize();
5189 unsigned NumRegs = (Size + 7) / 8;
5190
5191 // FIXME: This works on big-endian for composite byvals, which are the common
5192 // case. It should also work for fundamental types too.
5193 unsigned FrameIdx =
5194 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
5195 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
5196 InVals.push_back(FrameIdxN);
5197
5198 continue;
5199 }
5200
5201 if (Ins[i].Flags.isSwiftAsync())
5202 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5203
5204 SDValue ArgValue;
5205 if (VA.isRegLoc()) {
5206 // Arguments stored in registers.
5207 EVT RegVT = VA.getLocVT();
5208 const TargetRegisterClass *RC;
5209
5210 if (RegVT == MVT::i32)
5211 RC = &AArch64::GPR32RegClass;
5212 else if (RegVT == MVT::i64)
5213 RC = &AArch64::GPR64RegClass;
5214 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
5215 RC = &AArch64::FPR16RegClass;
5216 else if (RegVT == MVT::f32)
5217 RC = &AArch64::FPR32RegClass;
5218 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
5219 RC = &AArch64::FPR64RegClass;
5220 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
5221 RC = &AArch64::FPR128RegClass;
5222 else if (RegVT.isScalableVector() &&
5223 RegVT.getVectorElementType() == MVT::i1)
5224 RC = &AArch64::PPRRegClass;
5225 else if (RegVT.isScalableVector())
5226 RC = &AArch64::ZPRRegClass;
5227 else
5228 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5228)
;
5229
5230 // Transform the arguments in physical registers into virtual ones.
5231 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
5232 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
5233
5234 // If this is an 8, 16 or 32-bit value, it is really passed promoted
5235 // to 64 bits. Insert an assert[sz]ext to capture this, then
5236 // truncate to the right size.
5237 switch (VA.getLocInfo()) {
5238 default:
5239 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5239)
;
5240 case CCValAssign::Full:
5241 break;
5242 case CCValAssign::Indirect:
5243 assert(VA.getValVT().isScalableVector() &&(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5244, __extension__ __PRETTY_FUNCTION__))
5244 "Only scalable vectors can be passed indirectly")(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5244, __extension__ __PRETTY_FUNCTION__))
;
5245 break;
5246 case CCValAssign::BCvt:
5247 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
5248 break;
5249 case CCValAssign::AExt:
5250 case CCValAssign::SExt:
5251 case CCValAssign::ZExt:
5252 break;
5253 case CCValAssign::AExtUpper:
5254 ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
5255 DAG.getConstant(32, DL, RegVT));
5256 ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
5257 break;
5258 }
5259 } else { // VA.isRegLoc()
5260 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")(static_cast <bool> (VA.isMemLoc() && "CCValAssign is neither reg nor mem"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5260, __extension__ __PRETTY_FUNCTION__))
;
5261 unsigned ArgOffset = VA.getLocMemOffset();
5262 unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
5263 ? VA.getLocVT().getSizeInBits()
5264 : VA.getValVT().getSizeInBits()) / 8;
5265
5266 uint32_t BEAlign = 0;
5267 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
5268 !Ins[i].Flags.isInConsecutiveRegs())
5269 BEAlign = 8 - ArgSize;
5270
5271 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
5272
5273 // Create load nodes to retrieve arguments from the stack.
5274 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5275
5276 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
5277 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
5278 MVT MemVT = VA.getValVT();
5279
5280 switch (VA.getLocInfo()) {
5281 default:
5282 break;
5283 case CCValAssign::Trunc:
5284 case CCValAssign::BCvt:
5285 MemVT = VA.getLocVT();
5286 break;
5287 case CCValAssign::Indirect:
5288 assert(VA.getValVT().isScalableVector() &&(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5289, __extension__ __PRETTY_FUNCTION__))
5289 "Only scalable vectors can be passed indirectly")(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5289, __extension__ __PRETTY_FUNCTION__))
;
5290 MemVT = VA.getLocVT();
5291 break;
5292 case CCValAssign::SExt:
5293 ExtType = ISD::SEXTLOAD;
5294 break;
5295 case CCValAssign::ZExt:
5296 ExtType = ISD::ZEXTLOAD;
5297 break;
5298 case CCValAssign::AExt:
5299 ExtType = ISD::EXTLOAD;
5300 break;
5301 }
5302
5303 ArgValue = DAG.getExtLoad(
5304 ExtType, DL, VA.getLocVT(), Chain, FIN,
5305 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
5306 MemVT);
5307 }
5308
5309 if (VA.getLocInfo() == CCValAssign::Indirect) {
5310 assert(VA.getValVT().isScalableVector() &&(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5311, __extension__ __PRETTY_FUNCTION__))
5311 "Only scalable vectors can be passed indirectly")(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5311, __extension__ __PRETTY_FUNCTION__))
;
5312
5313 uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
5314 unsigned NumParts = 1;
5315 if (Ins[i].Flags.isInConsecutiveRegs()) {
5316 assert(!Ins[i].Flags.isInConsecutiveRegsLast())(static_cast <bool> (!Ins[i].Flags.isInConsecutiveRegsLast
()) ? void (0) : __assert_fail ("!Ins[i].Flags.isInConsecutiveRegsLast()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5316, __extension__ __PRETTY_FUNCTION__))
;
5317 while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
5318 ++NumParts;
5319 }
5320
5321 MVT PartLoad = VA.getValVT();
5322 SDValue Ptr = ArgValue;
5323
5324 // Ensure we generate all loads for each tuple part, whilst updating the
5325 // pointer after each load correctly using vscale.
5326 while (NumParts > 0) {
5327 ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
5328 InVals.push_back(ArgValue);
5329 NumParts--;
5330 if (NumParts > 0) {
5331 SDValue BytesIncrement = DAG.getVScale(
5332 DL, Ptr.getValueType(),
5333 APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
5334 SDNodeFlags Flags;
5335 Flags.setNoUnsignedWrap(true);
5336 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5337 BytesIncrement, Flags);
5338 ExtraArgLocs++;
5339 i++;
5340 }
5341 }
5342 } else {
5343 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
5344 ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
5345 ArgValue, DAG.getValueType(MVT::i32));
5346 InVals.push_back(ArgValue);
5347 }
5348 }
5349 assert((ArgLocs.size() + ExtraArgLocs) == Ins.size())(static_cast <bool> ((ArgLocs.size() + ExtraArgLocs) ==
Ins.size()) ? void (0) : __assert_fail ("(ArgLocs.size() + ExtraArgLocs) == Ins.size()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5349, __extension__ __PRETTY_FUNCTION__))
;
5350
5351 // varargs
5352 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5353 if (isVarArg) {
5354 if (!Subtarget->isTargetDarwin() || IsWin64) {
5355 // The AAPCS variadic function ABI is identical to the non-variadic
5356 // one. As a result there may be more arguments in registers and we should
5357 // save them for future reference.
5358 // Win64 variadic functions also pass arguments in registers, but all float
5359 // arguments are passed in integer registers.
5360 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
5361 }
5362
5363 // This will point to the next argument passed via stack.
5364 unsigned StackOffset = CCInfo.getNextStackOffset();
5365 // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
5366 StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
5367 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
5368
5369 if (MFI.hasMustTailInVarArgFunc()) {
5370 SmallVector<MVT, 2> RegParmTypes;
5371 RegParmTypes.push_back(MVT::i64);
5372 RegParmTypes.push_back(MVT::f128);
5373 // Compute the set of forwarded registers. The rest are scratch.
5374 SmallVectorImpl<ForwardedRegister> &Forwards =
5375 FuncInfo->getForwardedMustTailRegParms();
5376 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
5377 CC_AArch64_AAPCS);
5378
5379 // Conservatively forward X8, since it might be used for aggregate return.
5380 if (!CCInfo.isAllocated(AArch64::X8)) {
5381 unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
5382 Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
5383 }
5384 }
5385 }
5386
5387 // On Windows, InReg pointers must be returned, so record the pointer in a
5388 // virtual register at the start of the function so it can be returned in the
5389 // epilogue.
5390 if (IsWin64) {
5391 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
5392 if (Ins[I].Flags.isInReg()) {
5393 assert(!FuncInfo->getSRetReturnReg())(static_cast <bool> (!FuncInfo->getSRetReturnReg()) ?
void (0) : __assert_fail ("!FuncInfo->getSRetReturnReg()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5393, __extension__ __PRETTY_FUNCTION__))
;
5394
5395 MVT PtrTy = getPointerTy(DAG.getDataLayout());
5396 Register Reg =
5397 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
5398 FuncInfo->setSRetReturnReg(Reg);
5399
5400 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
5401 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
5402 break;
5403 }
5404 }
5405 }
5406
5407 unsigned StackArgSize = CCInfo.getNextStackOffset();
5408 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
5409 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
5410 // This is a non-standard ABI so by fiat I say we're allowed to make full
5411 // use of the stack area to be popped, which must be aligned to 16 bytes in
5412 // any case:
5413 StackArgSize = alignTo(StackArgSize, 16);
5414
5415 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
5416 // a multiple of 16.
5417 FuncInfo->setArgumentStackToRestore(StackArgSize);
5418
5419 // This realignment carries over to the available bytes below. Our own
5420 // callers will guarantee the space is free by giving an aligned value to
5421 // CALLSEQ_START.
5422 }
5423 // Even if we're not expected to free up the space, it's useful to know how
5424 // much is there while considering tail calls (because we can reuse it).
5425 FuncInfo->setBytesInStackArgArea(StackArgSize);
5426
5427 if (Subtarget->hasCustomCallingConv())
5428 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
5429
5430 return Chain;
5431}
5432
5433void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
5434 SelectionDAG &DAG,
5435 const SDLoc &DL,
5436 SDValue &Chain) const {
5437 MachineFunction &MF = DAG.getMachineFunction();
5438 MachineFrameInfo &MFI = MF.getFrameInfo();
5439 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5440 auto PtrVT = getPointerTy(DAG.getDataLayout());
5441 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
5442
5443 SmallVector<SDValue, 8> MemOps;
5444
5445 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
5446 AArch64::X3, AArch64::X4, AArch64::X5,
5447 AArch64::X6, AArch64::X7 };
5448 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
5449 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
5450
5451 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
5452 int GPRIdx = 0;
5453 if (GPRSaveSize != 0) {
5454 if (IsWin64) {
5455 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
5456 if (GPRSaveSize & 15)
5457 // The extra size here, if triggered, will always be 8.
5458 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
5459 } else
5460 GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
5461
5462 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
5463
5464 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
5465 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
5466 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
5467 SDValue Store = DAG.getStore(
5468 Val.getValue(1), DL, Val, FIN,
5469 IsWin64
5470 ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
5471 GPRIdx,
5472 (i - FirstVariadicGPR) * 8)
5473 : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
5474 MemOps.push_back(Store);
5475 FIN =
5476 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
5477 }
5478 }
5479 FuncInfo->setVarArgsGPRIndex(GPRIdx);
5480 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
5481
5482 if (Subtarget->hasFPARMv8() && !IsWin64) {
5483 static const MCPhysReg FPRArgRegs[] = {
5484 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
5485 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
5486 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
5487 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
5488
5489 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
5490 int FPRIdx = 0;
5491 if (FPRSaveSize != 0) {
5492 FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
5493
5494 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
5495
5496 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
5497 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
5498 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
5499
5500 SDValue Store = DAG.getStore(
5501 Val.getValue(1), DL, Val, FIN,
5502 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
5503 MemOps.push_back(Store);
5504 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
5505 DAG.getConstant(16, DL, PtrVT));
5506 }
5507 }
5508 FuncInfo->setVarArgsFPRIndex(FPRIdx);
5509 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
5510 }
5511
5512 if (!MemOps.empty()) {
5513 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
5514 }
5515}
5516
5517/// LowerCallResult - Lower the result values of a call into the
5518/// appropriate copies out of appropriate physical registers.
5519SDValue AArch64TargetLowering::LowerCallResult(
5520 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5521 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5522 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
5523 SDValue ThisVal) const {
5524 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
5525 // Assign locations to each value returned by this call.
5526 SmallVector<CCValAssign, 16> RVLocs;
5527 DenseMap<unsigned, SDValue> CopiedRegs;
5528 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5529 *DAG.getContext());
5530 CCInfo.AnalyzeCallResult(Ins, RetCC);
5531
5532 // Copy all of the result registers out of their specified physreg.
5533 for (unsigned i = 0; i != RVLocs.size(); ++i) {
5534 CCValAssign VA = RVLocs[i];
5535
5536 // Pass 'this' value directly from the argument to return value, to avoid
5537 // reg unit interference
5538 if (i == 0 && isThisReturn) {
5539 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i64 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5540, __extension__ __PRETTY_FUNCTION__))
5540 "unexpected return calling convention register assignment")(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i64 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5540, __extension__ __PRETTY_FUNCTION__))
;
5541 InVals.push_back(ThisVal);
5542 continue;
5543 }
5544
5545 // Avoid copying a physreg twice since RegAllocFast is incompetent and only
5546 // allows one use of a physreg per block.
5547 SDValue Val = CopiedRegs.lookup(VA.getLocReg());
5548 if (!Val) {
5549 Val =
5550 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
5551 Chain = Val.getValue(1);
5552 InFlag = Val.getValue(2);
5553 CopiedRegs[VA.getLocReg()] = Val;
5554 }
5555
5556 switch (VA.getLocInfo()) {
5557 default:
5558 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5558)
;
5559 case CCValAssign::Full:
5560 break;
5561 case CCValAssign::BCvt:
5562 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5563 break;
5564 case CCValAssign::AExtUpper:
5565 Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
5566 DAG.getConstant(32, DL, VA.getLocVT()));
5567 LLVM_FALLTHROUGH[[gnu::fallthrough]];
5568 case CCValAssign::AExt:
5569 LLVM_FALLTHROUGH[[gnu::fallthrough]];
5570 case CCValAssign::ZExt:
5571 Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
5572 break;
5573 }
5574
5575 InVals.push_back(Val);
5576 }
5577
5578 return Chain;
5579}
5580
5581/// Return true if the calling convention is one that we can guarantee TCO for.
5582static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
5583 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
5584 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
5585}
5586
5587/// Return true if we might ever do TCO for calls with this calling convention.
5588static bool mayTailCallThisCC(CallingConv::ID CC) {
5589 switch (CC) {
5590 case CallingConv::C:
5591 case CallingConv::AArch64_SVE_VectorCall:
5592 case CallingConv::PreserveMost:
5593 case CallingConv::Swift:
5594 case CallingConv::SwiftTail:
5595 case CallingConv::Tail:
5596 case CallingConv::Fast:
5597 return true;
5598 default:
5599 return false;
5600 }
5601}
5602
5603bool AArch64TargetLowering::isEligibleForTailCallOptimization(
5604 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
5605 const SmallVectorImpl<ISD::OutputArg> &Outs,
5606 const SmallVectorImpl<SDValue> &OutVals,
5607 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
5608 if (!mayTailCallThisCC(CalleeCC))
5609 return false;
5610
5611 MachineFunction &MF = DAG.getMachineFunction();
5612 const Function &CallerF = MF.getFunction();
5613 CallingConv::ID CallerCC = CallerF.getCallingConv();
5614
5615 // Functions using the C or Fast calling convention that have an SVE signature
5616 // preserve more registers and should assume the SVE_VectorCall CC.
5617 // The check for matching callee-saved regs will determine whether it is
5618 // eligible for TCO.
5619 if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) &&
5620 AArch64RegisterInfo::hasSVEArgsOrReturn(&MF))
5621 CallerCC = CallingConv::AArch64_SVE_VectorCall;
5622
5623 bool CCMatch = CallerCC == CalleeCC;
5624
5625 // When using the Windows calling convention on a non-windows OS, we want
5626 // to back up and restore X18 in such functions; we can't do a tail call
5627 // from those functions.
5628 if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
5629 CalleeCC != CallingConv::Win64)
5630 return false;
5631
5632 // Byval parameters hand the function a pointer directly into the stack area
5633 // we want to reuse during a tail call. Working around this *is* possible (see
5634 // X86) but less efficient and uglier in LowerCall.
5635 for (Function::const_arg_iterator i = CallerF.arg_begin(),
5636 e = CallerF.arg_end();
5637 i != e; ++i) {
5638 if (i->hasByValAttr())
5639 return false;
5640
5641 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
5642 // In this case, it is necessary to save/restore X0 in the callee. Tail
5643 // call opt interferes with this. So we disable tail call opt when the
5644 // caller has an argument with "inreg" attribute.
5645
5646 // FIXME: Check whether the callee also has an "inreg" argument.
5647 if (i->hasInRegAttr())
5648 return false;
5649 }
5650
5651 if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
5652 return CCMatch;
5653
5654 // Externally-defined functions with weak linkage should not be
5655 // tail-called on AArch64 when the OS does not support dynamic
5656 // pre-emption of symbols, as the AAELF spec requires normal calls
5657 // to undefined weak functions to be replaced with a NOP or jump to the
5658 // next instruction. The behaviour of branch instructions in this
5659 // situation (as used for tail calls) is implementation-defined, so we
5660 // cannot rely on the linker replacing the tail call with a return.
5661 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5662 const GlobalValue *GV = G->getGlobal();
5663 const Triple &TT = getTargetMachine().getTargetTriple();
5664 if (GV->hasExternalWeakLinkage() &&
5665 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
5666 return false;
5667 }
5668
5669 // Now we search for cases where we can use a tail call without changing the
5670 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
5671 // concept.
5672
5673 // I want anyone implementing a new calling convention to think long and hard
5674 // about this assert.
5675 assert((!isVarArg || CalleeCC == CallingConv::C) &&(static_cast <bool> ((!isVarArg || CalleeCC == CallingConv
::C) && "Unexpected variadic calling convention") ? void
(0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5676, __extension__ __PRETTY_FUNCTION__))
5676 "Unexpected variadic calling convention")(static_cast <bool> ((!isVarArg || CalleeCC == CallingConv
::C) && "Unexpected variadic calling convention") ? void
(0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5676, __extension__ __PRETTY_FUNCTION__))
;
5677
5678 LLVMContext &C = *DAG.getContext();
5679 if (isVarArg && !Outs.empty()) {
5680 // At least two cases here: if caller is fastcc then we can't have any
5681 // memory arguments (we'd be expected to clean up the stack afterwards). If
5682 // caller is C then we could potentially use its argument area.
5683
5684 // FIXME: for now we take the most conservative of these in both cases:
5685 // disallow all variadic memory operands.
5686 SmallVector<CCValAssign, 16> ArgLocs;
5687 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5688
5689 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
5690 for (const CCValAssign &ArgLoc : ArgLocs)
5691 if (!ArgLoc.isRegLoc())
5692 return false;
5693 }
5694
5695 // Check that the call results are passed in the same way.
5696 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
5697 CCAssignFnForCall(CalleeCC, isVarArg),
5698 CCAssignFnForCall(CallerCC, isVarArg)))
5699 return false;
5700 // The callee has to preserve all registers the caller needs to preserve.
5701 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5702 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5703 if (!CCMatch) {
5704 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5705 if (Subtarget->hasCustomCallingConv()) {
5706 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
5707 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
5708 }
5709 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5710 return false;
5711 }
5712
5713 // Nothing more to check if the callee is taking no arguments
5714 if (Outs.empty())
5715 return true;
5716
5717 SmallVector<CCValAssign, 16> ArgLocs;
5718 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5719
5720 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
5721
5722 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5723
5724 // If any of the arguments is passed indirectly, it must be SVE, so the
5725 // 'getBytesInStackArgArea' is not sufficient to determine whether we need to
5726 // allocate space on the stack. That is why we determine this explicitly here
5727 // the call cannot be a tailcall.
5728 if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
5729 assert((A.getLocInfo() != CCValAssign::Indirect ||(static_cast <bool> ((A.getLocInfo() != CCValAssign::Indirect
|| A.getValVT().isScalableVector()) && "Expected value to be scalable"
) ? void (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5731, __extension__ __PRETTY_FUNCTION__))
5730 A.getValVT().isScalableVector()) &&(static_cast <bool> ((A.getLocInfo() != CCValAssign::Indirect
|| A.getValVT().isScalableVector()) && "Expected value to be scalable"
) ? void (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5731, __extension__ __PRETTY_FUNCTION__))
5731 "Expected value to be scalable")(static_cast <bool> ((A.getLocInfo() != CCValAssign::Indirect
|| A.getValVT().isScalableVector()) && "Expected value to be scalable"
) ? void (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5731, __extension__ __PRETTY_FUNCTION__))
;
5732 return A.getLocInfo() == CCValAssign::Indirect;
5733 }))
5734 return false;
5735
5736 // If the stack arguments for this call do not fit into our own save area then
5737 // the call cannot be made tail.
5738 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
5739 return false;
5740
5741 const MachineRegisterInfo &MRI = MF.getRegInfo();
5742 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5743 return false;
5744
5745 return true;
5746}
5747
5748SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
5749 SelectionDAG &DAG,
5750 MachineFrameInfo &MFI,
5751 int ClobberedFI) const {
5752 SmallVector<SDValue, 8> ArgChains;
5753 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
5754 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
5755
5756 // Include the original chain at the beginning of the list. When this is
5757 // used by target LowerCall hooks, this helps legalize find the
5758 // CALLSEQ_BEGIN node.
5759 ArgChains.push_back(Chain);
5760
5761 // Add a chain value for each stack argument corresponding
5762 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
5763 UE = DAG.getEntryNode().getNode()->use_end();
5764 U != UE; ++U)
5765 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
5766 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
5767 if (FI->getIndex() < 0) {
5768 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
5769 int64_t InLastByte = InFirstByte;
5770 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
5771
5772 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
5773 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
5774 ArgChains.push_back(SDValue(L, 1));
5775 }
5776
5777 // Build a tokenfactor for all the chains.
5778 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
5779}
5780
5781bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
5782 bool TailCallOpt) const {
5783 return (CallCC == CallingConv::Fast && TailCallOpt) ||
5784 CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail;
5785}
5786
5787/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
5788/// and add input and output parameter nodes.
5789SDValue
5790AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
5791 SmallVectorImpl<SDValue> &InVals) const {
5792 SelectionDAG &DAG = CLI.DAG;
5793 SDLoc &DL = CLI.DL;
5794 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
5795 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
5796 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
5797 SDValue Chain = CLI.Chain;
5798 SDValue Callee = CLI.Callee;
5799 bool &IsTailCall = CLI.IsTailCall;
5800 CallingConv::ID CallConv = CLI.CallConv;
5801 bool IsVarArg = CLI.IsVarArg;
5802
5803 MachineFunction &MF = DAG.getMachineFunction();
5804 MachineFunction::CallSiteInfo CSInfo;
5805 bool IsThisReturn = false;
5806
5807 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5808 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
5809 bool IsSibCall = false;
5810 bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CallConv);
5811
5812 // Check callee args/returns for SVE registers and set calling convention
5813 // accordingly.
5814 if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) {
5815 bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
5816 return Out.VT.isScalableVector();
5817 });
5818 bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
5819 return In.VT.isScalableVector();
5820 });
5821
5822 if (CalleeInSVE || CalleeOutSVE)
5823 CallConv = CallingConv::AArch64_SVE_VectorCall;
5824 }
5825
5826 if (IsTailCall) {
5827 // Check if it's really possible to do a tail call.
5828 IsTailCall = isEligibleForTailCallOptimization(
5829 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
5830
5831 // A sibling call is one where we're under the usual C ABI and not planning
5832 // to change that but can still do a tail call:
5833 if (!TailCallOpt && IsTailCall && CallConv != CallingConv::Tail &&
5834 CallConv != CallingConv::SwiftTail)
5835 IsSibCall = true;
5836
5837 if (IsTailCall)
5838 ++NumTailCalls;
5839 }
5840
5841 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
5842 report_fatal_error("failed to perform tail call elimination on a call "
5843 "site marked musttail");
5844
5845 // Analyze operands of the call, assigning locations to each operand.
5846 SmallVector<CCValAssign, 16> ArgLocs;
5847 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
5848 *DAG.getContext());
5849
5850 if (IsVarArg) {
5851 // Handle fixed and variable vector arguments differently.
5852 // Variable vector arguments always go into memory.
5853 unsigned NumArgs = Outs.size();
5854
5855 for (unsigned i = 0; i != NumArgs; ++i) {
5856 MVT ArgVT = Outs[i].VT;
5857 if (!Outs[i].IsFixed && ArgVT.isScalableVector())
5858 report_fatal_error("Passing SVE types to variadic functions is "
5859 "currently not supported");
5860
5861 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5862 bool UseVarArgCC = !Outs[i].IsFixed;
5863 // On Windows, the fixed arguments in a vararg call are passed in GPRs
5864 // too, so use the vararg CC to force them to integer registers.
5865 if (IsCalleeWin64)
5866 UseVarArgCC = true;
5867 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
5868 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
5869 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5869, __extension__ __PRETTY_FUNCTION__))
;
5870 (void)Res;
5871 }
5872 } else {
5873 // At this point, Outs[].VT may already be promoted to i32. To correctly
5874 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
5875 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
5876 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
5877 // we use a special version of AnalyzeCallOperands to pass in ValVT and
5878 // LocVT.
5879 unsigned NumArgs = Outs.size();
5880 for (unsigned i = 0; i != NumArgs; ++i) {
5881 MVT ValVT = Outs[i].VT;
5882 // Get type of the original argument.
5883 EVT ActualVT = getValueType(DAG.getDataLayout(),
5884 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
5885 /*AllowUnknown*/ true);
5886 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
5887 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5888 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5889 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5890 ValVT = MVT::i8;
5891 else if (ActualMVT == MVT::i16)
5892 ValVT = MVT::i16;
5893
5894 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
5895 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
5896 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5896, __extension__ __PRETTY_FUNCTION__))
;
5897 (void)Res;
5898 }
5899 }
5900
5901 // Get a count of how many bytes are to be pushed on the stack.
5902 unsigned NumBytes = CCInfo.getNextStackOffset();
5903
5904 if (IsSibCall) {
5905 // Since we're not changing the ABI to make this a tail call, the memory
5906 // operands are already available in the caller's incoming argument space.
5907 NumBytes = 0;
5908 }
5909
5910 // FPDiff is the byte offset of the call's argument area from the callee's.
5911 // Stores to callee stack arguments will be placed in FixedStackSlots offset
5912 // by this amount for a tail call. In a sibling call it must be 0 because the
5913 // caller will deallocate the entire stack and the callee still expects its
5914 // arguments to begin at SP+0. Completely unused for non-tail calls.
5915 int FPDiff = 0;
5916
5917 if (IsTailCall && !IsSibCall) {
5918 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
5919
5920 // Since callee will pop argument stack as a tail call, we must keep the
5921 // popped size 16-byte aligned.
5922 NumBytes = alignTo(NumBytes, 16);
5923
5924 // FPDiff will be negative if this tail call requires more space than we
5925 // would automatically have in our incoming argument space. Positive if we
5926 // can actually shrink the stack.
5927 FPDiff = NumReusableBytes - NumBytes;
5928
5929 // Update the required reserved area if this is the tail call requiring the
5930 // most argument stack space.
5931 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
5932 FuncInfo->setTailCallReservedStack(-FPDiff);
5933
5934 // The stack pointer must be 16-byte aligned at all times it's used for a
5935 // memory operation, which in practice means at *all* times and in
5936 // particular across call boundaries. Therefore our own arguments started at
5937 // a 16-byte aligned SP and the delta applied for the tail call should
5938 // satisfy the same constraint.
5939 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")(static_cast <bool> (FPDiff % 16 == 0 && "unaligned stack on tail call"
) ? void (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5939, __extension__ __PRETTY_FUNCTION__))
;
5940 }
5941
5942 // Adjust the stack pointer for the new arguments...
5943 // These operations are automatically eliminated by the prolog/epilog pass
5944 if (!IsSibCall)
5945 Chain = DAG.getCALLSEQ_START(Chain, IsTailCall ? 0 : NumBytes, 0, DL);
5946
5947 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
5948 getPointerTy(DAG.getDataLayout()));
5949
5950 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5951 SmallSet<unsigned, 8> RegsUsed;
5952 SmallVector<SDValue, 8> MemOpChains;
5953 auto PtrVT = getPointerTy(DAG.getDataLayout());
5954
5955 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
5956 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
5957 for (const auto &F : Forwards) {
5958 SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
5959 RegsToPass.emplace_back(F.PReg, Val);
5960 }
5961 }
5962
5963 // Walk the register/memloc assignments, inserting copies/loads.
5964 unsigned ExtraArgLocs = 0;
5965 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5966 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
5967 SDValue Arg = OutVals[i];
5968 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5969
5970 // Promote the value if needed.
5971 switch (VA.getLocInfo()) {
5972 default:
5973 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5973)
;
5974 case CCValAssign::Full:
5975 break;
5976 case CCValAssign::SExt:
5977 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
5978 break;
5979 case CCValAssign::ZExt:
5980 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
5981 break;
5982 case CCValAssign::AExt:
5983 if (Outs[i].ArgVT == MVT::i1) {
5984 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
5985 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
5986 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
5987 }
5988 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5989 break;
5990 case CCValAssign::AExtUpper:
5991 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")(static_cast <bool> (VA.getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits") ? void (0) : __assert_fail
("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5991, __extension__ __PRETTY_FUNCTION__))
;
5992 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5993 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
5994 DAG.getConstant(32, DL, VA.getLocVT()));
5995 break;
5996 case CCValAssign::BCvt:
5997 Arg = DAG.getBitcast(VA.getLocVT(), Arg);
5998 break;
5999 case CCValAssign::Trunc:
6000 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
6001 break;
6002 case CCValAssign::FPExt:
6003 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
6004 break;
6005 case CCValAssign::Indirect:
6006 assert(VA.getValVT().isScalableVector() &&(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6007, __extension__ __PRETTY_FUNCTION__))
6007 "Only scalable vectors can be passed indirectly")(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6007, __extension__ __PRETTY_FUNCTION__))
;
6008
6009 uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
6010 uint64_t PartSize = StoreSize;
6011 unsigned NumParts = 1;
6012 if (Outs[i].Flags.isInConsecutiveRegs()) {
6013 assert(!Outs[i].Flags.isInConsecutiveRegsLast())(static_cast <bool> (!Outs[i].Flags.isInConsecutiveRegsLast
()) ? void (0) : __assert_fail ("!Outs[i].Flags.isInConsecutiveRegsLast()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6013, __extension__ __PRETTY_FUNCTION__))
;
6014 while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
6015 ++NumParts;
6016 StoreSize *= NumParts;
6017 }
6018
6019 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
6020 Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
6021 Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
6022 int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
6023 MFI.setStackID(FI, TargetStackID::ScalableVector);
6024
6025 MachinePointerInfo MPI =
6026 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
6027 SDValue Ptr = DAG.getFrameIndex(
6028 FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
6029 SDValue SpillSlot = Ptr;
6030
6031 // Ensure we generate all stores for each tuple part, whilst updating the
6032 // pointer after each store correctly using vscale.
6033 while (NumParts) {
6034 Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
6035 NumParts--;
6036 if (NumParts > 0) {
6037 SDValue BytesIncrement = DAG.getVScale(
6038 DL, Ptr.getValueType(),
6039 APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
6040 SDNodeFlags Flags;
6041 Flags.setNoUnsignedWrap(true);
6042
6043 MPI = MachinePointerInfo(MPI.getAddrSpace());
6044 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
6045 BytesIncrement, Flags);
6046 ExtraArgLocs++;
6047 i++;
6048 }
6049 }
6050
6051 Arg = SpillSlot;
6052 break;
6053 }
6054
6055 if (VA.isRegLoc()) {
6056 if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
6057 Outs[0].VT == MVT::i64) {
6058 assert(VA.getLocVT() == MVT::i64 &&(static_cast <bool> (VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6059, __extension__ __PRETTY_FUNCTION__))
6059 "unexpected calling convention register assignment")(static_cast <bool> (VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6059, __extension__ __PRETTY_FUNCTION__))
;
6060 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i64 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6061, __extension__ __PRETTY_FUNCTION__))
6061 "unexpected use of 'returned'")(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i64 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6061, __extension__ __PRETTY_FUNCTION__))
;
6062 IsThisReturn = true;
6063 }
6064 if (RegsUsed.count(VA.getLocReg())) {
6065 // If this register has already been used then we're trying to pack
6066 // parts of an [N x i32] into an X-register. The extension type will
6067 // take care of putting the two halves in the right place but we have to
6068 // combine them.
6069 SDValue &Bits =
6070 llvm::find_if(RegsToPass,
6071 [=](const std::pair<unsigned, SDValue> &Elt) {
6072 return Elt.first == VA.getLocReg();
6073 })
6074 ->second;
6075 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
6076 // Call site info is used for function's parameter entry value
6077 // tracking. For now we track only simple cases when parameter
6078 // is transferred through whole register.
6079 llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
6080 return ArgReg.Reg == VA.getLocReg();
6081 });
6082 } else {
6083 RegsToPass.emplace_back(VA.getLocReg(), Arg);
6084 RegsUsed.insert(VA.getLocReg());
6085 const TargetOptions &Options = DAG.getTarget().Options;
6086 if (Options.EmitCallSiteInfo)
6087 CSInfo.emplace_back(VA.getLocReg(), i);
6088 }
6089 } else {
6090 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6090, __extension__ __PRETTY_FUNCTION__))
;
6091
6092 SDValue DstAddr;
6093 MachinePointerInfo DstInfo;
6094
6095 // FIXME: This works on big-endian for composite byvals, which are the
6096 // common case. It should also work for fundamental types too.
6097 uint32_t BEAlign = 0;
6098 unsigned OpSize;
6099 if (VA.getLocInfo() == CCValAssign::Indirect ||
6100 VA.getLocInfo() == CCValAssign::Trunc)
6101 OpSize = VA.getLocVT().getFixedSizeInBits();
6102 else
6103 OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
6104 : VA.getValVT().getSizeInBits();
6105 OpSize = (OpSize + 7) / 8;
6106 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
6107 !Flags.isInConsecutiveRegs()) {
6108 if (OpSize < 8)
6109 BEAlign = 8 - OpSize;
6110 }
6111 unsigned LocMemOffset = VA.getLocMemOffset();
6112 int32_t Offset = LocMemOffset + BEAlign;
6113 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
6114 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
6115
6116 if (IsTailCall) {
6117 Offset = Offset + FPDiff;
6118 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
6119
6120 DstAddr = DAG.getFrameIndex(FI, PtrVT);
6121 DstInfo =
6122 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
6123
6124 // Make sure any stack arguments overlapping with where we're storing
6125 // are loaded before this eventual operation. Otherwise they'll be
6126 // clobbered.
6127 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
6128 } else {
6129 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
6130
6131 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
6132 DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
6133 LocMemOffset);
6134 }
6135
6136 if (Outs[i].Flags.isByVal()) {
6137 SDValue SizeNode =
6138 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
6139 SDValue Cpy = DAG.getMemcpy(
6140 Chain, DL, DstAddr, Arg, SizeNode,
6141 Outs[i].Flags.getNonZeroByValAlign(),
6142 /*isVol = */ false, /*AlwaysInline = */ false,
6143 /*isTailCall = */ false, DstInfo, MachinePointerInfo());
6144
6145 MemOpChains.push_back(Cpy);
6146 } else {
6147 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
6148 // promoted to a legal register type i32, we should truncate Arg back to
6149 // i1/i8/i16.
6150 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
6151 VA.getValVT() == MVT::i16)
6152 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
6153
6154 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
6155 MemOpChains.push_back(Store);
6156 }
6157 }
6158 }
6159
6160 if (!MemOpChains.empty())
6161 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
6162
6163 // Build a sequence of copy-to-reg nodes chained together with token chain
6164 // and flag operands which copy the outgoing args into the appropriate regs.
6165 SDValue InFlag;
6166 for (auto &RegToPass : RegsToPass) {
6167 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
6168 RegToPass.second, InFlag);
6169 InFlag = Chain.getValue(1);
6170 }
6171
6172 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
6173 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
6174 // node so that legalize doesn't hack it.
6175 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
6176 auto GV = G->getGlobal();
6177 unsigned OpFlags =
6178 Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
6179 if (OpFlags & AArch64II::MO_GOT) {
6180 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
6181 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
6182 } else {
6183 const GlobalValue *GV = G->getGlobal();
6184 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
6185 }
6186 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
6187 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
6188 Subtarget->isTargetMachO()) {
6189 const char *Sym = S->getSymbol();
6190 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
6191 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
6192 } else {
6193 const char *Sym = S->getSymbol();
6194 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
6195 }
6196 }
6197
6198 // We don't usually want to end the call-sequence here because we would tidy
6199 // the frame up *after* the call, however in the ABI-changing tail-call case
6200 // we've carefully laid out the parameters so that when sp is reset they'll be
6201 // in the correct location.
6202 if (IsTailCall && !IsSibCall) {
6203 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
6204 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
6205 InFlag = Chain.getValue(1);
6206 }
6207
6208 std::vector<SDValue> Ops;
6209 Ops.push_back(Chain);
6210 Ops.push_back(Callee);
6211
6212 if (IsTailCall) {
6213 // Each tail call may have to adjust the stack by a different amount, so
6214 // this information must travel along with the operation for eventual
6215 // consumption by emitEpilogue.
6216 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
6217 }
6218
6219 // Add argument registers to the end of the list so that they are known live
6220 // into the call.
6221 for (auto &RegToPass : RegsToPass)
6222 Ops.push_back(DAG.getRegister(RegToPass.first,
6223 RegToPass.second.getValueType()));
6224
6225 // Add a register mask operand representing the call-preserved registers.
6226 const uint32_t *Mask;
6227 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
6228 if (IsThisReturn) {
6229 // For 'this' returns, use the X0-preserving mask if applicable
6230 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
6231 if (!Mask) {
6232 IsThisReturn = false;
6233 Mask = TRI->getCallPreservedMask(MF, CallConv);
6234 }
6235 } else
6236 Mask = TRI->getCallPreservedMask(MF, CallConv);
6237
6238 if (Subtarget->hasCustomCallingConv())
6239 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
6240
6241 if (TRI->isAnyArgRegReserved(MF))
6242 TRI->emitReservedArgRegCallError(MF);
6243
6244 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6244, __extension__ __PRETTY_FUNCTION__))
;
6245 Ops.push_back(DAG.getRegisterMask(Mask));
6246
6247 if (InFlag.getNode())
6248 Ops.push_back(InFlag);
6249
6250 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
6251
6252 // If we're doing a tall call, use a TC_RETURN here rather than an
6253 // actual call instruction.
6254 if (IsTailCall) {
6255 MF.getFrameInfo().setHasTailCall();
6256 SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
6257 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
6258 return Ret;
6259 }
6260
6261 unsigned CallOpc = AArch64ISD::CALL;
6262 // Calls with operand bundle "clang.arc.attachedcall" are special. They should
6263 // be expanded to the call, directly followed by a special marker sequence.
6264 // Use the CALL_RVMARKER to do that.
6265 if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
6266 assert(!IsTailCall &&(static_cast <bool> (!IsTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!IsTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6267, __extension__ __PRETTY_FUNCTION__))
6267 "tail calls cannot be marked with clang.arc.attachedcall")(static_cast <bool> (!IsTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!IsTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6267, __extension__ __PRETTY_FUNCTION__))
;
6268 CallOpc = AArch64ISD::CALL_RVMARKER;
6269 }
6270
6271 // Returns a chain and a flag for retval copy to use.
6272 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
6273 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
6274 InFlag = Chain.getValue(1);
6275 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
6276
6277 uint64_t CalleePopBytes =
6278 DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
6279
6280 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
6281 DAG.getIntPtrConstant(CalleePopBytes, DL, true),
6282 InFlag, DL);
6283 if (!Ins.empty())
6284 InFlag = Chain.getValue(1);
6285
6286 // Handle result values, copying them out of physregs into vregs that we
6287 // return.
6288 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
6289 InVals, IsThisReturn,
6290 IsThisReturn ? OutVals[0] : SDValue());
6291}
6292
6293bool AArch64TargetLowering::CanLowerReturn(
6294 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
6295 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
6296 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
6297 SmallVector<CCValAssign, 16> RVLocs;
6298 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6299 return CCInfo.CheckReturn(Outs, RetCC);
6300}
6301
6302SDValue
6303AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6304 bool isVarArg,
6305 const SmallVectorImpl<ISD::OutputArg> &Outs,
6306 const SmallVectorImpl<SDValue> &OutVals,
6307 const SDLoc &DL, SelectionDAG &DAG) const {
6308 auto &MF = DAG.getMachineFunction();
6309 auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
6310
6311 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
6312 SmallVector<CCValAssign, 16> RVLocs;
6313 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6314 *DAG.getContext());
6315 CCInfo.AnalyzeReturn(Outs, RetCC);
6316
6317 // Copy the result values into the output registers.
6318 SDValue Flag;
6319 SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
6320 SmallSet<unsigned, 4> RegsUsed;
6321 for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
6322 ++i, ++realRVLocIdx) {
6323 CCValAssign &VA = RVLocs[i];
6324 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6324, __extension__ __PRETTY_FUNCTION__))
;
6325 SDValue Arg = OutVals[realRVLocIdx];
6326
6327 switch (VA.getLocInfo()) {
6328 default:
6329 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6329)
;
6330 case CCValAssign::Full:
6331 if (Outs[i].ArgVT == MVT::i1) {
6332 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
6333 // value. This is strictly redundant on Darwin (which uses "zeroext
6334 // i1"), but will be optimised out before ISel.
6335 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
6336 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
6337 }
6338 break;
6339 case CCValAssign::BCvt:
6340 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
6341 break;
6342 case CCValAssign::AExt:
6343 case CCValAssign::ZExt:
6344 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
6345 break;
6346 case CCValAssign::AExtUpper:
6347 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")(static_cast <bool> (VA.getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits") ? void (0) : __assert_fail
("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6347, __extension__ __PRETTY_FUNCTION__))
;
6348 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
6349 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
6350 DAG.getConstant(32, DL, VA.getLocVT()));
6351 break;
6352 }
6353
6354 if (RegsUsed.count(VA.getLocReg())) {
6355 SDValue &Bits =
6356 llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) {
6357 return Elt.first == VA.getLocReg();
6358 })->second;
6359 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
6360 } else {
6361 RetVals.emplace_back(VA.getLocReg(), Arg);
6362 RegsUsed.insert(VA.getLocReg());
6363 }
6364 }
6365
6366 SmallVector<SDValue, 4> RetOps(1, Chain);
6367 for (auto &RetVal : RetVals) {
6368 Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
6369 Flag = Chain.getValue(1);
6370 RetOps.push_back(
6371 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
6372 }
6373
6374 // Windows AArch64 ABIs require that for returning structs by value we copy
6375 // the sret argument into X0 for the return.
6376 // We saved the argument into a virtual register in the entry block,
6377 // so now we copy the value out and into X0.
6378 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
6379 SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
6380 getPointerTy(MF.getDataLayout()));
6381
6382 unsigned RetValReg = AArch64::X0;
6383 Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
6384 Flag = Chain.getValue(1);
6385
6386 RetOps.push_back(
6387 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
6388 }
6389
6390 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
6391 const MCPhysReg *I =
6392 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
6393 if (I) {
6394 for (; *I; ++I) {
6395 if (AArch64::GPR64RegClass.contains(*I))
6396 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6397 else if (AArch64::FPR64RegClass.contains(*I))
6398 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6399 else
6400 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6400)
;
6401 }
6402 }
6403
6404 RetOps[0] = Chain; // Update chain.
6405
6406 // Add the flag if we have it.
6407 if (Flag.getNode())
6408 RetOps.push_back(Flag);
6409
6410 return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
6411}
6412
6413//===----------------------------------------------------------------------===//
6414// Other Lowering Code
6415//===----------------------------------------------------------------------===//
6416
6417SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
6418 SelectionDAG &DAG,
6419 unsigned Flag) const {
6420 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
6421 N->getOffset(), Flag);
6422}
6423
6424SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
6425 SelectionDAG &DAG,
6426 unsigned Flag) const {
6427 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
6428}
6429
6430SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
6431 SelectionDAG &DAG,
6432 unsigned Flag) const {
6433 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
6434 N->getOffset(), Flag);
6435}
6436
6437SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
6438 SelectionDAG &DAG,
6439 unsigned Flag) const {
6440 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
6441}
6442
6443// (loadGOT sym)
6444template <class NodeTy>
6445SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
6446 unsigned Flags) const {
6447 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getGOT\n"
; } } while (false)
;
6448 SDLoc DL(N);
6449 EVT Ty = getPointerTy(DAG.getDataLayout());
6450 SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
6451 // FIXME: Once remat is capable of dealing with instructions with register
6452 // operands, expand this into two nodes instead of using a wrapper node.
6453 return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
6454}
6455
6456// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
6457template <class NodeTy>
6458SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
6459 unsigned Flags) const {
6460 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrLarge\n"
; } } while (false)
;
6461 SDLoc DL(N);
6462 EVT Ty = getPointerTy(DAG.getDataLayout());
6463 const unsigned char MO_NC = AArch64II::MO_NC;
6464 return DAG.getNode(
6465 AArch64ISD::WrapperLarge, DL, Ty,
6466 getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
6467 getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
6468 getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
6469 getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
6470}
6471
6472// (addlow (adrp %hi(sym)) %lo(sym))
6473template <class NodeTy>
6474SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
6475 unsigned Flags) const {
6476 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddr\n"
; } } while (false)
;
6477 SDLoc DL(N);
6478 EVT Ty = getPointerTy(DAG.getDataLayout());
6479 SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
6480 SDValue Lo = getTargetNode(N, Ty, DAG,
6481 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
6482 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
6483 return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
6484}
6485
6486// (adr sym)
6487template <class NodeTy>
6488SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
6489 unsigned Flags) const {
6490 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrTiny\n"
; } } while (false)
;
6491 SDLoc DL(N);
6492 EVT Ty = getPointerTy(DAG.getDataLayout());
6493 SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
6494 return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
6495}
6496
6497SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
6498 SelectionDAG &DAG) const {
6499 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
6500 const GlobalValue *GV = GN->getGlobal();
6501 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
6502
6503 if (OpFlags != AArch64II::MO_NO_FLAG)
6504 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&(static_cast <bool> (cast<GlobalAddressSDNode>(Op
)->getOffset() == 0 && "unexpected offset in global node"
) ? void (0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6505, __extension__ __PRETTY_FUNCTION__))
6505 "unexpected offset in global node")(static_cast <bool> (cast<GlobalAddressSDNode>(Op
)->getOffset() == 0 && "unexpected offset in global node"
) ? void (0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6505, __extension__ __PRETTY_FUNCTION__))
;
6506
6507 // This also catches the large code model case for Darwin, and tiny code
6508 // model with got relocations.
6509 if ((OpFlags & AArch64II::MO_GOT) != 0) {
6510 return getGOT(GN, DAG, OpFlags);
6511 }
6512
6513 SDValue Result;
6514 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
6515 Result = getAddrLarge(GN, DAG, OpFlags);
6516 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
6517 Result = getAddrTiny(GN, DAG, OpFlags);
6518 } else {
6519 Result = getAddr(GN, DAG, OpFlags);
6520 }
6521 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6522 SDLoc DL(GN);
6523 if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB))
6524 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
6525 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
6526 return Result;
6527}
6528
6529/// Convert a TLS address reference into the correct sequence of loads
6530/// and calls to compute the variable's address (for Darwin, currently) and
6531/// return an SDValue containing the final node.
6532
6533/// Darwin only has one TLS scheme which must be capable of dealing with the
6534/// fully general situation, in the worst case. This means:
6535/// + "extern __thread" declaration.
6536/// + Defined in a possibly unknown dynamic library.
6537///
6538/// The general system is that each __thread variable has a [3 x i64] descriptor
6539/// which contains information used by the runtime to calculate the address. The
6540/// only part of this the compiler needs to know about is the first xword, which
6541/// contains a function pointer that must be called with the address of the
6542/// entire descriptor in "x0".
6543///
6544/// Since this descriptor may be in a different unit, in general even the
6545/// descriptor must be accessed via an indirect load. The "ideal" code sequence
6546/// is:
6547/// adrp x0, _var@TLVPPAGE
6548/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
6549/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
6550/// ; the function pointer
6551/// blr x1 ; Uses descriptor address in x0
6552/// ; Address of _var is now in x0.
6553///
6554/// If the address of _var's descriptor *is* known to the linker, then it can
6555/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
6556/// a slight efficiency gain.
6557SDValue
6558AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
6559 SelectionDAG &DAG) const {
6560 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6561, __extension__ __PRETTY_FUNCTION__))
6561 "This function expects a Darwin target")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6561, __extension__ __PRETTY_FUNCTION__))
;
6562
6563 SDLoc DL(Op);
6564 MVT PtrVT = getPointerTy(DAG.getDataLayout());
6565 MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
6566 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
6567
6568 SDValue TLVPAddr =
6569 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
6570 SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
6571
6572 // The first entry in the descriptor is a function pointer that we must call
6573 // to obtain the address of the variable.
6574 SDValue Chain = DAG.getEntryNode();
6575 SDValue FuncTLVGet = DAG.getLoad(
6576 PtrMemVT, DL, Chain, DescAddr,
6577 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
6578 Align(PtrMemVT.getSizeInBits() / 8),
6579 MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
6580 Chain = FuncTLVGet.getValue(1);
6581
6582 // Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
6583 FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);
6584
6585 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
6586 MFI.setAdjustsStack(true);
6587
6588 // TLS calls preserve all registers except those that absolutely must be
6589 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
6590 // silly).
6591 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
6592 const uint32_t *Mask = TRI->getTLSCallPreservedMask();
6593 if (Subtarget->hasCustomCallingConv())
6594 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
6595
6596 // Finally, we can make the call. This is just a degenerate version of a
6597 // normal AArch64 call node: x0 takes the address of the descriptor, and
6598 // returns the address of the variable in this thread.
6599 Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
6600 Chain =
6601 DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
6602 Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
6603 DAG.getRegisterMask(Mask), Chain.getValue(1));
6604 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
6605}
6606
6607/// Convert a thread-local variable reference into a sequence of instructions to
6608/// compute the variable's address for the local exec TLS model of ELF targets.
6609/// The sequence depends on the maximum TLS area size.
6610SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
6611 SDValue ThreadBase,
6612 const SDLoc &DL,
6613 SelectionDAG &DAG) const {
6614 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6615 SDValue TPOff, Addr;
6616
6617 switch (DAG.getTarget().Options.TLSSize) {
6618 default:
6619 llvm_unreachable("Unexpected TLS size")::llvm::llvm_unreachable_internal("Unexpected TLS size", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6619)
;
6620
6621 case 12: {
6622 // mrs x0, TPIDR_EL0
6623 // add x0, x0, :tprel_lo12:a
6624 SDValue Var = DAG.getTargetGlobalAddress(
6625 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF);
6626 return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
6627 Var,
6628 DAG.getTargetConstant(0, DL, MVT::i32)),
6629 0);
6630 }
6631
6632 case 24: {
6633 // mrs x0, TPIDR_EL0
6634 // add x0, x0, :tprel_hi12:a
6635 // add x0, x0, :tprel_lo12_nc:a
6636 SDValue HiVar = DAG.getTargetGlobalAddress(
6637 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
6638 SDValue LoVar = DAG.getTargetGlobalAddress(
6639 GV, DL, PtrVT, 0,
6640 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
6641 Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
6642 HiVar,
6643 DAG.getTargetConstant(0, DL, MVT::i32)),
6644 0);
6645 return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
6646 LoVar,
6647 DAG.getTargetConstant(0, DL, MVT::i32)),
6648 0);
6649 }
6650
6651 case 32: {
6652 // mrs x1, TPIDR_EL0
6653 // movz x0, #:tprel_g1:a
6654 // movk x0, #:tprel_g0_nc:a
6655 // add x0, x1, x0
6656 SDValue HiVar = DAG.getTargetGlobalAddress(
6657 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
6658 SDValue LoVar = DAG.getTargetGlobalAddress(
6659 GV, DL, PtrVT, 0,
6660 AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
6661 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
6662 DAG.getTargetConstant(16, DL, MVT::i32)),
6663 0);
6664 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
6665 DAG.getTargetConstant(0, DL, MVT::i32)),
6666 0);
6667 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
6668 }
6669
6670 case 48: {
6671 // mrs x1, TPIDR_EL0
6672 // movz x0, #:tprel_g2:a
6673 // movk x0, #:tprel_g1_nc:a
6674 // movk x0, #:tprel_g0_nc:a
6675 // add x0, x1, x0
6676 SDValue HiVar = DAG.getTargetGlobalAddress(
6677 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G2);
6678 SDValue MiVar = DAG.getTargetGlobalAddress(
6679 GV, DL, PtrVT, 0,
6680 AArch64II::MO_TLS | AArch64II::MO_G1 | AArch64II::MO_NC);
6681 SDValue LoVar = DAG.getTargetGlobalAddress(
6682 GV, DL, PtrVT, 0,
6683 AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
6684 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
6685 DAG.getTargetConstant(32, DL, MVT::i32)),
6686 0);
6687 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar,
6688 DAG.getTargetConstant(16, DL, MVT::i32)),
6689 0);
6690 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
6691 DAG.getTargetConstant(0, DL, MVT::i32)),
6692 0);
6693 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
6694 }
6695 }
6696}
6697
6698/// When accessing thread-local variables under either the general-dynamic or
6699/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
6700/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
6701/// is a function pointer to carry out the resolution.
6702///
6703/// The sequence is:
6704/// adrp x0, :tlsdesc:var
6705/// ldr x1, [x0, #:tlsdesc_lo12:var]
6706/// add x0, x0, #:tlsdesc_lo12:var
6707/// .tlsdesccall var
6708/// blr x1
6709/// (TPIDR_EL0 offset now in x0)
6710///
6711/// The above sequence must be produced unscheduled, to enable the linker to
6712/// optimize/relax this sequence.
6713/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
6714/// above sequence, and expanded really late in the compilation flow, to ensure
6715/// the sequence is produced as per above.
6716SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
6717 const SDLoc &DL,
6718 SelectionDAG &DAG) const {
6719 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6720
6721 SDValue Chain = DAG.getEntryNode();
6722 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
6723
6724 Chain =
6725 DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
6726 SDValue Glue = Chain.getValue(1);
6727
6728 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
6729}
6730
6731SDValue
6732AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
6733 SelectionDAG &DAG) const {
6734 assert(Subtarget->isTargetELF() && "This function expects an ELF target")(static_cast <bool> (Subtarget->isTargetELF() &&
"This function expects an ELF target") ? void (0) : __assert_fail
("Subtarget->isTargetELF() && \"This function expects an ELF target\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6734, __extension__ __PRETTY_FUNCTION__))
;
6735
6736 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
6737
6738 TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
6739
6740 if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
6741 if (Model == TLSModel::LocalDynamic)
6742 Model = TLSModel::GeneralDynamic;
6743 }
6744
6745 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
6746 Model != TLSModel::LocalExec)
6747 report_fatal_error("ELF TLS only supported in small memory model or "
6748 "in local exec TLS model");
6749 // Different choices can be made for the maximum size of the TLS area for a
6750 // module. For the small address model, the default TLS size is 16MiB and the
6751 // maximum TLS size is 4GiB.
6752 // FIXME: add tiny and large code model support for TLS access models other
6753 // than local exec. We currently generate the same code as small for tiny,
6754 // which may be larger than needed.
6755
6756 SDValue TPOff;
6757 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6758 SDLoc DL(Op);
6759 const GlobalValue *GV = GA->getGlobal();
6760
6761 SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
6762
6763 if (Model == TLSModel::LocalExec) {
6764 return LowerELFTLSLocalExec(GV, ThreadBase, DL, DAG);
6765 } else if (Model == TLSModel::InitialExec) {
6766 TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
6767 TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
6768 } else if (Model == TLSModel::LocalDynamic) {
6769 // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
6770 // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
6771 // the beginning of the module's TLS region, followed by a DTPREL offset
6772 // calculation.
6773
6774 // These accesses will need deduplicating if there's more than one.
6775 AArch64FunctionInfo *MFI =
6776 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
6777 MFI->incNumLocalDynamicTLSAccesses();
6778
6779 // The call needs a relocation too for linker relaxation. It doesn't make
6780 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
6781 // the address.
6782 SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
6783 AArch64II::MO_TLS);
6784
6785 // Now we can calculate the offset from TPIDR_EL0 to this module's
6786 // thread-local area.
6787 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
6788
6789 // Now use :dtprel_whatever: operations to calculate this variable's offset
6790 // in its thread-storage area.
6791 SDValue HiVar = DAG.getTargetGlobalAddress(
6792 GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
6793 SDValue LoVar = DAG.getTargetGlobalAddress(
6794 GV, DL, MVT::i64, 0,
6795 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
6796
6797 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
6798 DAG.getTargetConstant(0, DL, MVT::i32)),
6799 0);
6800 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
6801 DAG.getTargetConstant(0, DL, MVT::i32)),
6802 0);
6803 } else if (Model == TLSModel::GeneralDynamic) {
6804 // The call needs a relocation too for linker relaxation. It doesn't make
6805 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
6806 // the address.
6807 SDValue SymAddr =
6808 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
6809
6810 // Finally we can make a call to calculate the offset from tpidr_el0.
6811 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
6812 } else
6813 llvm_unreachable("Unsupported ELF TLS access model")::llvm::llvm_unreachable_internal("Unsupported ELF TLS access model"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6813)
;
6814
6815 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
6816}
6817
6818SDValue
6819AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
6820 SelectionDAG &DAG) const {
6821 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows specific TLS lowering") ? void (0) : __assert_fail (
"Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6821, __extension__ __PRETTY_FUNCTION__))
;
6822
6823 SDValue Chain = DAG.getEntryNode();
6824 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6825 SDLoc DL(Op);
6826
6827 SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
6828
6829 // Load the ThreadLocalStoragePointer from the TEB
6830 // A pointer to the TLS array is located at offset 0x58 from the TEB.
6831 SDValue TLSArray =
6832 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
6833 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
6834 Chain = TLSArray.getValue(1);
6835
6836 // Load the TLS index from the C runtime;
6837 // This does the same as getAddr(), but without having a GlobalAddressSDNode.
6838 // This also does the same as LOADgot, but using a generic i32 load,
6839 // while LOADgot only loads i64.
6840 SDValue TLSIndexHi =
6841 DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
6842 SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
6843 "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
6844 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
6845 SDValue TLSIndex =
6846 DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
6847 TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
6848 Chain = TLSIndex.getValue(1);
6849
6850 // The pointer to the thread's TLS data area is at the TLS Index scaled by 8
6851 // offset into the TLSArray.
6852 TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
6853 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
6854 DAG.getConstant(3, DL, PtrVT));
6855 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
6856 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
6857 MachinePointerInfo());
6858 Chain = TLS.getValue(1);
6859
6860 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
6861 const GlobalValue *GV = GA->getGlobal();
6862 SDValue TGAHi = DAG.getTargetGlobalAddress(
6863 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
6864 SDValue TGALo = DAG.getTargetGlobalAddress(
6865 GV, DL, PtrVT, 0,
6866 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
6867
6868 // Add the offset from the start of the .tls section (section base).
6869 SDValue Addr =
6870 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
6871 DAG.getTargetConstant(0, DL, MVT::i32)),
6872 0);
6873 Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
6874 return Addr;
6875}
6876
6877SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
6878 SelectionDAG &DAG) const {
6879 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
6880 if (DAG.getTarget().useEmulatedTLS())
6881 return LowerToTLSEmulatedModel(GA, DAG);
6882
6883 if (Subtarget->isTargetDarwin())
6884 return LowerDarwinGlobalTLSAddress(Op, DAG);
6885 if (Subtarget->isTargetELF())
6886 return LowerELFGlobalTLSAddress(Op, DAG);
6887 if (Subtarget->isTargetWindows())
6888 return LowerWindowsGlobalTLSAddress(Op, DAG);
6889
6890 llvm_unreachable("Unexpected platform trying to use TLS")::llvm::llvm_unreachable_internal("Unexpected platform trying to use TLS"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6890)
;
6891}
6892
6893// Looks through \param Val to determine the bit that can be used to
6894// check the sign of the value. It returns the unextended value and
6895// the sign bit position.
6896std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) {
6897 if (Val.getOpcode() == ISD::SIGN_EXTEND_INREG)
6898 return {Val.getOperand(0),
6899 cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
6900 1};
6901
6902 if (Val.getOpcode() == ISD::SIGN_EXTEND)
6903 return {Val.getOperand(0),
6904 Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};
6905
6906 return {Val, Val.getValueSizeInBits() - 1};
6907}
6908
6909SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
6910 SDValue Chain = Op.getOperand(0);
6911 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
6912 SDValue LHS = Op.getOperand(2);
6913 SDValue RHS = Op.getOperand(3);
6914 SDValue Dest = Op.getOperand(4);
6915 SDLoc dl(Op);
6916
6917 MachineFunction &MF = DAG.getMachineFunction();
6918 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
6919 // will not be produced, as they are conditional branch instructions that do
6920 // not set flags.
6921 bool ProduceNonFlagSettingCondBr =
6922 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
6923
6924 // Handle f128 first, since lowering it will result in comparing the return
6925 // value of a libcall against zero, which is just what the rest of LowerBR_CC
6926 // is expecting to deal with.
6927 if (LHS.getValueType() == MVT::f128) {
6928 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
6929
6930 // If softenSetCCOperands returned a scalar, we need to compare the result
6931 // against zero to select between true and false values.
6932 if (!RHS.getNode()) {
6933 RHS = DAG.getConstant(0, dl, LHS.getValueType());
6934 CC = ISD::SETNE;
6935 }
6936 }
6937
6938 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
6939 // instruction.
6940 if (ISD::isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
6941 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
6942 // Only lower legal XALUO ops.
6943 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
6944 return SDValue();
6945
6946 // The actual operation with overflow check.
6947 AArch64CC::CondCode OFCC;
6948 SDValue Value, Overflow;
6949 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
6950
6951 if (CC == ISD::SETNE)
6952 OFCC = getInvertedCondCode(OFCC);
6953 SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
6954
6955 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
6956 Overflow);
6957 }
6958
6959 if (LHS.getValueType().isInteger()) {
6960 assert((LHS.getValueType() == RHS.getValueType()) &&(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6961, __extension__ __PRETTY_FUNCTION__))
6961 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6961, __extension__ __PRETTY_FUNCTION__))
;
6962
6963 // If the RHS of the comparison is zero, we can potentially fold this
6964 // to a specialized branch.
6965 const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
6966 if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
6967 if (CC == ISD::SETEQ) {
6968 // See if we can use a TBZ to fold in an AND as well.
6969 // TBZ has a smaller branch displacement than CBZ. If the offset is
6970 // out of bounds, a late MI-layer pass rewrites branches.
6971 // 403.gcc is an example that hits this case.
6972 if (LHS.getOpcode() == ISD::AND &&
6973 isa<ConstantSDNode>(LHS.getOperand(1)) &&
6974 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
6975 SDValue Test = LHS.getOperand(0);
6976 uint64_t Mask = LHS.getConstantOperandVal(1);
6977 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
6978 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
6979 Dest);
6980 }
6981
6982 return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
6983 } else if (CC == ISD::SETNE) {
6984 // See if we can use a TBZ to fold in an AND as well.
6985 // TBZ has a smaller branch displacement than CBZ. If the offset is
6986 // out of bounds, a late MI-layer pass rewrites branches.
6987 // 403.gcc is an example that hits this case.
6988 if (LHS.getOpcode() == ISD::AND &&
6989 isa<ConstantSDNode>(LHS.getOperand(1)) &&
6990 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
6991 SDValue Test = LHS.getOperand(0);
6992 uint64_t Mask = LHS.getConstantOperandVal(1);
6993 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
6994 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
6995 Dest);
6996 }
6997
6998 return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
6999 } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
7000 // Don't combine AND since emitComparison converts the AND to an ANDS
7001 // (a.k.a. TST) and the test in the test bit and branch instruction
7002 // becomes redundant. This would also increase register pressure.
7003 uint64_t SignBitPos;
7004 std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
7005 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
7006 DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
7007 }
7008 }
7009 if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
7010 LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
7011 // Don't combine AND since emitComparison converts the AND to an ANDS
7012 // (a.k.a. TST) and the test in the test bit and branch instruction
7013 // becomes redundant. This would also increase register pressure.
7014 uint64_t SignBitPos;
7015 std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
7016 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
7017 DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
7018 }
7019
7020 SDValue CCVal;
7021 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
7022 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
7023 Cmp);
7024 }
7025
7026 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::bf16 || LHS.getValueType() == MVT::f32
|| LHS.getValueType() == MVT::f64) ? void (0) : __assert_fail
("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7027, __extension__ __PRETTY_FUNCTION__))
7027 LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::bf16 || LHS.getValueType() == MVT::f32
|| LHS.getValueType() == MVT::f64) ? void (0) : __assert_fail
("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7027, __extension__ __PRETTY_FUNCTION__))
;
7028
7029 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
7030 // clean. Some of them require two branches to implement.
7031 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
7032 AArch64CC::CondCode CC1, CC2;
7033 changeFPCCToAArch64CC(CC, CC1, CC2);
7034 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
7035 SDValue BR1 =
7036 DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
7037 if (CC2 != AArch64CC::AL) {
7038 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
7039 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
7040 Cmp);
7041 }
7042
7043 return BR1;
7044}
7045
7046SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
7047 SelectionDAG &DAG) const {
7048 EVT VT = Op.getValueType();
7049 SDLoc DL(Op);
7050
7051 SDValue In1 = Op.getOperand(0);
7052 SDValue In2 = Op.getOperand(1);
7053 EVT SrcVT = In2.getValueType();
7054
7055 if (VT.isScalableVector()) {
7056 if (VT != SrcVT)
7057 return SDValue();
7058
7059 // copysign(x,y) -> (y & SIGN_MASK) | (x & ~SIGN_MASK)
7060 //
7061 // A possible alternative sequence involves using FNEG_MERGE_PASSTHRU;
7062 // maybe useful for copysign operations with mismatched VTs.
7063 //
7064 // IntVT here is chosen so it's a legal type with the same element width
7065 // as the input.
7066 EVT IntVT =
7067 getPackedSVEVectorVT(VT.getVectorElementType().changeTypeToInteger());
7068 unsigned NumBits = VT.getScalarSizeInBits();
7069 SDValue SignMask = DAG.getConstant(APInt::getSignMask(NumBits), DL, IntVT);
7070 SDValue InvSignMask = DAG.getNOT(DL, SignMask, IntVT);
7071 SDValue Sign = DAG.getNode(ISD::AND, DL, IntVT, SignMask,
7072 getSVESafeBitCast(IntVT, In2, DAG));
7073 SDValue Magnitude = DAG.getNode(ISD::AND, DL, IntVT, InvSignMask,
7074 getSVESafeBitCast(IntVT, In1, DAG));
7075 SDValue IntResult = DAG.getNode(ISD::OR, DL, IntVT, Sign, Magnitude);
7076 return getSVESafeBitCast(VT, IntResult, DAG);
7077 }
7078
7079 if (SrcVT.bitsLT(VT))
7080 In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
7081 else if (SrcVT.bitsGT(VT))
7082 In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
7083
7084 EVT VecVT;
7085 uint64_t EltMask;
7086 SDValue VecVal1, VecVal2;
7087
7088 auto setVecVal = [&] (int Idx) {
7089 if (!VT.isVector()) {
7090 VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
7091 DAG.getUNDEF(VecVT), In1);
7092 VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
7093 DAG.getUNDEF(VecVT), In2);
7094 } else {
7095 VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
7096 VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
7097 }
7098 };
7099
7100 if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
7101 VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
7102 EltMask = 0x80000000ULL;
7103 setVecVal(AArch64::ssub);
7104 } else if (VT == MVT::f64 || VT == MVT::v2f64) {
7105 VecVT = MVT::v2i64;
7106
7107 // We want to materialize a mask with the high bit set, but the AdvSIMD
7108 // immediate moves cannot materialize that in a single instruction for
7109 // 64-bit elements. Instead, materialize zero and then negate it.
7110 EltMask = 0;
7111
7112 setVecVal(AArch64::dsub);
7113 } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
7114 VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
7115 EltMask = 0x8000ULL;
7116 setVecVal(AArch64::hsub);
7117 } else {
7118 llvm_unreachable("Invalid type for copysign!")::llvm::llvm_unreachable_internal("Invalid type for copysign!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7118)
;
7119 }
7120
7121 SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
7122
7123 // If we couldn't materialize the mask above, then the mask vector will be
7124 // the zero vector, and we need to negate it here.
7125 if (VT == MVT::f64 || VT == MVT::v2f64) {
7126 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
7127 BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
7128 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
7129 }
7130
7131 SDValue Sel =
7132 DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
7133
7134 if (VT == MVT::f16)
7135 return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
7136 if (VT == MVT::f32)
7137 return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
7138 else if (VT == MVT::f64)
7139 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
7140 else
7141 return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
7142}
7143
7144SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
7145 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
7146 Attribute::NoImplicitFloat))
7147 return SDValue();
7148
7149 if (!Subtarget->hasNEON())
7150 return SDValue();
7151
7152 // While there is no integer popcount instruction, it can
7153 // be more efficiently lowered to the following sequence that uses
7154 // AdvSIMD registers/instructions as long as the copies to/from
7155 // the AdvSIMD registers are cheap.
7156 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
7157 // CNT V0.8B, V0.8B // 8xbyte pop-counts
7158 // ADDV B0, V0.8B // sum 8xbyte pop-counts
7159 // UMOV X0, V0.B[0] // copy byte result back to integer reg
7160 SDValue Val = Op.getOperand(0);
7161 SDLoc DL(Op);
7162 EVT VT = Op.getValueType();
7163
7164 if (VT == MVT::i32 || VT == MVT::i64) {
7165 if (VT == MVT::i32)
7166 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
7167 Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
7168
7169 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
7170 SDValue UaddLV = DAG.getNode(
7171 ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
7172 DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
7173
7174 if (VT == MVT::i64)
7175 UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
7176 return UaddLV;
7177 } else if (VT == MVT::i128) {
7178 Val = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Val);
7179
7180 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val);
7181 SDValue UaddLV = DAG.getNode(
7182 ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
7183 DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
7184
7185 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
7186 }
7187
7188 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
7189 return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
7190
7191 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||(static_cast <bool> ((VT == MVT::v1i64 || VT == MVT::v2i64
|| VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7193, __extension__ __PRETTY_FUNCTION__))
7192 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&(static_cast <bool> ((VT == MVT::v1i64 || VT == MVT::v2i64
|| VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7193, __extension__ __PRETTY_FUNCTION__))
7193 "Unexpected type for custom ctpop lowering")(static_cast <bool> ((VT == MVT::v1i64 || VT == MVT::v2i64
|| VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7193, __extension__ __PRETTY_FUNCTION__))
;
7194
7195 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
7196 Val = DAG.getBitcast(VT8Bit, Val);
7197 Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
7198
7199 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
7200 unsigned EltSize = 8;
7201 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
7202 while (EltSize != VT.getScalarSizeInBits()) {
7203 EltSize *= 2;
7204 NumElts /= 2;
7205 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
7206 Val = DAG.getNode(
7207 ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
7208 DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
7209 }
7210
7211 return Val;
7212}
7213
7214SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
7215 EVT VT = Op.getValueType();
7216 assert(VT.isScalableVector() ||(static_cast <bool> (VT.isScalableVector() || useSVEForFixedLengthVectorVT
(VT, true)) ? void (0) : __assert_fail ("VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, true)"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7217, __extension__ __PRETTY_FUNCTION__))
7217 useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))(static_cast <bool> (VT.isScalableVector() || useSVEForFixedLengthVectorVT
(VT, true)) ? void (0) : __assert_fail ("VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, true)"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7217, __extension__ __PRETTY_FUNCTION__))
;
7218
7219 SDLoc DL(Op);
7220 SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));
7221 return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
7222}
7223
7224SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
7225 SelectionDAG &DAG) const {
7226
7227 EVT VT = Op.getValueType();
7228 SDLoc DL(Op);
7229 unsigned Opcode = Op.getOpcode();
7230 ISD::CondCode CC;
7231 switch (Opcode) {
7232 default:
7233 llvm_unreachable("Wrong instruction")::llvm::llvm_unreachable_internal("Wrong instruction", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7233)
;
7234 case ISD::SMAX:
7235 CC = ISD::SETGT;
7236 break;
7237 case ISD::SMIN:
7238 CC = ISD::SETLT;
7239 break;
7240 case ISD::UMAX:
7241 CC = ISD::SETUGT;
7242 break;
7243 case ISD::UMIN:
7244 CC = ISD::SETULT;
7245 break;
7246 }
7247
7248 if (VT.isScalableVector() ||
7249 useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) {
7250 switch (Opcode) {
7251 default:
7252 llvm_unreachable("Wrong instruction")::llvm::llvm_unreachable_internal("Wrong instruction", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7252)
;
7253 case ISD::SMAX:
7254 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
7255 /*OverrideNEON=*/true);
7256 case ISD::SMIN:
7257 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
7258 /*OverrideNEON=*/true);
7259 case ISD::UMAX:
7260 return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
7261 /*OverrideNEON=*/true);
7262 case ISD::UMIN:
7263 return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
7264 /*OverrideNEON=*/true);
7265 }
7266 }
7267
7268 SDValue Op0 = Op.getOperand(0);
7269 SDValue Op1 = Op.getOperand(1);
7270 SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
7271 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
7272}
7273
7274SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
7275 SelectionDAG &DAG) const {
7276 EVT VT = Op.getValueType();
7277
7278 if (VT.isScalableVector() ||
7279 useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
7280 return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
7281 true);
7282
7283 SDLoc DL(Op);
7284 SDValue REVB;
7285 MVT VST;
7286
7287 switch (VT.getSimpleVT().SimpleTy) {
7288 default:
7289 llvm_unreachable("Invalid type for bitreverse!")::llvm::llvm_unreachable_internal("Invalid type for bitreverse!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7289)
;
7290
7291 case MVT::v2i32: {
7292 VST = MVT::v8i8;
7293 REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
7294
7295 break;
7296 }
7297
7298 case MVT::v4i32: {
7299 VST = MVT::v16i8;
7300 REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
7301
7302 break;
7303 }
7304
7305 case MVT::v1i64: {
7306 VST = MVT::v8i8;
7307 REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
7308
7309 break;
7310 }
7311
7312 case MVT::v2i64: {
7313 VST = MVT::v16i8;
7314 REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
7315
7316 break;
7317 }
7318 }
7319
7320 return DAG.getNode(AArch64ISD::NVCAST, DL, VT,
7321 DAG.getNode(ISD::BITREVERSE, DL, VST, REVB));
7322}
7323
7324SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
7325
7326 if (Op.getValueType().isVector())
7327 return LowerVSETCC(Op, DAG);
7328
7329 bool IsStrict = Op->isStrictFPOpcode();
7330 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
7331 unsigned OpNo = IsStrict ? 1 : 0;
7332 SDValue Chain;
7333 if (IsStrict)
7334 Chain = Op.getOperand(0);
7335 SDValue LHS = Op.getOperand(OpNo + 0);
7336 SDValue RHS = Op.getOperand(OpNo + 1);
7337 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
7338 SDLoc dl(Op);
7339
7340 // We chose ZeroOrOneBooleanContents, so use zero and one.
7341 EVT VT = Op.getValueType();
7342 SDValue TVal = DAG.getConstant(1, dl, VT);
7343 SDValue FVal = DAG.getConstant(0, dl, VT);
7344
7345 // Handle f128 first, since one possible outcome is a normal integer
7346 // comparison which gets picked up by the next if statement.
7347 if (LHS.getValueType() == MVT::f128) {
7348 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS, Chain,
7349 IsSignaling);
7350
7351 // If softenSetCCOperands returned a scalar, use it.
7352 if (!RHS.getNode()) {
7353 assert(LHS.getValueType() == Op.getValueType() &&(static_cast <bool> (LHS.getValueType() == Op.getValueType
() && "Unexpected setcc expansion!") ? void (0) : __assert_fail
("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7354, __extension__ __PRETTY_FUNCTION__))
7354 "Unexpected setcc expansion!")(static_cast <bool> (LHS.getValueType() == Op.getValueType
() && "Unexpected setcc expansion!") ? void (0) : __assert_fail
("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7354, __extension__ __PRETTY_FUNCTION__))
;
7355 return IsStrict ? DAG.getMergeValues({LHS, Chain}, dl) : LHS;
7356 }
7357 }
7358
7359 if (LHS.getValueType().isInteger()) {
7360 SDValue CCVal;
7361 SDValue Cmp = getAArch64Cmp(
7362 LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);
7363
7364 // Note that we inverted the condition above, so we reverse the order of
7365 // the true and false operands here. This will allow the setcc to be
7366 // matched to a single CSINC instruction.
7367 SDValue Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
7368 return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
7369 }
7370
7371 // Now we know we're dealing with FP values.
7372 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7373, __extension__ __PRETTY_FUNCTION__))
7373 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7373, __extension__ __PRETTY_FUNCTION__))
;
7374
7375 // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
7376 // and do the comparison.
7377 SDValue Cmp;
7378 if (IsStrict)
7379 Cmp = emitStrictFPComparison(LHS, RHS, dl, DAG, Chain, IsSignaling);
7380 else
7381 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
7382
7383 AArch64CC::CondCode CC1, CC2;
7384 changeFPCCToAArch64CC(CC, CC1, CC2);
7385 SDValue Res;
7386 if (CC2 == AArch64CC::AL) {
7387 changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1,
7388 CC2);
7389 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
7390
7391 // Note that we inverted the condition above, so we reverse the order of
7392 // the true and false operands here. This will allow the setcc to be
7393 // matched to a single CSINC instruction.
7394 Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
7395 } else {
7396 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
7397 // totally clean. Some of them require two CSELs to implement. As is in
7398 // this case, we emit the first CSEL and then emit a second using the output
7399 // of the first as the RHS. We're effectively OR'ing the two CC's together.
7400
7401 // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
7402 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
7403 SDValue CS1 =
7404 DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
7405
7406 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
7407 Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
7408 }
7409 return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res;
7410}
7411
7412SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
7413 SDValue RHS, SDValue TVal,
7414 SDValue FVal, const SDLoc &dl,
7415 SelectionDAG &DAG) const {
7416 // Handle f128 first, because it will result in a comparison of some RTLIB
7417 // call result against zero.
7418 if (LHS.getValueType() == MVT::f128) {
1
Taking true branch
7419 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
7420
7421 // If softenSetCCOperands returned a scalar, we need to compare the result
7422 // against zero to select between true and false values.
7423 if (!RHS.getNode()) {
2
Assuming the condition is false
7424 RHS = DAG.getConstant(0, dl, LHS.getValueType());
7425 CC = ISD::SETNE;
7426 }
7427 }
7428
7429 // Also handle f16, for which we need to do a f32 comparison.
7430 if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
3
Assuming the condition is true
4
Taking true branch
7431 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
7432 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
7433 }
7434
7435 // Next, handle integers.
7436 if (LHS.getValueType().isInteger()) {
7437 assert((LHS.getValueType() == RHS.getValueType()) &&(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7438, __extension__ __PRETTY_FUNCTION__))
5
Taking true branch
6
'?' condition is true
7438 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7438, __extension__ __PRETTY_FUNCTION__))
;
7439
7440 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
7441 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
7
Calling 'dyn_cast<llvm::ConstantSDNode, llvm::SDValue>'
22
Returning from 'dyn_cast<llvm::ConstantSDNode, llvm::SDValue>'
7442 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
7443 // Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
7444 // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
7445 // supported types.
7446 if (CC == ISD::SETGT && RHSC && RHSC->isAllOnesValue() && CTVal && CFVal &&
23
Assuming 'CC' is not equal to SETGT
24
Taking false branch
7447 CTVal->isOne() && CFVal->isAllOnesValue() &&
7448 LHS.getValueType() == TVal.getValueType()) {
7449 EVT VT = LHS.getValueType();
7450 SDValue Shift =
7451 DAG.getNode(ISD::SRA, dl, VT, LHS,
7452 DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
7453 return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT));
7454 }
7455
7456 unsigned Opcode = AArch64ISD::CSEL;
7457
7458 // If both the TVal and the FVal are constants, see if we can swap them in
7459 // order to for a CSINV or CSINC out of them.
7460 if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
25
Assuming 'CTVal' is null
7461 std::swap(TVal, FVal);
7462 std::swap(CTVal, CFVal);
7463 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7464 } else if (CTVal
25.1
'CTVal' is null
25.1
'CTVal' is null
25.1
'CTVal' is null
&& CFVal && CTVal->isOne() && CFVal->isNullValue()) {
7465 std::swap(TVal, FVal);
7466 std::swap(CTVal, CFVal);
7467 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7468 } else if (TVal.getOpcode() == ISD::XOR) {
26
Calling 'SDValue::getOpcode'
7469 // If TVal is a NOT we want to swap TVal and FVal so that we can match
7470 // with a CSINV rather than a CSEL.
7471 if (isAllOnesConstant(TVal.getOperand(1))) {
7472 std::swap(TVal, FVal);
7473 std::swap(CTVal, CFVal);
7474 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7475 }
7476 } else if (TVal.getOpcode() == ISD::SUB) {
7477 // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
7478 // that we can match with a CSNEG rather than a CSEL.
7479 if (isNullConstant(TVal.getOperand(0))) {
7480 std::swap(TVal, FVal);
7481 std::swap(CTVal, CFVal);
7482 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7483 }
7484 } else if (CTVal && CFVal) {
7485 const int64_t TrueVal = CTVal->getSExtValue();
7486 const int64_t FalseVal = CFVal->getSExtValue();
7487 bool Swap = false;
7488
7489 // If both TVal and FVal are constants, see if FVal is the
7490 // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
7491 // instead of a CSEL in that case.
7492 if (TrueVal == ~FalseVal) {
7493 Opcode = AArch64ISD::CSINV;
7494 } else if (FalseVal > std::numeric_limits<int64_t>::min() &&
7495 TrueVal == -FalseVal) {
7496 Opcode = AArch64ISD::CSNEG;
7497 } else if (TVal.getValueType() == MVT::i32) {
7498 // If our operands are only 32-bit wide, make sure we use 32-bit
7499 // arithmetic for the check whether we can use CSINC. This ensures that
7500 // the addition in the check will wrap around properly in case there is
7501 // an overflow (which would not be the case if we do the check with
7502 // 64-bit arithmetic).
7503 const uint32_t TrueVal32 = CTVal->getZExtValue();
7504 const uint32_t FalseVal32 = CFVal->getZExtValue();
7505
7506 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
7507 Opcode = AArch64ISD::CSINC;
7508
7509 if (TrueVal32 > FalseVal32) {
7510 Swap = true;
7511 }
7512 }
7513 // 64-bit check whether we can use CSINC.
7514 } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
7515 Opcode = AArch64ISD::CSINC;
7516
7517 if (TrueVal > FalseVal) {
7518 Swap = true;
7519 }
7520 }
7521
7522 // Swap TVal and FVal if necessary.
7523 if (Swap) {
7524 std::swap(TVal, FVal);
7525 std::swap(CTVal, CFVal);
7526 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7527 }
7528
7529 if (Opcode != AArch64ISD::CSEL) {
7530 // Drop FVal since we can get its value by simply inverting/negating
7531 // TVal.
7532 FVal = TVal;
7533 }
7534 }
7535
7536 // Avoid materializing a constant when possible by reusing a known value in
7537 // a register. However, don't perform this optimization if the known value
7538 // is one, zero or negative one in the case of a CSEL. We can always
7539 // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
7540 // FVal, respectively.
7541 ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
7542 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
7543 !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
7544 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
7545 // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
7546 // "a != C ? x : a" to avoid materializing C.
7547 if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
7548 TVal = LHS;
7549 else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
7550 FVal = LHS;
7551 } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
7552 assert (CTVal && CFVal && "Expected constant operands for CSNEG.")(static_cast <bool> (CTVal && CFVal && "Expected constant operands for CSNEG."
) ? void (0) : __assert_fail ("CTVal && CFVal && \"Expected constant operands for CSNEG.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7552, __extension__ __PRETTY_FUNCTION__))
;
7553 // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
7554 // avoid materializing C.
7555 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
7556 if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
7557 Opcode = AArch64ISD::CSINV;
7558 TVal = LHS;
7559 FVal = DAG.getConstant(0, dl, FVal.getValueType());
7560 }
7561 }
7562
7563 SDValue CCVal;
7564 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
7565 EVT VT = TVal.getValueType();
7566 return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
7567 }
7568
7569 // Now we know we're dealing with FP values.
7570 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7571, __extension__ __PRETTY_FUNCTION__))
7571 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7571, __extension__ __PRETTY_FUNCTION__))
;
7572 assert(LHS.getValueType() == RHS.getValueType())(static_cast <bool> (LHS.getValueType() == RHS.getValueType
()) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7572, __extension__ __PRETTY_FUNCTION__))
;
7573 EVT VT = TVal.getValueType();
7574 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
7575
7576 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
7577 // clean. Some of them require two CSELs to implement.
7578 AArch64CC::CondCode CC1, CC2;
7579 changeFPCCToAArch64CC(CC, CC1, CC2);
7580
7581 if (DAG.getTarget().Options.UnsafeFPMath) {
7582 // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
7583 // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
7584 ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
7585 if (RHSVal && RHSVal->isZero()) {
7586 ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
7587 ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
7588
7589 if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
7590 CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
7591 TVal = LHS;
7592 else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
7593 CFVal && CFVal->isZero() &&
7594 FVal.getValueType() == LHS.getValueType())
7595 FVal = LHS;
7596 }
7597 }
7598
7599 // Emit first, and possibly only, CSEL.
7600 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
7601 SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
7602
7603 // If we need a second CSEL, emit it, using the output of the first as the
7604 // RHS. We're effectively OR'ing the two CC's together.
7605 if (CC2 != AArch64CC::AL) {
7606 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
7607 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
7608 }
7609
7610 // Otherwise, return the output of the first CSEL.
7611 return CS1;
7612}
7613
7614SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op,
7615 SelectionDAG &DAG) const {
7616
7617 EVT Ty = Op.getValueType();
7618 auto Idx = Op.getConstantOperandAPInt(2);
7619 if (Idx.sge(-1) && Idx.slt(Ty.getVectorMinNumElements()))
7620 return Op;
7621 return SDValue();
7622}
7623
7624SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
7625 SelectionDAG &DAG) const {
7626 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7627 SDValue LHS = Op.getOperand(0);
7628 SDValue RHS = Op.getOperand(1);
7629 SDValue TVal = Op.getOperand(2);
7630 SDValue FVal = Op.getOperand(3);
7631 SDLoc DL(Op);
7632 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
7633}
7634
7635SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
7636 SelectionDAG &DAG) const {
7637 SDValue CCVal = Op->getOperand(0);
7638 SDValue TVal = Op->getOperand(1);
7639 SDValue FVal = Op->getOperand(2);
7640 SDLoc DL(Op);
7641
7642 EVT Ty = Op.getValueType();
7643 if (Ty.isScalableVector()) {
7644 SDValue TruncCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, CCVal);
7645 MVT PredVT = MVT::getVectorVT(MVT::i1, Ty.getVectorElementCount());
7646 SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, TruncCC);
7647 return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
7648 }
7649
7650 if (useSVEForFixedLengthVectorVT(Ty)) {
7651 // FIXME: Ideally this would be the same as above using i1 types, however
7652 // for the moment we can't deal with fixed i1 vector types properly, so
7653 // instead extend the predicate to a result type sized integer vector.
7654 MVT SplatValVT = MVT::getIntegerVT(Ty.getScalarSizeInBits());
7655 MVT PredVT = MVT::getVectorVT(SplatValVT, Ty.getVectorElementCount());
7656 SDValue SplatVal = DAG.getSExtOrTrunc(CCVal, DL, SplatValVT);
7657 SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, SplatVal);
7658 return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
7659 }
7660
7661 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
7662 // instruction.
7663 if (ISD::isOverflowIntrOpRes(CCVal)) {
7664 // Only lower legal XALUO ops.
7665 if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
7666 return SDValue();
7667
7668 AArch64CC::CondCode OFCC;
7669 SDValue Value, Overflow;
7670 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
7671 SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
7672
7673 return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
7674 CCVal, Overflow);
7675 }
7676
7677 // Lower it the same way as we would lower a SELECT_CC node.
7678 ISD::CondCode CC;
7679 SDValue LHS, RHS;
7680 if (CCVal.getOpcode() == ISD::SETCC) {
7681 LHS = CCVal.getOperand(0);
7682 RHS = CCVal.getOperand(1);
7683 CC = cast<CondCodeSDNode>(CCVal.getOperand(2))->get();
7684 } else {
7685 LHS = CCVal;
7686 RHS = DAG.getConstant(0, DL, CCVal.getValueType());
7687 CC = ISD::SETNE;
7688 }
7689 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
7690}
7691
7692SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
7693 SelectionDAG &DAG) const {
7694 // Jump table entries as PC relative offsets. No additional tweaking
7695 // is necessary here. Just get the address of the jump table.
7696 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
7697
7698 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
7699 !Subtarget->isTargetMachO()) {
7700 return getAddrLarge(JT, DAG);
7701 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
7702 return getAddrTiny(JT, DAG);
7703 }
7704 return getAddr(JT, DAG);
7705}
7706
7707SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
7708 SelectionDAG &DAG) const {
7709 // Jump table entries as PC relative offsets. No additional tweaking
7710 // is necessary here. Just get the address of the jump table.
7711 SDLoc DL(Op);
7712 SDValue JT = Op.getOperand(1);
7713 SDValue Entry = Op.getOperand(2);
7714 int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
7715
7716 auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
7717 AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
7718
7719 SDNode *Dest =
7720 DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
7721 Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
7722 return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
7723 SDValue(Dest, 0));
7724}
7725
7726SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
7727 SelectionDAG &DAG) const {
7728 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
7729
7730 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
7731 // Use the GOT for the large code model on iOS.
7732 if (Subtarget->isTargetMachO()) {
7733 return getGOT(CP, DAG);
7734 }
7735 return getAddrLarge(CP, DAG);
7736 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
7737 return getAddrTiny(CP, DAG);
7738 } else {
7739 return getAddr(CP, DAG);
7740 }
7741}
7742
7743SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
7744 SelectionDAG &DAG) const {
7745 BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
7746 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
7747 !Subtarget->isTargetMachO()) {
7748 return getAddrLarge(BA, DAG);
7749 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
7750 return getAddrTiny(BA, DAG);
7751 }
7752 return getAddr(BA, DAG);
7753}
7754
7755SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
7756 SelectionDAG &DAG) const {
7757 AArch64FunctionInfo *FuncInfo =
7758 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
7759
7760 SDLoc DL(Op);
7761 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
7762 getPointerTy(DAG.getDataLayout()));
7763 FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout()));
7764 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7765 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
7766 MachinePointerInfo(SV));
7767}
7768
7769SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
7770 SelectionDAG &DAG) const {
7771 AArch64FunctionInfo *FuncInfo =
7772 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
7773
7774 SDLoc DL(Op);
7775 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
7776 ? FuncInfo->getVarArgsGPRIndex()
7777 : FuncInfo->getVarArgsStackIndex(),
7778 getPointerTy(DAG.getDataLayout()));
7779 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7780 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
7781 MachinePointerInfo(SV));
7782}
7783
7784SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
7785 SelectionDAG &DAG) const {
7786 // The layout of the va_list struct is specified in the AArch64 Procedure Call
7787 // Standard, section B.3.
7788 MachineFunction &MF = DAG.getMachineFunction();
7789 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
7790 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
7791 auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
7792 auto PtrVT = getPointerTy(DAG.getDataLayout());
7793 SDLoc DL(Op);
7794
7795 SDValue Chain = Op.getOperand(0);
7796 SDValue VAList = Op.getOperand(1);
7797 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7798 SmallVector<SDValue, 4> MemOps;
7799
7800 // void *__stack at offset 0
7801 unsigned Offset = 0;
7802 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
7803 Stack = DAG.getZExtOrTrunc(Stack, DL, PtrMemVT);
7804 MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
7805 MachinePointerInfo(SV), Align(PtrSize)));
7806
7807 // void *__gr_top at offset 8 (4 on ILP32)
7808 Offset += PtrSize;
7809 int GPRSize = FuncInfo->getVarArgsGPRSize();
7810 if (GPRSize > 0) {
7811 SDValue GRTop, GRTopAddr;
7812
7813 GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7814 DAG.getConstant(Offset, DL, PtrVT));
7815
7816 GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
7817 GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
7818 DAG.getConstant(GPRSize, DL, PtrVT));
7819 GRTop = DAG.getZExtOrTrunc(GRTop, DL, PtrMemVT);
7820
7821 MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
7822 MachinePointerInfo(SV, Offset),
7823 Align(PtrSize)));
7824 }
7825
7826 // void *__vr_top at offset 16 (8 on ILP32)
7827 Offset += PtrSize;
7828 int FPRSize = FuncInfo->getVarArgsFPRSize();
7829 if (FPRSize > 0) {
7830 SDValue VRTop, VRTopAddr;
7831 VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7832 DAG.getConstant(Offset, DL, PtrVT));
7833
7834 VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
7835 VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
7836 DAG.getConstant(FPRSize, DL, PtrVT));
7837 VRTop = DAG.getZExtOrTrunc(VRTop, DL, PtrMemVT);
7838
7839 MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
7840 MachinePointerInfo(SV, Offset),
7841 Align(PtrSize)));
7842 }
7843
7844 // int __gr_offs at offset 24 (12 on ILP32)
7845 Offset += PtrSize;
7846 SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7847 DAG.getConstant(Offset, DL, PtrVT));
7848 MemOps.push_back(
7849 DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32),
7850 GROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
7851
7852 // int __vr_offs at offset 28 (16 on ILP32)
7853 Offset += 4;
7854 SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7855 DAG.getConstant(Offset, DL, PtrVT));
7856 MemOps.push_back(
7857 DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32),
7858 VROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
7859
7860 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
7861}
7862
7863SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
7864 SelectionDAG &DAG) const {
7865 MachineFunction &MF = DAG.getMachineFunction();
7866
7867 if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
7868 return LowerWin64_VASTART(Op, DAG);
7869 else if (Subtarget->isTargetDarwin())
7870 return LowerDarwin_VASTART(Op, DAG);
7871 else
7872 return LowerAAPCS_VASTART(Op, DAG);
7873}
7874
7875SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
7876 SelectionDAG &DAG) const {
7877 // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
7878 // pointer.
7879 SDLoc DL(Op);
7880 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
7881 unsigned VaListSize =
7882 (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
7883 ? PtrSize
7884 : Subtarget->isTargetILP32() ? 20 : 32;
7885 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
7886 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
7887
7888 return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
7889 DAG.getConstant(VaListSize, DL, MVT::i32),
7890 Align(PtrSize), false, false, false,
7891 MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
7892}
7893
7894SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
7895 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"automatic va_arg instruction only works on Darwin") ? void (
0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7896, __extension__ __PRETTY_FUNCTION__))
7896 "automatic va_arg instruction only works on Darwin")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"automatic va_arg instruction only works on Darwin") ? void (
0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7896, __extension__ __PRETTY_FUNCTION__))
;
7897
7898 const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7899 EVT VT = Op.getValueType();
7900 SDLoc DL(Op);
7901 SDValue Chain = Op.getOperand(0);
7902 SDValue Addr = Op.getOperand(1);
7903 MaybeAlign Align(Op.getConstantOperandVal(3));
7904 unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
7905 auto PtrVT = getPointerTy(DAG.getDataLayout());
7906 auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
7907 SDValue VAList =
7908 DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V));
7909 Chain = VAList.getValue(1);
7910 VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);
7911
7912 if (VT.isScalableVector())
7913 report_fatal_error("Passing SVE types to variadic functions is "
7914 "currently not supported");
7915
7916 if (Align && *Align > MinSlotSize) {
7917 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7918 DAG.getConstant(Align->value() - 1, DL, PtrVT));
7919 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
7920 DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT));
7921 }
7922
7923 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
7924 unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
7925
7926 // Scalar integer and FP values smaller than 64 bits are implicitly extended
7927 // up to 64 bits. At the very least, we have to increase the striding of the
7928 // vaargs list to match this, and for FP values we need to introduce
7929 // FP_ROUND nodes as well.
7930 if (VT.isInteger() && !VT.isVector())
7931 ArgSize = std::max(ArgSize, MinSlotSize);
7932 bool NeedFPTrunc = false;
7933 if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
7934 ArgSize = 8;
7935 NeedFPTrunc = true;
7936 }
7937
7938 // Increment the pointer, VAList, to the next vaarg
7939 SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7940 DAG.getConstant(ArgSize, DL, PtrVT));
7941 VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT);
7942
7943 // Store the incremented VAList to the legalized pointer
7944 SDValue APStore =
7945 DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
7946
7947 // Load the actual argument out of the pointer VAList
7948 if (NeedFPTrunc) {
7949 // Load the value as an f64.
7950 SDValue WideFP =
7951 DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
7952 // Round the value down to an f32.
7953 SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
7954 DAG.getIntPtrConstant(1, DL));
7955 SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
7956 // Merge the rounded value with the chain output of the load.
7957 return DAG.getMergeValues(Ops, DL);
7958 }
7959
7960 return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
7961}
7962
7963SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
7964 SelectionDAG &DAG) const {
7965 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7966 MFI.setFrameAddressIsTaken(true);
7967
7968 EVT VT = Op.getValueType();
7969 SDLoc DL(Op);
7970 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7971 SDValue FrameAddr =
7972 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64);
7973 while (Depth--)
7974 FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
7975 MachinePointerInfo());
7976
7977 if (Subtarget->isTargetILP32())
7978 FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr,
7979 DAG.getValueType(VT));
7980
7981 return FrameAddr;
7982}
7983
7984SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
7985 SelectionDAG &DAG) const {
7986 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7987
7988 EVT VT = getPointerTy(DAG.getDataLayout());
7989 SDLoc DL(Op);
7990 int FI = MFI.CreateFixedObject(4, 0, false);
7991 return DAG.getFrameIndex(FI, VT);
7992}
7993
7994#define GET_REGISTER_MATCHER
7995#include "AArch64GenAsmMatcher.inc"
7996
7997// FIXME? Maybe this could be a TableGen attribute on some registers and
7998// this table could be generated automatically from RegInfo.
7999Register AArch64TargetLowering::
8000getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
8001 Register Reg = MatchRegisterName(RegName);
8002 if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
8003 const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
8004 unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
8005 if (!Subtarget->isXRegisterReserved(DwarfRegNum))
8006 Reg = 0;
8007 }
8008 if (Reg)
8009 return Reg;
8010 report_fatal_error(Twine("Invalid register name \""
8011 + StringRef(RegName) + "\"."));
8012}
8013
8014SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
8015 SelectionDAG &DAG) const {
8016 DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
8017
8018 EVT VT = Op.getValueType();
8019 SDLoc DL(Op);
8020
8021 SDValue FrameAddr =
8022 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
8023 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
8024
8025 return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
8026}
8027
8028SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
8029 SelectionDAG &DAG) const {
8030 MachineFunction &MF = DAG.getMachineFunction();
8031 MachineFrameInfo &MFI = MF.getFrameInfo();
8032 MFI.setReturnAddressIsTaken(true);
8033
8034 EVT VT = Op.getValueType();
8035 SDLoc DL(Op);
8036 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
8037 SDValue ReturnAddress;
8038 if (Depth) {
8039 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
8040 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
8041 ReturnAddress = DAG.getLoad(
8042 VT, DL, DAG.getEntryNode(),
8043 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo());
8044 } else {
8045 // Return LR, which contains the return address. Mark it an implicit
8046 // live-in.
8047 unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
8048 ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
8049 }
8050
8051 // The XPACLRI instruction assembles to a hint-space instruction before
8052 // Armv8.3-A therefore this instruction can be safely used for any pre
8053 // Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
8054 // that instead.
8055 SDNode *St;
8056 if (Subtarget->hasPAuth()) {
8057 St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress);
8058 } else {
8059 // XPACLRI operates on LR therefore we must move the operand accordingly.
8060 SDValue Chain =
8061 DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress);
8062 St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain);
8063 }
8064 return SDValue(St, 0);
8065}
8066
8067/// LowerShiftParts - Lower SHL_PARTS/SRA_PARTS/SRL_PARTS, which returns two
8068/// i32 values and take a 2 x i32 value to shift plus a shift amount.
8069SDValue AArch64TargetLowering::LowerShiftParts(SDValue Op,
8070 SelectionDAG &DAG) const {
8071 SDValue Lo, Hi;
8072 expandShiftParts(Op.getNode(), Lo, Hi, DAG);
8073 return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
8074}
8075
8076bool AArch64TargetLowering::isOffsetFoldingLegal(
8077 const GlobalAddressSDNode *GA) const {
8078 // Offsets are folded in the DAG combine rather than here so that we can
8079 // intelligently choose an offset based on the uses.
8080 return false;
8081}
8082
8083bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8084 bool OptForSize) const {
8085 bool IsLegal = false;
8086 // We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
8087 // 16-bit case when target has full fp16 support.
8088 // FIXME: We should be able to handle f128 as well with a clever lowering.
8089 const APInt ImmInt = Imm.bitcastToAPInt();
8090 if (VT == MVT::f64)
8091 IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
8092 else if (VT == MVT::f32)
8093 IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
8094 else if (VT == MVT::f16 && Subtarget->hasFullFP16())
8095 IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero();
8096 // TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
8097 // generate that fmov.
8098
8099 // If we can not materialize in immediate field for fmov, check if the
8100 // value can be encoded as the immediate operand of a logical instruction.
8101 // The immediate value will be created with either MOVZ, MOVN, or ORR.
8102 if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
8103 // The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
8104 // however the mov+fmov sequence is always better because of the reduced
8105 // cache pressure. The timings are still the same if you consider
8106 // movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
8107 // movw+movk is fused). So we limit up to 2 instrdduction at most.
8108 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
8109 AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(),
8110 Insn);
8111 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
8112 IsLegal = Insn.size() <= Limit;
8113 }
8114
8115 LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << (IsLegal ? "Legal " : "Illegal "
) << VT.getEVTString() << " imm value: "; Imm.dump
();; } } while (false)
8116 << " imm value: "; Imm.dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << (IsLegal ? "Legal " : "Illegal "
) << VT.getEVTString() << " imm value: "; Imm.dump
();; } } while (false)
;
8117 return IsLegal;
8118}
8119
8120//===----------------------------------------------------------------------===//
8121// AArch64 Optimization Hooks
8122//===----------------------------------------------------------------------===//
8123
8124static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
8125 SDValue Operand, SelectionDAG &DAG,
8126 int &ExtraSteps) {
8127 EVT VT = Operand.getValueType();
8128 if (ST->hasNEON() &&
8129 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
8130 VT == MVT::f32 || VT == MVT::v1f32 ||
8131 VT == MVT::v2f32 || VT == MVT::v4f32)) {
8132 if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
8133 // For the reciprocal estimates, convergence is quadratic, so the number
8134 // of digits is doubled after each iteration. In ARMv8, the accuracy of
8135 // the initial estimate is 2^-8. Thus the number of extra steps to refine
8136 // the result for float (23 mantissa bits) is 2 and for double (52
8137 // mantissa bits) is 3.
8138 ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
8139
8140 return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
8141 }
8142
8143 return SDValue();
8144}
8145
8146SDValue
8147AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
8148 const DenormalMode &Mode) const {
8149 SDLoc DL(Op);
8150 EVT VT = Op.getValueType();
8151 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8152 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
8153 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
8154}
8155
8156SDValue
8157AArch64TargetLowering::getSqrtResultForDenormInput(SDValue Op,
8158 SelectionDAG &DAG) const {
8159 return Op;
8160}
8161
8162SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
8163 SelectionDAG &DAG, int Enabled,
8164 int &ExtraSteps,
8165 bool &UseOneConst,
8166 bool Reciprocal) const {
8167 if (Enabled == ReciprocalEstimate::Enabled ||
8168 (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
8169 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
8170 DAG, ExtraSteps)) {
8171 SDLoc DL(Operand);
8172 EVT VT = Operand.getValueType();
8173
8174 SDNodeFlags Flags;
8175 Flags.setAllowReassociation(true);
8176
8177 // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
8178 // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
8179 for (int i = ExtraSteps; i > 0; --i) {
8180 SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
8181 Flags);
8182 Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
8183 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
8184 }
8185 if (!Reciprocal)
8186 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
8187
8188 ExtraSteps = 0;
8189 return Estimate;
8190 }
8191
8192 return SDValue();
8193}
8194
8195SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
8196 SelectionDAG &DAG, int Enabled,
8197 int &ExtraSteps) const {
8198 if (Enabled == ReciprocalEstimate::Enabled)
8199 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
8200 DAG, ExtraSteps)) {
8201 SDLoc DL(Operand);
8202 EVT VT = Operand.getValueType();
8203
8204 SDNodeFlags Flags;
8205 Flags.setAllowReassociation(true);
8206
8207 // Newton reciprocal iteration: E * (2 - X * E)
8208 // AArch64 reciprocal iteration instruction: (2 - M * N)
8209 for (int i = ExtraSteps; i > 0; --i) {
8210 SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
8211 Estimate, Flags);
8212 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
8213 }
8214
8215 ExtraSteps = 0;
8216 return Estimate;
8217 }
8218
8219 return SDValue();
8220}
8221
8222//===----------------------------------------------------------------------===//
8223// AArch64 Inline Assembly Support
8224//===----------------------------------------------------------------------===//
8225
8226// Table of Constraints
8227// TODO: This is the current set of constraints supported by ARM for the
8228// compiler, not all of them may make sense.
8229//
8230// r - A general register
8231// w - An FP/SIMD register of some size in the range v0-v31
8232// x - An FP/SIMD register of some size in the range v0-v15
8233// I - Constant that can be used with an ADD instruction
8234// J - Constant that can be used with a SUB instruction
8235// K - Constant that can be used with a 32-bit logical instruction
8236// L - Constant that can be used with a 64-bit logical instruction
8237// M - Constant that can be used as a 32-bit MOV immediate
8238// N - Constant that can be used as a 64-bit MOV immediate
8239// Q - A memory reference with base register and no offset
8240// S - A symbolic address
8241// Y - Floating point constant zero
8242// Z - Integer constant zero
8243//
8244// Note that general register operands will be output using their 64-bit x
8245// register name, whatever the size of the variable, unless the asm operand
8246// is prefixed by the %w modifier. Floating-point and SIMD register operands
8247// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
8248// %q modifier.
8249const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
8250 // At this point, we have to lower this constraint to something else, so we
8251 // lower it to an "r" or "w". However, by doing this we will force the result
8252 // to be in register, while the X constraint is much more permissive.
8253 //
8254 // Although we are correct (we are free to emit anything, without
8255 // constraints), we might break use cases that would expect us to be more
8256 // efficient and emit something else.
8257 if (!Subtarget->hasFPARMv8())
8258 return "r";
8259
8260 if (ConstraintVT.isFloatingPoint())
8261 return "w";
8262
8263 if (ConstraintVT.isVector() &&
8264 (ConstraintVT.getSizeInBits() == 64 ||
8265 ConstraintVT.getSizeInBits() == 128))
8266 return "w";
8267
8268 return "r";
8269}
8270
8271enum PredicateConstraint {
8272 Upl,
8273 Upa,
8274 Invalid
8275};
8276
8277static PredicateConstraint parsePredicateConstraint(StringRef Constraint) {
8278 PredicateConstraint P = PredicateConstraint::Invalid;
8279 if (Constraint == "Upa")
8280 P = PredicateConstraint::Upa;
8281 if (Constraint == "Upl")
8282 P = PredicateConstraint::Upl;
8283 return P;
8284}
8285
8286/// getConstraintType - Given a constraint letter, return the type of
8287/// constraint it is for this target.
8288AArch64TargetLowering::ConstraintType
8289AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
8290 if (Constraint.size() == 1) {
8291 switch (Constraint[0]) {
8292 default:
8293 break;
8294 case 'x':
8295 case 'w':
8296 case 'y':
8297 return C_RegisterClass;
8298 // An address with a single base register. Due to the way we
8299 // currently handle addresses it is the same as 'r'.
8300 case 'Q':
8301 return C_Memory;
8302 case 'I':
8303 case 'J':
8304 case 'K':
8305 case 'L':
8306 case 'M':
8307 case 'N':
8308 case 'Y':
8309 case 'Z':
8310 return C_Immediate;
8311 case 'z':
8312 case 'S': // A symbolic address
8313 return C_Other;
8314 }
8315 } else if (parsePredicateConstraint(Constraint) !=
8316 PredicateConstraint::Invalid)
8317 return C_RegisterClass;
8318 return TargetLowering::getConstraintType(Constraint);
8319}
8320
8321/// Examine constraint type and operand type and determine a weight value.
8322/// This object must already have been set up with the operand type
8323/// and the current alternative constraint selected.
8324TargetLowering::ConstraintWeight
8325AArch64TargetLowering::getSingleConstraintMatchWeight(
8326 AsmOperandInfo &info, const char *constraint) const {
8327 ConstraintWeight weight = CW_Invalid;
8328 Value *CallOperandVal = info.CallOperandVal;
8329 // If we don't have a value, we can't do a match,
8330 // but allow it at the lowest weight.
8331 if (!CallOperandVal)
8332 return CW_Default;
8333 Type *type = CallOperandVal->getType();
8334 // Look at the constraint type.
8335 switch (*constraint) {
8336 default:
8337 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
8338 break;
8339 case 'x':
8340 case 'w':
8341 case 'y':
8342 if (type->isFloatingPointTy() || type->isVectorTy())
8343 weight = CW_Register;
8344 break;
8345 case 'z':
8346 weight = CW_Constant;
8347 break;
8348 case 'U':
8349 if (parsePredicateConstraint(constraint) != PredicateConstraint::Invalid)
8350 weight = CW_Register;
8351 break;
8352 }
8353 return weight;
8354}
8355
8356std::pair<unsigned, const TargetRegisterClass *>
8357AArch64TargetLowering::getRegForInlineAsmConstraint(
8358 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
8359 if (Constraint.size() == 1) {
8360 switch (Constraint[0]) {
8361 case 'r':
8362 if (VT.isScalableVector())
8363 return std::make_pair(0U, nullptr);
8364 if (Subtarget->hasLS64() && VT.getSizeInBits() == 512)
8365 return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
8366 if (VT.getFixedSizeInBits() == 64)
8367 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
8368 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
8369 case 'w': {
8370 if (!Subtarget->hasFPARMv8())
8371 break;
8372 if (VT.isScalableVector()) {
8373 if (VT.getVectorElementType() != MVT::i1)
8374 return std::make_pair(0U, &AArch64::ZPRRegClass);
8375 return std::make_pair(0U, nullptr);
8376 }
8377 uint64_t VTSize = VT.getFixedSizeInBits();
8378 if (VTSize == 16)
8379 return std::make_pair(0U, &AArch64::FPR16RegClass);
8380 if (VTSize == 32)
8381 return std::make_pair(0U, &AArch64::FPR32RegClass);
8382 if (VTSize == 64)
8383 return std::make_pair(0U, &AArch64::FPR64RegClass);
8384 if (VTSize == 128)
8385 return std::make_pair(0U, &AArch64::FPR128RegClass);
8386 break;
8387 }
8388 // The instructions that this constraint is designed for can
8389 // only take 128-bit registers so just use that regclass.
8390 case 'x':
8391 if (!Subtarget->hasFPARMv8())
8392 break;
8393 if (VT.isScalableVector())
8394 return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
8395 if (VT.getSizeInBits() == 128)
8396 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
8397 break;
8398 case 'y':
8399 if (!Subtarget->hasFPARMv8())
8400 break;
8401 if (VT.isScalableVector())
8402 return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
8403 break;
8404 }
8405 } else {
8406 PredicateConstraint PC = parsePredicateConstraint(Constraint);
8407 if (PC != PredicateConstraint::Invalid) {
8408 if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
8409 return std::make_pair(0U, nullptr);
8410 bool restricted = (PC == PredicateConstraint::Upl);
8411 return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
8412 : std::make_pair(0U, &AArch64::PPRRegClass);
8413 }
8414 }
8415 if (StringRef("{cc}").equals_insensitive(Constraint))
8416 return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
8417
8418 // Use the default implementation in TargetLowering to convert the register
8419 // constraint into a member of a register class.
8420 std::pair<unsigned, const TargetRegisterClass *> Res;
8421 Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8422
8423 // Not found as a standard register?
8424 if (!Res.second) {
8425 unsigned Size = Constraint.size();
8426 if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
8427 tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
8428 int RegNo;
8429 bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
8430 if (!Failed && RegNo >= 0 && RegNo <= 31) {
8431 // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
8432 // By default we'll emit v0-v31 for this unless there's a modifier where
8433 // we'll emit the correct register as well.
8434 if (VT != MVT::Other && VT.getSizeInBits() == 64) {
8435 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
8436 Res.second = &AArch64::FPR64RegClass;
8437 } else {
8438 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
8439 Res.second = &AArch64::FPR128RegClass;
8440 }
8441 }
8442 }
8443 }
8444
8445 if (Res.second && !Subtarget->hasFPARMv8() &&
8446 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
8447 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
8448 return std::make_pair(0U, nullptr);
8449
8450 return Res;
8451}
8452
8453EVT AArch64TargetLowering::getAsmOperandValueType(const DataLayout &DL,
8454 llvm::Type *Ty,
8455 bool AllowUnknown) const {
8456 if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
8457 return EVT(MVT::i64x8);
8458
8459 return TargetLowering::getAsmOperandValueType(DL, Ty, AllowUnknown);
8460}
8461
8462/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
8463/// vector. If it is invalid, don't add anything to Ops.
8464void AArch64TargetLowering::LowerAsmOperandForConstraint(
8465 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
8466 SelectionDAG &DAG) const {
8467 SDValue Result;
8468
8469 // Currently only support length 1 constraints.
8470 if (Constraint.length() != 1)
8471 return;
8472
8473 char ConstraintLetter = Constraint[0];
8474 switch (ConstraintLetter) {
8475 default:
8476 break;
8477
8478 // This set of constraints deal with valid constants for various instructions.
8479 // Validate and return a target constant for them if we can.
8480 case 'z': {
8481 // 'z' maps to xzr or wzr so it needs an input of 0.
8482 if (!isNullConstant(Op))
8483 return;
8484
8485 if (Op.getValueType() == MVT::i64)
8486 Result = DAG.getRegister(AArch64::XZR, MVT::i64);
8487 else
8488 Result = DAG.getRegister(AArch64::WZR, MVT::i32);
8489 break;
8490 }
8491 case 'S': {
8492 // An absolute symbolic address or label reference.
8493 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
8494 Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
8495 GA->getValueType(0));
8496 } else if (const BlockAddressSDNode *BA =
8497 dyn_cast<BlockAddressSDNode>(Op)) {
8498 Result =
8499 DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0));
8500 } else
8501 return;
8502 break;
8503 }
8504
8505 case 'I':
8506 case 'J':
8507 case 'K':
8508 case 'L':
8509 case 'M':
8510 case 'N':
8511 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
8512 if (!C)
8513 return;
8514
8515 // Grab the value and do some validation.
8516 uint64_t CVal = C->getZExtValue();
8517 switch (ConstraintLetter) {
8518 // The I constraint applies only to simple ADD or SUB immediate operands:
8519 // i.e. 0 to 4095 with optional shift by 12
8520 // The J constraint applies only to ADD or SUB immediates that would be
8521 // valid when negated, i.e. if [an add pattern] were to be output as a SUB
8522 // instruction [or vice versa], in other words -1 to -4095 with optional
8523 // left shift by 12.
8524 case 'I':
8525 if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
8526 break;
8527 return;
8528 case 'J': {
8529 uint64_t NVal = -C->getSExtValue();
8530 if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
8531 CVal = C->getSExtValue();
8532 break;
8533 }
8534 return;
8535 }
8536 // The K and L constraints apply *only* to logical immediates, including
8537 // what used to be the MOVI alias for ORR (though the MOVI alias has now
8538 // been removed and MOV should be used). So these constraints have to
8539 // distinguish between bit patterns that are valid 32-bit or 64-bit
8540 // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
8541 // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
8542 // versa.
8543 case 'K':
8544 if (AArch64_AM::isLogicalImmediate(CVal, 32))
8545 break;
8546 return;
8547 case 'L':
8548 if (AArch64_AM::isLogicalImmediate(CVal, 64))
8549 break;
8550 return;
8551 // The M and N constraints are a superset of K and L respectively, for use
8552 // with the MOV (immediate) alias. As well as the logical immediates they
8553 // also match 32 or 64-bit immediates that can be loaded either using a
8554 // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
8555 // (M) or 64-bit 0x1234000000000000 (N) etc.
8556 // As a note some of this code is liberally stolen from the asm parser.
8557 case 'M': {
8558 if (!isUInt<32>(CVal))
8559 return;
8560 if (AArch64_AM::isLogicalImmediate(CVal, 32))
8561 break;
8562 if ((CVal & 0xFFFF) == CVal)
8563 break;
8564 if ((CVal & 0xFFFF0000ULL) == CVal)
8565 break;
8566 uint64_t NCVal = ~(uint32_t)CVal;
8567 if ((NCVal & 0xFFFFULL) == NCVal)
8568 break;
8569 if ((NCVal & 0xFFFF0000ULL) == NCVal)
8570 break;
8571 return;
8572 }
8573 case 'N': {
8574 if (AArch64_AM::isLogicalImmediate(CVal, 64))
8575 break;
8576 if ((CVal & 0xFFFFULL) == CVal)
8577 break;
8578 if ((CVal & 0xFFFF0000ULL) == CVal)
8579 break;
8580 if ((CVal & 0xFFFF00000000ULL) == CVal)
8581 break;
8582 if ((CVal & 0xFFFF000000000000ULL) == CVal)
8583 break;
8584 uint64_t NCVal = ~CVal;
8585 if ((NCVal & 0xFFFFULL) == NCVal)
8586 break;
8587 if ((NCVal & 0xFFFF0000ULL) == NCVal)
8588 break;
8589 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
8590 break;
8591 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
8592 break;
8593 return;
8594 }
8595 default:
8596 return;
8597 }
8598
8599 // All assembler immediates are 64-bit integers.
8600 Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
8601 break;
8602 }
8603
8604 if (Result.getNode()) {
8605 Ops.push_back(Result);
8606 return;
8607 }
8608
8609 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
8610}
8611
8612//===----------------------------------------------------------------------===//
8613// AArch64 Advanced SIMD Support
8614//===----------------------------------------------------------------------===//
8615
8616/// WidenVector - Given a value in the V64 register class, produce the
8617/// equivalent value in the V128 register class.
8618static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
8619 EVT VT = V64Reg.getValueType();
8620 unsigned NarrowSize = VT.getVectorNumElements();
8621 MVT EltTy = VT.getVectorElementType().getSimpleVT();
8622 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
8623 SDLoc DL(V64Reg);
8624
8625 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
8626 V64Reg, DAG.getConstant(0, DL, MVT::i64));
8627}
8628
8629/// getExtFactor - Determine the adjustment factor for the position when
8630/// generating an "extract from vector registers" instruction.
8631static unsigned getExtFactor(SDValue &V) {
8632 EVT EltType = V.getValueType().getVectorElementType();
8633 return EltType.getSizeInBits() / 8;
8634}
8635
8636/// NarrowVector - Given a value in the V128 register class, produce the
8637/// equivalent value in the V64 register class.
8638static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
8639 EVT VT = V128Reg.getValueType();
8640 unsigned WideSize = VT.getVectorNumElements();
8641 MVT EltTy = VT.getVectorElementType().getSimpleVT();
8642 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
8643 SDLoc DL(V128Reg);
8644
8645 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
8646}
8647
8648// Gather data to see if the operation can be modelled as a
8649// shuffle in combination with VEXTs.
8650SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
8651 SelectionDAG &DAG) const {
8652 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8652, __extension__ __PRETTY_FUNCTION__))
;
8653 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::ReconstructShuffle\n"
; } } while (false)
;
8654 SDLoc dl(Op);
8655 EVT VT = Op.getValueType();
8656 assert(!VT.isScalableVector() &&(static_cast <bool> (!VT.isScalableVector() && "Scalable vectors cannot be used with ISD::BUILD_VECTOR"
) ? void (0) : __assert_fail ("!VT.isScalableVector() && \"Scalable vectors cannot be used with ISD::BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8657, __extension__ __PRETTY_FUNCTION__))
8657 "Scalable vectors cannot be used with ISD::BUILD_VECTOR")(static_cast <bool> (!VT.isScalableVector() && "Scalable vectors cannot be used with ISD::BUILD_VECTOR"
) ? void (0) : __assert_fail ("!VT.isScalableVector() && \"Scalable vectors cannot be used with ISD::BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8657, __extension__ __PRETTY_FUNCTION__))
;
8658 unsigned NumElts = VT.getVectorNumElements();
8659
8660 struct ShuffleSourceInfo {
8661 SDValue Vec;
8662 unsigned MinElt;
8663 unsigned MaxElt;
8664
8665 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
8666 // be compatible with the shuffle we intend to construct. As a result
8667 // ShuffleVec will be some sliding window into the original Vec.
8668 SDValue ShuffleVec;
8669
8670 // Code should guarantee that element i in Vec starts at element "WindowBase
8671 // + i * WindowScale in ShuffleVec".
8672 int WindowBase;
8673 int WindowScale;
8674
8675 ShuffleSourceInfo(SDValue Vec)
8676 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
8677 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
8678
8679 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8680 };
8681
8682 // First gather all vectors used as an immediate source for this BUILD_VECTOR
8683 // node.
8684 SmallVector<ShuffleSourceInfo, 2> Sources;
8685 for (unsigned i = 0; i < NumElts; ++i) {
8686 SDValue V = Op.getOperand(i);
8687 if (V.isUndef())
8688 continue;
8689 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
8690 !isa<ConstantSDNode>(V.getOperand(1))) {
8691 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
8692 dbgs() << "Reshuffle failed: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
8693 "a shuffle can only come from building a vector from "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
8694 "various elements of other vectors, provided their "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
8695 "indices are constant\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
;
8696 return SDValue();
8697 }
8698
8699 // Add this element source to the list if it's not already there.
8700 SDValue SourceVec = V.getOperand(0);
8701 auto Source = find(Sources, SourceVec);
8702 if (Source == Sources.end())
8703 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8704
8705 // Update the minimum and maximum lane number seen.
8706 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
8707 Source->MinElt = std::min(Source->MinElt, EltNo);
8708 Source->MaxElt = std::max(Source->MaxElt, EltNo);
8709 }
8710
8711 if (Sources.size() > 2) {
8712 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
8713 dbgs() << "Reshuffle failed: currently only do something sane when at "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
8714 "most two source vectors are involved\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
;
8715 return SDValue();
8716 }
8717
8718 // Find out the smallest element size among result and two sources, and use
8719 // it as element size to build the shuffle_vector.
8720 EVT SmallestEltTy = VT.getVectorElementType();
8721 for (auto &Source : Sources) {
8722 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8723 if (SrcEltTy.bitsLT(SmallestEltTy)) {
8724 SmallestEltTy = SrcEltTy;
8725 }
8726 }
8727 unsigned ResMultiplier =
8728 VT.getScalarSizeInBits() / SmallestEltTy.getFixedSizeInBits();
8729 uint64_t VTSize = VT.getFixedSizeInBits();
8730 NumElts = VTSize / SmallestEltTy.getFixedSizeInBits();
8731 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8732
8733 // If the source vector is too wide or too narrow, we may nevertheless be able
8734 // to construct a compatible shuffle either by concatenating it with UNDEF or
8735 // extracting a suitable range of elements.
8736 for (auto &Src : Sources) {
8737 EVT SrcVT = Src.ShuffleVec.getValueType();
8738
8739 uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8740 if (SrcVTSize == VTSize)
8741 continue;
8742
8743 // This stage of the search produces a source with the same element type as
8744 // the original, but with a total width matching the BUILD_VECTOR output.
8745 EVT EltVT = SrcVT.getVectorElementType();
8746 unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8747 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8748
8749 if (SrcVTSize < VTSize) {
8750 assert(2 * SrcVTSize == VTSize)(static_cast <bool> (2 * SrcVTSize == VTSize) ? void (0
) : __assert_fail ("2 * SrcVTSize == VTSize", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8750, __extension__ __PRETTY_FUNCTION__))
;
8751 // We can pad out the smaller vector for free, so if it's part of a
8752 // shuffle...
8753 Src.ShuffleVec =
8754 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8755 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8756 continue;
8757 }
8758
8759 if (SrcVTSize != 2 * VTSize) {
8760 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: result vector too small to extract\n"
; } } while (false)
8761 dbgs() << "Reshuffle failed: result vector too small to extract\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: result vector too small to extract\n"
; } } while (false)
;
8762 return SDValue();
8763 }
8764
8765 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8766 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n"
; } } while (false)
8767 dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n"
; } } while (false)
;
8768 return SDValue();
8769 }
8770
8771 if (Src.MinElt >= NumSrcElts) {
8772 // The extraction can just take the second half
8773 Src.ShuffleVec =
8774 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8775 DAG.getConstant(NumSrcElts, dl, MVT::i64));
8776 Src.WindowBase = -NumSrcElts;
8777 } else if (Src.MaxElt < NumSrcElts) {
8778 // The extraction can just take the first half
8779 Src.ShuffleVec =
8780 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8781 DAG.getConstant(0, dl, MVT::i64));
8782 } else {
8783 // An actual VEXT is needed
8784 SDValue VEXTSrc1 =
8785 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8786 DAG.getConstant(0, dl, MVT::i64));
8787 SDValue VEXTSrc2 =
8788 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8789 DAG.getConstant(NumSrcElts, dl, MVT::i64));
8790 unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
8791
8792 if (!SrcVT.is64BitVector()) {
8793 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
"for SVE vectors."; } } while (false)
8794 dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
"for SVE vectors."; } } while (false)
8795 "for SVE vectors.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
"for SVE vectors."; } } while (false)
;
8796 return SDValue();
8797 }
8798
8799 Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
8800 VEXTSrc2,
8801 DAG.getConstant(Imm, dl, MVT::i32));
8802 Src.WindowBase = -Src.MinElt;
8803 }
8804 }
8805
8806 // Another possible incompatibility occurs from the vector element types. We
8807 // can fix this by bitcasting the source vectors to the same type we intend
8808 // for the shuffle.
8809 for (auto &Src : Sources) {
8810 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8811 if (SrcEltTy == SmallestEltTy)
8812 continue;
8813 assert(ShuffleVT.getVectorElementType() == SmallestEltTy)(static_cast <bool> (ShuffleVT.getVectorElementType() ==
SmallestEltTy) ? void (0) : __assert_fail ("ShuffleVT.getVectorElementType() == SmallestEltTy"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8813, __extension__ __PRETTY_FUNCTION__))
;
8814 Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
8815 Src.WindowScale =
8816 SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
8817 Src.WindowBase *= Src.WindowScale;
8818 }
8819
8820 // Final sanity check before we try to actually produce a shuffle.
8821 LLVM_DEBUG(for (auto Srcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8823, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
8822 : Sources)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8823, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
8823 assert(Src.ShuffleVec.getValueType() == ShuffleVT);)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8823, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
;
8824
8825 // The stars all align, our next step is to produce the mask for the shuffle.
8826 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
8827 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8828 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8829 SDValue Entry = Op.getOperand(i);
8830 if (Entry.isUndef())
8831 continue;
8832
8833 auto Src = find(Sources, Entry.getOperand(0));
8834 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8835
8836 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8837 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8838 // segment.
8839 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8840 int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8841 VT.getScalarSizeInBits());
8842 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8843
8844 // This source is expected to fill ResMultiplier lanes of the final shuffle,
8845 // starting at the appropriate offset.
8846 int *LaneMask = &Mask[i * ResMultiplier];
8847
8848 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8849 ExtractBase += NumElts * (Src - Sources.begin());
8850 for (int j = 0; j < LanesDefined; ++j)
8851 LaneMask[j] = ExtractBase + j;
8852 }
8853
8854 // Final check before we try to produce nonsense...
8855 if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
8856 LLVM_DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: illegal shuffle mask\n"
; } } while (false)
;
8857 return SDValue();
8858 }
8859
8860 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8861 for (unsigned i = 0; i < Sources.size(); ++i)
8862 ShuffleOps[i] = Sources[i].ShuffleVec;
8863
8864 SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8865 ShuffleOps[1], Mask);
8866 SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
8867
8868 LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
8869 dbgs() << "Reshuffle, creating node: "; V.dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
;
8870
8871 return V;
8872}
8873
8874// check if an EXT instruction can handle the shuffle mask when the
8875// vector sources of the shuffle are the same.
8876static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
8877 unsigned NumElts = VT.getVectorNumElements();
8878
8879 // Assume that the first shuffle index is not UNDEF. Fail if it is.
8880 if (M[0] < 0)
8881 return false;
8882
8883 Imm = M[0];
8884
8885 // If this is a VEXT shuffle, the immediate value is the index of the first
8886 // element. The other shuffle indices must be the successive elements after
8887 // the first one.
8888 unsigned ExpectedElt = Imm;
8889 for (unsigned i = 1; i < NumElts; ++i) {
8890 // Increment the expected index. If it wraps around, just follow it
8891 // back to index zero and keep going.
8892 ++ExpectedElt;
8893 if (ExpectedElt == NumElts)
8894 ExpectedElt = 0;
8895
8896 if (M[i] < 0)
8897 continue; // ignore UNDEF indices
8898 if (ExpectedElt != static_cast<unsigned>(M[i]))
8899 return false;
8900 }
8901
8902 return true;
8903}
8904
8905/// Check if a vector shuffle corresponds to a DUP instructions with a larger
8906/// element width than the vector lane type. If that is the case the function
8907/// returns true and writes the value of the DUP instruction lane operand into
8908/// DupLaneOp
8909static bool isWideDUPMask(ArrayRef<int> M, EVT VT, unsigned BlockSize,
8910 unsigned &DupLaneOp) {
8911 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for wide DUP are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for wide DUP are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8912, __extension__ __PRETTY_FUNCTION__))
8912 "Only possible block sizes for wide DUP are: 16, 32, 64")(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for wide DUP are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for wide DUP are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8912, __extension__ __PRETTY_FUNCTION__))
;
8913
8914 if (BlockSize <= VT.getScalarSizeInBits())
8915 return false;
8916 if (BlockSize % VT.getScalarSizeInBits() != 0)
8917 return false;
8918 if (VT.getSizeInBits() % BlockSize != 0)
8919 return false;
8920
8921 size_t SingleVecNumElements = VT.getVectorNumElements();
8922 size_t NumEltsPerBlock = BlockSize / VT.getScalarSizeInBits();
8923 size_t NumBlocks = VT.getSizeInBits() / BlockSize;
8924
8925 // We are looking for masks like
8926 // [0, 1, 0, 1] or [2, 3, 2, 3] or [4, 5, 6, 7, 4, 5, 6, 7] where any element
8927 // might be replaced by 'undefined'. BlockIndices will eventually contain
8928 // lane indices of the duplicated block (i.e. [0, 1], [2, 3] and [4, 5, 6, 7]
8929 // for the above examples)
8930 SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
8931 for (size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
8932 for (size_t I = 0; I < NumEltsPerBlock; I++) {
8933 int Elt = M[BlockIndex * NumEltsPerBlock + I];
8934 if (Elt < 0)
8935 continue;
8936 // For now we don't support shuffles that use the second operand
8937 if ((unsigned)Elt >= SingleVecNumElements)
8938 return false;
8939 if (BlockElts[I] < 0)
8940 BlockElts[I] = Elt;
8941 else if (BlockElts[I] != Elt)
8942 return false;
8943 }
8944
8945 // We found a candidate block (possibly with some undefs). It must be a
8946 // sequence of consecutive integers starting with a value divisible by
8947 // NumEltsPerBlock with some values possibly replaced by undef-s.
8948
8949 // Find first non-undef element
8950 auto FirstRealEltIter = find_if(BlockElts, [](int Elt) { return Elt >= 0; });
8951 assert(FirstRealEltIter != BlockElts.end() &&(static_cast <bool> (FirstRealEltIter != BlockElts.end(
) && "Shuffle with all-undefs must have been caught by previous cases, "
"e.g. isSplat()") ? void (0) : __assert_fail ("FirstRealEltIter != BlockElts.end() && \"Shuffle with all-undefs must have been caught by previous cases, \" \"e.g. isSplat()\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8953, __extension__ __PRETTY_FUNCTION__))
8952 "Shuffle with all-undefs must have been caught by previous cases, "(static_cast <bool> (FirstRealEltIter != BlockElts.end(
) && "Shuffle with all-undefs must have been caught by previous cases, "
"e.g. isSplat()") ? void (0) : __assert_fail ("FirstRealEltIter != BlockElts.end() && \"Shuffle with all-undefs must have been caught by previous cases, \" \"e.g. isSplat()\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8953, __extension__ __PRETTY_FUNCTION__))
8953 "e.g. isSplat()")(static_cast <bool> (FirstRealEltIter != BlockElts.end(
) && "Shuffle with all-undefs must have been caught by previous cases, "
"e.g. isSplat()") ? void (0) : __assert_fail ("FirstRealEltIter != BlockElts.end() && \"Shuffle with all-undefs must have been caught by previous cases, \" \"e.g. isSplat()\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8953, __extension__ __PRETTY_FUNCTION__))
;
8954 if (FirstRealEltIter == BlockElts.end()) {
8955 DupLaneOp = 0;
8956 return true;
8957 }
8958
8959 // Index of FirstRealElt in BlockElts
8960 size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
8961
8962 if ((unsigned)*FirstRealEltIter < FirstRealIndex)
8963 return false;
8964 // BlockElts[0] must have the following value if it isn't undef:
8965 size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
8966
8967 // Check the first element
8968 if (Elt0 % NumEltsPerBlock != 0)
8969 return false;
8970 // Check that the sequence indeed consists of consecutive integers (modulo
8971 // undefs)
8972 for (size_t I = 0; I < NumEltsPerBlock; I++)
8973 if (BlockElts[I] >= 0 && (unsigned)BlockElts[I] != Elt0 + I)
8974 return false;
8975
8976 DupLaneOp = Elt0 / NumEltsPerBlock;
8977 return true;
8978}
8979
8980// check if an EXT instruction can handle the shuffle mask when the
8981// vector sources of the shuffle are different.
8982static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
8983 unsigned &Imm) {
8984 // Look for the first non-undef element.
8985 const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
8986
8987 // Benefit form APInt to handle overflow when calculating expected element.
8988 unsigned NumElts = VT.getVectorNumElements();
8989 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
8990 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
8991 // The following shuffle indices must be the successive elements after the
8992 // first real element.
8993 const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
8994 [&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
8995 if (FirstWrongElt != M.end())
8996 return false;
8997
8998 // The index of an EXT is the first element if it is not UNDEF.
8999 // Watch out for the beginning UNDEFs. The EXT index should be the expected
9000 // value of the first element. E.g.
9001 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
9002 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
9003 // ExpectedElt is the last mask index plus 1.
9004 Imm = ExpectedElt.getZExtValue();
9005
9006 // There are two difference cases requiring to reverse input vectors.
9007 // For example, for vector <4 x i32> we have the following cases,
9008 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
9009 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
9010 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
9011 // to reverse two input vectors.
9012 if (Imm < NumElts)
9013 ReverseEXT = true;
9014 else
9015 Imm -= NumElts;
9016
9017 return true;
9018}
9019
9020/// isREVMask - Check if a vector shuffle corresponds to a REV
9021/// instruction with the specified blocksize. (The order of the elements
9022/// within each block of the vector is reversed.)
9023static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
9024 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for REV are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for REV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9025, __extension__ __PRETTY_FUNCTION__))
9025 "Only possible block sizes for REV are: 16, 32, 64")(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for REV are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for REV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9025, __extension__ __PRETTY_FUNCTION__))
;
9026
9027 unsigned EltSz = VT.getScalarSizeInBits();
9028 if (EltSz == 64)
9029 return false;
9030
9031 unsigned NumElts = VT.getVectorNumElements();
9032 unsigned BlockElts = M[0] + 1;
9033 // If the first shuffle index is UNDEF, be optimistic.
9034 if (M[0] < 0)
9035 BlockElts = BlockSize / EltSz;
9036
9037 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
9038 return false;
9039
9040 for (unsigned i = 0; i < NumElts; ++i) {
9041 if (M[i] < 0)
9042 continue; // ignore UNDEF indices
9043 if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
9044 return false;
9045 }
9046
9047 return true;
9048}
9049
9050static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9051 unsigned NumElts = VT.getVectorNumElements();
9052 if (NumElts % 2 != 0)
9053 return false;
9054 WhichResult = (M[0] == 0 ? 0 : 1);
9055 unsigned Idx = WhichResult * NumElts / 2;
9056 for (unsigned i = 0; i != NumElts; i += 2) {
9057 if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
9058 (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
9059 return false;
9060 Idx += 1;
9061 }
9062
9063 return true;
9064}
9065
9066static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9067 unsigned NumElts = VT.getVectorNumElements();
9068 WhichResult = (M[0] == 0 ? 0 : 1);
9069 for (unsigned i = 0; i != NumElts; ++i) {
9070 if (M[i] < 0)
9071 continue; // ignore UNDEF indices
9072 if ((unsigned)M[i] != 2 * i + WhichResult)
9073 return false;
9074 }
9075
9076 return true;
9077}
9078
9079static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9080 unsigned NumElts = VT.getVectorNumElements();
9081 if (NumElts % 2 != 0)
9082 return false;
9083 WhichResult = (M[0] == 0 ? 0 : 1);
9084 for (unsigned i = 0; i < NumElts; i += 2) {
9085 if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
9086 (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
9087 return false;
9088 }
9089 return true;
9090}
9091
9092/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
9093/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
9094/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
9095static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9096 unsigned NumElts = VT.getVectorNumElements();
9097 if (NumElts % 2 != 0)
9098 return false;
9099 WhichResult = (M[0] == 0 ? 0 : 1);
9100 unsigned Idx = WhichResult * NumElts / 2;
9101 for (unsigned i = 0; i != NumElts; i += 2) {
9102 if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
9103 (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
9104 return false;
9105 Idx += 1;
9106 }
9107
9108 return true;
9109}
9110
9111/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
9112/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
9113/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
9114static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9115 unsigned Half = VT.getVectorNumElements() / 2;
9116 WhichResult = (M[0] == 0 ? 0 : 1);
9117 for (unsigned j = 0; j != 2; ++j) {
9118 unsigned Idx = WhichResult;
9119 for (unsigned i = 0; i != Half; ++i) {
9120 int MIdx = M[i + j * Half];
9121 if (MIdx >= 0 && (unsigned)MIdx != Idx)
9122 return false;
9123 Idx += 2;
9124 }
9125 }
9126
9127 return true;
9128}
9129
9130/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
9131/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
9132/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
9133static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9134 unsigned NumElts = VT.getVectorNumElements();
9135 if (NumElts % 2 != 0)
9136 return false;
9137 WhichResult = (M[0] == 0 ? 0 : 1);
9138 for (unsigned i = 0; i < NumElts; i += 2) {
9139 if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
9140 (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
9141 return false;
9142 }
9143 return true;
9144}
9145
9146static bool isINSMask(ArrayRef<int> M, int NumInputElements,
9147 bool &DstIsLeft, int &Anomaly) {
9148 if (M.size() != static_cast<size_t>(NumInputElements))
9149 return false;
9150
9151 int NumLHSMatch = 0, NumRHSMatch = 0;
9152 int LastLHSMismatch = -1, LastRHSMismatch = -1;
9153
9154 for (int i = 0; i < NumInputElements; ++i) {
9155 if (M[i] == -1) {
9156 ++NumLHSMatch;
9157 ++NumRHSMatch;
9158 continue;
9159 }
9160
9161 if (M[i] == i)
9162 ++NumLHSMatch;
9163 else
9164 LastLHSMismatch = i;
9165
9166 if (M[i] == i + NumInputElements)
9167 ++NumRHSMatch;
9168 else
9169 LastRHSMismatch = i;
9170 }
9171
9172 if (NumLHSMatch == NumInputElements - 1) {
9173 DstIsLeft = true;
9174 Anomaly = LastLHSMismatch;
9175 return true;
9176 } else if (NumRHSMatch == NumInputElements - 1) {
9177 DstIsLeft = false;
9178 Anomaly = LastRHSMismatch;
9179 return true;
9180 }
9181
9182 return false;
9183}
9184
9185static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
9186 if (VT.getSizeInBits() != 128)
9187 return false;
9188
9189 unsigned NumElts = VT.getVectorNumElements();
9190
9191 for (int I = 0, E = NumElts / 2; I != E; I++) {
9192 if (Mask[I] != I)
9193 return false;
9194 }
9195
9196 int Offset = NumElts / 2;
9197 for (int I = NumElts / 2, E = NumElts; I != E; I++) {
9198 if (Mask[I] != I + SplitLHS * Offset)
9199 return false;
9200 }
9201
9202 return true;
9203}
9204
9205static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
9206 SDLoc DL(Op);
9207 EVT VT = Op.getValueType();
9208 SDValue V0 = Op.getOperand(0);
9209 SDValue V1 = Op.getOperand(1);
9210 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
9211
9212 if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() ||
9213 VT.getVectorElementType() != V1.getValueType().getVectorElementType())
9214 return SDValue();
9215
9216 bool SplitV0 = V0.getValueSizeInBits() == 128;
9217
9218 if (!isConcatMask(Mask, VT, SplitV0))
9219 return SDValue();
9220
9221 EVT CastVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
9222 if (SplitV0) {
9223 V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
9224 DAG.getConstant(0, DL, MVT::i64));
9225 }
9226 if (V1.getValueSizeInBits() == 128) {
9227 V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
9228 DAG.getConstant(0, DL, MVT::i64));
9229 }
9230 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
9231}
9232
9233/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9234/// the specified operations to build the shuffle.
9235static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9236 SDValue RHS, SelectionDAG &DAG,
9237 const SDLoc &dl) {
9238 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9239 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
9240 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
9241
9242 enum {
9243 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9244 OP_VREV,
9245 OP_VDUP0,
9246 OP_VDUP1,
9247 OP_VDUP2,
9248 OP_VDUP3,
9249 OP_VEXT1,
9250 OP_VEXT2,
9251 OP_VEXT3,
9252 OP_VUZPL, // VUZP, left result
9253 OP_VUZPR, // VUZP, right result
9254 OP_VZIPL, // VZIP, left result
9255 OP_VZIPR, // VZIP, right result
9256 OP_VTRNL, // VTRN, left result
9257 OP_VTRNR // VTRN, right result
9258 };
9259
9260 if (OpNum == OP_COPY) {
9261 if (LHSID == (1 * 9 + 2) * 9 + 3)
9262 return LHS;
9263 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!")(static_cast <bool> (LHSID == ((4 * 9 + 5) * 9 + 6) * 9
+ 7 && "Illegal OP_COPY!") ? void (0) : __assert_fail
("LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && \"Illegal OP_COPY!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9263, __extension__ __PRETTY_FUNCTION__))
;
9264 return RHS;
9265 }
9266
9267 SDValue OpLHS, OpRHS;
9268 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9269 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9270 EVT VT = OpLHS.getValueType();
9271
9272 switch (OpNum) {
9273 default:
9274 llvm_unreachable("Unknown shuffle opcode!")::llvm::llvm_unreachable_internal("Unknown shuffle opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9274)
;
9275 case OP_VREV:
9276 // VREV divides the vector in half and swaps within the half.
9277 if (VT.getVectorElementType() == MVT::i32 ||
9278 VT.getVectorElementType() == MVT::f32)
9279 return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
9280 // vrev <4 x i16> -> REV32
9281 if (VT.getVectorElementType() == MVT::i16 ||
9282 VT.getVectorElementType() == MVT::f16 ||
9283 VT.getVectorElementType() == MVT::bf16)
9284 return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
9285 // vrev <4 x i8> -> REV16
9286 assert(VT.getVectorElementType() == MVT::i8)(static_cast <bool> (VT.getVectorElementType() == MVT::
i8) ? void (0) : __assert_fail ("VT.getVectorElementType() == MVT::i8"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9286, __extension__ __PRETTY_FUNCTION__))
;
9287 return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
9288 case OP_VDUP0:
9289 case OP_VDUP1:
9290 case OP_VDUP2:
9291 case OP_VDUP3: {
9292 EVT EltTy = VT.getVectorElementType();
9293 unsigned Opcode;
9294 if (EltTy == MVT::i8)
9295 Opcode = AArch64ISD::DUPLANE8;
9296 else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
9297 Opcode = AArch64ISD::DUPLANE16;
9298 else if (EltTy == MVT::i32 || EltTy == MVT::f32)
9299 Opcode = AArch64ISD::DUPLANE32;
9300 else if (EltTy == MVT::i64 || EltTy == MVT::f64)
9301 Opcode = AArch64ISD::DUPLANE64;
9302 else
9303 llvm_unreachable("Invalid vector element type?")::llvm::llvm_unreachable_internal("Invalid vector element type?"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9303)
;
9304
9305 if (VT.getSizeInBits() == 64)
9306 OpLHS = WidenVector(OpLHS, DAG);
9307 SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
9308 return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
9309 }
9310 case OP_VEXT1:
9311 case OP_VEXT2:
9312 case OP_VEXT3: {
9313 unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
9314 return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
9315 DAG.getConstant(Imm, dl, MVT::i32));
9316 }
9317 case OP_VUZPL:
9318 return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
9319 OpRHS);
9320 case OP_VUZPR:
9321 return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
9322 OpRHS);
9323 case OP_VZIPL:
9324 return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
9325 OpRHS);
9326 case OP_VZIPR:
9327 return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
9328 OpRHS);
9329 case OP_VTRNL:
9330 return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
9331 OpRHS);
9332 case OP_VTRNR:
9333 return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
9334 OpRHS);
9335 }
9336}
9337
9338static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
9339 SelectionDAG &DAG) {
9340 // Check to see if we can use the TBL instruction.
9341 SDValue V1 = Op.getOperand(0);
9342 SDValue V2 = Op.getOperand(1);
9343 SDLoc DL(Op);
9344
9345 EVT EltVT = Op.getValueType().getVectorElementType();
9346 unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
9347
9348 SmallVector<SDValue, 8> TBLMask;
9349 for (int Val : ShuffleMask) {
9350 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
9351 unsigned Offset = Byte + Val * BytesPerElt;
9352 TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
9353 }
9354 }
9355
9356 MVT IndexVT = MVT::v8i8;
9357 unsigned IndexLen = 8;
9358 if (Op.getValueSizeInBits() == 128) {
9359 IndexVT = MVT::v16i8;
9360 IndexLen = 16;
9361 }
9362
9363 SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
9364 SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
9365
9366 SDValue Shuffle;
9367 if (V2.getNode()->isUndef()) {
9368 if (IndexLen == 8)
9369 V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
9370 Shuffle = DAG.getNode(
9371 ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
9372 DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
9373 DAG.getBuildVector(IndexVT, DL,
9374 makeArrayRef(TBLMask.data(), IndexLen)));
9375 } else {
9376 if (IndexLen == 8) {
9377 V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
9378 Shuffle = DAG.getNode(
9379 ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
9380 DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
9381 DAG.getBuildVector(IndexVT, DL,
9382 makeArrayRef(TBLMask.data(), IndexLen)));
9383 } else {
9384 // FIXME: We cannot, for the moment, emit a TBL2 instruction because we
9385 // cannot currently represent the register constraints on the input
9386 // table registers.
9387 // Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
9388 // DAG.getBuildVector(IndexVT, DL, &TBLMask[0],
9389 // IndexLen));
9390 Shuffle = DAG.getNode(
9391 ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
9392 DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst,
9393 V2Cst, DAG.getBuildVector(IndexVT, DL,
9394 makeArrayRef(TBLMask.data(), IndexLen)));
9395 }
9396 }
9397 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
9398}
9399
9400static unsigned getDUPLANEOp(EVT EltType) {
9401 if (EltType == MVT::i8)
9402 return AArch64ISD::DUPLANE8;
9403 if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
9404 return AArch64ISD::DUPLANE16;
9405 if (EltType == MVT::i32 || EltType == MVT::f32)
9406 return AArch64ISD::DUPLANE32;
9407 if (EltType == MVT::i64 || EltType == MVT::f64)
9408 return AArch64ISD::DUPLANE64;
9409
9410 llvm_unreachable("Invalid vector element type?")::llvm::llvm_unreachable_internal("Invalid vector element type?"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9410)
;
9411}
9412
9413static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
9414 unsigned Opcode, SelectionDAG &DAG) {
9415 // Try to eliminate a bitcasted extract subvector before a DUPLANE.
9416 auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
9417 // Match: dup (bitcast (extract_subv X, C)), LaneC
9418 if (BitCast.getOpcode() != ISD::BITCAST ||
9419 BitCast.getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR)
9420 return false;
9421
9422 // The extract index must align in the destination type. That may not
9423 // happen if the bitcast is from narrow to wide type.
9424 SDValue Extract = BitCast.getOperand(0);
9425 unsigned ExtIdx = Extract.getConstantOperandVal(1);
9426 unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
9427 unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
9428 unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
9429 if (ExtIdxInBits % CastedEltBitWidth != 0)
9430 return false;
9431
9432 // Update the lane value by offsetting with the scaled extract index.
9433 LaneC += ExtIdxInBits / CastedEltBitWidth;
9434
9435 // Determine the casted vector type of the wide vector input.
9436 // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
9437 // Examples:
9438 // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
9439 // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
9440 unsigned SrcVecNumElts =
9441 Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
9442 CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
9443 SrcVecNumElts);
9444 return true;
9445 };
9446 MVT CastVT;
9447 if (getScaledOffsetDup(V, Lane, CastVT)) {
9448 V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0));
9449 } else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9450 // The lane is incremented by the index of the extract.
9451 // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
9452 Lane += V.getConstantOperandVal(1);
9453 V = V.getOperand(0);
9454 } else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
9455 // The lane is decremented if we are splatting from the 2nd operand.
9456 // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
9457 unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
9458 Lane -= Idx * VT.getVectorNumElements() / 2;
9459 V = WidenVector(V.getOperand(Idx), DAG);
9460 } else if (VT.getSizeInBits() == 64) {
9461 // Widen the operand to 128-bit register with undef.
9462 V = WidenVector(V, DAG);
9463 }
9464 return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
9465}
9466
9467SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9468 SelectionDAG &DAG) const {
9469 SDLoc dl(Op);
9470 EVT VT = Op.getValueType();
9471
9472 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
9473
9474 if (useSVEForFixedLengthVectorVT(VT))
9475 return LowerFixedLengthVECTOR_SHUFFLEToSVE(Op, DAG);
9476
9477 // Convert shuffles that are directly supported on NEON to target-specific
9478 // DAG nodes, instead of keeping them as shuffles and matching them again
9479 // during code selection. This is more efficient and avoids the possibility
9480 // of inconsistencies between legalization and selection.
9481 ArrayRef<int> ShuffleMask = SVN->getMask();
9482
9483 SDValue V1 = Op.getOperand(0);
9484 SDValue V2 = Op.getOperand(1);
9485
9486 assert(V1.getValueType() == VT && "Unexpected VECTOR_SHUFFLE type!")(static_cast <bool> (V1.getValueType() == VT &&
"Unexpected VECTOR_SHUFFLE type!") ? void (0) : __assert_fail
("V1.getValueType() == VT && \"Unexpected VECTOR_SHUFFLE type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9486, __extension__ __PRETTY_FUNCTION__))
;
9487 assert(ShuffleMask.size() == VT.getVectorNumElements() &&(static_cast <bool> (ShuffleMask.size() == VT.getVectorNumElements
() && "Unexpected VECTOR_SHUFFLE mask size!") ? void (
0) : __assert_fail ("ShuffleMask.size() == VT.getVectorNumElements() && \"Unexpected VECTOR_SHUFFLE mask size!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9488, __extension__ __PRETTY_FUNCTION__))
9488 "Unexpected VECTOR_SHUFFLE mask size!")(static_cast <bool> (ShuffleMask.size() == VT.getVectorNumElements
() && "Unexpected VECTOR_SHUFFLE mask size!") ? void (
0) : __assert_fail ("ShuffleMask.size() == VT.getVectorNumElements() && \"Unexpected VECTOR_SHUFFLE mask size!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9488, __extension__ __PRETTY_FUNCTION__))
;
9489
9490 if (SVN->isSplat()) {
9491 int Lane = SVN->getSplatIndex();
9492 // If this is undef splat, generate it via "just" vdup, if possible.
9493 if (Lane == -1)
9494 Lane = 0;
9495
9496 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
9497 return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
9498 V1.getOperand(0));
9499 // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
9500 // constant. If so, we can just reference the lane's definition directly.
9501 if (V1.getOpcode() == ISD::BUILD_VECTOR &&
9502 !isa<ConstantSDNode>(V1.getOperand(Lane)))
9503 return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
9504
9505 // Otherwise, duplicate from the lane of the input vector.
9506 unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
9507 return constructDup(V1, Lane, dl, VT, Opcode, DAG);
9508 }
9509
9510 // Check if the mask matches a DUP for a wider element
9511 for (unsigned LaneSize : {64U, 32U, 16U}) {
9512 unsigned Lane = 0;
9513 if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
9514 unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
9515 : LaneSize == 32 ? AArch64ISD::DUPLANE32
9516 : AArch64ISD::DUPLANE16;
9517 // Cast V1 to an integer vector with required lane size
9518 MVT NewEltTy = MVT::getIntegerVT(LaneSize);
9519 unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
9520 MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount);
9521 V1 = DAG.getBitcast(NewVecTy, V1);
9522 // Constuct the DUP instruction
9523 V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
9524 // Cast back to the original type
9525 return DAG.getBitcast(VT, V1);
9526 }
9527 }
9528
9529 if (isREVMask(ShuffleMask, VT, 64))
9530 return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
9531 if (isREVMask(ShuffleMask, VT, 32))
9532 return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
9533 if (isREVMask(ShuffleMask, VT, 16))
9534 return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);
9535
9536 if (((VT.getVectorNumElements() == 8 && VT.getScalarSizeInBits() == 16) ||
9537 (VT.getVectorNumElements() == 16 && VT.getScalarSizeInBits() == 8)) &&
9538 ShuffleVectorInst::isReverseMask(ShuffleMask)) {
9539 SDValue Rev = DAG.getNode(AArch64ISD::REV64, dl, VT, V1);
9540 return DAG.getNode(AArch64ISD::EXT, dl, VT, Rev, Rev,
9541 DAG.getConstant(8, dl, MVT::i32));
9542 }
9543
9544 bool ReverseEXT = false;
9545 unsigned Imm;
9546 if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
9547 if (ReverseEXT)
9548 std::swap(V1, V2);
9549 Imm *= getExtFactor(V1);
9550 return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
9551 DAG.getConstant(Imm, dl, MVT::i32));
9552 } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
9553 Imm *= getExtFactor(V1);
9554 return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
9555 DAG.getConstant(Imm, dl, MVT::i32));
9556 }
9557
9558 unsigned WhichResult;
9559 if (isZIPMask(ShuffleMask, VT, WhichResult)) {
9560 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
9561 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
9562 }
9563 if (isUZPMask(ShuffleMask, VT, WhichResult)) {
9564 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
9565 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
9566 }
9567 if (isTRNMask(ShuffleMask, VT, WhichResult)) {
9568 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
9569 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
9570 }
9571
9572 if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
9573 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
9574 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
9575 }
9576 if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
9577 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
9578 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
9579 }
9580 if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
9581 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
9582 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
9583 }
9584
9585 if (SDValue Concat = tryFormConcatFromShuffle(Op, DAG))
9586 return Concat;
9587
9588 bool DstIsLeft;
9589 int Anomaly;
9590 int NumInputElements = V1.getValueType().getVectorNumElements();
9591 if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
9592 SDValue DstVec = DstIsLeft ? V1 : V2;
9593 SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
9594
9595 SDValue SrcVec = V1;
9596 int SrcLane = ShuffleMask[Anomaly];
9597 if (SrcLane >= NumInputElements) {
9598 SrcVec = V2;
9599 SrcLane -= VT.getVectorNumElements();
9600 }
9601 SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
9602
9603 EVT ScalarVT = VT.getVectorElementType();
9604
9605 if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger())
9606 ScalarVT = MVT::i32;
9607
9608 return DAG.getNode(
9609 ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
9610 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
9611 DstLaneV);
9612 }
9613
9614 // If the shuffle is not directly supported and it has 4 elements, use
9615 // the PerfectShuffle-generated table to synthesize it from other shuffles.
9616 unsigned NumElts = VT.getVectorNumElements();
9617 if (NumElts == 4) {
9618 unsigned PFIndexes[4];
9619 for (unsigned i = 0; i != 4; ++i) {
9620 if (ShuffleMask[i] < 0)
9621 PFIndexes[i] = 8;
9622 else
9623 PFIndexes[i] = ShuffleMask[i];
9624 }
9625
9626 // Compute the index in the perfect shuffle table.
9627 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
9628 PFIndexes[2] * 9 + PFIndexes[3];
9629 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
9630 unsigned Cost = (PFEntry >> 30);
9631
9632 if (Cost <= 4)
9633 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9634 }
9635
9636 return GenerateTBL(Op, ShuffleMask, DAG);
9637}
9638
9639SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
9640 SelectionDAG &DAG) const {
9641 SDLoc dl(Op);
9642 EVT VT = Op.getValueType();
9643 EVT ElemVT = VT.getScalarType();
9644 SDValue SplatVal = Op.getOperand(0);
9645
9646 if (useSVEForFixedLengthVectorVT(VT))
9647 return LowerToScalableOp(Op, DAG);
9648
9649 // Extend input splat value where needed to fit into a GPR (32b or 64b only)
9650 // FPRs don't have this restriction.
9651 switch (ElemVT.getSimpleVT().SimpleTy) {
9652 case MVT::i1: {
9653 // The only legal i1 vectors are SVE vectors, so we can use SVE-specific
9654 // lowering code.
9655 if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
9656 if (ConstVal->isOne())
9657 return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
9658 // TODO: Add special case for constant false
9659 }
9660 // The general case of i1. There isn't any natural way to do this,
9661 // so we use some trickery with whilelo.
9662 SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
9663 SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, SplatVal,
9664 DAG.getValueType(MVT::i1));
9665 SDValue ID = DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl,
9666 MVT::i64);
9667 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID,
9668 DAG.getConstant(0, dl, MVT::i64), SplatVal);
9669 }
9670 case MVT::i8:
9671 case MVT::i16:
9672 case MVT::i32:
9673 SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32);
9674 break;
9675 case MVT::i64:
9676 SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
9677 break;
9678 case MVT::f16:
9679 case MVT::bf16:
9680 case MVT::f32:
9681 case MVT::f64:
9682 // Fine as is
9683 break;
9684 default:
9685 report_fatal_error("Unsupported SPLAT_VECTOR input operand type");
9686 }
9687
9688 return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal);
9689}
9690
9691SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
9692 SelectionDAG &DAG) const {
9693 SDLoc DL(Op);
9694
9695 EVT VT = Op.getValueType();
9696 if (!isTypeLegal(VT) || !VT.isScalableVector())
9697 return SDValue();
9698
9699 // Current lowering only supports the SVE-ACLE types.
9700 if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
9701 return SDValue();
9702
9703 // The DUPQ operation is indepedent of element type so normalise to i64s.
9704 SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1));
9705 SDValue Idx128 = Op.getOperand(2);
9706
9707 // DUPQ can be used when idx is in range.
9708 auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
9709 if (CIdx && (CIdx->getZExtValue() <= 3)) {
9710 SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64);
9711 SDNode *DUPQ =
9712 DAG.getMachineNode(AArch64::DUP_ZZI_Q, DL, MVT::nxv2i64, V, CI);
9713 return DAG.getNode(ISD::BITCAST, DL, VT, SDValue(DUPQ, 0));
9714 }
9715
9716 // The ACLE says this must produce the same result as:
9717 // svtbl(data, svadd_x(svptrue_b64(),
9718 // svand_x(svptrue_b64(), svindex_u64(0, 1), 1),
9719 // index * 2))
9720 SDValue One = DAG.getConstant(1, DL, MVT::i64);
9721 SDValue SplatOne = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, One);
9722
9723 // create the vector 0,1,0,1,...
9724 SDValue SV = DAG.getStepVector(DL, MVT::nxv2i64);
9725 SV = DAG.getNode(ISD::AND, DL, MVT::nxv2i64, SV, SplatOne);
9726
9727 // create the vector idx64,idx64+1,idx64,idx64+1,...
9728 SDValue Idx64 = DAG.getNode(ISD::ADD, DL, MVT::i64, Idx128, Idx128);
9729 SDValue SplatIdx64 = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Idx64);
9730 SDValue ShuffleMask = DAG.getNode(ISD::ADD, DL, MVT::nxv2i64, SV, SplatIdx64);
9731
9732 // create the vector Val[idx64],Val[idx64+1],Val[idx64],Val[idx64+1],...
9733 SDValue TBL = DAG.getNode(AArch64ISD::TBL, DL, MVT::nxv2i64, V, ShuffleMask);
9734 return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
9735}
9736
9737
9738static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
9739 APInt &UndefBits) {
9740 EVT VT = BVN->getValueType(0);
9741 APInt SplatBits, SplatUndef;
9742 unsigned SplatBitSize;
9743 bool HasAnyUndefs;
9744 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
9745 unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;
9746
9747 for (unsigned i = 0; i < NumSplats; ++i) {
9748 CnstBits <<= SplatBitSize;
9749 UndefBits <<= SplatBitSize;
9750 CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits());
9751 UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
9752 }
9753
9754 return true;
9755 }
9756
9757 return false;
9758}
9759
9760// Try 64-bit splatted SIMD immediate.
9761static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9762 const APInt &Bits) {
9763 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9764 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9765 EVT VT = Op.getValueType();
9766 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::f64;
9767
9768 if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
9769 Value = AArch64_AM::encodeAdvSIMDModImmType10(Value);
9770
9771 SDLoc dl(Op);
9772 SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
9773 DAG.getConstant(Value, dl, MVT::i32));
9774 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9775 }
9776 }
9777
9778 return SDValue();
9779}
9780
9781// Try 32-bit splatted SIMD immediate.
9782static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9783 const APInt &Bits,
9784 const SDValue *LHS = nullptr) {
9785 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9786 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9787 EVT VT = Op.getValueType();
9788 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
9789 bool isAdvSIMDModImm = false;
9790 uint64_t Shift;
9791
9792 if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) {
9793 Value = AArch64_AM::encodeAdvSIMDModImmType1(Value);
9794 Shift = 0;
9795 }
9796 else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
9797 Value = AArch64_AM::encodeAdvSIMDModImmType2(Value);
9798 Shift = 8;
9799 }
9800 else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
9801 Value = AArch64_AM::encodeAdvSIMDModImmType3(Value);
9802 Shift = 16;
9803 }
9804 else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
9805 Value = AArch64_AM::encodeAdvSIMDModImmType4(Value);
9806 Shift = 24;
9807 }
9808
9809 if (isAdvSIMDModImm) {
9810 SDLoc dl(Op);
9811 SDValue Mov;
9812
9813 if (LHS)
9814 Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
9815 DAG.getConstant(Value, dl, MVT::i32),
9816 DAG.getConstant(Shift, dl, MVT::i32));
9817 else
9818 Mov = DAG.getNode(NewOp, dl, MovTy,
9819 DAG.getConstant(Value, dl, MVT::i32),
9820 DAG.getConstant(Shift, dl, MVT::i32));
9821
9822 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9823 }
9824 }
9825
9826 return SDValue();
9827}
9828
9829// Try 16-bit splatted SIMD immediate.
9830static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9831 const APInt &Bits,
9832 const SDValue *LHS = nullptr) {
9833 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9834 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9835 EVT VT = Op.getValueType();
9836 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
9837 bool isAdvSIMDModImm = false;
9838 uint64_t Shift;
9839
9840 if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) {
9841 Value = AArch64_AM::encodeAdvSIMDModImmType5(Value);
9842 Shift = 0;
9843 }
9844 else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
9845 Value = AArch64_AM::encodeAdvSIMDModImmType6(Value);
9846 Shift = 8;
9847 }
9848
9849 if (isAdvSIMDModImm) {
9850 SDLoc dl(Op);
9851 SDValue Mov;
9852
9853 if (LHS)
9854 Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
9855 DAG.getConstant(Value, dl, MVT::i32),
9856 DAG.getConstant(Shift, dl, MVT::i32));
9857 else
9858 Mov = DAG.getNode(NewOp, dl, MovTy,
9859 DAG.getConstant(Value, dl, MVT::i32),
9860 DAG.getConstant(Shift, dl, MVT::i32));
9861
9862 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9863 }
9864 }
9865
9866 return SDValue();
9867}
9868
9869// Try 32-bit splatted SIMD immediate with shifted ones.
9870static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op,
9871 SelectionDAG &DAG, const APInt &Bits) {
9872 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9873 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9874 EVT VT = Op.getValueType();
9875 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
9876 bool isAdvSIMDModImm = false;
9877 uint64_t Shift;
9878
9879 if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) {
9880 Value = AArch64_AM::encodeAdvSIMDModImmType7(Value);
9881 Shift = 264;
9882 }
9883 else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
9884 Value = AArch64_AM::encodeAdvSIMDModImmType8(Value);
9885 Shift = 272;
9886 }
9887
9888 if (isAdvSIMDModImm) {
9889 SDLoc dl(Op);
9890 SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
9891 DAG.getConstant(Value, dl, MVT::i32),
9892 DAG.getConstant(Shift, dl, MVT::i32));
9893 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9894 }
9895 }
9896
9897 return SDValue();
9898}
9899
9900// Try 8-bit splatted SIMD immediate.
9901static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9902 const APInt &Bits) {
9903 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9904 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9905 EVT VT = Op.getValueType();
9906 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
9907
9908 if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
9909 Value = AArch64_AM::encodeAdvSIMDModImmType9(Value);
9910
9911 SDLoc dl(Op);
9912 SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
9913 DAG.getConstant(Value, dl, MVT::i32));
9914 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9915 }
9916 }
9917
9918 return SDValue();
9919}
9920
9921// Try FP splatted SIMD immediate.
9922static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9923 const APInt &Bits) {
9924 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9925 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9926 EVT VT = Op.getValueType();
9927 bool isWide = (VT.getSizeInBits() == 128);
9928 MVT MovTy;
9929 bool isAdvSIMDModImm = false;
9930
9931 if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) {
9932 Value = AArch64_AM::encodeAdvSIMDModImmType11(Value);
9933 MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
9934 }
9935 else if (isWide &&
9936 (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
9937 Value = AArch64_AM::encodeAdvSIMDModImmType12(Value);
9938 MovTy = MVT::v2f64;
9939 }
9940
9941 if (isAdvSIMDModImm) {
9942 SDLoc dl(Op);
9943 SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
9944 DAG.getConstant(Value, dl, MVT::i32));
9945 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9946 }
9947 }
9948
9949 return SDValue();
9950}
9951
9952// Specialized code to quickly find if PotentialBVec is a BuildVector that
9953// consists of only the same constant int value, returned in reference arg
9954// ConstVal
9955static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
9956 uint64_t &ConstVal) {
9957 BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
9958 if (!Bvec)
9959 return false;
9960 ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
9961 if (!FirstElt)
9962 return false;
9963 EVT VT = Bvec->getValueType(0);
9964 unsigned NumElts = VT.getVectorNumElements();
9965 for (unsigned i = 1; i < NumElts; ++i)
9966 if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
9967 return false;
9968 ConstVal = FirstElt->getZExtValue();
9969 return true;
9970}
9971
9972static unsigned getIntrinsicID(const SDNode *N) {
9973 unsigned Opcode = N->getOpcode();
9974 switch (Opcode) {
9975 default:
9976 return Intrinsic::not_intrinsic;
9977 case ISD::INTRINSIC_WO_CHAIN: {
9978 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
9979 if (IID < Intrinsic::num_intrinsics)
9980 return IID;
9981 return Intrinsic::not_intrinsic;
9982 }
9983 }
9984}
9985
9986// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
9987// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
9988// BUILD_VECTORs with constant element C1, C2 is a constant, and:
9989// - for the SLI case: C1 == ~(Ones(ElemSizeInBits) << C2)
9990// - for the SRI case: C1 == ~(Ones(ElemSizeInBits) >> C2)
9991// The (or (lsl Y, C2), (and X, BvecC1)) case is also handled.
9992static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
9993 EVT VT = N->getValueType(0);
9994
9995 if (!VT.isVector())
9996 return SDValue();
9997
9998 SDLoc DL(N);
9999
10000 SDValue And;
10001 SDValue Shift;
10002
10003 SDValue FirstOp = N->getOperand(0);
10004 unsigned FirstOpc = FirstOp.getOpcode();
10005 SDValue SecondOp = N->getOperand(1);
10006 unsigned SecondOpc = SecondOp.getOpcode();
10007
10008 // Is one of the operands an AND or a BICi? The AND may have been optimised to
10009 // a BICi in order to use an immediate instead of a register.
10010 // Is the other operand an shl or lshr? This will have been turned into:
10011 // AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift.
10012 if ((FirstOpc == ISD::AND || FirstOpc == AArch64ISD::BICi) &&
10013 (SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR)) {
10014 And = FirstOp;
10015 Shift = SecondOp;
10016
10017 } else if ((SecondOpc == ISD::AND || SecondOpc == AArch64ISD::BICi) &&
10018 (FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR)) {
10019 And = SecondOp;
10020 Shift = FirstOp;
10021 } else
10022 return SDValue();
10023
10024 bool IsAnd = And.getOpcode() == ISD::AND;
10025 bool IsShiftRight = Shift.getOpcode() == AArch64ISD::VLSHR;
10026
10027 // Is the shift amount constant?
10028 ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
10029 if (!C2node)
10030 return SDValue();
10031
10032 uint64_t C1;
10033 if (IsAnd) {
10034 // Is the and mask vector all constant?
10035 if (!isAllConstantBuildVector(And.getOperand(1), C1))
10036 return SDValue();
10037 } else {
10038 // Reconstruct the corresponding AND immediate from the two BICi immediates.
10039 ConstantSDNode *C1nodeImm = dyn_cast<ConstantSDNode>(And.getOperand(1));
10040 ConstantSDNode *C1nodeShift = dyn_cast<ConstantSDNode>(And.getOperand(2));
10041 assert(C1nodeImm && C1nodeShift)(static_cast <bool> (C1nodeImm && C1nodeShift) ?
void (0) : __assert_fail ("C1nodeImm && C1nodeShift"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10041, __extension__ __PRETTY_FUNCTION__))
;
10042 C1 = ~(C1nodeImm->getZExtValue() << C1nodeShift->getZExtValue());
10043 }
10044
10045 // Is C1 == ~(Ones(ElemSizeInBits) << C2) or
10046 // C1 == ~(Ones(ElemSizeInBits) >> C2), taking into account
10047 // how much one can shift elements of a particular size?
10048 uint64_t C2 = C2node->getZExtValue();
10049 unsigned ElemSizeInBits = VT.getScalarSizeInBits();
10050 if (C2 > ElemSizeInBits)
10051 return SDValue();
10052
10053 APInt C1AsAPInt(ElemSizeInBits, C1);
10054 APInt RequiredC1 = IsShiftRight ? APInt::getHighBitsSet(ElemSizeInBits, C2)
10055 : APInt::getLowBitsSet(ElemSizeInBits, C2);
10056 if (C1AsAPInt != RequiredC1)
10057 return SDValue();
10058
10059 SDValue X = And.getOperand(0);
10060 SDValue Y = Shift.getOperand(0);
10061
10062 unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
10063 SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1));
10064
10065 LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "aarch64-lower: transformed: \n"
; } } while (false)
;
10066 LLVM_DEBUG(N->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { N->dump(&DAG); } } while (false)
;
10067 LLVM_DEBUG(dbgs() << "into: \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "into: \n"; } } while (false
)
;
10068 LLVM_DEBUG(ResultSLI->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { ResultSLI->dump(&DAG); } } while (
false)
;
10069
10070 ++NumShiftInserts;
10071 return ResultSLI;
10072}
10073
10074SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
10075 SelectionDAG &DAG) const {
10076 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
10077 return LowerToScalableOp(Op, DAG);
10078
10079 // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
10080 if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
10081 return Res;
10082
10083 EVT VT = Op.getValueType();
10084
10085 SDValue LHS = Op.getOperand(0);
10086 BuildVectorSDNode *BVN =
10087 dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
10088 if (!BVN) {
10089 // OR commutes, so try swapping the operands.
10090 LHS = Op.getOperand(1);
10091 BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
10092 }
10093 if (!BVN)
10094 return Op;
10095
10096 APInt DefBits(VT.getSizeInBits(), 0);
10097 APInt UndefBits(VT.getSizeInBits(), 0);
10098 if (resolveBuildVector(BVN, DefBits, UndefBits)) {
10099 SDValue NewOp;
10100
10101 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
10102 DefBits, &LHS)) ||
10103 (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
10104 DefBits, &LHS)))
10105 return NewOp;
10106
10107 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
10108 UndefBits, &LHS)) ||
10109 (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
10110 UndefBits, &LHS)))
10111 return NewOp;
10112 }
10113
10114 // We can always fall back to a non-immediate OR.
10115 return Op;
10116}
10117
10118// Normalize the operands of BUILD_VECTOR. The value of constant operands will
10119// be truncated to fit element width.
10120static SDValue NormalizeBuildVector(SDValue Op,
10121 SelectionDAG &DAG) {
10122 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10122, __extension__ __PRETTY_FUNCTION__))
;
10123 SDLoc dl(Op);
10124 EVT VT = Op.getValueType();
10125 EVT EltTy= VT.getVectorElementType();
10126
10127 if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
10128 return Op;
10129
10130 SmallVector<SDValue, 16> Ops;
10131 for (SDValue Lane : Op->ops()) {
10132 // For integer vectors, type legalization would have promoted the
10133 // operands already. Otherwise, if Op is a floating-point splat
10134 // (with operands cast to integers), then the only possibilities
10135 // are constants and UNDEFs.
10136 if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
10137 APInt LowBits(EltTy.getSizeInBits(),
10138 CstLane->getZExtValue());
10139 Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
10140 } else if (Lane.getNode()->isUndef()) {
10141 Lane = DAG.getUNDEF(MVT::i32);
10142 } else {
10143 assert(Lane.getValueType() == MVT::i32 &&(static_cast <bool> (Lane.getValueType() == MVT::i32 &&
"Unexpected BUILD_VECTOR operand type") ? void (0) : __assert_fail
("Lane.getValueType() == MVT::i32 && \"Unexpected BUILD_VECTOR operand type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10144, __extension__ __PRETTY_FUNCTION__))
10144 "Unexpected BUILD_VECTOR operand type")(static_cast <bool> (Lane.getValueType() == MVT::i32 &&
"Unexpected BUILD_VECTOR operand type") ? void (0) : __assert_fail
("Lane.getValueType() == MVT::i32 && \"Unexpected BUILD_VECTOR operand type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10144, __extension__ __PRETTY_FUNCTION__))
;
10145 }
10146 Ops.push_back(Lane);
10147 }
10148 return DAG.getBuildVector(VT, dl, Ops);
10149}
10150
10151static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG) {
10152 EVT VT = Op.getValueType();
10153
10154 APInt DefBits(VT.getSizeInBits(), 0);
10155 APInt UndefBits(VT.getSizeInBits(), 0);
10156 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
10157 if (resolveBuildVector(BVN, DefBits, UndefBits)) {
10158 SDValue NewOp;
10159 if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
10160 (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
10161 (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
10162 (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
10163 (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
10164 (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
10165 return NewOp;
10166
10167 DefBits = ~DefBits;
10168 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
10169 (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
10170 (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
10171 return NewOp;
10172
10173 DefBits = UndefBits;
10174 if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
10175 (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
10176 (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
10177 (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
10178 (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
10179 (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
10180 return NewOp;
10181
10182 DefBits = ~UndefBits;
10183 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
10184 (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
10185 (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
10186 return NewOp;
10187 }
10188
10189 return SDValue();
10190}
10191
10192SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
10193 SelectionDAG &DAG) const {
10194 EVT VT = Op.getValueType();
10195
10196 // Try to build a simple constant vector.
10197 Op = NormalizeBuildVector(Op, DAG);
10198 if (VT.isInteger()) {
10199 // Certain vector constants, used to express things like logical NOT and
10200 // arithmetic NEG, are passed through unmodified. This allows special
10201 // patterns for these operations to match, which will lower these constants
10202 // to whatever is proven necessary.
10203 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
10204 if (BVN->isConstant())
10205 if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
10206 unsigned BitSize = VT.getVectorElementType().getSizeInBits();
10207 APInt Val(BitSize,
10208 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
10209 if (Val.isNullValue() || Val.isAllOnesValue())
10210 return Op;
10211 }
10212 }
10213
10214 if (SDValue V = ConstantBuildVector(Op, DAG))
10215 return V;
10216
10217 // Scan through the operands to find some interesting properties we can
10218 // exploit:
10219 // 1) If only one value is used, we can use a DUP, or
10220 // 2) if only the low element is not undef, we can just insert that, or
10221 // 3) if only one constant value is used (w/ some non-constant lanes),
10222 // we can splat the constant value into the whole vector then fill
10223 // in the non-constant lanes.
10224 // 4) FIXME: If different constant values are used, but we can intelligently
10225 // select the values we'll be overwriting for the non-constant
10226 // lanes such that we can directly materialize the vector
10227 // some other way (MOVI, e.g.), we can be sneaky.
10228 // 5) if all operands are EXTRACT_VECTOR_ELT, check for VUZP.
10229 SDLoc dl(Op);
10230 unsigned NumElts = VT.getVectorNumElements();
10231 bool isOnlyLowElement = true;
10232 bool usesOnlyOneValue = true;
10233 bool usesOnlyOneConstantValue = true;
10234 bool isConstant = true;
10235 bool AllLanesExtractElt = true;
10236 unsigned NumConstantLanes = 0;
10237 unsigned NumDifferentLanes = 0;
10238 unsigned NumUndefLanes = 0;
10239 SDValue Value;
10240 SDValue ConstantValue;
10241 for (unsigned i = 0; i < NumElts; ++i) {
10242 SDValue V = Op.getOperand(i);
10243 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
10244 AllLanesExtractElt = false;
10245 if (V.isUndef()) {
10246 ++NumUndefLanes;
10247 continue;
10248 }
10249 if (i > 0)
10250 isOnlyLowElement = false;
10251 if (!isIntOrFPConstant(V))
10252 isConstant = false;
10253
10254 if (isIntOrFPConstant(V)) {
10255 ++NumConstantLanes;
10256 if (!ConstantValue.getNode())
10257 ConstantValue = V;
10258 else if (ConstantValue != V)
10259 usesOnlyOneConstantValue = false;
10260 }
10261
10262 if (!Value.getNode())
10263 Value = V;
10264 else if (V != Value) {
10265 usesOnlyOneValue = false;
10266 ++NumDifferentLanes;
10267 }
10268 }
10269
10270 if (!Value.getNode()) {
10271 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n"
; } } while (false)
10272 dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n"
; } } while (false)
;
10273 return DAG.getUNDEF(VT);
10274 }
10275
10276 // Convert BUILD_VECTOR where all elements but the lowest are undef into
10277 // SCALAR_TO_VECTOR, except for when we have a single-element constant vector
10278 // as SimplifyDemandedBits will just turn that back into BUILD_VECTOR.
10279 if (isOnlyLowElement && !(NumElts == 1 && isIntOrFPConstant(Value))) {
10280 LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
"SCALAR_TO_VECTOR node\n"; } } while (false)
10281 "SCALAR_TO_VECTOR node\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
"SCALAR_TO_VECTOR node\n"; } } while (false)
;
10282 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
10283 }
10284
10285 if (AllLanesExtractElt) {
10286 SDNode *Vector = nullptr;
10287 bool Even = false;
10288 bool Odd = false;
10289 // Check whether the extract elements match the Even pattern <0,2,4,...> or
10290 // the Odd pattern <1,3,5,...>.
10291 for (unsigned i = 0; i < NumElts; ++i) {
10292 SDValue V = Op.getOperand(i);
10293 const SDNode *N = V.getNode();
10294 if (!isa<ConstantSDNode>(N->getOperand(1)))
10295 break;
10296 SDValue N0 = N->getOperand(0);
10297
10298 // All elements are extracted from the same vector.
10299 if (!Vector) {
10300 Vector = N0.getNode();
10301 // Check that the type of EXTRACT_VECTOR_ELT matches the type of
10302 // BUILD_VECTOR.
10303 if (VT.getVectorElementType() !=
10304 N0.getValueType().getVectorElementType())
10305 break;
10306 } else if (Vector != N0.getNode()) {
10307 Odd = false;
10308 Even = false;
10309 break;
10310 }
10311
10312 // Extracted values are either at Even indices <0,2,4,...> or at Odd
10313 // indices <1,3,5,...>.
10314 uint64_t Val = N->getConstantOperandVal(1);
10315 if (Val == 2 * i) {
10316 Even = true;
10317 continue;
10318 }
10319 if (Val - 1 == 2 * i) {
10320 Odd = true;
10321 continue;
10322 }
10323
10324 // Something does not match: abort.
10325 Odd = false;
10326 Even = false;
10327 break;
10328 }
10329 if (Even || Odd) {
10330 SDValue LHS =
10331 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
10332 DAG.getConstant(0, dl, MVT::i64));
10333 SDValue RHS =
10334 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
10335 DAG.getConstant(NumElts, dl, MVT::i64));
10336
10337 if (Even && !Odd)
10338 return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), LHS,
10339 RHS);
10340 if (Odd && !Even)
10341 return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), LHS,
10342 RHS);
10343 }
10344 }
10345
10346 // Use DUP for non-constant splats. For f32 constant splats, reduce to
10347 // i32 and try again.
10348 if (usesOnlyOneValue) {
10349 if (!isConstant) {
10350 if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
10351 Value.getValueType() != VT) {
10352 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n"
; } } while (false)
10353 dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n"
; } } while (false)
;
10354 return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
10355 }
10356
10357 // This is actually a DUPLANExx operation, which keeps everything vectory.
10358
10359 SDValue Lane = Value.getOperand(1);
10360 Value = Value.getOperand(0);
10361 if (Value.getValueSizeInBits() == 64) {
10362 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
"widening it\n"; } } while (false)
10363 dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
"widening it\n"; } } while (false)
10364 "widening it\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
"widening it\n"; } } while (false)
;
10365 Value = WidenVector(Value, DAG);
10366 }
10367
10368 unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
10369 return DAG.getNode(Opcode, dl, VT, Value, Lane);
10370 }
10371
10372 if (VT.getVectorElementType().isFloatingPoint()) {
10373 SmallVector<SDValue, 8> Ops;
10374 EVT EltTy = VT.getVectorElementType();
10375 assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||(static_cast <bool> ((EltTy == MVT::f16 || EltTy == MVT
::bf16 || EltTy == MVT::f32 || EltTy == MVT::f64) && "Unsupported floating-point vector type"
) ? void (0) : __assert_fail ("(EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 || EltTy == MVT::f64) && \"Unsupported floating-point vector type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10376, __extension__ __PRETTY_FUNCTION__))
10376 EltTy == MVT::f64) && "Unsupported floating-point vector type")(static_cast <bool> ((EltTy == MVT::f16 || EltTy == MVT
::bf16 || EltTy == MVT::f32 || EltTy == MVT::f64) && "Unsupported floating-point vector type"
) ? void (0) : __assert_fail ("(EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 || EltTy == MVT::f64) && \"Unsupported floating-point vector type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10376, __extension__ __PRETTY_FUNCTION__))
;
10377 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n"; } } while (false)
10378 dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n"; } } while (false)
10379 "BITCASTS, and try again\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n"; } } while (false)
;
10380 MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
10381 for (unsigned i = 0; i < NumElts; ++i)
10382 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
10383 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
10384 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
10385 LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: "
; Val.dump();; } } while (false)
10386 Val.dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: "
; Val.dump();; } } while (false)
;
10387 Val = LowerBUILD_VECTOR(Val, DAG);
10388 if (Val.getNode())
10389 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
10390 }
10391 }
10392
10393 // If we need to insert a small number of different non-constant elements and
10394 // the vector width is sufficiently large, prefer using DUP with the common
10395 // value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred,
10396 // skip the constant lane handling below.
10397 bool PreferDUPAndInsert =
10398 !isConstant && NumDifferentLanes >= 1 &&
10399 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
10400 NumDifferentLanes >= NumConstantLanes;
10401
10402 // If there was only one constant value used and for more than one lane,
10403 // start by splatting that value, then replace the non-constant lanes. This
10404 // is better than the default, which will perform a separate initialization
10405 // for each lane.
10406 if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
10407 // Firstly, try to materialize the splat constant.
10408 SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
10409 Val = ConstantBuildVector(Vec, DAG);
10410 if (!Val) {
10411 // Otherwise, materialize the constant and splat it.
10412 Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
10413 DAG.ReplaceAllUsesWith(Vec.getNode(), &Val);
10414 }
10415
10416 // Now insert the non-constant lanes.
10417 for (unsigned i = 0; i < NumElts; ++i) {
10418 SDValue V = Op.getOperand(i);
10419 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
10420 if (!isIntOrFPConstant(V))
10421 // Note that type legalization likely mucked about with the VT of the
10422 // source operand, so we may have to convert it here before inserting.
10423 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
10424 }
10425 return Val;
10426 }
10427
10428 // This will generate a load from the constant pool.
10429 if (isConstant) {
10430 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
"expansion\n"; } } while (false)
10431 dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
"expansion\n"; } } while (false)
10432 "expansion\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
"expansion\n"; } } while (false)
;
10433 return SDValue();
10434 }
10435
10436 // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
10437 if (NumElts >= 4) {
10438 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
10439 return shuffle;
10440 }
10441
10442 if (PreferDUPAndInsert) {
10443 // First, build a constant vector with the common element.
10444 SmallVector<SDValue, 8> Ops(NumElts, Value);
10445 SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
10446 // Next, insert the elements that do not match the common value.
10447 for (unsigned I = 0; I < NumElts; ++I)
10448 if (Op.getOperand(I) != Value)
10449 NewVector =
10450 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
10451 Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));
10452
10453 return NewVector;
10454 }
10455
10456 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
10457 // know the default expansion would otherwise fall back on something even
10458 // worse. For a vector with one or two non-undef values, that's
10459 // scalar_to_vector for the elements followed by a shuffle (provided the
10460 // shuffle is valid for the target) and materialization element by element
10461 // on the stack followed by a load for everything else.
10462 if (!isConstant && !usesOnlyOneValue) {
10463 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
"of INSERT_VECTOR_ELT\n"; } } while (false)
10464 dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
"of INSERT_VECTOR_ELT\n"; } } while (false)
10465 "of INSERT_VECTOR_ELT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
"of INSERT_VECTOR_ELT\n"; } } while (false)
;
10466
10467 SDValue Vec = DAG.getUNDEF(VT);
10468 SDValue Op0 = Op.getOperand(0);
10469 unsigned i = 0;
10470
10471 // Use SCALAR_TO_VECTOR for lane zero to
10472 // a) Avoid a RMW dependency on the full vector register, and
10473 // b) Allow the register coalescer to fold away the copy if the
10474 // value is already in an S or D register, and we're forced to emit an
10475 // INSERT_SUBREG that we can't fold anywhere.
10476 //
10477 // We also allow types like i8 and i16 which are illegal scalar but legal
10478 // vector element types. After type-legalization the inserted value is
10479 // extended (i32) and it is safe to cast them to the vector type by ignoring
10480 // the upper bits of the lowest lane (e.g. v8i8, v4i16).
10481 if (!Op0.isUndef()) {
10482 LLVM_DEBUG(dbgs() << "Creating node for op0, it is not undefined:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Creating node for op0, it is not undefined:\n"
; } } while (false)
;
10483 Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
10484 ++i;
10485 }
10486 LLVM_DEBUG(if (i < NumElts) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { if (i < NumElts) dbgs() << "Creating nodes for the other vector elements:\n"
;; } } while (false)
10487 << "Creating nodes for the other vector elements:\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { if (i < NumElts) dbgs() << "Creating nodes for the other vector elements:\n"
;; } } while (false)
;
10488 for (; i < NumElts; ++i) {
10489 SDValue V = Op.getOperand(i);
10490 if (V.isUndef())
10491 continue;
10492 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
10493 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
10494 }
10495 return Vec;
10496 }
10497
10498 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
"better alternative\n"; } } while (false)
10499 dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
"better alternative\n"; } } while (false)
10500 "better alternative\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
"better alternative\n"; } } while (false)
;
10501 return SDValue();
10502}
10503
10504SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
10505 SelectionDAG &DAG) const {
10506 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
10507 return LowerFixedLengthConcatVectorsToSVE(Op, DAG);
10508
10509 assert(Op.getValueType().isScalableVector() &&(static_cast <bool> (Op.getValueType().isScalableVector
() && isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10511, __extension__ __PRETTY_FUNCTION__))
10510 isTypeLegal(Op.getValueType()) &&(static_cast <bool> (Op.getValueType().isScalableVector
() && isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10511, __extension__ __PRETTY_FUNCTION__))
10511 "Expected legal scalable vector type!")(static_cast <bool> (Op.getValueType().isScalableVector
() && isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10511, __extension__ __PRETTY_FUNCTION__))
;
10512
10513 if (isTypeLegal(Op.getOperand(0).getValueType())) {
10514 unsigned NumOperands = Op->getNumOperands();
10515 assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&(static_cast <bool> (NumOperands > 1 && isPowerOf2_32
(NumOperands) && "Unexpected number of operands in CONCAT_VECTORS"
) ? void (0) : __assert_fail ("NumOperands > 1 && isPowerOf2_32(NumOperands) && \"Unexpected number of operands in CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10516, __extension__ __PRETTY_FUNCTION__))
10516 "Unexpected number of operands in CONCAT_VECTORS")(static_cast <bool> (NumOperands > 1 && isPowerOf2_32
(NumOperands) && "Unexpected number of operands in CONCAT_VECTORS"
) ? void (0) : __assert_fail ("NumOperands > 1 && isPowerOf2_32(NumOperands) && \"Unexpected number of operands in CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10516, __extension__ __PRETTY_FUNCTION__))
;
10517
10518 if (NumOperands == 2)
10519 return Op;
10520
10521 // Concat each pair of subvectors and pack into the lower half of the array.
10522 SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
10523 while (ConcatOps.size() > 1) {
10524 for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
10525 SDValue V1 = ConcatOps[I];
10526 SDValue V2 = ConcatOps[I + 1];
10527 EVT SubVT = V1.getValueType();
10528 EVT PairVT = SubVT.getDoubleNumVectorElementsVT(*DAG.getContext());
10529 ConcatOps[I / 2] =
10530 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), PairVT, V1, V2);
10531 }
10532 ConcatOps.resize(ConcatOps.size() / 2);
10533 }
10534 return ConcatOps[0];
10535 }
10536
10537 return SDValue();
10538}
10539
10540SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10541 SelectionDAG &DAG) const {
10542 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10542, __extension__ __PRETTY_FUNCTION__))
;
10543
10544 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
10545 return LowerFixedLengthInsertVectorElt(Op, DAG);
10546
10547 // Check for non-constant or out of range lane.
10548 EVT VT = Op.getOperand(0).getValueType();
10549
10550 if (VT.getScalarType() == MVT::i1) {
10551 EVT VectorVT = getPromotedVTForPredicate(VT);
10552 SDLoc DL(Op);
10553 SDValue ExtendedVector =
10554 DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, VectorVT);
10555 SDValue ExtendedValue =
10556 DAG.getAnyExtOrTrunc(Op.getOperand(1), DL,
10557 VectorVT.getScalarType().getSizeInBits() < 32
10558 ? MVT::i32
10559 : VectorVT.getScalarType());
10560 ExtendedVector =
10561 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VectorVT, ExtendedVector,
10562 ExtendedValue, Op.getOperand(2));
10563 return DAG.getAnyExtOrTrunc(ExtendedVector, DL, VT);
10564 }
10565
10566 ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10567 if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
10568 return SDValue();
10569
10570 // Insertion/extraction are legal for V128 types.
10571 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
10572 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
10573 VT == MVT::v8f16 || VT == MVT::v8bf16)
10574 return Op;
10575
10576 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
10577 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
10578 VT != MVT::v4bf16)
10579 return SDValue();
10580
10581 // For V64 types, we perform insertion by expanding the value
10582 // to a V128 type and perform the insertion on that.
10583 SDLoc DL(Op);
10584 SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
10585 EVT WideTy = WideVec.getValueType();
10586
10587 SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
10588 Op.getOperand(1), Op.getOperand(2));
10589 // Re-narrow the resultant vector.
10590 return NarrowVector(Node, DAG);
10591}
10592
10593SDValue
10594AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
10595 SelectionDAG &DAG) const {
10596 assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10596, __extension__ __PRETTY_FUNCTION__))
;
10597 EVT VT = Op.getOperand(0).getValueType();
10598
10599 if (VT.getScalarType() == MVT::i1) {
10600 // We can't directly extract from an SVE predicate; extend it first.
10601 // (This isn't the only possible lowering, but it's straightforward.)
10602 EVT VectorVT = getPromotedVTForPredicate(VT);
10603 SDLoc DL(Op);
10604 SDValue Extend =
10605 DAG.getNode(ISD::ANY_EXTEND, DL, VectorVT, Op.getOperand(0));
10606 MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
10607 SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractTy,
10608 Extend, Op.getOperand(1));
10609 return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType());
10610 }
10611
10612 if (useSVEForFixedLengthVectorVT(VT))
10613 return LowerFixedLengthExtractVectorElt(Op, DAG);
10614
10615 // Check for non-constant or out of range lane.
10616 ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));
10617 if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
10618 return SDValue();
10619
10620 // Insertion/extraction are legal for V128 types.
10621 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
10622 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
10623 VT == MVT::v8f16 || VT == MVT::v8bf16)
10624 return Op;
10625
10626 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
10627 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
10628 VT != MVT::v4bf16)
10629 return SDValue();
10630
10631 // For V64 types, we perform extraction by expanding the value
10632 // to a V128 type and perform the extraction on that.
10633 SDLoc DL(Op);
10634 SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
10635 EVT WideTy = WideVec.getValueType();
10636
10637 EVT ExtrTy = WideTy.getVectorElementType();
10638 if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
10639 ExtrTy = MVT::i32;
10640
10641 // For extractions, we just return the result directly.
10642 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
10643 Op.getOperand(1));
10644}
10645
10646SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
10647 SelectionDAG &DAG) const {
10648 assert(Op.getValueType().isFixedLengthVector() &&(static_cast <bool> (Op.getValueType().isFixedLengthVector
() && "Only cases that extract a fixed length vector are supported!"
) ? void (0) : __assert_fail ("Op.getValueType().isFixedLengthVector() && \"Only cases that extract a fixed length vector are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10649, __extension__ __PRETTY_FUNCTION__))
10649 "Only cases that extract a fixed length vector are supported!")(static_cast <bool> (Op.getValueType().isFixedLengthVector
() && "Only cases that extract a fixed length vector are supported!"
) ? void (0) : __assert_fail ("Op.getValueType().isFixedLengthVector() && \"Only cases that extract a fixed length vector are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10649, __extension__ __PRETTY_FUNCTION__))
;
10650
10651 EVT InVT = Op.getOperand(0).getValueType();
10652 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
10653 unsigned Size = Op.getValueSizeInBits();
10654
10655 if (InVT.isScalableVector()) {
10656 // This will be matched by custom code during ISelDAGToDAG.
10657 if (Idx == 0 && isPackedVectorType(InVT, DAG))
10658 return Op;
10659
10660 return SDValue();
10661 }
10662
10663 // This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
10664 if (Idx == 0 && InVT.getSizeInBits() <= 128)
10665 return Op;
10666
10667 // If this is extracting the upper 64-bits of a 128-bit vector, we match
10668 // that directly.
10669 if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
10670 InVT.getSizeInBits() == 128)
10671 return Op;
10672
10673 return SDValue();
10674}
10675
10676SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
10677 SelectionDAG &DAG) const {
10678 assert(Op.getValueType().isScalableVector() &&(static_cast <bool> (Op.getValueType().isScalableVector
() && "Only expect to lower inserts into scalable vectors!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && \"Only expect to lower inserts into scalable vectors!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10679, __extension__ __PRETTY_FUNCTION__))
10679 "Only expect to lower inserts into scalable vectors!")(static_cast <bool> (Op.getValueType().isScalableVector
() && "Only expect to lower inserts into scalable vectors!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && \"Only expect to lower inserts into scalable vectors!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10679, __extension__ __PRETTY_FUNCTION__))
;
10680
10681 EVT InVT = Op.getOperand(1).getValueType();
10682 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
10683
10684 if (InVT.isScalableVector()) {
10685 SDLoc DL(Op);
10686 EVT VT = Op.getValueType();
10687
10688 if (!isTypeLegal(VT) || !VT.isInteger())
10689 return SDValue();
10690
10691 SDValue Vec0 = Op.getOperand(0);
10692 SDValue Vec1 = Op.getOperand(1);
10693
10694 // Ensure the subvector is half the size of the main vector.
10695 if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
10696 return SDValue();
10697
10698 // Extend elements of smaller vector...
10699 EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
10700 SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
10701
10702 if (Idx == 0) {
10703 SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
10704 return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0);
10705 } else if (Idx == InVT.getVectorMinNumElements()) {
10706 SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0);
10707 return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec);
10708 }
10709
10710 return SDValue();
10711 }
10712
10713 // This will be matched by custom code during ISelDAGToDAG.
10714 if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef())
10715 return Op;
10716
10717 return SDValue();
10718}
10719
10720SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
10721 EVT VT = Op.getValueType();
10722
10723 if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
10724 return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
10725
10726 assert(VT.isScalableVector() && "Expected a scalable vector.")(static_cast <bool> (VT.isScalableVector() && "Expected a scalable vector."
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Expected a scalable vector.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10726, __extension__ __PRETTY_FUNCTION__))
;
10727
10728 bool Signed = Op.getOpcode() == ISD::SDIV;
10729 unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
10730
10731 if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
10732 return LowerToPredicatedOp(Op, DAG, PredOpcode);
10733
10734 // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
10735 // operations, and truncate the result.
10736 EVT WidenedVT;
10737 if (VT == MVT::nxv16i8)
10738 WidenedVT = MVT::nxv8i16;
10739 else if (VT == MVT::nxv8i16)
10740 WidenedVT = MVT::nxv4i32;
10741 else
10742 llvm_unreachable("Unexpected Custom DIV operation")::llvm::llvm_unreachable_internal("Unexpected Custom DIV operation"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10742)
;
10743
10744 SDLoc dl(Op);
10745 unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
10746 unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
10747 SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
10748 SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));
10749 SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));
10750 SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
10751 SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
10752 SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
10753 return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
10754}
10755
10756bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
10757 // Currently no fixed length shuffles that require SVE are legal.
10758 if (useSVEForFixedLengthVectorVT(VT))
10759 return false;
10760
10761 if (VT.getVectorNumElements() == 4 &&
10762 (VT.is128BitVector() || VT.is64BitVector())) {
10763 unsigned PFIndexes[4];
10764 for (unsigned i = 0; i != 4; ++i) {
10765 if (M[i] < 0)
10766 PFIndexes[i] = 8;
10767 else
10768 PFIndexes[i] = M[i];
10769 }
10770
10771 // Compute the index in the perfect shuffle table.
10772 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10773 PFIndexes[2] * 9 + PFIndexes[3];
10774 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10775 unsigned Cost = (PFEntry >> 30);
10776
10777 if (Cost <= 4)
10778 return true;
10779 }
10780
10781 bool DummyBool;
10782 int DummyInt;
10783 unsigned DummyUnsigned;
10784
10785 return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
10786 isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
10787 isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
10788 // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
10789 isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) ||
10790 isZIPMask(M, VT, DummyUnsigned) ||
10791 isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
10792 isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
10793 isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
10794 isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) ||
10795 isConcatMask(M, VT, VT.getSizeInBits() == 128));
10796}
10797
10798/// getVShiftImm - Check if this is a valid build_vector for the immediate
10799/// operand of a vector shift operation, where all the elements of the
10800/// build_vector must have the same constant integer value.
10801static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
10802 // Ignore bit_converts.
10803 while (Op.getOpcode() == ISD::BITCAST)
10804 Op = Op.getOperand(0);
10805 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
10806 APInt SplatBits, SplatUndef;
10807 unsigned SplatBitSize;
10808 bool HasAnyUndefs;
10809 if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
10810 HasAnyUndefs, ElementBits) ||
10811 SplatBitSize > ElementBits)
10812 return false;
10813 Cnt = SplatBits.getSExtValue();
10814 return true;
10815}
10816
10817/// isVShiftLImm - Check if this is a valid build_vector for the immediate
10818/// operand of a vector shift left operation. That value must be in the range:
10819/// 0 <= Value < ElementBits for a left shift; or
10820/// 0 <= Value <= ElementBits for a long left shift.
10821static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
10822 assert(VT.isVector() && "vector shift count is not a vector type")(static_cast <bool> (VT.isVector() && "vector shift count is not a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"vector shift count is not a vector type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10822, __extension__ __PRETTY_FUNCTION__))
;
10823 int64_t ElementBits = VT.getScalarSizeInBits();
10824 if (!getVShiftImm(Op, ElementBits, Cnt))
10825 return false;
10826 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
10827}
10828
10829/// isVShiftRImm - Check if this is a valid build_vector for the immediate
10830/// operand of a vector shift right operation. The value must be in the range:
10831/// 1 <= Value <= ElementBits for a right shift; or
10832static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
10833 assert(VT.isVector() && "vector shift count is not a vector type")(static_cast <bool> (VT.isVector() && "vector shift count is not a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"vector shift count is not a vector type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10833, __extension__ __PRETTY_FUNCTION__))
;
10834 int64_t ElementBits = VT.getScalarSizeInBits();
10835 if (!getVShiftImm(Op, ElementBits, Cnt))
10836 return false;
10837 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
10838}
10839
10840SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
10841 SelectionDAG &DAG) const {
10842 EVT VT = Op.getValueType();
10843
10844 if (VT.getScalarType() == MVT::i1) {
10845 // Lower i1 truncate to `(x & 1) != 0`.
10846 SDLoc dl(Op);
10847 EVT OpVT = Op.getOperand(0).getValueType();
10848 SDValue Zero = DAG.getConstant(0, dl, OpVT);
10849 SDValue One = DAG.getConstant(1, dl, OpVT);
10850 SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One);
10851 return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE);
10852 }
10853
10854 if (!VT.isVector() || VT.isScalableVector())
10855 return SDValue();
10856
10857 if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
10858 return LowerFixedLengthVectorTruncateToSVE(Op, DAG);
10859
10860 return SDValue();
10861}
10862
10863SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
10864 SelectionDAG &DAG) const {
10865 EVT VT = Op.getValueType();
10866 SDLoc DL(Op);
10867 int64_t Cnt;
10868
10869 if (!Op.getOperand(1).getValueType().isVector())
10870 return Op;
10871 unsigned EltSize = VT.getScalarSizeInBits();
10872
10873 switch (Op.getOpcode()) {
10874 default:
10875 llvm_unreachable("unexpected shift opcode")::llvm::llvm_unreachable_internal("unexpected shift opcode", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10875)
;
10876
10877 case ISD::SHL:
10878 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
10879 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
10880
10881 if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
10882 return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
10883 DAG.getConstant(Cnt, DL, MVT::i32));
10884 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
10885 DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
10886 MVT::i32),
10887 Op.getOperand(0), Op.getOperand(1));
10888 case ISD::SRA:
10889 case ISD::SRL:
10890 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) {
10891 unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
10892 : AArch64ISD::SRL_PRED;
10893 return LowerToPredicatedOp(Op, DAG, Opc);
10894 }
10895
10896 // Right shift immediate
10897 if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
10898 unsigned Opc =
10899 (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
10900 return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
10901 DAG.getConstant(Cnt, DL, MVT::i32));
10902 }
10903
10904 // Right shift register. Note, there is not a shift right register
10905 // instruction, but the shift left register instruction takes a signed
10906 // value, where negative numbers specify a right shift.
10907 unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
10908 : Intrinsic::aarch64_neon_ushl;
10909 // negate the shift amount
10910 SDValue NegShift = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
10911 Op.getOperand(1));
10912 SDValue NegShiftLeft =
10913 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
10914 DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
10915 NegShift);
10916 return NegShiftLeft;
10917 }
10918
10919 return SDValue();
10920}
10921
10922static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
10923 AArch64CC::CondCode CC, bool NoNans, EVT VT,
10924 const SDLoc &dl, SelectionDAG &DAG) {
10925 EVT SrcVT = LHS.getValueType();
10926 assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&(static_cast <bool> (VT.getSizeInBits() == SrcVT.getSizeInBits
() && "function only supposed to emit natural comparisons"
) ? void (0) : __assert_fail ("VT.getSizeInBits() == SrcVT.getSizeInBits() && \"function only supposed to emit natural comparisons\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10927, __extension__ __PRETTY_FUNCTION__))
10927 "function only supposed to emit natural comparisons")(static_cast <bool> (VT.getSizeInBits() == SrcVT.getSizeInBits
() && "function only supposed to emit natural comparisons"
) ? void (0) : __assert_fail ("VT.getSizeInBits() == SrcVT.getSizeInBits() && \"function only supposed to emit natural comparisons\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10927, __extension__ __PRETTY_FUNCTION__))
;
10928
10929 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10930 APInt CnstBits(VT.getSizeInBits(), 0);
10931 APInt UndefBits(VT.getSizeInBits(), 0);
10932 bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
10933 bool IsZero = IsCnst && (CnstBits == 0);
10934
10935 if (SrcVT.getVectorElementType().isFloatingPoint()) {
10936 switch (CC) {
10937 default:
10938 return SDValue();
10939 case AArch64CC::NE: {
10940 SDValue Fcmeq;
10941 if (IsZero)
10942 Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
10943 else
10944 Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
10945 return DAG.getNOT(dl, Fcmeq, VT);
10946 }
10947 case AArch64CC::EQ:
10948 if (IsZero)
10949 return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
10950 return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
10951 case AArch64CC::GE:
10952 if (IsZero)
10953 return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
10954 return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
10955 case AArch64CC::GT:
10956 if (IsZero)
10957 return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
10958 return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
10959 case AArch64CC::LS:
10960 if (IsZero)
10961 return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
10962 return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
10963 case AArch64CC::LT:
10964 if (!NoNans)
10965 return SDValue();
10966 // If we ignore NaNs then we can use to the MI implementation.
10967 LLVM_FALLTHROUGH[[gnu::fallthrough]];
10968 case AArch64CC::MI:
10969 if (IsZero)
10970 return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
10971 return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
10972 }
10973 }
10974
10975 switch (CC) {
10976 default:
10977 return SDValue();
10978 case AArch64CC::NE: {
10979 SDValue Cmeq;
10980 if (IsZero)
10981 Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
10982 else
10983 Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
10984 return DAG.getNOT(dl, Cmeq, VT);
10985 }
10986 case AArch64CC::EQ:
10987 if (IsZero)
10988 return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
10989 return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
10990 case AArch64CC::GE:
10991 if (IsZero)
10992 return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
10993 return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
10994 case AArch64CC::GT:
10995 if (IsZero)
10996 return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
10997 return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
10998 case AArch64CC::LE:
10999 if (IsZero)
11000 return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
11001 return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
11002 case AArch64CC::LS:
11003 return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
11004 case AArch64CC::LO:
11005 return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
11006 case AArch64CC::LT:
11007 if (IsZero)
11008 return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
11009 return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
11010 case AArch64CC::HI:
11011 return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
11012 case AArch64CC::HS:
11013 return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
11014 }
11015}
11016
11017SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
11018 SelectionDAG &DAG) const {
11019 if (Op.getValueType().isScalableVector())
11020 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
11021
11022 if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
11023 return LowerFixedLengthVectorSetccToSVE(Op, DAG);
11024
11025 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11026 SDValue LHS = Op.getOperand(0);
11027 SDValue RHS = Op.getOperand(1);
11028 EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();
11029 SDLoc dl(Op);
11030
11031 if (LHS.getValueType().getVectorElementType().isInteger()) {
11032 assert(LHS.getValueType() == RHS.getValueType())(static_cast <bool> (LHS.getValueType() == RHS.getValueType
()) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11032, __extension__ __PRETTY_FUNCTION__))
;
11033 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
11034 SDValue Cmp =
11035 EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
11036 return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
11037 }
11038
11039 const bool FullFP16 =
11040 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
11041
11042 // Make v4f16 (only) fcmp operations utilise vector instructions
11043 // v8f16 support will be a litle more complicated
11044 if (!FullFP16 && LHS.getValueType().getVectorElementType() == MVT::f16) {
11045 if (LHS.getValueType().getVectorNumElements() == 4) {
11046 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
11047 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
11048 SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
11049 DAG.ReplaceAllUsesWith(Op, NewSetcc);
11050 CmpVT = MVT::v4i32;
11051 } else
11052 return SDValue();
11053 }
11054
11055 assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) ||(static_cast <bool> ((!FullFP16 && LHS.getValueType
().getVectorElementType() != MVT::f16) || LHS.getValueType().
getVectorElementType() != MVT::f128) ? void (0) : __assert_fail
("(!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) || LHS.getValueType().getVectorElementType() != MVT::f128"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11056, __extension__ __PRETTY_FUNCTION__))
11056 LHS.getValueType().getVectorElementType() != MVT::f128)(static_cast <bool> ((!FullFP16 && LHS.getValueType
().getVectorElementType() != MVT::f16) || LHS.getValueType().
getVectorElementType() != MVT::f128) ? void (0) : __assert_fail
("(!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) || LHS.getValueType().getVectorElementType() != MVT::f128"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11056, __extension__ __PRETTY_FUNCTION__))
;
11057
11058 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
11059 // clean. Some of them require two branches to implement.
11060 AArch64CC::CondCode CC1, CC2;
11061 bool ShouldInvert;
11062 changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
11063
11064 bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
11065 SDValue Cmp =
11066 EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
11067 if (!Cmp.getNode())
11068 return SDValue();
11069
11070 if (CC2 != AArch64CC::AL) {
11071 SDValue Cmp2 =
11072 EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
11073 if (!Cmp2.getNode())
11074 return SDValue();
11075
11076 Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
11077 }
11078
11079 Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
11080
11081 if (ShouldInvert)
11082 Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
11083
11084 return Cmp;
11085}
11086
11087static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
11088 SelectionDAG &DAG) {
11089 SDValue VecOp = ScalarOp.getOperand(0);
11090 auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
11091 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
11092 DAG.getConstant(0, DL, MVT::i64));
11093}
11094
11095SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
11096 SelectionDAG &DAG) const {
11097 SDValue Src = Op.getOperand(0);
11098
11099 // Try to lower fixed length reductions to SVE.
11100 EVT SrcVT = Src.getValueType();
11101 bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND ||
11102 Op.getOpcode() == ISD::VECREDUCE_OR ||
11103 Op.getOpcode() == ISD::VECREDUCE_XOR ||
11104 Op.getOpcode() == ISD::VECREDUCE_FADD ||
11105 (Op.getOpcode() != ISD::VECREDUCE_ADD &&
11106 SrcVT.getVectorElementType() == MVT::i64);
11107 if (SrcVT.isScalableVector() ||
11108 useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
11109
11110 if (SrcVT.getVectorElementType() == MVT::i1)
11111 return LowerPredReductionToSVE(Op, DAG);
11112
11113 switch (Op.getOpcode()) {
11114 case ISD::VECREDUCE_ADD:
11115 return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
11116 case ISD::VECREDUCE_AND:
11117 return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
11118 case ISD::VECREDUCE_OR:
11119 return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
11120 case ISD::VECREDUCE_SMAX:
11121 return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
11122 case ISD::VECREDUCE_SMIN:
11123 return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
11124 case ISD::VECREDUCE_UMAX:
11125 return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
11126 case ISD::VECREDUCE_UMIN:
11127 return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
11128 case ISD::VECREDUCE_XOR:
11129 return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
11130 case ISD::VECREDUCE_FADD:
11131 return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
11132 case ISD::VECREDUCE_FMAX:
11133 return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
11134 case ISD::VECREDUCE_FMIN:
11135 return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
11136 default:
11137 llvm_unreachable("Unhandled fixed length reduction")::llvm::llvm_unreachable_internal("Unhandled fixed length reduction"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11137)
;
11138 }
11139 }
11140
11141 // Lower NEON reductions.
11142 SDLoc dl(Op);
11143 switch (Op.getOpcode()) {
11144 case ISD::VECREDUCE_ADD:
11145 return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
11146 case ISD::VECREDUCE_SMAX:
11147 return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
11148 case ISD::VECREDUCE_SMIN:
11149 return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
11150 case ISD::VECREDUCE_UMAX:
11151 return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
11152 case ISD::VECREDUCE_UMIN:
11153 return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
11154 case ISD::VECREDUCE_FMAX: {
11155 return DAG.getNode(
11156 ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
11157 DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
11158 Src);
11159 }
11160 case ISD::VECREDUCE_FMIN: {
11161 return DAG.getNode(
11162 ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
11163 DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
11164 Src);
11165 }
11166 default:
11167 llvm_unreachable("Unhandled reduction")::llvm::llvm_unreachable_internal("Unhandled reduction", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11167)
;
11168 }
11169}
11170
11171SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
11172 SelectionDAG &DAG) const {
11173 auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
11174 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
11175 return SDValue();
11176
11177 // LSE has an atomic load-add instruction, but not a load-sub.
11178 SDLoc dl(Op);
11179 MVT VT = Op.getSimpleValueType();
11180 SDValue RHS = Op.getOperand(2);
11181 AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
11182 RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS);
11183 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(),
11184 Op.getOperand(0), Op.getOperand(1), RHS,
11185 AN->getMemOperand());
11186}
11187
11188SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
11189 SelectionDAG &DAG) const {
11190 auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
11191 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
11192 return SDValue();
11193
11194 // LSE has an atomic load-clear instruction, but not a load-and.
11195 SDLoc dl(Op);
11196 MVT VT = Op.getSimpleValueType();
11197 SDValue RHS = Op.getOperand(2);
11198 AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
11199 RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS);
11200 return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(),
11201 Op.getOperand(0), Op.getOperand(1), RHS,
11202 AN->getMemOperand());
11203}
11204
11205SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
11206 SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const {
11207 SDLoc dl(Op);
11208 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11209 SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0);
11210
11211 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
11212 const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
11213 if (Subtarget->hasCustomCallingConv())
11214 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
11215
11216 Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
11217 DAG.getConstant(4, dl, MVT::i64));
11218 Chain = DAG.getCopyToReg(Chain, dl, AArch64::X15, Size, SDValue());
11219 Chain =
11220 DAG.getNode(AArch64ISD::CALL, dl, DAG.getVTList(MVT::Other, MVT::Glue),
11221 Chain, Callee, DAG.getRegister(AArch64::X15, MVT::i64),
11222 DAG.getRegisterMask(Mask), Chain.getValue(1));
11223 // To match the actual intent better, we should read the output from X15 here
11224 // again (instead of potentially spilling it to the stack), but rereading Size
11225 // from X15 here doesn't work at -O0, since it thinks that X15 is undefined
11226 // here.
11227
11228 Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size,
11229 DAG.getConstant(4, dl, MVT::i64));
11230 return Chain;
11231}
11232
11233SDValue
11234AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
11235 SelectionDAG &DAG) const {
11236 assert(Subtarget->isTargetWindows() &&(static_cast <bool> (Subtarget->isTargetWindows() &&
"Only Windows alloca probing supported") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Only Windows alloca probing supported\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11237, __extension__ __PRETTY_FUNCTION__))
11237 "Only Windows alloca probing supported")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Only Windows alloca probing supported") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Only Windows alloca probing supported\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11237, __extension__ __PRETTY_FUNCTION__))
;
11238 SDLoc dl(Op);
11239 // Get the inputs.
11240 SDNode *Node = Op.getNode();
11241 SDValue Chain = Op.getOperand(0);
11242 SDValue Size = Op.getOperand(1);
11243 MaybeAlign Align =
11244 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
11245 EVT VT = Node->getValueType(0);
11246
11247 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
11248 "no-stack-arg-probe")) {
11249 SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
11250 Chain = SP.getValue(1);
11251 SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
11252 if (Align)
11253 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
11254 DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
11255 Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
11256 SDValue Ops[2] = {SP, Chain};
11257 return DAG.getMergeValues(Ops, dl);
11258 }
11259
11260 Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
11261
11262 Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG);
11263
11264 SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
11265 Chain = SP.getValue(1);
11266 SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
11267 if (Align)
11268 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
11269 DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
11270 Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
11271
11272 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
11273 DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
11274
11275 SDValue Ops[2] = {SP, Chain};
11276 return DAG.getMergeValues(Ops, dl);
11277}
11278
11279SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
11280 SelectionDAG &DAG) const {
11281 EVT VT = Op.getValueType();
11282 assert(VT != MVT::i64 && "Expected illegal VSCALE node")(static_cast <bool> (VT != MVT::i64 && "Expected illegal VSCALE node"
) ? void (0) : __assert_fail ("VT != MVT::i64 && \"Expected illegal VSCALE node\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11282, __extension__ __PRETTY_FUNCTION__))
;
11283
11284 SDLoc DL(Op);
11285 APInt MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getAPIntValue();
11286 return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sextOrSelf(64)),
11287 DL, VT);
11288}
11289
11290/// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
11291template <unsigned NumVecs>
11292static bool
11293setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
11294 AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
11295 Info.opc = ISD::INTRINSIC_VOID;
11296 // Retrieve EC from first vector argument.
11297 const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
11298 ElementCount EC = VT.getVectorElementCount();
11299#ifndef NDEBUG
11300 // Check the assumption that all input vectors are the same type.
11301 for (unsigned I = 0; I < NumVecs; ++I)
11302 assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&(static_cast <bool> (VT == TLI.getMemValueType(DL, CI.getArgOperand
(I)->getType()) && "Invalid type.") ? void (0) : __assert_fail
("VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) && \"Invalid type.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11303, __extension__ __PRETTY_FUNCTION__))
11303 "Invalid type.")(static_cast <bool> (VT == TLI.getMemValueType(DL, CI.getArgOperand
(I)->getType()) && "Invalid type.") ? void (0) : __assert_fail
("VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) && \"Invalid type.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11303, __extension__ __PRETTY_FUNCTION__))
;
11304#endif
11305 // memVT is `NumVecs * VT`.
11306 Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
11307 EC * NumVecs);
11308 Info.ptrVal = CI.getArgOperand(CI.getNumArgOperands() - 1);
11309 Info.offset = 0;
11310 Info.align.reset();
11311 Info.flags = MachineMemOperand::MOStore;
11312 return true;
11313}
11314
11315/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
11316/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
11317/// specified in the intrinsic calls.
11318bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
11319 const CallInst &I,
11320 MachineFunction &MF,
11321 unsigned Intrinsic) const {
11322 auto &DL = I.getModule()->getDataLayout();
11323 switch (Intrinsic) {
11324 case Intrinsic::aarch64_sve_st2:
11325 return setInfoSVEStN<2>(*this, DL, Info, I);
11326 case Intrinsic::aarch64_sve_st3:
11327 return setInfoSVEStN<3>(*this, DL, Info, I);
11328 case Intrinsic::aarch64_sve_st4:
11329 return setInfoSVEStN<4>(*this, DL, Info, I);
11330 case Intrinsic::aarch64_neon_ld2:
11331 case Intrinsic::aarch64_neon_ld3:
11332 case Intrinsic::aarch64_neon_ld4:
11333 case Intrinsic::aarch64_neon_ld1x2:
11334 case Intrinsic::aarch64_neon_ld1x3:
11335 case Intrinsic::aarch64_neon_ld1x4:
11336 case Intrinsic::aarch64_neon_ld2lane:
11337 case Intrinsic::aarch64_neon_ld3lane:
11338 case Intrinsic::aarch64_neon_ld4lane:
11339 case Intrinsic::aarch64_neon_ld2r:
11340 case Intrinsic::aarch64_neon_ld3r:
11341 case Intrinsic::aarch64_neon_ld4r: {
11342 Info.opc = ISD::INTRINSIC_W_CHAIN;
11343 // Conservatively set memVT to the entire set of vectors loaded.
11344 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
11345 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
11346 Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
11347 Info.offset = 0;
11348 Info.align.reset();
11349 // volatile loads with NEON intrinsics not supported
11350 Info.flags = MachineMemOperand::MOLoad;
11351 return true;
11352 }
11353 case Intrinsic::aarch64_neon_st2:
11354 case Intrinsic::aarch64_neon_st3:
11355 case Intrinsic::aarch64_neon_st4:
11356 case Intrinsic::aarch64_neon_st1x2:
11357 case Intrinsic::aarch64_neon_st1x3:
11358 case Intrinsic::aarch64_neon_st1x4:
11359 case Intrinsic::aarch64_neon_st2lane:
11360 case Intrinsic::aarch64_neon_st3lane:
11361 case Intrinsic::aarch64_neon_st4lane: {
11362 Info.opc = ISD::INTRINSIC_VOID;
11363 // Conservatively set memVT to the entire set of vectors stored.
11364 unsigned NumElts = 0;
11365 for (unsigned ArgI = 0, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
11366 Type *ArgTy = I.getArgOperand(ArgI)->getType();
11367 if (!ArgTy->isVectorTy())
11368 break;
11369 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
11370 }
11371 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
11372 Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
11373 Info.offset = 0;
11374 Info.align.reset();
11375 // volatile stores with NEON intrinsics not supported
11376 Info.flags = MachineMemOperand::MOStore;
11377 return true;
11378 }
11379 case Intrinsic::aarch64_ldaxr:
11380 case Intrinsic::aarch64_ldxr: {
11381 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
11382 Info.opc = ISD::INTRINSIC_W_CHAIN;
11383 Info.memVT = MVT::getVT(PtrTy->getElementType());
11384 Info.ptrVal = I.getArgOperand(0);
11385 Info.offset = 0;
11386 Info.align = DL.getABITypeAlign(PtrTy->getElementType());
11387 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
11388 return true;
11389 }
11390 case Intrinsic::aarch64_stlxr:
11391 case Intrinsic::aarch64_stxr: {
11392 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
11393 Info.opc = ISD::INTRINSIC_W_CHAIN;
11394 Info.memVT = MVT::getVT(PtrTy->getElementType());
11395 Info.ptrVal = I.getArgOperand(1);
11396 Info.offset = 0;
11397 Info.align = DL.getABITypeAlign(PtrTy->getElementType());
11398 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
11399 return true;
11400 }
11401 case Intrinsic::aarch64_ldaxp:
11402 case Intrinsic::aarch64_ldxp:
11403 Info.opc = ISD::INTRINSIC_W_CHAIN;
11404 Info.memVT = MVT::i128;
11405 Info.ptrVal = I.getArgOperand(0);
11406 Info.offset = 0;
11407 Info.align = Align(16);
11408 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
11409 return true;
11410 case Intrinsic::aarch64_stlxp:
11411 case Intrinsic::aarch64_stxp:
11412 Info.opc = ISD::INTRINSIC_W_CHAIN;
11413 Info.memVT = MVT::i128;
11414 Info.ptrVal = I.getArgOperand(2);
11415 Info.offset = 0;
11416 Info.align = Align(16);
11417 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
11418 return true;
11419 case Intrinsic::aarch64_sve_ldnt1: {
11420 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
11421 Info.opc = ISD::INTRINSIC_W_CHAIN;
11422 Info.memVT = MVT::getVT(I.getType());
11423 Info.ptrVal = I.getArgOperand(1);
11424 Info.offset = 0;
11425 Info.align = DL.getABITypeAlign(PtrTy->getElementType());
11426 Info.flags = MachineMemOperand::MOLoad;
11427 if (Intrinsic == Intrinsic::aarch64_sve_ldnt1)
11428 Info.flags |= MachineMemOperand::MONonTemporal;
11429 return true;
11430 }
11431 case Intrinsic::aarch64_sve_stnt1: {
11432 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType());
11433 Info.opc = ISD::INTRINSIC_W_CHAIN;
11434 Info.memVT = MVT::getVT(I.getOperand(0)->getType());
11435 Info.ptrVal = I.getArgOperand(2);
11436 Info.offset = 0;
11437 Info.align = DL.getABITypeAlign(PtrTy->getElementType());
11438 Info.flags = MachineMemOperand::MOStore;
11439 if (Intrinsic == Intrinsic::aarch64_sve_stnt1)
11440 Info.flags |= MachineMemOperand::MONonTemporal;
11441 return true;
11442 }
11443 default:
11444 break;
11445 }
11446
11447 return false;
11448}
11449
11450bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
11451 ISD::LoadExtType ExtTy,
11452 EVT NewVT) const {
11453 // TODO: This may be worth removing. Check regression tests for diffs.
11454 if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
11455 return false;
11456
11457 // If we're reducing the load width in order to avoid having to use an extra
11458 // instruction to do extension then it's probably a good idea.
11459 if (ExtTy != ISD::NON_EXTLOAD)
11460 return true;
11461 // Don't reduce load width if it would prevent us from combining a shift into
11462 // the offset.
11463 MemSDNode *Mem = dyn_cast<MemSDNode>(Load);
11464 assert(Mem)(static_cast <bool> (Mem) ? void (0) : __assert_fail ("Mem"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11464, __extension__ __PRETTY_FUNCTION__))
;
11465 const SDValue &Base = Mem->getBasePtr();
11466 if (Base.getOpcode() == ISD::ADD &&
11467 Base.getOperand(1).getOpcode() == ISD::SHL &&
11468 Base.getOperand(1).hasOneUse() &&
11469 Base.getOperand(1).getOperand(1).getOpcode() == ISD::Constant) {
11470 // The shift can be combined if it matches the size of the value being
11471 // loaded (and so reducing the width would make it not match).
11472 uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1);
11473 uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
11474 if (ShiftAmount == Log2_32(LoadBytes))
11475 return false;
11476 }
11477 // We have no reason to disallow reducing the load width, so allow it.
11478 return true;
11479}
11480
11481// Truncations from 64-bit GPR to 32-bit GPR is free.
11482bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
11483 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
11484 return false;
11485 uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedSize();
11486 uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedSize();
11487 return NumBits1 > NumBits2;
11488}
11489bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
11490 if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
11491 return false;
11492 uint64_t NumBits1 = VT1.getFixedSizeInBits();
11493 uint64_t NumBits2 = VT2.getFixedSizeInBits();
11494 return NumBits1 > NumBits2;
11495}
11496
11497/// Check if it is profitable to hoist instruction in then/else to if.
11498/// Not profitable if I and it's user can form a FMA instruction
11499/// because we prefer FMSUB/FMADD.
11500bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
11501 if (I->getOpcode() != Instruction::FMul)
11502 return true;
11503
11504 if (!I->hasOneUse())
11505 return true;
11506
11507 Instruction *User = I->user_back();
11508
11509 if (User &&
11510 !(User->getOpcode() == Instruction::FSub ||
11511 User->getOpcode() == Instruction::FAdd))
11512 return true;
11513
11514 const TargetOptions &Options = getTargetMachine().Options;
11515 const Function *F = I->getFunction();
11516 const DataLayout &DL = F->getParent()->getDataLayout();
11517 Type *Ty = User->getOperand(0)->getType();
11518
11519 return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
11520 isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
11521 (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11522 Options.UnsafeFPMath));
11523}
11524
11525// All 32-bit GPR operations implicitly zero the high-half of the corresponding
11526// 64-bit GPR.
11527bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
11528 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
11529 return false;
11530 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
11531 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
11532 return NumBits1 == 32 && NumBits2 == 64;
11533}
11534bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
11535 if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
11536 return false;
11537 unsigned NumBits1 = VT1.getSizeInBits();
11538 unsigned NumBits2 = VT2.getSizeInBits();
11539 return NumBits1 == 32 && NumBits2 == 64;
11540}
11541
11542bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
11543 EVT VT1 = Val.getValueType();
11544 if (isZExtFree(VT1, VT2)) {
11545 return true;
11546 }
11547
11548 if (Val.getOpcode() != ISD::LOAD)
11549 return false;
11550
11551 // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
11552 return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
11553 VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
11554 VT1.getSizeInBits() <= 32);
11555}
11556
11557bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
11558 if (isa<FPExtInst>(Ext))
11559 return false;
11560
11561 // Vector types are not free.
11562 if (Ext->getType()->isVectorTy())
11563 return false;
11564
11565 for (const Use &U : Ext->uses()) {
11566 // The extension is free if we can fold it with a left shift in an
11567 // addressing mode or an arithmetic operation: add, sub, and cmp.
11568
11569 // Is there a shift?
11570 const Instruction *Instr = cast<Instruction>(U.getUser());
11571
11572 // Is this a constant shift?
11573 switch (Instr->getOpcode()) {
11574 case Instruction::Shl:
11575 if (!isa<ConstantInt>(Instr->getOperand(1)))
11576 return false;
11577 break;
11578 case Instruction::GetElementPtr: {
11579 gep_type_iterator GTI = gep_type_begin(Instr);
11580 auto &DL = Ext->getModule()->getDataLayout();
11581 std::advance(GTI, U.getOperandNo()-1);
11582 Type *IdxTy = GTI.getIndexedType();
11583 // This extension will end up with a shift because of the scaling factor.
11584 // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
11585 // Get the shift amount based on the scaling factor:
11586 // log2(sizeof(IdxTy)) - log2(8).
11587 uint64_t ShiftAmt =
11588 countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - 3;
11589 // Is the constant foldable in the shift of the addressing mode?
11590 // I.e., shift amount is between 1 and 4 inclusive.
11591 if (ShiftAmt == 0 || ShiftAmt > 4)
11592 return false;
11593 break;
11594 }
11595 case Instruction::Trunc:
11596 // Check if this is a noop.
11597 // trunc(sext ty1 to ty2) to ty1.
11598 if (Instr->getType() == Ext->getOperand(0)->getType())
11599 continue;
11600 LLVM_FALLTHROUGH[[gnu::fallthrough]];
11601 default:
11602 return false;
11603 }
11604
11605 // At this point we can use the bfm family, so this extension is free
11606 // for that use.
11607 }
11608 return true;
11609}
11610
11611/// Check if both Op1 and Op2 are shufflevector extracts of either the lower
11612/// or upper half of the vector elements.
11613static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
11614 auto areTypesHalfed = [](Value *FullV, Value *HalfV) {
11615 auto *FullTy = FullV->getType();
11616 auto *HalfTy = HalfV->getType();
11617 return FullTy->getPrimitiveSizeInBits().getFixedSize() ==
11618 2 * HalfTy->getPrimitiveSizeInBits().getFixedSize();
11619 };
11620
11621 auto extractHalf = [](Value *FullV, Value *HalfV) {
11622 auto *FullVT = cast<FixedVectorType>(FullV->getType());
11623 auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
11624 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
11625 };
11626
11627 ArrayRef<int> M1, M2;
11628 Value *S1Op1, *S2Op1;
11629 if (!match(Op1, m_Shuffle(m_Value(S1Op1), m_Undef(), m_Mask(M1))) ||
11630 !match(Op2, m_Shuffle(m_Value(S2Op1), m_Undef(), m_Mask(M2))))
11631 return false;
11632
11633 // Check that the operands are half as wide as the result and we extract
11634 // half of the elements of the input vectors.
11635 if (!areTypesHalfed(S1Op1, Op1) || !areTypesHalfed(S2Op1, Op2) ||
11636 !extractHalf(S1Op1, Op1) || !extractHalf(S2Op1, Op2))
11637 return false;
11638
11639 // Check the mask extracts either the lower or upper half of vector
11640 // elements.
11641 int M1Start = -1;
11642 int M2Start = -1;
11643 int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2;
11644 if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) ||
11645 !ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) ||
11646 M1Start != M2Start || (M1Start != 0 && M2Start != (NumElements / 2)))
11647 return false;
11648
11649 return true;
11650}
11651
11652/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
11653/// of the vector elements.
11654static bool areExtractExts(Value *Ext1, Value *Ext2) {
11655 auto areExtDoubled = [](Instruction *Ext) {
11656 return Ext->getType()->getScalarSizeInBits() ==
11657 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
11658 };
11659
11660 if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
11661 !match(Ext2, m_ZExtOrSExt(m_Value())) ||
11662 !areExtDoubled(cast<Instruction>(Ext1)) ||
11663 !areExtDoubled(cast<Instruction>(Ext2)))
11664 return false;
11665
11666 return true;
11667}
11668
11669/// Check if Op could be used with vmull_high_p64 intrinsic.
11670static bool isOperandOfVmullHighP64(Value *Op) {
11671 Value *VectorOperand = nullptr;
11672 ConstantInt *ElementIndex = nullptr;
11673 return match(Op, m_ExtractElt(m_Value(VectorOperand),
11674 m_ConstantInt(ElementIndex))) &&
11675 ElementIndex->getValue() == 1 &&
11676 isa<FixedVectorType>(VectorOperand->getType()) &&
11677 cast<FixedVectorType>(VectorOperand->getType())->getNumElements() == 2;
11678}
11679
11680/// Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
11681static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2) {
11682 return isOperandOfVmullHighP64(Op1) && isOperandOfVmullHighP64(Op2);
11683}
11684
11685/// Check if sinking \p I's operands to I's basic block is profitable, because
11686/// the operands can be folded into a target instruction, e.g.
11687/// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
11688bool AArch64TargetLowering::shouldSinkOperands(
11689 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
11690 if (!I->getType()->isVectorTy())
11691 return false;
11692
11693 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
11694 switch (II->getIntrinsicID()) {
11695 case Intrinsic::aarch64_neon_umull:
11696 if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1)))
11697 return false;
11698 Ops.push_back(&II->getOperandUse(0));
11699 Ops.push_back(&II->getOperandUse(1));
11700 return true;
11701
11702 case Intrinsic::aarch64_neon_pmull64:
11703 if (!areOperandsOfVmullHighP64(II->getArgOperand(0),
11704 II->getArgOperand(1)))
11705 return false;
11706 Ops.push_back(&II->getArgOperandUse(0));
11707 Ops.push_back(&II->getArgOperandUse(1));
11708 return true;
11709
11710 default:
11711 return false;
11712 }
11713 }
11714
11715 switch (I->getOpcode()) {
11716 case Instruction::Sub:
11717 case Instruction::Add: {
11718 if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
11719 return false;
11720
11721 // If the exts' operands extract either the lower or upper elements, we
11722 // can sink them too.
11723 auto Ext1 = cast<Instruction>(I->getOperand(0));
11724 auto Ext2 = cast<Instruction>(I->getOperand(1));
11725 if (areExtractShuffleVectors(Ext1->getOperand(0), Ext2->getOperand(0))) {
11726 Ops.push_back(&Ext1->getOperandUse(0));
11727 Ops.push_back(&Ext2->getOperandUse(0));
11728 }
11729
11730 Ops.push_back(&I->getOperandUse(0));
11731 Ops.push_back(&I->getOperandUse(1));
11732
11733 return true;
11734 }
11735 case Instruction::Mul: {
11736 bool IsProfitable = false;
11737 for (auto &Op : I->operands()) {
11738 // Make sure we are not already sinking this operand
11739 if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
11740 continue;
11741
11742 ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
11743 if (!Shuffle || !Shuffle->isZeroEltSplat())
11744 continue;
11745
11746 Value *ShuffleOperand = Shuffle->getOperand(0);
11747 InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
11748 if (!Insert)
11749 continue;
11750
11751 Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
11752 if (!OperandInstr)
11753 continue;
11754
11755 ConstantInt *ElementConstant =
11756 dyn_cast<ConstantInt>(Insert->getOperand(2));
11757 // Check that the insertelement is inserting into element 0
11758 if (!ElementConstant || ElementConstant->getZExtValue() != 0)
11759 continue;
11760
11761 unsigned Opcode = OperandInstr->getOpcode();
11762 if (Opcode != Instruction::SExt && Opcode != Instruction::ZExt)
11763 continue;
11764
11765 Ops.push_back(&Shuffle->getOperandUse(0));
11766 Ops.push_back(&Op);
11767 IsProfitable = true;
11768 }
11769
11770 return IsProfitable;
11771 }
11772 default:
11773 return false;
11774 }
11775 return false;
11776}
11777
11778bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
11779 Align &RequiredAligment) const {
11780 if (!LoadedType.isSimple() ||
11781 (!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
11782 return false;
11783 // Cyclone supports unaligned accesses.
11784 RequiredAligment = Align(1);
11785 unsigned NumBits = LoadedType.getSizeInBits();
11786 return NumBits == 32 || NumBits == 64;
11787}
11788
11789/// A helper function for determining the number of interleaved accesses we
11790/// will generate when lowering accesses of the given type.
11791unsigned
11792AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
11793 const DataLayout &DL) const {
11794 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
11795}
11796
11797MachineMemOperand::Flags
11798AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
11799 if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
11800 I.getMetadata(FALKOR_STRIDED_ACCESS_MD"falkor.strided.access") != nullptr)
11801 return MOStridedAccess;
11802 return MachineMemOperand::MONone;
11803}
11804
11805bool AArch64TargetLowering::isLegalInterleavedAccessType(
11806 VectorType *VecTy, const DataLayout &DL) const {
11807
11808 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
11809 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
11810
11811 // Ensure the number of vector elements is greater than 1.
11812 if (cast<FixedVectorType>(VecTy)->getNumElements() < 2)
11813 return false;
11814
11815 // Ensure the element type is legal.
11816 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
11817 return false;
11818
11819 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
11820 // 128 will be split into multiple interleaved accesses.
11821 return VecSize == 64 || VecSize % 128 == 0;
11822}
11823
11824/// Lower an interleaved load into a ldN intrinsic.
11825///
11826/// E.g. Lower an interleaved load (Factor = 2):
11827/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
11828/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
11829/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
11830///
11831/// Into:
11832/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
11833/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
11834/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
11835bool AArch64TargetLowering::lowerInterleavedLoad(
11836 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
11837 ArrayRef<unsigned> Indices, unsigned Factor) const {
11838 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11839, __extension__ __PRETTY_FUNCTION__))
11839 "Invalid interleave factor")(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11839, __extension__ __PRETTY_FUNCTION__))
;
11840 assert(!Shuffles.empty() && "Empty shufflevector input")(static_cast <bool> (!Shuffles.empty() && "Empty shufflevector input"
) ? void (0) : __assert_fail ("!Shuffles.empty() && \"Empty shufflevector input\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11840, __extension__ __PRETTY_FUNCTION__))
;
11841 assert(Shuffles.size() == Indices.size() &&(static_cast <bool> (Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices") ? void (0)
: __assert_fail ("Shuffles.size() == Indices.size() && \"Unmatched number of shufflevectors and indices\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11842, __extension__ __PRETTY_FUNCTION__))
11842 "Unmatched number of shufflevectors and indices")(static_cast <bool> (Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices") ? void (0)
: __assert_fail ("Shuffles.size() == Indices.size() && \"Unmatched number of shufflevectors and indices\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11842, __extension__ __PRETTY_FUNCTION__))
;
11843
11844 const DataLayout &DL = LI->getModule()->getDataLayout();
11845
11846 VectorType *VTy = Shuffles[0]->getType();
11847
11848 // Skip if we do not have NEON and skip illegal vector types. We can
11849 // "legalize" wide vector types into multiple interleaved accesses as long as
11850 // the vector types are divisible by 128.
11851 if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VTy, DL))
11852 return false;
11853
11854 unsigned NumLoads = getNumInterleavedAccesses(VTy, DL);
11855
11856 auto *FVTy = cast<FixedVectorType>(VTy);
11857
11858 // A pointer vector can not be the return type of the ldN intrinsics. Need to
11859 // load integer vectors first and then convert to pointer vectors.
11860 Type *EltTy = FVTy->getElementType();
11861 if (EltTy->isPointerTy())
11862 FVTy =
11863 FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
11864
11865 IRBuilder<> Builder(LI);
11866
11867 // The base address of the load.
11868 Value *BaseAddr = LI->getPointerOperand();
11869
11870 if (NumLoads > 1) {
11871 // If we're going to generate more than one load, reset the sub-vector type
11872 // to something legal.
11873 FVTy = FixedVectorType::get(FVTy->getElementType(),
11874 FVTy->getNumElements() / NumLoads);
11875
11876 // We will compute the pointer operand of each load from the original base
11877 // address using GEPs. Cast the base address to a pointer to the scalar
11878 // element type.
11879 BaseAddr = Builder.CreateBitCast(
11880 BaseAddr,
11881 FVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
11882 }
11883
11884 Type *PtrTy = FVTy->getPointerTo(LI->getPointerAddressSpace());
11885 Type *Tys[2] = {FVTy, PtrTy};
11886 static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
11887 Intrinsic::aarch64_neon_ld3,
11888 Intrinsic::aarch64_neon_ld4};
11889 Function *LdNFunc =
11890 Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
11891
11892 // Holds sub-vectors extracted from the load intrinsic return values. The
11893 // sub-vectors are associated with the shufflevector instructions they will
11894 // replace.
11895 DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
11896
11897 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
11898
11899 // If we're generating more than one load, compute the base address of
11900 // subsequent loads as an offset from the previous.
11901 if (LoadCount > 0)
11902 BaseAddr = Builder.CreateConstGEP1_32(FVTy->getElementType(), BaseAddr,
11903 FVTy->getNumElements() * Factor);
11904
11905 CallInst *LdN = Builder.CreateCall(
11906 LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
11907
11908 // Extract and store the sub-vectors returned by the load intrinsic.
11909 for (unsigned i = 0; i < Shuffles.size(); i++) {
11910 ShuffleVectorInst *SVI = Shuffles[i];
11911 unsigned Index = Indices[i];
11912
11913 Value *SubVec = Builder.CreateExtractValue(LdN, Index);
11914
11915 // Convert the integer vector to pointer vector if the element is pointer.
11916 if (EltTy->isPointerTy())
11917 SubVec = Builder.CreateIntToPtr(
11918 SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
11919 FVTy->getNumElements()));
11920 SubVecs[SVI].push_back(SubVec);
11921 }
11922 }
11923
11924 // Replace uses of the shufflevector instructions with the sub-vectors
11925 // returned by the load intrinsic. If a shufflevector instruction is
11926 // associated with more than one sub-vector, those sub-vectors will be
11927 // concatenated into a single wide vector.
11928 for (ShuffleVectorInst *SVI : Shuffles) {
11929 auto &SubVec = SubVecs[SVI];
11930 auto *WideVec =
11931 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
11932 SVI->replaceAllUsesWith(WideVec);
11933 }
11934
11935 return true;
11936}
11937
11938/// Lower an interleaved store into a stN intrinsic.
11939///
11940/// E.g. Lower an interleaved store (Factor = 3):
11941/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
11942/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
11943/// store <12 x i32> %i.vec, <12 x i32>* %ptr
11944///
11945/// Into:
11946/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
11947/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
11948/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
11949/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
11950///
11951/// Note that the new shufflevectors will be removed and we'll only generate one
11952/// st3 instruction in CodeGen.
11953///
11954/// Example for a more general valid mask (Factor 3). Lower:
11955/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
11956/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
11957/// store <12 x i32> %i.vec, <12 x i32>* %ptr
11958///
11959/// Into:
11960/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
11961/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
11962/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
11963/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
11964bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
11965 ShuffleVectorInst *SVI,
11966 unsigned Factor) const {
11967 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11968, __extension__ __PRETTY_FUNCTION__))
11968 "Invalid interleave factor")(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11968, __extension__ __PRETTY_FUNCTION__))
;
11969
11970 auto *VecTy = cast<FixedVectorType>(SVI->getType());
11971 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store")(static_cast <bool> (VecTy->getNumElements() % Factor
== 0 && "Invalid interleaved store") ? void (0) : __assert_fail
("VecTy->getNumElements() % Factor == 0 && \"Invalid interleaved store\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11971, __extension__ __PRETTY_FUNCTION__))
;
11972
11973 unsigned LaneLen = VecTy->getNumElements() / Factor;
11974 Type *EltTy = VecTy->getElementType();
11975 auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
11976
11977 const DataLayout &DL = SI->getModule()->getDataLayout();
11978
11979 // Skip if we do not have NEON and skip illegal vector types. We can
11980 // "legalize" wide vector types into multiple interleaved accesses as long as
11981 // the vector types are divisible by 128.
11982 if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
11983 return false;
11984
11985 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
11986
11987 Value *Op0 = SVI->getOperand(0);
11988 Value *Op1 = SVI->getOperand(1);
11989 IRBuilder<> Builder(SI);
11990
11991 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
11992 // vectors to integer vectors.
11993 if (EltTy->isPointerTy()) {
11994 Type *IntTy = DL.getIntPtrType(EltTy);
11995 unsigned NumOpElts =
11996 cast<FixedVectorType>(Op0->getType())->getNumElements();
11997
11998 // Convert to the corresponding integer vector.
11999 auto *IntVecTy = FixedVectorType::get(IntTy, NumOpElts);
12000 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
12001 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
12002
12003 SubVecTy = FixedVectorType::get(IntTy, LaneLen);
12004 }
12005
12006 // The base address of the store.
12007 Value *BaseAddr = SI->getPointerOperand();
12008
12009 if (NumStores > 1) {
12010 // If we're going to generate more than one store, reset the lane length
12011 // and sub-vector type to something legal.
12012 LaneLen /= NumStores;
12013 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
12014
12015 // We will compute the pointer operand of each store from the original base
12016 // address using GEPs. Cast the base address to a pointer to the scalar
12017 // element type.
12018 BaseAddr = Builder.CreateBitCast(
12019 BaseAddr,
12020 SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
12021 }
12022
12023 auto Mask = SVI->getShuffleMask();
12024
12025 Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
12026 Type *Tys[2] = {SubVecTy, PtrTy};
12027 static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
12028 Intrinsic::aarch64_neon_st3,
12029 Intrinsic::aarch64_neon_st4};
12030 Function *StNFunc =
12031 Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
12032
12033 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
12034
12035 SmallVector<Value *, 5> Ops;
12036
12037 // Split the shufflevector operands into sub vectors for the new stN call.
12038 for (unsigned i = 0; i < Factor; i++) {
12039 unsigned IdxI = StoreCount * LaneLen * Factor + i;
12040 if (Mask[IdxI] >= 0) {
12041 Ops.push_back(Builder.CreateShuffleVector(
12042 Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
12043 } else {
12044 unsigned StartMask = 0;
12045 for (unsigned j = 1; j < LaneLen; j++) {
12046 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
12047 if (Mask[IdxJ * Factor + IdxI] >= 0) {
12048 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
12049 break;
12050 }
12051 }
12052 // Note: Filling undef gaps with random elements is ok, since
12053 // those elements were being written anyway (with undefs).
12054 // In the case of all undefs we're defaulting to using elems from 0
12055 // Note: StartMask cannot be negative, it's checked in
12056 // isReInterleaveMask
12057 Ops.push_back(Builder.CreateShuffleVector(
12058 Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
12059 }
12060 }
12061
12062 // If we generating more than one store, we compute the base address of
12063 // subsequent stores as an offset from the previous.
12064 if (StoreCount > 0)
12065 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
12066 BaseAddr, LaneLen * Factor);
12067
12068 Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
12069 Builder.CreateCall(StNFunc, Ops);
12070 }
12071 return true;
12072}
12073
12074// Lower an SVE structured load intrinsic returning a tuple type to target
12075// specific intrinsic taking the same input but returning a multi-result value
12076// of the split tuple type.
12077//
12078// E.g. Lowering an LD3:
12079//
12080// call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32(
12081// <vscale x 4 x i1> %pred,
12082// <vscale x 4 x i32>* %addr)
12083//
12084// Output DAG:
12085//
12086// t0: ch = EntryToken
12087// t2: nxv4i1,ch = CopyFromReg t0, Register:nxv4i1 %0
12088// t4: i64,ch = CopyFromReg t0, Register:i64 %1
12089// t5: nxv4i32,nxv4i32,nxv4i32,ch = AArch64ISD::SVE_LD3 t0, t2, t4
12090// t6: nxv12i32 = concat_vectors t5, t5:1, t5:2
12091//
12092// This is called pre-legalization to avoid widening/splitting issues with
12093// non-power-of-2 tuple types used for LD3, such as nxv12i32.
12094SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
12095 ArrayRef<SDValue> LoadOps,
12096 EVT VT, SelectionDAG &DAG,
12097 const SDLoc &DL) const {
12098 assert(VT.isScalableVector() && "Can only lower scalable vectors")(static_cast <bool> (VT.isScalableVector() && "Can only lower scalable vectors"
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Can only lower scalable vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12098, __extension__ __PRETTY_FUNCTION__))
;
12099
12100 unsigned N, Opcode;
12101 static std::map<unsigned, std::pair<unsigned, unsigned>> IntrinsicMap = {
12102 {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}},
12103 {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}},
12104 {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};
12105
12106 std::tie(N, Opcode) = IntrinsicMap[Intrinsic];
12107 assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 &&(static_cast <bool> (VT.getVectorElementCount().getKnownMinValue
() % N == 0 && "invalid tuple vector type!") ? void (
0) : __assert_fail ("VT.getVectorElementCount().getKnownMinValue() % N == 0 && \"invalid tuple vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12108, __extension__ __PRETTY_FUNCTION__))
12108 "invalid tuple vector type!")(static_cast <bool> (VT.getVectorElementCount().getKnownMinValue
() % N == 0 && "invalid tuple vector type!") ? void (
0) : __assert_fail ("VT.getVectorElementCount().getKnownMinValue() % N == 0 && \"invalid tuple vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12108, __extension__ __PRETTY_FUNCTION__))
;
12109
12110 EVT SplitVT =
12111 EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
12112 VT.getVectorElementCount().divideCoefficientBy(N));
12113 assert(isTypeLegal(SplitVT))(static_cast <bool> (isTypeLegal(SplitVT)) ? void (0) :
__assert_fail ("isTypeLegal(SplitVT)", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12113, __extension__ __PRETTY_FUNCTION__))
;
12114
12115 SmallVector<EVT, 5> VTs(N, SplitVT);
12116 VTs.push_back(MVT::Other); // Chain
12117 SDVTList NodeTys = DAG.getVTList(VTs);
12118
12119 SDValue PseudoLoad = DAG.getNode(Opcode, DL, NodeTys, LoadOps);
12120 SmallVector<SDValue, 4> PseudoLoadOps;
12121 for (unsigned I = 0; I < N; ++I)
12122 PseudoLoadOps.push_back(SDValue(PseudoLoad.getNode(), I));
12123 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, PseudoLoadOps);
12124}
12125
12126EVT AArch64TargetLowering::getOptimalMemOpType(
12127 const MemOp &Op, const AttributeList &FuncAttributes) const {
12128 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
12129 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
12130 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
12131 // Only use AdvSIMD to implement memset of 32-byte and above. It would have
12132 // taken one instruction to materialize the v2i64 zero and one store (with
12133 // restrictive addressing mode). Just do i64 stores.
12134 bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
12135 auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
12136 if (Op.isAligned(AlignCheck))
12137 return true;
12138 bool Fast;
12139 return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
12140 MachineMemOperand::MONone, &Fast) &&
12141 Fast;
12142 };
12143
12144 if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
12145 AlignmentIsAcceptable(MVT::v16i8, Align(16)))
12146 return MVT::v16i8;
12147 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
12148 return MVT::f128;
12149 if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
12150 return MVT::i64;
12151 if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
12152 return MVT::i32;
12153 return MVT::Other;
12154}
12155
12156LLT AArch64TargetLowering::getOptimalMemOpLLT(
12157 const MemOp &Op, const AttributeList &FuncAttributes) const {
12158 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
12159 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
12160 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
12161 // Only use AdvSIMD to implement memset of 32-byte and above. It would have
12162 // taken one instruction to materialize the v2i64 zero and one store (with
12163 // restrictive addressing mode). Just do i64 stores.
12164 bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
12165 auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
12166 if (Op.isAligned(AlignCheck))
12167 return true;
12168 bool Fast;
12169 return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
12170 MachineMemOperand::MONone, &Fast) &&
12171 Fast;
12172 };
12173
12174 if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
12175 AlignmentIsAcceptable(MVT::v2i64, Align(16)))
12176 return LLT::fixed_vector(2, 64);
12177 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
12178 return LLT::scalar(128);
12179 if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
12180 return LLT::scalar(64);
12181 if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
12182 return LLT::scalar(32);
12183 return LLT();
12184}
12185
12186// 12-bit optionally shifted immediates are legal for adds.
12187bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
12188 if (Immed == std::numeric_limits<int64_t>::min()) {
12189 LLVM_DEBUG(dbgs() << "Illegal add imm " << Immeddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal add imm " <<
Immed << ": avoid UB for INT64_MIN\n"; } } while (false
)
12190 << ": avoid UB for INT64_MIN\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal add imm " <<
Immed << ": avoid UB for INT64_MIN\n"; } } while (false
)
;
12191 return false;
12192 }
12193 // Same encoding for add/sub, just flip the sign.
12194 Immed = std::abs(Immed);
12195 bool IsLegal = ((Immed >> 12) == 0 ||
12196 ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
12197 LLVM_DEBUG(dbgs() << "Is " << Immeddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is " << Immed <<
" legal add imm: " << (IsLegal ? "yes" : "no") <<
"\n"; } } while (false)
12198 << " legal add imm: " << (IsLegal ? "yes" : "no") << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is " << Immed <<
" legal add imm: " << (IsLegal ? "yes" : "no") <<
"\n"; } } while (false)
;
12199 return IsLegal;
12200}
12201
12202// Integer comparisons are implemented with ADDS/SUBS, so the range of valid
12203// immediates is the same as for an add or a sub.
12204bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
12205 return isLegalAddImmediate(Immed);
12206}
12207
12208/// isLegalAddressingMode - Return true if the addressing mode represented
12209/// by AM is legal for this target, for a load/store of the specified type.
12210bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
12211 const AddrMode &AM, Type *Ty,
12212 unsigned AS, Instruction *I) const {
12213 // AArch64 has five basic addressing modes:
12214 // reg
12215 // reg + 9-bit signed offset
12216 // reg + SIZE_IN_BYTES * 12-bit unsigned offset
12217 // reg1 + reg2
12218 // reg + SIZE_IN_BYTES * reg
12219
12220 // No global is ever allowed as a base.
12221 if (AM.BaseGV)
12222 return false;
12223
12224 // No reg+reg+imm addressing.
12225 if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
12226 return false;
12227
12228 // FIXME: Update this method to support scalable addressing modes.
12229 if (isa<ScalableVectorType>(Ty)) {
12230 uint64_t VecElemNumBytes =
12231 DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
12232 return AM.HasBaseReg && !AM.BaseOffs &&
12233 (AM.Scale == 0 || (uint64_t)AM.Scale == VecElemNumBytes);
12234 }
12235
12236 // check reg + imm case:
12237 // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
12238 uint64_t NumBytes = 0;
12239 if (Ty->isSized()) {
12240 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
12241 NumBytes = NumBits / 8;
12242 if (!isPowerOf2_64(NumBits))
12243 NumBytes = 0;
12244 }
12245
12246 if (!AM.Scale) {
12247 int64_t Offset = AM.BaseOffs;
12248
12249 // 9-bit signed offset
12250 if (isInt<9>(Offset))
12251 return true;
12252
12253 // 12-bit unsigned offset
12254 unsigned shift = Log2_64(NumBytes);
12255 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
12256 // Must be a multiple of NumBytes (NumBytes is a power of 2)
12257 (Offset >> shift) << shift == Offset)
12258 return true;
12259 return false;
12260 }
12261
12262 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
12263
12264 return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes);
12265}
12266
12267bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const {
12268 // Consider splitting large offset of struct or array.
12269 return true;
12270}
12271
12272InstructionCost AArch64TargetLowering::getScalingFactorCost(
12273 const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const {
12274 // Scaling factors are not free at all.
12275 // Operands | Rt Latency
12276 // -------------------------------------------
12277 // Rt, [Xn, Xm] | 4
12278 // -------------------------------------------
12279 // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
12280 // Rt, [Xn, Wm, <extend> #imm] |
12281 if (isLegalAddressingMode(DL, AM, Ty, AS))
12282 // Scale represents reg2 * scale, thus account for 1 if
12283 // it is not equal to 0 or 1.
12284 return AM.Scale != 0 && AM.Scale != 1;
12285 return -1;
12286}
12287
12288bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
12289 const MachineFunction &MF, EVT VT) const {
12290 VT = VT.getScalarType();
12291
12292 if (!VT.isSimple())
12293 return false;
12294
12295 switch (VT.getSimpleVT().SimpleTy) {
12296 case MVT::f16:
12297 return Subtarget->hasFullFP16();
12298 case MVT::f32:
12299 case MVT::f64:
12300 return true;
12301 default:
12302 break;
12303 }
12304
12305 return false;
12306}
12307
12308bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
12309 Type *Ty) const {
12310 switch (Ty->getScalarType()->getTypeID()) {
12311 case Type::FloatTyID:
12312 case Type::DoubleTyID:
12313 return true;
12314 default:
12315 return false;
12316 }
12317}
12318
12319bool AArch64TargetLowering::generateFMAsInMachineCombiner(
12320 EVT VT, CodeGenOpt::Level OptLevel) const {
12321 return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector();
12322}
12323
12324const MCPhysReg *
12325AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
12326 // LR is a callee-save register, but we must treat it as clobbered by any call
12327 // site. Hence we include LR in the scratch registers, which are in turn added
12328 // as implicit-defs for stackmaps and patchpoints.
12329 static const MCPhysReg ScratchRegs[] = {
12330 AArch64::X16, AArch64::X17, AArch64::LR, 0
12331 };
12332 return ScratchRegs;
12333}
12334
12335bool
12336AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
12337 CombineLevel Level) const {
12338 N = N->getOperand(0).getNode();
12339 EVT VT = N->getValueType(0);
12340 // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
12341 // it with shift to let it be lowered to UBFX.
12342 if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
12343 isa<ConstantSDNode>(N->getOperand(1))) {
12344 uint64_t TruncMask = N->getConstantOperandVal(1);
12345 if (isMask_64(TruncMask) &&
12346 N->getOperand(0).getOpcode() == ISD::SRL &&
12347 isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
12348 return false;
12349 }
12350 return true;
12351}
12352
12353bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
12354 Type *Ty) const {
12355 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12355, __extension__ __PRETTY_FUNCTION__))
;
12356
12357 unsigned BitSize = Ty->getPrimitiveSizeInBits();
12358 if (BitSize == 0)
12359 return false;
12360
12361 int64_t Val = Imm.getSExtValue();
12362 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize))
12363 return true;
12364
12365 if ((int64_t)Val < 0)
12366 Val = ~Val;
12367 if (BitSize == 32)
12368 Val &= (1LL << 32) - 1;
12369
12370 unsigned LZ = countLeadingZeros((uint64_t)Val);
12371 unsigned Shift = (63 - LZ) / 16;
12372 // MOVZ is free so return true for one or fewer MOVK.
12373 return Shift < 3;
12374}
12375
12376bool AArch64TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
12377 unsigned Index) const {
12378 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
12379 return false;
12380
12381 return (Index == 0 || Index == ResVT.getVectorNumElements());
12382}
12383
12384/// Turn vector tests of the signbit in the form of:
12385/// xor (sra X, elt_size(X)-1), -1
12386/// into:
12387/// cmge X, X, #0
12388static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
12389 const AArch64Subtarget *Subtarget) {
12390 EVT VT = N->getValueType(0);
12391 if (!Subtarget->hasNEON() || !VT.isVector())
12392 return SDValue();
12393
12394 // There must be a shift right algebraic before the xor, and the xor must be a
12395 // 'not' operation.
12396 SDValue Shift = N->getOperand(0);
12397 SDValue Ones = N->getOperand(1);
12398 if (Shift.getOpcode() != AArch64ISD::VASHR || !Shift.hasOneUse() ||
12399 !ISD::isBuildVectorAllOnes(Ones.getNode()))
12400 return SDValue();
12401
12402 // The shift should be smearing the sign bit across each vector element.
12403 auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
12404 EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
12405 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
12406 return SDValue();
12407
12408 return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
12409}
12410
12411// Given a vecreduce_add node, detect the below pattern and convert it to the
12412// node sequence with UABDL, [S|U]ADB and UADDLP.
12413//
12414// i32 vecreduce_add(
12415// v16i32 abs(
12416// v16i32 sub(
12417// v16i32 [sign|zero]_extend(v16i8 a), v16i32 [sign|zero]_extend(v16i8 b))))
12418// =================>
12419// i32 vecreduce_add(
12420// v4i32 UADDLP(
12421// v8i16 add(
12422// v8i16 zext(
12423// v8i8 [S|U]ABD low8:v16i8 a, low8:v16i8 b
12424// v8i16 zext(
12425// v8i8 [S|U]ABD high8:v16i8 a, high8:v16i8 b
12426static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N,
12427 SelectionDAG &DAG) {
12428 // Assumed i32 vecreduce_add
12429 if (N->getValueType(0) != MVT::i32)
12430 return SDValue();
12431
12432 SDValue VecReduceOp0 = N->getOperand(0);
12433 unsigned Opcode = VecReduceOp0.getOpcode();
12434 // Assumed v16i32 abs
12435 if (Opcode != ISD::ABS || VecReduceOp0->getValueType(0) != MVT::v16i32)
12436 return SDValue();
12437
12438 SDValue ABS = VecReduceOp0;
12439 // Assumed v16i32 sub
12440 if (ABS->getOperand(0)->getOpcode() != ISD::SUB ||
12441 ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
12442 return SDValue();
12443
12444 SDValue SUB = ABS->getOperand(0);
12445 unsigned Opcode0 = SUB->getOperand(0).getOpcode();
12446 unsigned Opcode1 = SUB->getOperand(1).getOpcode();
12447 // Assumed v16i32 type
12448 if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 ||
12449 SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
12450 return SDValue();
12451
12452 // Assumed zext or sext
12453 bool IsZExt = false;
12454 if (Opcode0 == ISD::ZERO_EXTEND && Opcode1 == ISD::ZERO_EXTEND) {
12455 IsZExt = true;
12456 } else if (Opcode0 == ISD::SIGN_EXTEND && Opcode1 == ISD::SIGN_EXTEND) {
12457 IsZExt = false;
12458 } else
12459 return SDValue();
12460
12461 SDValue EXT0 = SUB->getOperand(0);
12462 SDValue EXT1 = SUB->getOperand(1);
12463 // Assumed zext's operand has v16i8 type
12464 if (EXT0->getOperand(0)->getValueType(0) != MVT::v16i8 ||
12465 EXT1->getOperand(0)->getValueType(0) != MVT::v16i8)
12466 return SDValue();
12467
12468 // Pattern is dectected. Let's convert it to sequence of nodes.
12469 SDLoc DL(N);
12470
12471 // First, create the node pattern of UABD/SABD.
12472 SDValue UABDHigh8Op0 =
12473 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
12474 DAG.getConstant(8, DL, MVT::i64));
12475 SDValue UABDHigh8Op1 =
12476 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
12477 DAG.getConstant(8, DL, MVT::i64));
12478 SDValue UABDHigh8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
12479 UABDHigh8Op0, UABDHigh8Op1);
12480 SDValue UABDL = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDHigh8);
12481
12482 // Second, create the node pattern of UABAL.
12483 SDValue UABDLo8Op0 =
12484 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
12485 DAG.getConstant(0, DL, MVT::i64));
12486 SDValue UABDLo8Op1 =
12487 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
12488 DAG.getConstant(0, DL, MVT::i64));
12489 SDValue UABDLo8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
12490 UABDLo8Op0, UABDLo8Op1);
12491 SDValue ZExtUABD = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDLo8);
12492 SDValue UABAL = DAG.getNode(ISD::ADD, DL, MVT::v8i16, UABDL, ZExtUABD);
12493
12494 // Third, create the node of UADDLP.
12495 SDValue UADDLP = DAG.getNode(AArch64ISD::UADDLP, DL, MVT::v4i32, UABAL);
12496
12497 // Fourth, create the node of VECREDUCE_ADD.
12498 return DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, UADDLP);
12499}
12500
12501// Turn a v8i8/v16i8 extended vecreduce into a udot/sdot and vecreduce
12502// vecreduce.add(ext(A)) to vecreduce.add(DOT(zero, A, one))
12503// vecreduce.add(mul(ext(A), ext(B))) to vecreduce.add(DOT(zero, A, B))
12504static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
12505 const AArch64Subtarget *ST) {
12506 if (!ST->hasDotProd())
12507 return performVecReduceAddCombineWithUADDLP(N, DAG);
12508
12509 SDValue Op0 = N->getOperand(0);
12510 if (N->getValueType(0) != MVT::i32 ||
12511 Op0.getValueType().getVectorElementType() != MVT::i32)
12512 return SDValue();
12513
12514 unsigned ExtOpcode = Op0.getOpcode();
12515 SDValue A = Op0;
12516 SDValue B;
12517 if (ExtOpcode == ISD::MUL) {
12518 A = Op0.getOperand(0);
12519 B = Op0.getOperand(1);
12520 if (A.getOpcode() != B.getOpcode() ||
12521 A.getOperand(0).getValueType() != B.getOperand(0).getValueType())
12522 return SDValue();
12523 ExtOpcode = A.getOpcode();
12524 }
12525 if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND)
12526 return SDValue();
12527
12528 EVT Op0VT = A.getOperand(0).getValueType();
12529 if (Op0VT != MVT::v8i8 && Op0VT != MVT::v16i8)
12530 return SDValue();
12531
12532 SDLoc DL(Op0);
12533 // For non-mla reductions B can be set to 1. For MLA we take the operand of
12534 // the extend B.
12535 if (!B)
12536 B = DAG.getConstant(1, DL, Op0VT);
12537 else
12538 B = B.getOperand(0);
12539
12540 SDValue Zeros =
12541 DAG.getConstant(0, DL, Op0VT == MVT::v8i8 ? MVT::v2i32 : MVT::v4i32);
12542 auto DotOpcode =
12543 (ExtOpcode == ISD::ZERO_EXTEND) ? AArch64ISD::UDOT : AArch64ISD::SDOT;
12544 SDValue Dot = DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros,
12545 A.getOperand(0), B);
12546 return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
12547}
12548
12549static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
12550 TargetLowering::DAGCombinerInfo &DCI,
12551 const AArch64Subtarget *Subtarget) {
12552 if (DCI.isBeforeLegalizeOps())
12553 return SDValue();
12554
12555 return foldVectorXorShiftIntoCmp(N, DAG, Subtarget);
12556}
12557
12558SDValue
12559AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
12560 SelectionDAG &DAG,
12561 SmallVectorImpl<SDNode *> &Created) const {
12562 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
12563 if (isIntDivCheap(N->getValueType(0), Attr))
12564 return SDValue(N,0); // Lower SDIV as SDIV
12565
12566 // fold (sdiv X, pow2)
12567 EVT VT = N->getValueType(0);
12568 if ((VT != MVT::i32 && VT != MVT::i64) ||
12569 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
12570 return SDValue();
12571
12572 SDLoc DL(N);
12573 SDValue N0 = N->getOperand(0);
12574 unsigned Lg2 = Divisor.countTrailingZeros();
12575 SDValue Zero = DAG.getConstant(0, DL, VT);
12576 SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
12577
12578 // Add (N0 < 0) ? Pow2 - 1 : 0;
12579 SDValue CCVal;
12580 SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
12581 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
12582 SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
12583
12584 Created.push_back(Cmp.getNode());
12585 Created.push_back(Add.getNode());
12586 Created.push_back(CSel.getNode());
12587
12588 // Divide by pow2.
12589 SDValue SRA =
12590 DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
12591
12592 // If we're dividing by a positive value, we're done. Otherwise, we must
12593 // negate the result.
12594 if (Divisor.isNonNegative())
12595 return SRA;
12596
12597 Created.push_back(SRA.getNode());
12598 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
12599}
12600
12601static bool IsSVECntIntrinsic(SDValue S) {
12602 switch(getIntrinsicID(S.getNode())) {
12603 default:
12604 break;
12605 case Intrinsic::aarch64_sve_cntb:
12606 case Intrinsic::aarch64_sve_cnth:
12607 case Intrinsic::aarch64_sve_cntw:
12608 case Intrinsic::aarch64_sve_cntd:
12609 return true;
12610 }
12611 return false;
12612}
12613
12614/// Calculates what the pre-extend type is, based on the extension
12615/// operation node provided by \p Extend.
12616///
12617/// In the case that \p Extend is a SIGN_EXTEND or a ZERO_EXTEND, the
12618/// pre-extend type is pulled directly from the operand, while other extend
12619/// operations need a bit more inspection to get this information.
12620///
12621/// \param Extend The SDNode from the DAG that represents the extend operation
12622/// \param DAG The SelectionDAG hosting the \p Extend node
12623///
12624/// \returns The type representing the \p Extend source type, or \p MVT::Other
12625/// if no valid type can be determined
12626static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG) {
12627 switch (Extend.getOpcode()) {
12628 case ISD::SIGN_EXTEND:
12629 case ISD::ZERO_EXTEND:
12630 return Extend.getOperand(0).getValueType();
12631 case ISD::AssertSext:
12632 case ISD::AssertZext:
12633 case ISD::SIGN_EXTEND_INREG: {
12634 VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1));
12635 if (!TypeNode)
12636 return MVT::Other;
12637 return TypeNode->getVT();
12638 }
12639 case ISD::AND: {
12640 ConstantSDNode *Constant =
12641 dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode());
12642 if (!Constant)
12643 return MVT::Other;
12644
12645 uint32_t Mask = Constant->getZExtValue();
12646
12647 if (Mask == UCHAR_MAX(127*2 +1))
12648 return MVT::i8;
12649 else if (Mask == USHRT_MAX(32767 *2 +1))
12650 return MVT::i16;
12651 else if (Mask == UINT_MAX(2147483647 *2U +1U))
12652 return MVT::i32;
12653
12654 return MVT::Other;
12655 }
12656 default:
12657 return MVT::Other;
12658 }
12659
12660 llvm_unreachable("Code path unhandled in calculatePreExtendType!")::llvm::llvm_unreachable_internal("Code path unhandled in calculatePreExtendType!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12660)
;
12661}
12662
12663/// Combines a dup(sext/zext) node pattern into sext/zext(dup)
12664/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
12665static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
12666 SelectionDAG &DAG) {
12667
12668 ShuffleVectorSDNode *ShuffleNode =
12669 dyn_cast<ShuffleVectorSDNode>(VectorShuffle.getNode());
12670 if (!ShuffleNode)
12671 return SDValue();
12672
12673 // Ensuring the mask is zero before continuing
12674 if (!ShuffleNode->isSplat() || ShuffleNode->getSplatIndex() != 0)
12675 return SDValue();
12676
12677 SDValue InsertVectorElt = VectorShuffle.getOperand(0);
12678
12679 if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
12680 return SDValue();
12681
12682 SDValue InsertLane = InsertVectorElt.getOperand(2);
12683 ConstantSDNode *Constant = dyn_cast<ConstantSDNode>(InsertLane.getNode());
12684 // Ensures the insert is inserting into lane 0
12685 if (!Constant || Constant->getZExtValue() != 0)
12686 return SDValue();
12687
12688 SDValue Extend = InsertVectorElt.getOperand(1);
12689 unsigned ExtendOpcode = Extend.getOpcode();
12690
12691 bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND ||
12692 ExtendOpcode == ISD::SIGN_EXTEND_INREG ||
12693 ExtendOpcode == ISD::AssertSext;
12694 if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
12695 ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
12696 return SDValue();
12697
12698 EVT TargetType = VectorShuffle.getValueType();
12699 EVT PreExtendType = calculatePreExtendType(Extend, DAG);
12700
12701 if ((TargetType != MVT::v8i16 && TargetType != MVT::v4i32 &&
12702 TargetType != MVT::v2i64) ||
12703 (PreExtendType == MVT::Other))
12704 return SDValue();
12705
12706 // Restrict valid pre-extend data type
12707 if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 &&
12708 PreExtendType != MVT::i32)
12709 return SDValue();
12710
12711 EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType);
12712
12713 if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount())
12714 return SDValue();
12715
12716 if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2)
12717 return SDValue();
12718
12719 SDLoc DL(VectorShuffle);
12720
12721 SDValue InsertVectorNode = DAG.getNode(
12722 InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT),
12723 DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
12724 DAG.getConstant(0, DL, MVT::i64));
12725
12726 std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue());
12727
12728 SDValue VectorShuffleNode =
12729 DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
12730 DAG.getUNDEF(PreExtendVT), ShuffleMask);
12731
12732 SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
12733 DL, TargetType, VectorShuffleNode);
12734
12735 return ExtendNode;
12736}
12737
12738/// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
12739/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
12740static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
12741 // If the value type isn't a vector, none of the operands are going to be dups
12742 if (!Mul->getValueType(0).isVector())
12743 return SDValue();
12744
12745 SDValue Op0 = performCommonVectorExtendCombine(Mul->getOperand(0), DAG);
12746 SDValue Op1 = performCommonVectorExtendCombine(Mul->getOperand(1), DAG);
12747
12748 // Neither operands have been changed, don't make any further changes
12749 if (!Op0 && !Op1)
12750 return SDValue();
12751
12752 SDLoc DL(Mul);
12753 return DAG.getNode(Mul->getOpcode(), DL, Mul->getValueType(0),
12754 Op0 ? Op0 : Mul->getOperand(0),
12755 Op1 ? Op1 : Mul->getOperand(1));
12756}
12757
12758static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
12759 TargetLowering::DAGCombinerInfo &DCI,
12760 const AArch64Subtarget *Subtarget) {
12761
12762 if (SDValue Ext = performMulVectorExtendCombine(N, DAG))
12763 return Ext;
12764
12765 if (DCI.isBeforeLegalizeOps())
12766 return SDValue();
12767
12768 // The below optimizations require a constant RHS.
12769 if (!isa<ConstantSDNode>(N->getOperand(1)))
12770 return SDValue();
12771
12772 SDValue N0 = N->getOperand(0);
12773 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(1));
12774 const APInt &ConstValue = C->getAPIntValue();
12775
12776 // Allow the scaling to be folded into the `cnt` instruction by preventing
12777 // the scaling to be obscured here. This makes it easier to pattern match.
12778 if (IsSVECntIntrinsic(N0) ||
12779 (N0->getOpcode() == ISD::TRUNCATE &&
12780 (IsSVECntIntrinsic(N0->getOperand(0)))))
12781 if (ConstValue.sge(1) && ConstValue.sle(16))
12782 return SDValue();
12783
12784 // Multiplication of a power of two plus/minus one can be done more
12785 // cheaply as as shift+add/sub. For now, this is true unilaterally. If
12786 // future CPUs have a cheaper MADD instruction, this may need to be
12787 // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
12788 // 64-bit is 5 cycles, so this is always a win.
12789 // More aggressively, some multiplications N0 * C can be lowered to
12790 // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
12791 // e.g. 6=3*2=(2+1)*2.
12792 // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
12793 // which equals to (1+2)*16-(1+2).
12794
12795 // TrailingZeroes is used to test if the mul can be lowered to
12796 // shift+add+shift.
12797 unsigned TrailingZeroes = ConstValue.countTrailingZeros();
12798 if (TrailingZeroes) {
12799 // Conservatively do not lower to shift+add+shift if the mul might be
12800 // folded into smul or umul.
12801 if (N0->hasOneUse() && (isSignExtended(N0.getNode(), DAG) ||
12802 isZeroExtended(N0.getNode(), DAG)))
12803 return SDValue();
12804 // Conservatively do not lower to shift+add+shift if the mul might be
12805 // folded into madd or msub.
12806 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD ||
12807 N->use_begin()->getOpcode() == ISD::SUB))
12808 return SDValue();
12809 }
12810 // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
12811 // and shift+add+shift.
12812 APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
12813
12814 unsigned ShiftAmt, AddSubOpc;
12815 // Is the shifted value the LHS operand of the add/sub?
12816 bool ShiftValUseIsN0 = true;
12817 // Do we need to negate the result?
12818 bool NegateResult = false;
12819
12820 if (ConstValue.isNonNegative()) {
12821 // (mul x, 2^N + 1) => (add (shl x, N), x)
12822 // (mul x, 2^N - 1) => (sub (shl x, N), x)
12823 // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
12824 APInt SCVMinus1 = ShiftedConstValue - 1;
12825 APInt CVPlus1 = ConstValue + 1;
12826 if (SCVMinus1.isPowerOf2()) {
12827 ShiftAmt = SCVMinus1.logBase2();
12828 AddSubOpc = ISD::ADD;
12829 } else if (CVPlus1.isPowerOf2()) {
12830 ShiftAmt = CVPlus1.logBase2();
12831 AddSubOpc = ISD::SUB;
12832 } else
12833 return SDValue();
12834 } else {
12835 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
12836 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
12837 APInt CVNegPlus1 = -ConstValue + 1;
12838 APInt CVNegMinus1 = -ConstValue - 1;
12839 if (CVNegPlus1.isPowerOf2()) {
12840 ShiftAmt = CVNegPlus1.logBase2();
12841 AddSubOpc = ISD::SUB;
12842 ShiftValUseIsN0 = false;
12843 } else if (CVNegMinus1.isPowerOf2()) {
12844 ShiftAmt = CVNegMinus1.logBase2();
12845 AddSubOpc = ISD::ADD;
12846 NegateResult = true;
12847 } else
12848 return SDValue();
12849 }
12850
12851 SDLoc DL(N);
12852 EVT VT = N->getValueType(0);
12853 SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0,
12854 DAG.getConstant(ShiftAmt, DL, MVT::i64));
12855
12856 SDValue AddSubN0 = ShiftValUseIsN0 ? ShiftedVal : N0;
12857 SDValue AddSubN1 = ShiftValUseIsN0 ? N0 : ShiftedVal;
12858 SDValue Res = DAG.getNode(AddSubOpc, DL, VT, AddSubN0, AddSubN1);
12859 assert(!(NegateResult && TrailingZeroes) &&(static_cast <bool> (!(NegateResult && TrailingZeroes
) && "NegateResult and TrailingZeroes cannot both be true for now."
) ? void (0) : __assert_fail ("!(NegateResult && TrailingZeroes) && \"NegateResult and TrailingZeroes cannot both be true for now.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12860, __extension__ __PRETTY_FUNCTION__))
12860 "NegateResult and TrailingZeroes cannot both be true for now.")(static_cast <bool> (!(NegateResult && TrailingZeroes
) && "NegateResult and TrailingZeroes cannot both be true for now."
) ? void (0) : __assert_fail ("!(NegateResult && TrailingZeroes) && \"NegateResult and TrailingZeroes cannot both be true for now.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12860, __extension__ __PRETTY_FUNCTION__))
;
12861 // Negate the result.
12862 if (NegateResult)
12863 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
12864 // Shift the result.
12865 if (TrailingZeroes)
12866 return DAG.getNode(ISD::SHL, DL, VT, Res,
12867 DAG.getConstant(TrailingZeroes, DL, MVT::i64));
12868 return Res;
12869}
12870
12871static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
12872 SelectionDAG &DAG) {
12873 // Take advantage of vector comparisons producing 0 or -1 in each lane to
12874 // optimize away operation when it's from a constant.
12875 //
12876 // The general transformation is:
12877 // UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
12878 // AND(VECTOR_CMP(x,y), constant2)
12879 // constant2 = UNARYOP(constant)
12880
12881 // Early exit if this isn't a vector operation, the operand of the
12882 // unary operation isn't a bitwise AND, or if the sizes of the operations
12883 // aren't the same.
12884 EVT VT = N->getValueType(0);
12885 if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
12886 N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
12887 VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
12888 return SDValue();
12889
12890 // Now check that the other operand of the AND is a constant. We could
12891 // make the transformation for non-constant splats as well, but it's unclear
12892 // that would be a benefit as it would not eliminate any operations, just
12893 // perform one more step in scalar code before moving to the vector unit.
12894 if (BuildVectorSDNode *BV =
12895 dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
12896 // Bail out if the vector isn't a constant.
12897 if (!BV->isConstant())
12898 return SDValue();
12899
12900 // Everything checks out. Build up the new and improved node.
12901 SDLoc DL(N);
12902 EVT IntVT = BV->getValueType(0);
12903 // Create a new constant of the appropriate type for the transformed
12904 // DAG.
12905 SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
12906 // The AND node needs bitcasts to/from an integer vector type around it.
12907 SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
12908 SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
12909 N->getOperand(0)->getOperand(0), MaskConst);
12910 SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
12911 return Res;
12912 }
12913
12914 return SDValue();
12915}
12916
12917static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
12918 const AArch64Subtarget *Subtarget) {
12919 // First try to optimize away the conversion when it's conditionally from
12920 // a constant. Vectors only.
12921 if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
12922 return Res;
12923
12924 EVT VT = N->getValueType(0);
12925 if (VT != MVT::f32 && VT != MVT::f64)
12926 return SDValue();
12927
12928 // Only optimize when the source and destination types have the same width.
12929 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
12930 return SDValue();
12931
12932 // If the result of an integer load is only used by an integer-to-float
12933 // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
12934 // This eliminates an "integer-to-vector-move" UOP and improves throughput.
12935 SDValue N0 = N->getOperand(0);
12936 if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12937 // Do not change the width of a volatile load.
12938 !cast<LoadSDNode>(N0)->isVolatile()) {
12939 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12940 SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
12941 LN0->getPointerInfo(), LN0->getAlignment(),
12942 LN0->getMemOperand()->getFlags());
12943
12944 // Make sure successors of the original load stay after it by updating them
12945 // to use the new Chain.
12946 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
12947
12948 unsigned Opcode =
12949 (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
12950 return DAG.getNode(Opcode, SDLoc(N), VT, Load);
12951 }
12952
12953 return SDValue();
12954}
12955
12956/// Fold a floating-point multiply by power of two into floating-point to
12957/// fixed-point conversion.
12958static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
12959 TargetLowering::DAGCombinerInfo &DCI,
12960 const AArch64Subtarget *Subtarget) {
12961 if (!Subtarget->hasNEON())
12962 return SDValue();
12963
12964 if (!N->getValueType(0).isSimple())
12965 return SDValue();
12966
12967 SDValue Op = N->getOperand(0);
12968 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
12969 Op.getOpcode() != ISD::FMUL)
12970 return SDValue();
12971
12972 SDValue ConstVec = Op->getOperand(1);
12973 if (!isa<BuildVectorSDNode>(ConstVec))
12974 return SDValue();
12975
12976 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
12977 uint32_t FloatBits = FloatTy.getSizeInBits();
12978 if (FloatBits != 32 && FloatBits != 64)
12979 return SDValue();
12980
12981 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
12982 uint32_t IntBits = IntTy.getSizeInBits();
12983 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
12984 return SDValue();
12985
12986 // Avoid conversions where iN is larger than the float (e.g., float -> i64).
12987 if (IntBits > FloatBits)
12988 return SDValue();
12989
12990 BitVector UndefElements;
12991 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
12992 int32_t Bits = IntBits == 64 ? 64 : 32;
12993 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
12994 if (C == -1 || C == 0 || C > Bits)
12995 return SDValue();
12996
12997 MVT ResTy;
12998 unsigned NumLanes = Op.getValueType().getVectorNumElements();
12999 switch (NumLanes) {
13000 default:
13001 return SDValue();
13002 case 2:
13003 ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
13004 break;
13005 case 4:
13006 ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
13007 break;
13008 }
13009
13010 if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
13011 return SDValue();
13012
13013 assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) &&(static_cast <bool> ((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps
()) && "Illegal vector type after legalization") ? void
(0) : __assert_fail ("(ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) && \"Illegal vector type after legalization\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13014, __extension__ __PRETTY_FUNCTION__))
13014 "Illegal vector type after legalization")(static_cast <bool> ((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps
()) && "Illegal vector type after legalization") ? void
(0) : __assert_fail ("(ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) && \"Illegal vector type after legalization\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13014, __extension__ __PRETTY_FUNCTION__))
;
13015
13016 SDLoc DL(N);
13017 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
13018 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
13019 : Intrinsic::aarch64_neon_vcvtfp2fxu;
13020 SDValue FixConv =
13021 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
13022 DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
13023 Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
13024 // We can handle smaller integers by generating an extra trunc.
13025 if (IntBits < FloatBits)
13026 FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
13027
13028 return FixConv;
13029}
13030
13031/// Fold a floating-point divide by power of two into fixed-point to
13032/// floating-point conversion.
13033static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
13034 TargetLowering::DAGCombinerInfo &DCI,
13035 const AArch64Subtarget *Subtarget) {
13036 if (!Subtarget->hasNEON())
13037 return SDValue();
13038
13039 SDValue Op = N->getOperand(0);
13040 unsigned Opc = Op->getOpcode();
13041 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
13042 !Op.getOperand(0).getValueType().isSimple() ||
13043 (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
13044 return SDValue();
13045
13046 SDValue ConstVec = N->getOperand(1);
13047 if (!isa<BuildVectorSDNode>(ConstVec))
13048 return SDValue();
13049
13050 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
13051 int32_t IntBits = IntTy.getSizeInBits();
13052 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
13053 return SDValue();
13054
13055 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
13056 int32_t FloatBits = FloatTy.getSizeInBits();
13057 if (FloatBits != 32 && FloatBits != 64)
13058 return SDValue();
13059
13060 // Avoid conversions where iN is larger than the float (e.g., i64 -> float).
13061 if (IntBits > FloatBits)
13062 return SDValue();
13063
13064 BitVector UndefElements;
13065 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
13066 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
13067 if (C == -1 || C == 0 || C > FloatBits)
13068 return SDValue();
13069
13070 MVT ResTy;
13071 unsigned NumLanes = Op.getValueType().getVectorNumElements();
13072 switch (NumLanes) {
13073 default:
13074 return SDValue();
13075 case 2:
13076 ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
13077 break;
13078 case 4:
13079 ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
13080 break;
13081 }
13082
13083 if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
13084 return SDValue();
13085
13086 SDLoc DL(N);
13087 SDValue ConvInput = Op.getOperand(0);
13088 bool IsSigned = Opc == ISD::SINT_TO_FP;
13089 if (IntBits < FloatBits)
13090 ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
13091 ResTy, ConvInput);
13092
13093 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
13094 : Intrinsic::aarch64_neon_vcvtfxu2fp;
13095 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
13096 DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
13097 DAG.getConstant(C, DL, MVT::i32));
13098}
13099
13100/// An EXTR instruction is made up of two shifts, ORed together. This helper
13101/// searches for and classifies those shifts.
13102static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
13103 bool &FromHi) {
13104 if (N.getOpcode() == ISD::SHL)
13105 FromHi = false;
13106 else if (N.getOpcode() == ISD::SRL)
13107 FromHi = true;
13108 else
13109 return false;
13110
13111 if (!isa<ConstantSDNode>(N.getOperand(1)))
13112 return false;
13113
13114 ShiftAmount = N->getConstantOperandVal(1);
13115 Src = N->getOperand(0);
13116 return true;
13117}
13118
13119/// EXTR instruction extracts a contiguous chunk of bits from two existing
13120/// registers viewed as a high/low pair. This function looks for the pattern:
13121/// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it
13122/// with an EXTR. Can't quite be done in TableGen because the two immediates
13123/// aren't independent.
13124static SDValue tryCombineToEXTR(SDNode *N,
13125 TargetLowering::DAGCombinerInfo &DCI) {
13126 SelectionDAG &DAG = DCI.DAG;
13127 SDLoc DL(N);
13128 EVT VT = N->getValueType(0);
13129
13130 assert(N->getOpcode() == ISD::OR && "Unexpected root")(static_cast <bool> (N->getOpcode() == ISD::OR &&
"Unexpected root") ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Unexpected root\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13130, __extension__ __PRETTY_FUNCTION__))
;
13131
13132 if (VT != MVT::i32 && VT != MVT::i64)
13133 return SDValue();
13134
13135 SDValue LHS;
13136 uint32_t ShiftLHS = 0;
13137 bool LHSFromHi = false;
13138 if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
13139 return SDValue();
13140
13141 SDValue RHS;
13142 uint32_t ShiftRHS = 0;
13143 bool RHSFromHi = false;
13144 if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
13145 return SDValue();
13146
13147 // If they're both trying to come from the high part of the register, they're
13148 // not really an EXTR.
13149 if (LHSFromHi == RHSFromHi)
13150 return SDValue();
13151
13152 if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
13153 return SDValue();
13154
13155 if (LHSFromHi) {
13156 std::swap(LHS, RHS);
13157 std::swap(ShiftLHS, ShiftRHS);
13158 }
13159
13160 return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
13161 DAG.getConstant(ShiftRHS, DL, MVT::i64));
13162}
13163
13164static SDValue tryCombineToBSL(SDNode *N,
13165 TargetLowering::DAGCombinerInfo &DCI) {
13166 EVT VT = N->getValueType(0);
13167 SelectionDAG &DAG = DCI.DAG;
13168 SDLoc DL(N);
13169
13170 if (!VT.isVector())
13171 return SDValue();
13172
13173 // The combining code currently only works for NEON vectors. In particular,
13174 // it does not work for SVE when dealing with vectors wider than 128 bits.
13175 if (!VT.is64BitVector() && !VT.is128BitVector())
13176 return SDValue();
13177
13178 SDValue N0 = N->getOperand(0);
13179 if (N0.getOpcode() != ISD::AND)
13180 return SDValue();
13181
13182 SDValue N1 = N->getOperand(1);
13183 if (N1.getOpcode() != ISD::AND)
13184 return SDValue();
13185
13186 // InstCombine does (not (neg a)) => (add a -1).
13187 // Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c)
13188 // Loop over all combinations of AND operands.
13189 for (int i = 1; i >= 0; --i) {
13190 for (int j = 1; j >= 0; --j) {
13191 SDValue O0 = N0->getOperand(i);
13192 SDValue O1 = N1->getOperand(j);
13193 SDValue Sub, Add, SubSibling, AddSibling;
13194
13195 // Find a SUB and an ADD operand, one from each AND.
13196 if (O0.getOpcode() == ISD::SUB && O1.getOpcode() == ISD::ADD) {
13197 Sub = O0;
13198 Add = O1;
13199 SubSibling = N0->getOperand(1 - i);
13200 AddSibling = N1->getOperand(1 - j);
13201 } else if (O0.getOpcode() == ISD::ADD && O1.getOpcode() == ISD::SUB) {
13202 Add = O0;
13203 Sub = O1;
13204 AddSibling = N0->getOperand(1 - i);
13205 SubSibling = N1->getOperand(1 - j);
13206 } else
13207 continue;
13208
13209 if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()))
13210 continue;
13211
13212 // Constant ones is always righthand operand of the Add.
13213 if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
13214 continue;
13215
13216 if (Sub.getOperand(1) != Add.getOperand(0))
13217 continue;
13218
13219 return DAG.getNode(AArch64ISD::BSP, DL, VT, Sub, SubSibling, AddSibling);
13220 }
13221 }
13222
13223 // (or (and a b) (and (not a) c)) => (bsl a b c)
13224 // We only have to look for constant vectors here since the general, variable
13225 // case can be handled in TableGen.
13226 unsigned Bits = VT.getScalarSizeInBits();
13227 uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
13228 for (int i = 1; i >= 0; --i)
13229 for (int j = 1; j >= 0; --j) {
13230 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
13231 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
13232 if (!BVN0 || !BVN1)
13233 continue;
13234
13235 bool FoundMatch = true;
13236 for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
13237 ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
13238 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
13239 if (!CN0 || !CN1 ||
13240 CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
13241 FoundMatch = false;
13242 break;
13243 }
13244 }
13245
13246 if (FoundMatch)
13247 return DAG.getNode(AArch64ISD::BSP, DL, VT, SDValue(BVN0, 0),
13248 N0->getOperand(1 - i), N1->getOperand(1 - j));
13249 }
13250
13251 return SDValue();
13252}
13253
13254static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
13255 const AArch64Subtarget *Subtarget) {
13256 // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
13257 SelectionDAG &DAG = DCI.DAG;
13258 EVT VT = N->getValueType(0);
13259
13260 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
13261 return SDValue();
13262
13263 if (SDValue Res = tryCombineToEXTR(N, DCI))
13264 return Res;
13265
13266 if (SDValue Res = tryCombineToBSL(N, DCI))
13267 return Res;
13268
13269 return SDValue();
13270}
13271
13272static bool isConstantSplatVectorMaskForType(SDNode *N, EVT MemVT) {
13273 if (!MemVT.getVectorElementType().isSimple())
13274 return false;
13275
13276 uint64_t MaskForTy = 0ull;
13277 switch (MemVT.getVectorElementType().getSimpleVT().SimpleTy) {
13278 case MVT::i8:
13279 MaskForTy = 0xffull;
13280 break;
13281 case MVT::i16:
13282 MaskForTy = 0xffffull;
13283 break;
13284 case MVT::i32:
13285 MaskForTy = 0xffffffffull;
13286 break;
13287 default:
13288 return false;
13289 break;
13290 }
13291
13292 if (N->getOpcode() == AArch64ISD::DUP || N->getOpcode() == ISD::SPLAT_VECTOR)
13293 if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0)))
13294 return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
13295
13296 return false;
13297}
13298
13299static SDValue performSVEAndCombine(SDNode *N,
13300 TargetLowering::DAGCombinerInfo &DCI) {
13301 if (DCI.isBeforeLegalizeOps())
13302 return SDValue();
13303
13304 SelectionDAG &DAG = DCI.DAG;
13305 SDValue Src = N->getOperand(0);
13306 unsigned Opc = Src->getOpcode();
13307
13308 // Zero/any extend of an unsigned unpack
13309 if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
13310 SDValue UnpkOp = Src->getOperand(0);
13311 SDValue Dup = N->getOperand(1);
13312
13313 if (Dup.getOpcode() != AArch64ISD::DUP)
13314 return SDValue();
13315
13316 SDLoc DL(N);
13317 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
13318 uint64_t ExtVal = C->getZExtValue();
13319
13320 // If the mask is fully covered by the unpack, we don't need to push
13321 // a new AND onto the operand
13322 EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
13323 if ((ExtVal == 0xFF && EltTy == MVT::i8) ||
13324 (ExtVal == 0xFFFF && EltTy == MVT::i16) ||
13325 (ExtVal == 0xFFFFFFFF && EltTy == MVT::i32))
13326 return Src;
13327
13328 // Truncate to prevent a DUP with an over wide constant
13329 APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());
13330
13331 // Otherwise, make sure we propagate the AND to the operand
13332 // of the unpack
13333 Dup = DAG.getNode(AArch64ISD::DUP, DL,
13334 UnpkOp->getValueType(0),
13335 DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32));
13336
13337 SDValue And = DAG.getNode(ISD::AND, DL,
13338 UnpkOp->getValueType(0), UnpkOp, Dup);
13339
13340 return DAG.getNode(Opc, DL, N->getValueType(0), And);
13341 }
13342
13343 if (!EnableCombineMGatherIntrinsics)
13344 return SDValue();
13345
13346 SDValue Mask = N->getOperand(1);
13347
13348 if (!Src.hasOneUse())
13349 return SDValue();
13350
13351 EVT MemVT;
13352
13353 // SVE load instructions perform an implicit zero-extend, which makes them
13354 // perfect candidates for combining.
13355 switch (Opc) {
13356 case AArch64ISD::LD1_MERGE_ZERO:
13357 case AArch64ISD::LDNF1_MERGE_ZERO:
13358 case AArch64ISD::LDFF1_MERGE_ZERO:
13359 MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
13360 break;
13361 case AArch64ISD::GLD1_MERGE_ZERO:
13362 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
13363 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
13364 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
13365 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
13366 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
13367 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
13368 case AArch64ISD::GLDFF1_MERGE_ZERO:
13369 case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
13370 case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
13371 case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
13372 case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
13373 case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
13374 case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
13375 case AArch64ISD::GLDNT1_MERGE_ZERO:
13376 MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
13377 break;
13378 default:
13379 return SDValue();
13380 }
13381
13382 if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT))
13383 return Src;
13384
13385 return SDValue();
13386}
13387
13388static SDValue performANDCombine(SDNode *N,
13389 TargetLowering::DAGCombinerInfo &DCI) {
13390 SelectionDAG &DAG = DCI.DAG;
13391 SDValue LHS = N->getOperand(0);
13392 EVT VT = N->getValueType(0);
13393 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
13394 return SDValue();
13395
13396 if (VT.isScalableVector())
13397 return performSVEAndCombine(N, DCI);
13398
13399 // The combining code below works only for NEON vectors. In particular, it
13400 // does not work for SVE when dealing with vectors wider than 128 bits.
13401 if (!(VT.is64BitVector() || VT.is128BitVector()))
13402 return SDValue();
13403
13404 BuildVectorSDNode *BVN =
13405 dyn_cast<BuildVectorSDNode>(N->getOperand(1).getNode());
13406 if (!BVN)
13407 return SDValue();
13408
13409 // AND does not accept an immediate, so check if we can use a BIC immediate
13410 // instruction instead. We do this here instead of using a (and x, (mvni imm))
13411 // pattern in isel, because some immediates may be lowered to the preferred
13412 // (and x, (movi imm)) form, even though an mvni representation also exists.
13413 APInt DefBits(VT.getSizeInBits(), 0);
13414 APInt UndefBits(VT.getSizeInBits(), 0);
13415 if (resolveBuildVector(BVN, DefBits, UndefBits)) {
13416 SDValue NewOp;
13417
13418 DefBits = ~DefBits;
13419 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
13420 DefBits, &LHS)) ||
13421 (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
13422 DefBits, &LHS)))
13423 return NewOp;
13424
13425 UndefBits = ~UndefBits;
13426 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
13427 UndefBits, &LHS)) ||
13428 (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
13429 UndefBits, &LHS)))
13430 return NewOp;
13431 }
13432
13433 return SDValue();
13434}
13435
13436static SDValue performSRLCombine(SDNode *N,
13437 TargetLowering::DAGCombinerInfo &DCI) {
13438 SelectionDAG &DAG = DCI.DAG;
13439 EVT VT = N->getValueType(0);
13440 if (VT != MVT::i32 && VT != MVT::i64)
13441 return SDValue();
13442
13443 // Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the
13444 // high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32)
13445 // to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero.
13446 SDValue N0 = N->getOperand(0);
13447 if (N0.getOpcode() == ISD::BSWAP) {
13448 SDLoc DL(N);
13449 SDValue N1 = N->getOperand(1);
13450 SDValue N00 = N0.getOperand(0);
13451 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
13452 uint64_t ShiftAmt = C->getZExtValue();
13453 if (VT == MVT::i32 && ShiftAmt == 16 &&
13454 DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16)))
13455 return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
13456 if (VT == MVT::i64 && ShiftAmt == 32 &&
13457 DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32)))
13458 return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
13459 }
13460 }
13461 return SDValue();
13462}
13463
13464// Attempt to form urhadd(OpA, OpB) from
13465// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
13466// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
13467// The original form of the first expression is
13468// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and the
13469// (OpA + OpB + 1) subexpression will have been changed to (OpB - (~OpA)).
13470// Before this function is called the srl will have been lowered to
13471// AArch64ISD::VLSHR.
13472// This pass can also recognize signed variants of the patterns that use sign
13473// extension instead of zero extension and form a srhadd(OpA, OpB) or a
13474// shadd(OpA, OpB) from them.
13475static SDValue
13476performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
13477 SelectionDAG &DAG) {
13478 EVT VT = N->getValueType(0);
13479
13480 // Since we are looking for a right shift by a constant value of 1 and we are
13481 // operating on types at least 16 bits in length (sign/zero extended OpA and
13482 // OpB, which are at least 8 bits), it follows that the truncate will always
13483 // discard the shifted-in bit and therefore the right shift will be logical
13484 // regardless of the signedness of OpA and OpB.
13485 SDValue Shift = N->getOperand(0);
13486 if (Shift.getOpcode() != AArch64ISD::VLSHR)
13487 return SDValue();
13488
13489 // Is the right shift using an immediate value of 1?
13490 uint64_t ShiftAmount = Shift.getConstantOperandVal(1);
13491 if (ShiftAmount != 1)
13492 return SDValue();
13493
13494 SDValue ExtendOpA, ExtendOpB;
13495 SDValue ShiftOp0 = Shift.getOperand(0);
13496 unsigned ShiftOp0Opc = ShiftOp0.getOpcode();
13497 if (ShiftOp0Opc == ISD::SUB) {
13498
13499 SDValue Xor = ShiftOp0.getOperand(1);
13500 if (Xor.getOpcode() != ISD::XOR)
13501 return SDValue();
13502
13503 // Is the XOR using a constant amount of all ones in the right hand side?
13504 uint64_t C;
13505 if (!isAllConstantBuildVector(Xor.getOperand(1), C))
13506 return SDValue();
13507
13508 unsigned ElemSizeInBits = VT.getScalarSizeInBits();
13509 APInt CAsAPInt(ElemSizeInBits, C);
13510 if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits))
13511 return SDValue();
13512
13513 ExtendOpA = Xor.getOperand(0);
13514 ExtendOpB = ShiftOp0.getOperand(0);
13515 } else if (ShiftOp0Opc == ISD::ADD) {
13516 ExtendOpA = ShiftOp0.getOperand(0);
13517 ExtendOpB = ShiftOp0.getOperand(1);
13518 } else
13519 return SDValue();
13520
13521 unsigned ExtendOpAOpc = ExtendOpA.getOpcode();
13522 unsigned ExtendOpBOpc = ExtendOpB.getOpcode();
13523 if (!(ExtendOpAOpc == ExtendOpBOpc &&
13524 (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND)))
13525 return SDValue();
13526
13527 // Is the result of the right shift being truncated to the same value type as
13528 // the original operands, OpA and OpB?
13529 SDValue OpA = ExtendOpA.getOperand(0);
13530 SDValue OpB = ExtendOpB.getOperand(0);
13531 EVT OpAVT = OpA.getValueType();
13532 assert(ExtendOpA.getValueType() == ExtendOpB.getValueType())(static_cast <bool> (ExtendOpA.getValueType() == ExtendOpB
.getValueType()) ? void (0) : __assert_fail ("ExtendOpA.getValueType() == ExtendOpB.getValueType()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13532, __extension__ __PRETTY_FUNCTION__))
;
13533 if (!(VT == OpAVT && OpAVT == OpB.getValueType()))
13534 return SDValue();
13535
13536 SDLoc DL(N);
13537 bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND;
13538 bool IsRHADD = ShiftOp0Opc == ISD::SUB;
13539 unsigned HADDOpc = IsSignExtend
13540 ? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
13541 : (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD);
13542 SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB);
13543
13544 return ResultHADD;
13545}
13546
13547static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
13548 switch (Opcode) {
13549 case ISD::FADD:
13550 return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
13551 case ISD::ADD:
13552 return VT == MVT::i64;
13553 default:
13554 return false;
13555 }
13556}
13557
13558static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) {
13559 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13560 ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);
13561
13562 EVT VT = N->getValueType(0);
13563 const bool FullFP16 =
13564 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
13565
13566 // Rewrite for pairwise fadd pattern
13567 // (f32 (extract_vector_elt
13568 // (fadd (vXf32 Other)
13569 // (vector_shuffle (vXf32 Other) undef <1,X,...> )) 0))
13570 // ->
13571 // (f32 (fadd (extract_vector_elt (vXf32 Other) 0)
13572 // (extract_vector_elt (vXf32 Other) 1))
13573 if (ConstantN1 && ConstantN1->getZExtValue() == 0 &&
13574 hasPairwiseAdd(N0->getOpcode(), VT, FullFP16)) {
13575 SDLoc DL(N0);
13576 SDValue N00 = N0->getOperand(0);
13577 SDValue N01 = N0->getOperand(1);
13578
13579 ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01);
13580 SDValue Other = N00;
13581
13582 // And handle the commutative case.
13583 if (!Shuffle) {
13584 Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
13585 Other = N01;
13586 }
13587
13588 if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
13589 Other == Shuffle->getOperand(0)) {
13590 return DAG.getNode(N0->getOpcode(), DL, VT,
13591 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
13592 DAG.getConstant(0, DL, MVT::i64)),
13593 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
13594 DAG.getConstant(1, DL, MVT::i64)));
13595 }
13596 }
13597
13598 return SDValue();
13599}
13600
13601static SDValue performConcatVectorsCombine(SDNode *N,
13602 TargetLowering::DAGCombinerInfo &DCI,
13603 SelectionDAG &DAG) {
13604 SDLoc dl(N);
13605 EVT VT = N->getValueType(0);
13606 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13607 unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
13608
13609 // Optimize concat_vectors of truncated vectors, where the intermediate
13610 // type is illegal, to avoid said illegality, e.g.,
13611 // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
13612 // (v2i16 (truncate (v2i64)))))
13613 // ->
13614 // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
13615 // (v4i32 (bitcast (v2i64))),
13616 // <0, 2, 4, 6>)))
13617 // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
13618 // on both input and result type, so we might generate worse code.
13619 // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
13620 if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
13621 N1Opc == ISD::TRUNCATE) {
13622 SDValue N00 = N0->getOperand(0);
13623 SDValue N10 = N1->getOperand(0);
13624 EVT N00VT = N00.getValueType();
13625
13626 if (N00VT == N10.getValueType() &&
13627 (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
13628 N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
13629 MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
13630 SmallVector<int, 8> Mask(MidVT.getVectorNumElements());
13631 for (size_t i = 0; i < Mask.size(); ++i)
13632 Mask[i] = i * 2;
13633 return DAG.getNode(ISD::TRUNCATE, dl, VT,
13634 DAG.getVectorShuffle(
13635 MidVT, dl,
13636 DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
13637 DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
13638 }
13639 }
13640
13641 // Wait 'til after everything is legalized to try this. That way we have
13642 // legal vector types and such.
13643 if (DCI.isBeforeLegalizeOps())
13644 return SDValue();
13645
13646 // Optimise concat_vectors of two [us]rhadds or [us]hadds that use extracted
13647 // subvectors from the same original vectors. Combine these into a single
13648 // [us]rhadd or [us]hadd that operates on the two original vectors. Example:
13649 // (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>),
13650 // extract_subvector (v16i8 OpB,
13651 // <0>))),
13652 // (v8i8 (urhadd (extract_subvector (v16i8 OpA, <8>),
13653 // extract_subvector (v16i8 OpB,
13654 // <8>)))))
13655 // ->
13656 // (v16i8(urhadd(v16i8 OpA, v16i8 OpB)))
13657 if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
13658 (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD ||
13659 N0Opc == AArch64ISD::UHADD || N0Opc == AArch64ISD::SHADD)) {
13660 SDValue N00 = N0->getOperand(0);
13661 SDValue N01 = N0->getOperand(1);
13662 SDValue N10 = N1->getOperand(0);
13663 SDValue N11 = N1->getOperand(1);
13664
13665 EVT N00VT = N00.getValueType();
13666 EVT N10VT = N10.getValueType();
13667
13668 if (N00->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13669 N01->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13670 N10->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13671 N11->getOpcode() == ISD::EXTRACT_SUBVECTOR && N00VT == N10VT) {
13672 SDValue N00Source = N00->getOperand(0);
13673 SDValue N01Source = N01->getOperand(0);
13674 SDValue N10Source = N10->getOperand(0);
13675 SDValue N11Source = N11->getOperand(0);
13676
13677 if (N00Source == N10Source && N01Source == N11Source &&
13678 N00Source.getValueType() == VT && N01Source.getValueType() == VT) {
13679 assert(N0.getValueType() == N1.getValueType())(static_cast <bool> (N0.getValueType() == N1.getValueType
()) ? void (0) : __assert_fail ("N0.getValueType() == N1.getValueType()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13679, __extension__ __PRETTY_FUNCTION__))
;
13680
13681 uint64_t N00Index = N00.getConstantOperandVal(1);
13682 uint64_t N01Index = N01.getConstantOperandVal(1);
13683 uint64_t N10Index = N10.getConstantOperandVal(1);
13684 uint64_t N11Index = N11.getConstantOperandVal(1);
13685
13686 if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 &&
13687 N10Index == N00VT.getVectorNumElements())
13688 return DAG.getNode(N0Opc, dl, VT, N00Source, N01Source);
13689 }
13690 }
13691 }
13692
13693 // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
13694 // splat. The indexed instructions are going to be expecting a DUPLANE64, so
13695 // canonicalise to that.
13696 if (N->getNumOperands() == 2 && N0 == N1 && VT.getVectorNumElements() == 2) {
13697 assert(VT.getScalarSizeInBits() == 64)(static_cast <bool> (VT.getScalarSizeInBits() == 64) ? void
(0) : __assert_fail ("VT.getScalarSizeInBits() == 64", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13697, __extension__ __PRETTY_FUNCTION__))
;
13698 return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
13699 DAG.getConstant(0, dl, MVT::i64));
13700 }
13701
13702 // Canonicalise concat_vectors so that the right-hand vector has as few
13703 // bit-casts as possible before its real operation. The primary matching
13704 // destination for these operations will be the narrowing "2" instructions,
13705 // which depend on the operation being performed on this right-hand vector.
13706 // For example,
13707 // (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS))))
13708 // becomes
13709 // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
13710
13711 if (N->getNumOperands() != 2 || N1Opc != ISD::BITCAST)
13712 return SDValue();
13713 SDValue RHS = N1->getOperand(0);
13714 MVT RHSTy = RHS.getValueType().getSimpleVT();
13715 // If the RHS is not a vector, this is not the pattern we're looking for.
13716 if (!RHSTy.isVector())
13717 return SDValue();
13718
13719 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n"
; } } while (false)
13720 dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n"
; } } while (false)
;
13721
13722 MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
13723 RHSTy.getVectorNumElements() * 2);
13724 return DAG.getNode(ISD::BITCAST, dl, VT,
13725 DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
13726 DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
13727 RHS));
13728}
13729
13730static SDValue
13731performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
13732 SelectionDAG &DAG) {
13733 SDValue Vec = N->getOperand(0);
13734 SDValue SubVec = N->getOperand(1);
13735 uint64_t IdxVal = N->getConstantOperandVal(2);
13736 EVT VecVT = Vec.getValueType();
13737 EVT SubVT = SubVec.getValueType();
13738
13739 // Only do this for legal fixed vector types.
13740 if (!VecVT.isFixedLengthVector() ||
13741 !DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
13742 !DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
13743 return SDValue();
13744
13745 // Ignore widening patterns.
13746 if (IdxVal == 0 && Vec.isUndef())
13747 return SDValue();
13748
13749 // Subvector must be half the width and an "aligned" insertion.
13750 unsigned NumSubElts = SubVT.getVectorNumElements();
13751 if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
13752 (IdxVal != 0 && IdxVal != NumSubElts))
13753 return SDValue();
13754
13755 // Fold insert_subvector -> concat_vectors
13756 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
13757 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
13758 SDLoc DL(N);
13759 SDValue Lo, Hi;
13760 if (IdxVal == 0) {
13761 Lo = SubVec;
13762 Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
13763 DAG.getVectorIdxConstant(NumSubElts, DL));
13764 } else {
13765 Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
13766 DAG.getVectorIdxConstant(0, DL));
13767 Hi = SubVec;
13768 }
13769 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
13770}
13771
13772static SDValue tryCombineFixedPointConvert(SDNode *N,
13773 TargetLowering::DAGCombinerInfo &DCI,
13774 SelectionDAG &DAG) {
13775 // Wait until after everything is legalized to try this. That way we have
13776 // legal vector types and such.
13777 if (DCI.isBeforeLegalizeOps())
13778 return SDValue();
13779 // Transform a scalar conversion of a value from a lane extract into a
13780 // lane extract of a vector conversion. E.g., from foo1 to foo2:
13781 // double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); }
13782 // double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; }
13783 //
13784 // The second form interacts better with instruction selection and the
13785 // register allocator to avoid cross-class register copies that aren't
13786 // coalescable due to a lane reference.
13787
13788 // Check the operand and see if it originates from a lane extract.
13789 SDValue Op1 = N->getOperand(1);
13790 if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
13791 // Yep, no additional predication needed. Perform the transform.
13792 SDValue IID = N->getOperand(0);
13793 SDValue Shift = N->getOperand(2);
13794 SDValue Vec = Op1.getOperand(0);
13795 SDValue Lane = Op1.getOperand(1);
13796 EVT ResTy = N->getValueType(0);
13797 EVT VecResTy;
13798 SDLoc DL(N);
13799
13800 // The vector width should be 128 bits by the time we get here, even
13801 // if it started as 64 bits (the extract_vector handling will have
13802 // done so).
13803 assert(Vec.getValueSizeInBits() == 128 &&(static_cast <bool> (Vec.getValueSizeInBits() == 128 &&
"unexpected vector size on extract_vector_elt!") ? void (0) :
__assert_fail ("Vec.getValueSizeInBits() == 128 && \"unexpected vector size on extract_vector_elt!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13804, __extension__ __PRETTY_FUNCTION__))
13804 "unexpected vector size on extract_vector_elt!")(static_cast <bool> (Vec.getValueSizeInBits() == 128 &&
"unexpected vector size on extract_vector_elt!") ? void (0) :
__assert_fail ("Vec.getValueSizeInBits() == 128 && \"unexpected vector size on extract_vector_elt!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13804, __extension__ __PRETTY_FUNCTION__))
;
13805 if (Vec.getValueType() == MVT::v4i32)
13806 VecResTy = MVT::v4f32;
13807 else if (Vec.getValueType() == MVT::v2i64)
13808 VecResTy = MVT::v2f64;
13809 else
13810 llvm_unreachable("unexpected vector type!")::llvm::llvm_unreachable_internal("unexpected vector type!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13810)
;
13811
13812 SDValue Convert =
13813 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
13814 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
13815 }
13816 return SDValue();
13817}
13818
13819// AArch64 high-vector "long" operations are formed by performing the non-high
13820// version on an extract_subvector of each operand which gets the high half:
13821//
13822// (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS))
13823//
13824// However, there are cases which don't have an extract_high explicitly, but
13825// have another operation that can be made compatible with one for free. For
13826// example:
13827//
13828// (dupv64 scalar) --> (extract_high (dup128 scalar))
13829//
13830// This routine does the actual conversion of such DUPs, once outer routines
13831// have determined that everything else is in order.
13832// It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
13833// similarly here.
13834static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
13835 switch (N.getOpcode()) {
13836 case AArch64ISD::DUP:
13837 case AArch64ISD::DUPLANE8:
13838 case AArch64ISD::DUPLANE16:
13839 case AArch64ISD::DUPLANE32:
13840 case AArch64ISD::DUPLANE64:
13841 case AArch64ISD::MOVI:
13842 case AArch64ISD::MOVIshift:
13843 case AArch64ISD::MOVIedit:
13844 case AArch64ISD::MOVImsl:
13845 case AArch64ISD::MVNIshift:
13846 case AArch64ISD::MVNImsl:
13847 break;
13848 default:
13849 // FMOV could be supported, but isn't very useful, as it would only occur
13850 // if you passed a bitcast' floating point immediate to an eligible long
13851 // integer op (addl, smull, ...).
13852 return SDValue();
13853 }
13854
13855 MVT NarrowTy = N.getSimpleValueType();
13856 if (!NarrowTy.is64BitVector())
13857 return SDValue();
13858
13859 MVT ElementTy = NarrowTy.getVectorElementType();
13860 unsigned NumElems = NarrowTy.getVectorNumElements();
13861 MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
13862
13863 SDLoc dl(N);
13864 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy,
13865 DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()),
13866 DAG.getConstant(NumElems, dl, MVT::i64));
13867}
13868
13869static bool isEssentiallyExtractHighSubvector(SDValue N) {
13870 if (N.getOpcode() == ISD::BITCAST)
13871 N = N.getOperand(0);
13872 if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13873 return false;
13874 return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
13875 N.getOperand(0).getValueType().getVectorNumElements() / 2;
13876}
13877
13878/// Helper structure to keep track of ISD::SET_CC operands.
13879struct GenericSetCCInfo {
13880 const SDValue *Opnd0;
13881 const SDValue *Opnd1;
13882 ISD::CondCode CC;
13883};
13884
13885/// Helper structure to keep track of a SET_CC lowered into AArch64 code.
13886struct AArch64SetCCInfo {
13887 const SDValue *Cmp;
13888 AArch64CC::CondCode CC;
13889};
13890
13891/// Helper structure to keep track of SetCC information.
13892union SetCCInfo {
13893 GenericSetCCInfo Generic;
13894 AArch64SetCCInfo AArch64;
13895};
13896
13897/// Helper structure to be able to read SetCC information. If set to
13898/// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a
13899/// GenericSetCCInfo.
13900struct SetCCInfoAndKind {
13901 SetCCInfo Info;
13902 bool IsAArch64;
13903};
13904
13905/// Check whether or not \p Op is a SET_CC operation, either a generic or
13906/// an
13907/// AArch64 lowered one.
13908/// \p SetCCInfo is filled accordingly.
13909/// \post SetCCInfo is meanginfull only when this function returns true.
13910/// \return True when Op is a kind of SET_CC operation.
13911static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
13912 // If this is a setcc, this is straight forward.
13913 if (Op.getOpcode() == ISD::SETCC) {
13914 SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
13915 SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
13916 SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
13917 SetCCInfo.IsAArch64 = false;
13918 return true;
13919 }
13920 // Otherwise, check if this is a matching csel instruction.
13921 // In other words:
13922 // - csel 1, 0, cc
13923 // - csel 0, 1, !cc
13924 if (Op.getOpcode() != AArch64ISD::CSEL)
13925 return false;
13926 // Set the information about the operands.
13927 // TODO: we want the operands of the Cmp not the csel
13928 SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
13929 SetCCInfo.IsAArch64 = true;
13930 SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>(
13931 cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
13932
13933 // Check that the operands matches the constraints:
13934 // (1) Both operands must be constants.
13935 // (2) One must be 1 and the other must be 0.
13936 ConstantSDNode *TValue = dyn_cast<ConstantSDNode>(Op.getOperand(0));
13937 ConstantSDNode *FValue = dyn_cast<ConstantSDNode>(Op.getOperand(1));
13938
13939 // Check (1).
13940 if (!TValue || !FValue)
13941 return false;
13942
13943 // Check (2).
13944 if (!TValue->isOne()) {
13945 // Update the comparison when we are interested in !cc.
13946 std::swap(TValue, FValue);
13947 SetCCInfo.Info.AArch64.CC =
13948 AArch64CC::getInvertedCondCode(SetCCInfo.Info.AArch64.CC);
13949 }
13950 return TValue->isOne() && FValue->isNullValue();
13951}
13952
13953// Returns true if Op is setcc or zext of setcc.
13954static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) {
13955 if (isSetCC(Op, Info))
13956 return true;
13957 return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
13958 isSetCC(Op->getOperand(0), Info));
13959}
13960
13961// The folding we want to perform is:
13962// (add x, [zext] (setcc cc ...) )
13963// -->
13964// (csel x, (add x, 1), !cc ...)
13965//
13966// The latter will get matched to a CSINC instruction.
13967static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
13968 assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!")(static_cast <bool> (Op && Op->getOpcode() ==
ISD::ADD && "Unexpected operation!") ? void (0) : __assert_fail
("Op && Op->getOpcode() == ISD::ADD && \"Unexpected operation!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13968, __extension__ __PRETTY_FUNCTION__))
;
13969 SDValue LHS = Op->getOperand(0);
13970 SDValue RHS = Op->getOperand(1);
13971 SetCCInfoAndKind InfoAndKind;
13972
13973 // If both operands are a SET_CC, then we don't want to perform this
13974 // folding and create another csel as this results in more instructions
13975 // (and higher register usage).
13976 if (isSetCCOrZExtSetCC(LHS, InfoAndKind) &&
13977 isSetCCOrZExtSetCC(RHS, InfoAndKind))
13978 return SDValue();
13979
13980 // If neither operand is a SET_CC, give up.
13981 if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) {
13982 std::swap(LHS, RHS);
13983 if (!isSetCCOrZExtSetCC(LHS, InfoAndKind))
13984 return SDValue();
13985 }
13986
13987 // FIXME: This could be generatized to work for FP comparisons.
13988 EVT CmpVT = InfoAndKind.IsAArch64
13989 ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
13990 : InfoAndKind.Info.Generic.Opnd0->getValueType();
13991 if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
13992 return SDValue();
13993
13994 SDValue CCVal;
13995 SDValue Cmp;
13996 SDLoc dl(Op);
13997 if (InfoAndKind.IsAArch64) {
13998 CCVal = DAG.getConstant(
13999 AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
14000 MVT::i32);
14001 Cmp = *InfoAndKind.Info.AArch64.Cmp;
14002 } else
14003 Cmp = getAArch64Cmp(
14004 *InfoAndKind.Info.Generic.Opnd0, *InfoAndKind.Info.Generic.Opnd1,
14005 ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG,
14006 dl);
14007
14008 EVT VT = Op->getValueType(0);
14009 LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
14010 return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
14011}
14012
14013// ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
14014static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
14015 EVT VT = N->getValueType(0);
14016 // Only scalar integer and vector types.
14017 if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
14018 return SDValue();
14019
14020 SDValue LHS = N->getOperand(0);
14021 SDValue RHS = N->getOperand(1);
14022 if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14023 RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getValueType() != VT)
14024 return SDValue();
14025
14026 auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
14027 auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
14028 if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isNullValue())
14029 return SDValue();
14030
14031 SDValue Op1 = LHS->getOperand(0);
14032 SDValue Op2 = RHS->getOperand(0);
14033 EVT OpVT1 = Op1.getValueType();
14034 EVT OpVT2 = Op2.getValueType();
14035 if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 ||
14036 Op2.getOpcode() != AArch64ISD::UADDV ||
14037 OpVT1.getVectorElementType() != VT)
14038 return SDValue();
14039
14040 SDValue Val1 = Op1.getOperand(0);
14041 SDValue Val2 = Op2.getOperand(0);
14042 EVT ValVT = Val1->getValueType(0);
14043 SDLoc DL(N);
14044 SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
14045 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
14046 DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
14047 DAG.getConstant(0, DL, MVT::i64));
14048}
14049
14050// ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y)
14051static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG) {
14052 EVT VT = N->getValueType(0);
14053 if (N->getOpcode() != ISD::ADD)
14054 return SDValue();
14055
14056 SDValue Dot = N->getOperand(0);
14057 SDValue A = N->getOperand(1);
14058 // Handle commutivity
14059 auto isZeroDot = [](SDValue Dot) {
14060 return (Dot.getOpcode() == AArch64ISD::UDOT ||
14061 Dot.getOpcode() == AArch64ISD::SDOT) &&
14062 isZerosVector(Dot.getOperand(0).getNode());
14063 };
14064 if (!isZeroDot(Dot))
14065 std::swap(Dot, A);
14066 if (!isZeroDot(Dot))
14067 return SDValue();
14068
14069 return DAG.getNode(Dot.getOpcode(), SDLoc(N), VT, A, Dot.getOperand(1),
14070 Dot.getOperand(2));
14071}
14072
14073// The basic add/sub long vector instructions have variants with "2" on the end
14074// which act on the high-half of their inputs. They are normally matched by
14075// patterns like:
14076//
14077// (add (zeroext (extract_high LHS)),
14078// (zeroext (extract_high RHS)))
14079// -> uaddl2 vD, vN, vM
14080//
14081// However, if one of the extracts is something like a duplicate, this
14082// instruction can still be used profitably. This function puts the DAG into a
14083// more appropriate form for those patterns to trigger.
14084static SDValue performAddSubLongCombine(SDNode *N,
14085 TargetLowering::DAGCombinerInfo &DCI,
14086 SelectionDAG &DAG) {
14087 if (DCI.isBeforeLegalizeOps())
14088 return SDValue();
14089
14090 MVT VT = N->getSimpleValueType(0);
14091 if (!VT.is128BitVector()) {
14092 if (N->getOpcode() == ISD::ADD)
14093 return performSetccAddFolding(N, DAG);
14094 return SDValue();
14095 }
14096
14097 // Make sure both branches are extended in the same way.
14098 SDValue LHS = N->getOperand(0);
14099 SDValue RHS = N->getOperand(1);
14100 if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
14101 LHS.getOpcode() != ISD::SIGN_EXTEND) ||
14102 LHS.getOpcode() != RHS.getOpcode())
14103 return SDValue();
14104
14105 unsigned ExtType = LHS.getOpcode();
14106
14107 // It's not worth doing if at least one of the inputs isn't already an
14108 // extract, but we don't know which it'll be so we have to try both.
14109 if (isEssentiallyExtractHighSubvector(LHS.getOperand(0))) {
14110 RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
14111 if (!RHS.getNode())
14112 return SDValue();
14113
14114 RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
14115 } else if (isEssentiallyExtractHighSubvector(RHS.getOperand(0))) {
14116 LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
14117 if (!LHS.getNode())
14118 return SDValue();
14119
14120 LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
14121 }
14122
14123 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
14124}
14125
14126static SDValue performAddSubCombine(SDNode *N,
14127 TargetLowering::DAGCombinerInfo &DCI,
14128 SelectionDAG &DAG) {
14129 // Try to change sum of two reductions.
14130 if (SDValue Val = performUADDVCombine(N, DAG))
14131 return Val;
14132 if (SDValue Val = performAddDotCombine(N, DAG))
14133 return Val;
14134
14135 return performAddSubLongCombine(N, DCI, DAG);
14136}
14137
14138// Massage DAGs which we can use the high-half "long" operations on into
14139// something isel will recognize better. E.g.
14140//
14141// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
14142// (aarch64_neon_umull (extract_high (v2i64 vec)))
14143// (extract_high (v2i64 (dup128 scalar)))))
14144//
14145static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
14146 TargetLowering::DAGCombinerInfo &DCI,
14147 SelectionDAG &DAG) {
14148 if (DCI.isBeforeLegalizeOps())
14149 return SDValue();
14150
14151 SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
14152 SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
14153 assert(LHS.getValueType().is64BitVector() &&(static_cast <bool> (LHS.getValueType().is64BitVector()
&& RHS.getValueType().is64BitVector() && "unexpected shape for long operation"
) ? void (0) : __assert_fail ("LHS.getValueType().is64BitVector() && RHS.getValueType().is64BitVector() && \"unexpected shape for long operation\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14155, __extension__ __PRETTY_FUNCTION__))
14154 RHS.getValueType().is64BitVector() &&(static_cast <bool> (LHS.getValueType().is64BitVector()
&& RHS.getValueType().is64BitVector() && "unexpected shape for long operation"
) ? void (0) : __assert_fail ("LHS.getValueType().is64BitVector() && RHS.getValueType().is64BitVector() && \"unexpected shape for long operation\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14155, __extension__ __PRETTY_FUNCTION__))
14155 "unexpected shape for long operation")(static_cast <bool> (LHS.getValueType().is64BitVector()
&& RHS.getValueType().is64BitVector() && "unexpected shape for long operation"
) ? void (0) : __assert_fail ("LHS.getValueType().is64BitVector() && RHS.getValueType().is64BitVector() && \"unexpected shape for long operation\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14155, __extension__ __PRETTY_FUNCTION__))
;
14156
14157 // Either node could be a DUP, but it's not worth doing both of them (you'd
14158 // just as well use the non-high version) so look for a corresponding extract
14159 // operation on the other "wing".
14160 if (isEssentiallyExtractHighSubvector(LHS)) {
14161 RHS = tryExtendDUPToExtractHigh(RHS, DAG);
14162 if (!RHS.getNode())
14163 return SDValue();
14164 } else if (isEssentiallyExtractHighSubvector(RHS)) {
14165 LHS = tryExtendDUPToExtractHigh(LHS, DAG);
14166 if (!LHS.getNode())
14167 return SDValue();
14168 }
14169
14170 if (IID == Intrinsic::not_intrinsic)
14171 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
14172
14173 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
14174 N->getOperand(0), LHS, RHS);
14175}
14176
14177static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
14178 MVT ElemTy = N->getSimpleValueType(0).getScalarType();
14179 unsigned ElemBits = ElemTy.getSizeInBits();
14180
14181 int64_t ShiftAmount;
14182 if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
14183 APInt SplatValue, SplatUndef;
14184 unsigned SplatBitSize;
14185 bool HasAnyUndefs;
14186 if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
14187 HasAnyUndefs, ElemBits) ||
14188 SplatBitSize != ElemBits)
14189 return SDValue();
14190
14191 ShiftAmount = SplatValue.getSExtValue();
14192 } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
14193 ShiftAmount = CVN->getSExtValue();
14194 } else
14195 return SDValue();
14196
14197 unsigned Opcode;
14198 bool IsRightShift;
14199 switch (IID) {
14200 default:
14201 llvm_unreachable("Unknown shift intrinsic")::llvm::llvm_unreachable_internal("Unknown shift intrinsic", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14201)
;
14202 case Intrinsic::aarch64_neon_sqshl:
14203 Opcode = AArch64ISD::SQSHL_I;
14204 IsRightShift = false;
14205 break;
14206 case Intrinsic::aarch64_neon_uqshl:
14207 Opcode = AArch64ISD::UQSHL_I;
14208 IsRightShift = false;
14209 break;
14210 case Intrinsic::aarch64_neon_srshl:
14211 Opcode = AArch64ISD::SRSHR_I;
14212 IsRightShift = true;
14213 break;
14214 case Intrinsic::aarch64_neon_urshl:
14215 Opcode = AArch64ISD::URSHR_I;
14216 IsRightShift = true;
14217 break;
14218 case Intrinsic::aarch64_neon_sqshlu:
14219 Opcode = AArch64ISD::SQSHLU_I;
14220 IsRightShift = false;
14221 break;
14222 case Intrinsic::aarch64_neon_sshl:
14223 case Intrinsic::aarch64_neon_ushl:
14224 // For positive shift amounts we can use SHL, as ushl/sshl perform a regular
14225 // left shift for positive shift amounts. Below, we only replace the current
14226 // node with VSHL, if this condition is met.
14227 Opcode = AArch64ISD::VSHL;
14228 IsRightShift = false;
14229 break;
14230 }
14231
14232 if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
14233 SDLoc dl(N);
14234 return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
14235 DAG.getConstant(-ShiftAmount, dl, MVT::i32));
14236 } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
14237 SDLoc dl(N);
14238 return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
14239 DAG.getConstant(ShiftAmount, dl, MVT::i32));
14240 }
14241
14242 return SDValue();
14243}
14244
14245// The CRC32[BH] instructions ignore the high bits of their data operand. Since
14246// the intrinsics must be legal and take an i32, this means there's almost
14247// certainly going to be a zext in the DAG which we can eliminate.
14248static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
14249 SDValue AndN = N->getOperand(2);
14250 if (AndN.getOpcode() != ISD::AND)
14251 return SDValue();
14252
14253 ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1));
14254 if (!CMask || CMask->getZExtValue() != Mask)
14255 return SDValue();
14256
14257 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), MVT::i32,
14258 N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
14259}
14260
14261static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
14262 SelectionDAG &DAG) {
14263 SDLoc dl(N);
14264 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
14265 DAG.getNode(Opc, dl,
14266 N->getOperand(1).getSimpleValueType(),
14267 N->getOperand(1)),
14268 DAG.getConstant(0, dl, MVT::i64));
14269}
14270
14271static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) {
14272 SDLoc DL(N);
14273 SDValue Op1 = N->getOperand(1);
14274 SDValue Op2 = N->getOperand(2);
14275 EVT ScalarTy = Op2.getValueType();
14276 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
14277 ScalarTy = MVT::i32;
14278
14279 // Lower index_vector(base, step) to mul(step step_vector(1)) + splat(base).
14280 SDValue StepVector = DAG.getStepVector(DL, N->getValueType(0));
14281 SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2);
14282 SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step);
14283 SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1);
14284 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base);
14285}
14286
14287static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG) {
14288 SDLoc dl(N);
14289 SDValue Scalar = N->getOperand(3);
14290 EVT ScalarTy = Scalar.getValueType();
14291
14292 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
14293 Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
14294
14295 SDValue Passthru = N->getOperand(1);
14296 SDValue Pred = N->getOperand(2);
14297 return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
14298 Pred, Scalar, Passthru);
14299}
14300
14301static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
14302 SDLoc dl(N);
14303 LLVMContext &Ctx = *DAG.getContext();
14304 EVT VT = N->getValueType(0);
14305
14306 assert(VT.isScalableVector() && "Expected a scalable vector.")(static_cast <bool> (VT.isScalableVector() && "Expected a scalable vector."
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Expected a scalable vector.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14306, __extension__ __PRETTY_FUNCTION__))
;
14307
14308 // Current lowering only supports the SVE-ACLE types.
14309 if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
14310 return SDValue();
14311
14312 unsigned ElemSize = VT.getVectorElementType().getSizeInBits() / 8;
14313 unsigned ByteSize = VT.getSizeInBits().getKnownMinSize() / 8;
14314 EVT ByteVT =
14315 EVT::getVectorVT(Ctx, MVT::i8, ElementCount::getScalable(ByteSize));
14316
14317 // Convert everything to the domain of EXT (i.e bytes).
14318 SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
14319 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2));
14320 SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3),
14321 DAG.getConstant(ElemSize, dl, MVT::i32));
14322
14323 SDValue EXT = DAG.getNode(AArch64ISD::EXT, dl, ByteVT, Op0, Op1, Op2);
14324 return DAG.getNode(ISD::BITCAST, dl, VT, EXT);
14325}
14326
14327static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC,
14328 TargetLowering::DAGCombinerInfo &DCI,
14329 SelectionDAG &DAG) {
14330 if (DCI.isBeforeLegalize())
14331 return SDValue();
14332
14333 SDValue Comparator = N->getOperand(3);
14334 if (Comparator.getOpcode() == AArch64ISD::DUP ||
14335 Comparator.getOpcode() == ISD::SPLAT_VECTOR) {
14336 unsigned IID = getIntrinsicID(N);
14337 EVT VT = N->getValueType(0);
14338 EVT CmpVT = N->getOperand(2).getValueType();
14339 SDValue Pred = N->getOperand(1);
14340 SDValue Imm;
14341 SDLoc DL(N);
14342
14343 switch (IID) {
14344 default:
14345 llvm_unreachable("Called with wrong intrinsic!")::llvm::llvm_unreachable_internal("Called with wrong intrinsic!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14345)
;
14346 break;
14347
14348 // Signed comparisons
14349 case Intrinsic::aarch64_sve_cmpeq_wide:
14350 case Intrinsic::aarch64_sve_cmpne_wide:
14351 case Intrinsic::aarch64_sve_cmpge_wide:
14352 case Intrinsic::aarch64_sve_cmpgt_wide:
14353 case Intrinsic::aarch64_sve_cmplt_wide:
14354 case Intrinsic::aarch64_sve_cmple_wide: {
14355 if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
14356 int64_t ImmVal = CN->getSExtValue();
14357 if (ImmVal >= -16 && ImmVal <= 15)
14358 Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
14359 else
14360 return SDValue();
14361 }
14362 break;
14363 }
14364 // Unsigned comparisons
14365 case Intrinsic::aarch64_sve_cmphs_wide:
14366 case Intrinsic::aarch64_sve_cmphi_wide:
14367 case Intrinsic::aarch64_sve_cmplo_wide:
14368 case Intrinsic::aarch64_sve_cmpls_wide: {
14369 if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
14370 uint64_t ImmVal = CN->getZExtValue();
14371 if (ImmVal <= 127)
14372 Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
14373 else
14374 return SDValue();
14375 }
14376 break;
14377 }
14378 }
14379
14380 if (!Imm)
14381 return SDValue();
14382
14383 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, CmpVT, Imm);
14384 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, VT, Pred,
14385 N->getOperand(2), Splat, DAG.getCondCode(CC));
14386 }
14387
14388 return SDValue();
14389}
14390
14391static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
14392 AArch64CC::CondCode Cond) {
14393 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14394
14395 SDLoc DL(Op);
14396 assert(Op.getValueType().isScalableVector() &&(static_cast <bool> (Op.getValueType().isScalableVector
() && TLI.isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && TLI.isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14398, __extension__ __PRETTY_FUNCTION__))
14397 TLI.isTypeLegal(Op.getValueType()) &&(static_cast <bool> (Op.getValueType().isScalableVector
() && TLI.isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && TLI.isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14398, __extension__ __PRETTY_FUNCTION__))
14398 "Expected legal scalable vector type!")(static_cast <bool> (Op.getValueType().isScalableVector
() && TLI.isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && TLI.isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14398, __extension__ __PRETTY_FUNCTION__))
;
14399
14400 // Ensure target specific opcodes are using legal type.
14401 EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
14402 SDValue TVal = DAG.getConstant(1, DL, OutVT);
14403 SDValue FVal = DAG.getConstant(0, DL, OutVT);
14404
14405 // Set condition code (CC) flags.
14406 SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op);
14407
14408 // Convert CC to integer based on requested condition.
14409 // NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
14410 SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32);
14411 SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);
14412 return DAG.getZExtOrTrunc(Res, DL, VT);
14413}
14414
14415static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc,
14416 SelectionDAG &DAG) {
14417 SDLoc DL(N);
14418
14419 SDValue Pred = N->getOperand(1);
14420 SDValue VecToReduce = N->getOperand(2);
14421
14422 // NOTE: The integer reduction's result type is not always linked to the
14423 // operand's element type so we construct it from the intrinsic's result type.
14424 EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
14425 SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
14426
14427 // SVE reductions set the whole vector register with the first element
14428 // containing the reduction result, which we'll now extract.
14429 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14430 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
14431 Zero);
14432}
14433
14434static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
14435 SelectionDAG &DAG) {
14436 SDLoc DL(N);
14437
14438 SDValue Pred = N->getOperand(1);
14439 SDValue VecToReduce = N->getOperand(2);
14440
14441 EVT ReduceVT = VecToReduce.getValueType();
14442 SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
14443
14444 // SVE reductions set the whole vector register with the first element
14445 // containing the reduction result, which we'll now extract.
14446 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14447 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
14448 Zero);
14449}
14450
14451static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc,
14452 SelectionDAG &DAG) {
14453 SDLoc DL(N);
14454
14455 SDValue Pred = N->getOperand(1);
14456 SDValue InitVal = N->getOperand(2);
14457 SDValue VecToReduce = N->getOperand(3);
14458 EVT ReduceVT = VecToReduce.getValueType();
14459
14460 // Ordered reductions use the first lane of the result vector as the
14461 // reduction's initial value.
14462 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14463 InitVal = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ReduceVT,
14464 DAG.getUNDEF(ReduceVT), InitVal, Zero);
14465
14466 SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, InitVal, VecToReduce);
14467
14468 // SVE reductions set the whole vector register with the first element
14469 // containing the reduction result, which we'll now extract.
14470 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
14471 Zero);
14472}
14473
14474static bool isAllActivePredicate(SDValue N) {
14475 unsigned NumElts = N.getValueType().getVectorMinNumElements();
14476
14477 // Look through cast.
14478 while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
14479 N = N.getOperand(0);
14480 // When reinterpreting from a type with fewer elements the "new" elements
14481 // are not active, so bail if they're likely to be used.
14482 if (N.getValueType().getVectorMinNumElements() < NumElts)
14483 return false;
14484 }
14485
14486 // "ptrue p.<ty>, all" can be considered all active when <ty> is the same size
14487 // or smaller than the implicit element type represented by N.
14488 // NOTE: A larger element count implies a smaller element type.
14489 if (N.getOpcode() == AArch64ISD::PTRUE &&
14490 N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
14491 return N.getValueType().getVectorMinNumElements() >= NumElts;
14492
14493 return false;
14494}
14495
14496// If a merged operation has no inactive lanes we can relax it to a predicated
14497// or unpredicated operation, which potentially allows better isel (perhaps
14498// using immediate forms) or relaxing register reuse requirements.
14499static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
14500 SelectionDAG &DAG,
14501 bool UnpredOp = false) {
14502 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!")(static_cast <bool> (N->getOpcode() == ISD::INTRINSIC_WO_CHAIN
&& "Expected intrinsic!") ? void (0) : __assert_fail
("N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && \"Expected intrinsic!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14502, __extension__ __PRETTY_FUNCTION__))
;
14503 assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!")(static_cast <bool> (N->getNumOperands() == 4 &&
"Expected 3 operand intrinsic!") ? void (0) : __assert_fail (
"N->getNumOperands() == 4 && \"Expected 3 operand intrinsic!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14503, __extension__ __PRETTY_FUNCTION__))
;
14504 SDValue Pg = N->getOperand(1);
14505
14506 // ISD way to specify an all active predicate.
14507 if (isAllActivePredicate(Pg)) {
14508 if (UnpredOp)
14509 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), N->getOperand(2),
14510 N->getOperand(3));
14511 else
14512 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg,
14513 N->getOperand(2), N->getOperand(3));
14514 }
14515
14516 // FUTURE: SplatVector(true)
14517 return SDValue();
14518}
14519
14520static SDValue performIntrinsicCombine(SDNode *N,
14521 TargetLowering::DAGCombinerInfo &DCI,
14522 const AArch64Subtarget *Subtarget) {
14523 SelectionDAG &DAG = DCI.DAG;
14524 unsigned IID = getIntrinsicID(N);
14525 switch (IID) {
14526 default:
14527 break;
14528 case Intrinsic::aarch64_neon_vcvtfxs2fp:
14529 case Intrinsic::aarch64_neon_vcvtfxu2fp:
14530 return tryCombineFixedPointConvert(N, DCI, DAG);
14531 case Intrinsic::aarch64_neon_saddv:
14532 return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
14533 case Intrinsic::aarch64_neon_uaddv:
14534 return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG);
14535 case Intrinsic::aarch64_neon_sminv:
14536 return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG);
14537 case Intrinsic::aarch64_neon_uminv:
14538 return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG);
14539 case Intrinsic::aarch64_neon_smaxv:
14540 return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG);
14541 case Intrinsic::aarch64_neon_umaxv:
14542 return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
14543 case Intrinsic::aarch64_neon_fmax:
14544 return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
14545 N->getOperand(1), N->getOperand(2));
14546 case Intrinsic::aarch64_neon_fmin:
14547 return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
14548 N->getOperand(1), N->getOperand(2));
14549 case Intrinsic::aarch64_neon_fmaxnm:
14550 return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
14551 N->getOperand(1), N->getOperand(2));
14552 case Intrinsic::aarch64_neon_fminnm:
14553 return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
14554 N->getOperand(1), N->getOperand(2));
14555 case Intrinsic::aarch64_neon_smull:
14556 case Intrinsic::aarch64_neon_umull:
14557 case Intrinsic::aarch64_neon_pmull:
14558 case Intrinsic::aarch64_neon_sqdmull:
14559 return tryCombineLongOpWithDup(IID, N, DCI, DAG);
14560 case Intrinsic::aarch64_neon_sqshl:
14561 case Intrinsic::aarch64_neon_uqshl:
14562 case Intrinsic::aarch64_neon_sqshlu:
14563 case Intrinsic::aarch64_neon_srshl:
14564 case Intrinsic::aarch64_neon_urshl:
14565 case Intrinsic::aarch64_neon_sshl:
14566 case Intrinsic::aarch64_neon_ushl:
14567 return tryCombineShiftImm(IID, N, DAG);
14568 case Intrinsic::aarch64_crc32b:
14569 case Intrinsic::aarch64_crc32cb:
14570 return tryCombineCRC32(0xff, N, DAG);
14571 case Intrinsic::aarch64_crc32h:
14572 case Intrinsic::aarch64_crc32ch:
14573 return tryCombineCRC32(0xffff, N, DAG);
14574 case Intrinsic::aarch64_sve_saddv:
14575 // There is no i64 version of SADDV because the sign is irrelevant.
14576 if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
14577 return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
14578 else
14579 return combineSVEReductionInt(N, AArch64ISD::SADDV_PRED, DAG);
14580 case Intrinsic::aarch64_sve_uaddv:
14581 return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
14582 case Intrinsic::aarch64_sve_smaxv:
14583 return combineSVEReductionInt(N, AArch64ISD::SMAXV_PRED, DAG);
14584 case Intrinsic::aarch64_sve_umaxv:
14585 return combineSVEReductionInt(N, AArch64ISD::UMAXV_PRED, DAG);
14586 case Intrinsic::aarch64_sve_sminv:
14587 return combineSVEReductionInt(N, AArch64ISD::SMINV_PRED, DAG);
14588 case Intrinsic::aarch64_sve_uminv:
14589 return combineSVEReductionInt(N, AArch64ISD::UMINV_PRED, DAG);
14590 case Intrinsic::aarch64_sve_orv:
14591 return combineSVEReductionInt(N, AArch64ISD::ORV_PRED, DAG);
14592 case Intrinsic::aarch64_sve_eorv:
14593 return combineSVEReductionInt(N, AArch64ISD::EORV_PRED, DAG);
14594 case Intrinsic::aarch64_sve_andv:
14595 return combineSVEReductionInt(N, AArch64ISD::ANDV_PRED, DAG);
14596 case Intrinsic::aarch64_sve_index:
14597 return LowerSVEIntrinsicIndex(N, DAG);
14598 case Intrinsic::aarch64_sve_dup:
14599 return LowerSVEIntrinsicDUP(N, DAG);
14600 case Intrinsic::aarch64_sve_dup_x:
14601 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),
14602 N->getOperand(1));
14603 case Intrinsic::aarch64_sve_ext:
14604 return LowerSVEIntrinsicEXT(N, DAG);
14605 case Intrinsic::aarch64_sve_mul:
14606 return convertMergedOpToPredOp(N, AArch64ISD::MUL_PRED, DAG);
14607 case Intrinsic::aarch64_sve_smulh:
14608 return convertMergedOpToPredOp(N, AArch64ISD::MULHS_PRED, DAG);
14609 case Intrinsic::aarch64_sve_umulh:
14610 return convertMergedOpToPredOp(N, AArch64ISD::MULHU_PRED, DAG);
14611 case Intrinsic::aarch64_sve_smin:
14612 return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG);
14613 case Intrinsic::aarch64_sve_umin:
14614 return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG);
14615 case Intrinsic::aarch64_sve_smax:
14616 return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG);
14617 case Intrinsic::aarch64_sve_umax:
14618 return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG);
14619 case Intrinsic::aarch64_sve_lsl:
14620 return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG);
14621 case Intrinsic::aarch64_sve_lsr:
14622 return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);
14623 case Intrinsic::aarch64_sve_asr:
14624 return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);
14625 case Intrinsic::aarch64_sve_fadd:
14626 return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG);
14627 case Intrinsic::aarch64_sve_fsub:
14628 return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG);
14629 case Intrinsic::aarch64_sve_fmul:
14630 return convertMergedOpToPredOp(N, AArch64ISD::FMUL_PRED, DAG);
14631 case Intrinsic::aarch64_sve_add:
14632 return convertMergedOpToPredOp(N, ISD::ADD, DAG, true);
14633 case Intrinsic::aarch64_sve_sub:
14634 return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);
14635 case Intrinsic::aarch64_sve_and:
14636 return convertMergedOpToPredOp(N, ISD::AND, DAG, true);
14637 case Intrinsic::aarch64_sve_bic:
14638 return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true);
14639 case Intrinsic::aarch64_sve_eor:
14640 return convertMergedOpToPredOp(N, ISD::XOR, DAG, true);
14641 case Intrinsic::aarch64_sve_orr:
14642 return convertMergedOpToPredOp(N, ISD::OR, DAG, true);
14643 case Intrinsic::aarch64_sve_sqadd:
14644 return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true);
14645 case Intrinsic::aarch64_sve_sqsub:
14646 return convertMergedOpToPredOp(N, ISD::SSUBSAT, DAG, true);
14647 case Intrinsic::aarch64_sve_uqadd:
14648 return convertMergedOpToPredOp(N, ISD::UADDSAT, DAG, true);
14649 case Intrinsic::aarch64_sve_uqsub:
14650 return convertMergedOpToPredOp(N, ISD::USUBSAT, DAG, true);
14651 case Intrinsic::aarch64_sve_sqadd_x:
14652 return DAG.getNode(ISD::SADDSAT, SDLoc(N), N->getValueType(0),
14653 N->getOperand(1), N->getOperand(2));
14654 case Intrinsic::aarch64_sve_sqsub_x:
14655 return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
14656 N->getOperand(1), N->getOperand(2));
14657 case Intrinsic::aarch64_sve_uqadd_x:
14658 return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0),
14659 N->getOperand(1), N->getOperand(2));
14660 case Intrinsic::aarch64_sve_uqsub_x:
14661 return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
14662 N->getOperand(1), N->getOperand(2));
14663 case Intrinsic::aarch64_sve_cmphs:
14664 if (!N->getOperand(2).getValueType().isFloatingPoint())
14665 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14666 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14667 N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
14668 break;
14669 case Intrinsic::aarch64_sve_cmphi:
14670 if (!N->getOperand(2).getValueType().isFloatingPoint())
14671 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14672 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14673 N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
14674 break;
14675 case Intrinsic::aarch64_sve_fcmpge:
14676 case Intrinsic::aarch64_sve_cmpge:
14677 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14678 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14679 N->getOperand(3), DAG.getCondCode(ISD::SETGE));
14680 break;
14681 case Intrinsic::aarch64_sve_fcmpgt:
14682 case Intrinsic::aarch64_sve_cmpgt:
14683 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14684 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14685 N->getOperand(3), DAG.getCondCode(ISD::SETGT));
14686 break;
14687 case Intrinsic::aarch64_sve_fcmpeq:
14688 case Intrinsic::aarch64_sve_cmpeq:
14689 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14690 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14691 N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
14692 break;
14693 case Intrinsic::aarch64_sve_fcmpne:
14694 case Intrinsic::aarch64_sve_cmpne:
14695 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14696 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14697 N->getOperand(3), DAG.getCondCode(ISD::SETNE));
14698 break;
14699 case Intrinsic::aarch64_sve_fcmpuo:
14700 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14701 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14702 N->getOperand(3), DAG.getCondCode(ISD::SETUO));
14703 break;
14704 case Intrinsic::aarch64_sve_fadda:
14705 return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG);
14706 case Intrinsic::aarch64_sve_faddv:
14707 return combineSVEReductionFP(N, AArch64ISD::FADDV_PRED, DAG);
14708 case Intrinsic::aarch64_sve_fmaxnmv:
14709 return combineSVEReductionFP(N, AArch64ISD::FMAXNMV_PRED, DAG);
14710 case Intrinsic::aarch64_sve_fmaxv:
14711 return combineSVEReductionFP(N, AArch64ISD::FMAXV_PRED, DAG);
14712 case Intrinsic::aarch64_sve_fminnmv:
14713 return combineSVEReductionFP(N, AArch64ISD::FMINNMV_PRED, DAG);
14714 case Intrinsic::aarch64_sve_fminv:
14715 return combineSVEReductionFP(N, AArch64ISD::FMINV_PRED, DAG);
14716 case Intrinsic::aarch64_sve_sel:
14717 return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
14718 N->getOperand(1), N->getOperand(2), N->getOperand(3));
14719 case Intrinsic::aarch64_sve_cmpeq_wide:
14720 return tryConvertSVEWideCompare(N, ISD::SETEQ, DCI, DAG);
14721 case Intrinsic::aarch64_sve_cmpne_wide:
14722 return tryConvertSVEWideCompare(N, ISD::SETNE, DCI, DAG);
14723 case Intrinsic::aarch64_sve_cmpge_wide:
14724 return tryConvertSVEWideCompare(N, ISD::SETGE, DCI, DAG);
14725 case Intrinsic::aarch64_sve_cmpgt_wide:
14726 return tryConvertSVEWideCompare(N, ISD::SETGT, DCI, DAG);
14727 case Intrinsic::aarch64_sve_cmplt_wide:
14728 return tryConvertSVEWideCompare(N, ISD::SETLT, DCI, DAG);
14729 case Intrinsic::aarch64_sve_cmple_wide:
14730 return tryConvertSVEWideCompare(N, ISD::SETLE, DCI, DAG);
14731 case Intrinsic::aarch64_sve_cmphs_wide:
14732 return tryConvertSVEWideCompare(N, ISD::SETUGE, DCI, DAG);
14733 case Intrinsic::aarch64_sve_cmphi_wide:
14734 return tryConvertSVEWideCompare(N, ISD::SETUGT, DCI, DAG);
14735 case Intrinsic::aarch64_sve_cmplo_wide:
14736 return tryConvertSVEWideCompare(N, ISD::SETULT, DCI, DAG);
14737 case Intrinsic::aarch64_sve_cmpls_wide:
14738 return tryConvertSVEWideCompare(N, ISD::SETULE, DCI, DAG);
14739 case Intrinsic::aarch64_sve_ptest_any:
14740 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
14741 AArch64CC::ANY_ACTIVE);
14742 case Intrinsic::aarch64_sve_ptest_first:
14743 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
14744 AArch64CC::FIRST_ACTIVE);
14745 case Intrinsic::aarch64_sve_ptest_last:
14746 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
14747 AArch64CC::LAST_ACTIVE);
14748 }
14749 return SDValue();
14750}
14751
14752static SDValue performExtendCombine(SDNode *N,
14753 TargetLowering::DAGCombinerInfo &DCI,
14754 SelectionDAG &DAG) {
14755 // If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
14756 // we can convert that DUP into another extract_high (of a bigger DUP), which
14757 // helps the backend to decide that an sabdl2 would be useful, saving a real
14758 // extract_high operation.
14759 if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
14760 (N->getOperand(0).getOpcode() == ISD::ABDU ||
14761 N->getOperand(0).getOpcode() == ISD::ABDS)) {
14762 SDNode *ABDNode = N->getOperand(0).getNode();
14763 SDValue NewABD =
14764 tryCombineLongOpWithDup(Intrinsic::not_intrinsic, ABDNode, DCI, DAG);
14765 if (!NewABD.getNode())
14766 return SDValue();
14767
14768 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
14769 }
14770 return SDValue();
14771}
14772
14773static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
14774 SDValue SplatVal, unsigned NumVecElts) {
14775 assert(!St.isTruncatingStore() && "cannot split truncating vector store")(static_cast <bool> (!St.isTruncatingStore() &&
"cannot split truncating vector store") ? void (0) : __assert_fail
("!St.isTruncatingStore() && \"cannot split truncating vector store\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14775, __extension__ __PRETTY_FUNCTION__))
;
14776 unsigned OrigAlignment = St.getAlignment();
14777 unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
14778
14779 // Create scalar stores. This is at least as good as the code sequence for a
14780 // split unaligned store which is a dup.s, ext.b, and two stores.
14781 // Most of the time the three stores should be replaced by store pair
14782 // instructions (stp).
14783 SDLoc DL(&St);
14784 SDValue BasePtr = St.getBasePtr();
14785 uint64_t BaseOffset = 0;
14786
14787 const MachinePointerInfo &PtrInfo = St.getPointerInfo();
14788 SDValue NewST1 =
14789 DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
14790 OrigAlignment, St.getMemOperand()->getFlags());
14791
14792 // As this in ISel, we will not merge this add which may degrade results.
14793 if (BasePtr->getOpcode() == ISD::ADD &&
14794 isa<ConstantSDNode>(BasePtr->getOperand(1))) {
14795 BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
14796 BasePtr = BasePtr->getOperand(0);
14797 }
14798
14799 unsigned Offset = EltOffset;
14800 while (--NumVecElts) {
14801 unsigned Alignment = MinAlign(OrigAlignment, Offset);
14802 SDValue OffsetPtr =
14803 DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
14804 DAG.getConstant(BaseOffset + Offset, DL, MVT::i64));
14805 NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
14806 PtrInfo.getWithOffset(Offset), Alignment,
14807 St.getMemOperand()->getFlags());
14808 Offset += EltOffset;
14809 }
14810 return NewST1;
14811}
14812
14813// Returns an SVE type that ContentTy can be trivially sign or zero extended
14814// into.
14815static MVT getSVEContainerType(EVT ContentTy) {
14816 assert(ContentTy.isSimple() && "No SVE containers for extended types")(static_cast <bool> (ContentTy.isSimple() && "No SVE containers for extended types"
) ? void (0) : __assert_fail ("ContentTy.isSimple() && \"No SVE containers for extended types\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14816, __extension__ __PRETTY_FUNCTION__))
;
14817
14818 switch (ContentTy.getSimpleVT().SimpleTy) {
14819 default:
14820 llvm_unreachable("No known SVE container for this MVT type")::llvm::llvm_unreachable_internal("No known SVE container for this MVT type"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14820)
;
14821 case MVT::nxv2i8:
14822 case MVT::nxv2i16:
14823 case MVT::nxv2i32:
14824 case MVT::nxv2i64:
14825 case MVT::nxv2f32:
14826 case MVT::nxv2f64:
14827 return MVT::nxv2i64;
14828 case MVT::nxv4i8:
14829 case MVT::nxv4i16:
14830 case MVT::nxv4i32:
14831 case MVT::nxv4f32:
14832 return MVT::nxv4i32;
14833 case MVT::nxv8i8:
14834 case MVT::nxv8i16:
14835 case MVT::nxv8f16:
14836 case MVT::nxv8bf16:
14837 return MVT::nxv8i16;
14838 case MVT::nxv16i8:
14839 return MVT::nxv16i8;
14840 }
14841}
14842
14843static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc) {
14844 SDLoc DL(N);
14845 EVT VT = N->getValueType(0);
14846
14847 if (VT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
14848 return SDValue();
14849
14850 EVT ContainerVT = VT;
14851 if (ContainerVT.isInteger())
14852 ContainerVT = getSVEContainerType(ContainerVT);
14853
14854 SDVTList VTs = DAG.getVTList(ContainerVT, MVT::Other);
14855 SDValue Ops[] = { N->getOperand(0), // Chain
14856 N->getOperand(2), // Pg
14857 N->getOperand(3), // Base
14858 DAG.getValueType(VT) };
14859
14860 SDValue Load = DAG.getNode(Opc, DL, VTs, Ops);
14861 SDValue LoadChain = SDValue(Load.getNode(), 1);
14862
14863 if (ContainerVT.isInteger() && (VT != ContainerVT))
14864 Load = DAG.getNode(ISD::TRUNCATE, DL, VT, Load.getValue(0));
14865
14866 return DAG.getMergeValues({ Load, LoadChain }, DL);
14867}
14868
14869static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
14870 SDLoc DL(N);
14871 EVT VT = N->getValueType(0);
14872 EVT PtrTy = N->getOperand(3).getValueType();
14873
14874 if (VT == MVT::nxv8bf16 &&
14875 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
14876 return SDValue();
14877
14878 EVT LoadVT = VT;
14879 if (VT.isFloatingPoint())
14880 LoadVT = VT.changeTypeToInteger();
14881
14882 auto *MINode = cast<MemIntrinsicSDNode>(N);
14883 SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
14884 SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
14885 MINode->getOperand(3), DAG.getUNDEF(PtrTy),
14886 MINode->getOperand(2), PassThru,
14887 MINode->getMemoryVT(), MINode->getMemOperand(),
14888 ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
14889
14890 if (VT.isFloatingPoint()) {
14891 SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
14892 return DAG.getMergeValues(Ops, DL);
14893 }
14894
14895 return L;
14896}
14897
14898template <unsigned Opcode>
14899static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG) {
14900 static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO ||
14901 Opcode == AArch64ISD::LD1RO_MERGE_ZERO,
14902 "Unsupported opcode.");
14903 SDLoc DL(N);
14904 EVT VT = N->getValueType(0);
14905 if (VT == MVT::nxv8bf16 &&
14906 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
14907 return SDValue();
14908
14909 EVT LoadVT = VT;
14910 if (VT.isFloatingPoint())
14911 LoadVT = VT.changeTypeToInteger();
14912
14913 SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)};
14914 SDValue Load = DAG.getNode(Opcode, DL, {LoadVT, MVT::Other}, Ops);
14915 SDValue LoadChain = SDValue(Load.getNode(), 1);
14916
14917 if (VT.isFloatingPoint())
14918 Load = DAG.getNode(ISD::BITCAST, DL, VT, Load.getValue(0));
14919
14920 return DAG.getMergeValues({Load, LoadChain}, DL);
14921}
14922
14923static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG) {
14924 SDLoc DL(N);
14925 SDValue Data = N->getOperand(2);
14926 EVT DataVT = Data.getValueType();
14927 EVT HwSrcVt = getSVEContainerType(DataVT);
14928 SDValue InputVT = DAG.getValueType(DataVT);
14929
14930 if (DataVT == MVT::nxv8bf16 &&
14931 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
14932 return SDValue();
14933
14934 if (DataVT.isFloatingPoint())
14935 InputVT = DAG.getValueType(HwSrcVt);
14936
14937 SDValue SrcNew;
14938 if (Data.getValueType().isFloatingPoint())
14939 SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Data);
14940 else
14941 SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Data);
14942
14943 SDValue Ops[] = { N->getOperand(0), // Chain
14944 SrcNew,
14945 N->getOperand(4), // Base
14946 N->getOperand(3), // Pg
14947 InputVT
14948 };
14949
14950 return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
14951}
14952
14953static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) {
14954 SDLoc DL(N);
14955
14956 SDValue Data = N->getOperand(2);
14957 EVT DataVT = Data.getValueType();
14958 EVT PtrTy = N->getOperand(4).getValueType();
14959
14960 if (DataVT == MVT::nxv8bf16 &&
14961 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
14962 return SDValue();
14963
14964 if (DataVT.isFloatingPoint())
14965 Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);
14966
14967 auto *MINode = cast<MemIntrinsicSDNode>(N);
14968 return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
14969 DAG.getUNDEF(PtrTy), MINode->getOperand(3),
14970 MINode->getMemoryVT(), MINode->getMemOperand(),
14971 ISD::UNINDEXED, false, false);
14972}
14973
14974/// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
14975/// load store optimizer pass will merge them to store pair stores. This should
14976/// be better than a movi to create the vector zero followed by a vector store
14977/// if the zero constant is not re-used, since one instructions and one register
14978/// live range will be removed.
14979///
14980/// For example, the final generated code should be:
14981///
14982/// stp xzr, xzr, [x0]
14983///
14984/// instead of:
14985///
14986/// movi v0.2d, #0
14987/// str q0, [x0]
14988///
14989static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
14990 SDValue StVal = St.getValue();
14991 EVT VT = StVal.getValueType();
14992
14993 // Avoid scalarizing zero splat stores for scalable vectors.
14994 if (VT.isScalableVector())
14995 return SDValue();
14996
14997 // It is beneficial to scalarize a zero splat store for 2 or 3 i64 elements or
14998 // 2, 3 or 4 i32 elements.
14999 int NumVecElts = VT.getVectorNumElements();
15000 if (!(((NumVecElts == 2 || NumVecElts == 3) &&
15001 VT.getVectorElementType().getSizeInBits() == 64) ||
15002 ((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
15003 VT.getVectorElementType().getSizeInBits() == 32)))
15004 return SDValue();
15005
15006 if (StVal.getOpcode() != ISD::BUILD_VECTOR)
15007 return SDValue();
15008
15009 // If the zero constant has more than one use then the vector store could be
15010 // better since the constant mov will be amortized and stp q instructions
15011 // should be able to be formed.
15012 if (!StVal.hasOneUse())
15013 return SDValue();
15014
15015 // If the store is truncating then it's going down to i16 or smaller, which
15016 // means it can be implemented in a single store anyway.
15017 if (St.isTruncatingStore())
15018 return SDValue();
15019
15020 // If the immediate offset of the address operand is too large for the stp
15021 // instruction, then bail out.
15022 if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
15023 int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
15024 if (Offset < -512 || Offset > 504)
15025 return SDValue();
15026 }
15027
15028 for (int I = 0; I < NumVecElts; ++I) {
15029 SDValue EltVal = StVal.getOperand(I);
15030 if (!isNullConstant(EltVal) && !isNullFPConstant(EltVal))
15031 return SDValue();
15032 }
15033
15034 // Use a CopyFromReg WZR/XZR here to prevent
15035 // DAGCombiner::MergeConsecutiveStores from undoing this transformation.
15036 SDLoc DL(&St);
15037 unsigned ZeroReg;
15038 EVT ZeroVT;
15039 if (VT.getVectorElementType().getSizeInBits() == 32) {
15040 ZeroReg = AArch64::WZR;
15041 ZeroVT = MVT::i32;
15042 } else {
15043 ZeroReg = AArch64::XZR;
15044 ZeroVT = MVT::i64;
15045 }
15046 SDValue SplatVal =
15047 DAG.getCopyFromReg(DAG.getEntryNode(), DL, ZeroReg, ZeroVT);
15048 return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
15049}
15050
15051/// Replace a splat of a scalar to a vector store by scalar stores of the scalar
15052/// value. The load store optimizer pass will merge them to store pair stores.
15053/// This has better performance than a splat of the scalar followed by a split
15054/// vector store. Even if the stores are not merged it is four stores vs a dup,
15055/// followed by an ext.b and two stores.
15056static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
15057 SDValue StVal = St.getValue();
15058 EVT VT = StVal.getValueType();
15059
15060 // Don't replace floating point stores, they possibly won't be transformed to
15061 // stp because of the store pair suppress pass.
15062 if (VT.isFloatingPoint())
15063 return SDValue();
15064
15065 // We can express a splat as store pair(s) for 2 or 4 elements.
15066 unsigned NumVecElts = VT.getVectorNumElements();
15067 if (NumVecElts != 4 && NumVecElts != 2)
15068 return SDValue();
15069
15070 // If the store is truncating then it's going down to i16 or smaller, which
15071 // means it can be implemented in a single store anyway.
15072 if (St.isTruncatingStore())
15073 return SDValue();
15074
15075 // Check that this is a splat.
15076 // Make sure that each of the relevant vector element locations are inserted
15077 // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
15078 std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
15079 SDValue SplatVal;
15080 for (unsigned I = 0; I < NumVecElts; ++I) {
15081 // Check for insert vector elements.
15082 if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
15083 return SDValue();
15084
15085 // Check that same value is inserted at each vector element.
15086 if (I == 0)
15087 SplatVal = StVal.getOperand(1);
15088 else if (StVal.getOperand(1) != SplatVal)
15089 return SDValue();
15090
15091 // Check insert element index.
15092 ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
15093 if (!CIndex)
15094 return SDValue();
15095 uint64_t IndexVal = CIndex->getZExtValue();
15096 if (IndexVal >= NumVecElts)
15097 return SDValue();
15098 IndexNotInserted.reset(IndexVal);
15099
15100 StVal = StVal.getOperand(0);
15101 }
15102 // Check that all vector element locations were inserted to.
15103 if (IndexNotInserted.any())
15104 return SDValue();
15105
15106 return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
15107}
15108
15109static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
15110 SelectionDAG &DAG,
15111 const AArch64Subtarget *Subtarget) {
15112
15113 StoreSDNode *S = cast<StoreSDNode>(N);
15114 if (S->isVolatile() || S->isIndexed())
15115 return SDValue();
15116
15117 SDValue StVal = S->getValue();
15118 EVT VT = StVal.getValueType();
15119
15120 if (!VT.isFixedLengthVector())
15121 return SDValue();
15122
15123 // If we get a splat of zeros, convert this vector store to a store of
15124 // scalars. They will be merged into store pairs of xzr thereby removing one
15125 // instruction and one register.
15126 if (SDValue ReplacedZeroSplat = replaceZeroVectorStore(DAG, *S))
15127 return ReplacedZeroSplat;
15128
15129 // FIXME: The logic for deciding if an unaligned store should be split should
15130 // be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
15131 // a call to that function here.
15132
15133 if (!Subtarget->isMisaligned128StoreSlow())
15134 return SDValue();
15135
15136 // Don't split at -Oz.
15137 if (DAG.getMachineFunction().getFunction().hasMinSize())
15138 return SDValue();
15139
15140 // Don't split v2i64 vectors. Memcpy lowering produces those and splitting
15141 // those up regresses performance on micro-benchmarks and olden/bh.
15142 if (VT.getVectorNumElements() < 2 || VT == MVT::v2i64)
15143 return SDValue();
15144
15145 // Split unaligned 16B stores. They are terrible for performance.
15146 // Don't split stores with alignment of 1 or 2. Code that uses clang vector
15147 // extensions can use this to mark that it does not want splitting to happen
15148 // (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
15149 // eliminating alignment hazards is only 1 in 8 for alignment of 2.
15150 if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 ||
15151 S->getAlignment() <= 2)
15152 return SDValue();
15153
15154 // If we get a splat of a scalar convert this vector store to a store of
15155 // scalars. They will be merged into store pairs thereby removing two
15156 // instructions.
15157 if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, *S))
15158 return ReplacedSplat;
15159
15160 SDLoc DL(S);
15161
15162 // Split VT into two.
15163 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
15164 unsigned NumElts = HalfVT.getVectorNumElements();
15165 SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
15166 DAG.getConstant(0, DL, MVT::i64));
15167 SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
15168 DAG.getConstant(NumElts, DL, MVT::i64));
15169 SDValue BasePtr = S->getBasePtr();
15170 SDValue NewST1 =
15171 DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
15172 S->getAlignment(), S->getMemOperand()->getFlags());
15173 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
15174 DAG.getConstant(8, DL, MVT::i64));
15175 return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
15176 S->getPointerInfo(), S->getAlignment(),
15177 S->getMemOperand()->getFlags());
15178}
15179
15180static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG) {
15181 assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexepected Opcode!")(static_cast <bool> (N->getOpcode() == AArch64ISD::SPLICE
&& "Unexepected Opcode!") ? void (0) : __assert_fail
("N->getOpcode() == AArch64ISD::SPLICE && \"Unexepected Opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15181, __extension__ __PRETTY_FUNCTION__))
;
15182
15183 // splice(pg, op1, undef) -> op1
15184 if (N->getOperand(2).isUndef())
15185 return N->getOperand(1);
15186
15187 return SDValue();
15188}
15189
15190static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
15191 SDLoc DL(N);
15192 SDValue Op0 = N->getOperand(0);
15193 SDValue Op1 = N->getOperand(1);
15194 EVT ResVT = N->getValueType(0);
15195
15196 // uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
15197 if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
15198 if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
15199 SDValue X = Op0.getOperand(0).getOperand(0);
15200 return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1);
15201 }
15202 }
15203
15204 // uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z)
15205 if (Op1.getOpcode() == AArch64ISD::UUNPKHI) {
15206 if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
15207 SDValue Z = Op1.getOperand(0).getOperand(1);
15208 return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z);
15209 }
15210 }
15211
15212 return SDValue();
15213}
15214
15215static SDValue performGLD1Combine(SDNode *N, SelectionDAG &DAG) {
15216 unsigned Opc = N->getOpcode();
15217
15218 assert(((Opc >= AArch64ISD::GLD1_MERGE_ZERO && // unsigned gather loads(static_cast <bool> (((Opc >= AArch64ISD::GLD1_MERGE_ZERO
&& Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc
>= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD
::GLD1S_IMM_MERGE_ZERO)) && "Invalid opcode.") ? void
(0) : __assert_fail ("((Opc >= AArch64ISD::GLD1_MERGE_ZERO && Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) && \"Invalid opcode.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15222, __extension__ __PRETTY_FUNCTION__))
15219 Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) ||(static_cast <bool> (((Opc >= AArch64ISD::GLD1_MERGE_ZERO
&& Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc
>= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD
::GLD1S_IMM_MERGE_ZERO)) && "Invalid opcode.") ? void
(0) : __assert_fail ("((Opc >= AArch64ISD::GLD1_MERGE_ZERO && Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) && \"Invalid opcode.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15222, __extension__ __PRETTY_FUNCTION__))
15220 (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && // signed gather loads(static_cast <bool> (((Opc >= AArch64ISD::GLD1_MERGE_ZERO
&& Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc
>= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD
::GLD1S_IMM_MERGE_ZERO)) && "Invalid opcode.") ? void
(0) : __assert_fail ("((Opc >= AArch64ISD::GLD1_MERGE_ZERO && Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) && \"Invalid opcode.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15222, __extension__ __PRETTY_FUNCTION__))
15221 Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) &&(static_cast <bool> (((Opc >= AArch64ISD::GLD1_MERGE_ZERO
&& Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc
>= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD
::GLD1S_IMM_MERGE_ZERO)) && "Invalid opcode.") ? void
(0) : __assert_fail ("((Opc >= AArch64ISD::GLD1_MERGE_ZERO && Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) && \"Invalid opcode.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15222, __extension__ __PRETTY_FUNCTION__))
15222 "Invalid opcode.")(static_cast <bool> (((Opc >= AArch64ISD::GLD1_MERGE_ZERO
&& Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc
>= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD
::GLD1S_IMM_MERGE_ZERO)) && "Invalid opcode.") ? void
(0) : __assert_fail ("((Opc >= AArch64ISD::GLD1_MERGE_ZERO && Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) && \"Invalid opcode.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15222, __extension__ __PRETTY_FUNCTION__))
;
15223
15224 const bool Scaled = Opc == AArch64ISD::GLD1_SCALED_MERGE_ZERO ||
15225 Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
15226 const bool Signed = Opc == AArch64ISD::GLD1S_MERGE_ZERO ||
15227 Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
15228 const bool Extended = Opc == AArch64ISD::GLD1_SXTW_MERGE_ZERO ||
15229 Opc == AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO ||
15230 Opc == AArch64ISD::GLD1_UXTW_MERGE_ZERO ||
15231 Opc == AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO;
15232
15233 SDLoc DL(N);
15234 SDValue Chain = N->getOperand(0);
15235 SDValue Pg = N->getOperand(1);
15236 SDValue Base = N->getOperand(2);
15237 SDValue Offset = N->getOperand(3);
15238 SDValue Ty = N->getOperand(4);
15239
15240 EVT ResVT = N->getValueType(0);
15241
15242 const auto OffsetOpc = Offset.getOpcode();
15243 const bool OffsetIsZExt =
15244 OffsetOpc == AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU;
15245 const bool OffsetIsSExt =
15246 OffsetOpc == AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU;
15247
15248 // Fold sign/zero extensions of vector offsets into GLD1 nodes where possible.
15249 if (!Extended && (OffsetIsSExt || OffsetIsZExt)) {
15250 SDValue ExtPg = Offset.getOperand(0);
15251 VTSDNode *ExtFrom = cast<VTSDNode>(Offset.getOperand(2).getNode());
15252 EVT ExtFromEVT = ExtFrom->getVT().getVectorElementType();
15253
15254 // If the predicate for the sign- or zero-extended offset is the
15255 // same as the predicate used for this load and the sign-/zero-extension
15256 // was from a 32-bits...
15257 if (ExtPg == Pg && ExtFromEVT == MVT::i32) {
15258 SDValue UnextendedOffset = Offset.getOperand(1);
15259
15260 unsigned NewOpc = getGatherVecOpcode(Scaled, OffsetIsSExt, true);
15261 if (Signed)
15262 NewOpc = getSignExtendedGatherOpcode(NewOpc);
15263
15264 return DAG.getNode(NewOpc, DL, {ResVT, MVT::Other},
15265 {Chain, Pg, Base, UnextendedOffset, Ty});
15266 }
15267 }
15268
15269 return SDValue();
15270}
15271
15272/// Optimize a vector shift instruction and its operand if shifted out
15273/// bits are not used.
15274static SDValue performVectorShiftCombine(SDNode *N,
15275 const AArch64TargetLowering &TLI,
15276 TargetLowering::DAGCombinerInfo &DCI) {
15277 assert(N->getOpcode() == AArch64ISD::VASHR ||(static_cast <bool> (N->getOpcode() == AArch64ISD::VASHR
|| N->getOpcode() == AArch64ISD::VLSHR) ? void (0) : __assert_fail
("N->getOpcode() == AArch64ISD::VASHR || N->getOpcode() == AArch64ISD::VLSHR"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15278, __extension__ __PRETTY_FUNCTION__))
15278 N->getOpcode() == AArch64ISD::VLSHR)(static_cast <bool> (N->getOpcode() == AArch64ISD::VASHR
|| N->getOpcode() == AArch64ISD::VLSHR) ? void (0) : __assert_fail
("N->getOpcode() == AArch64ISD::VASHR || N->getOpcode() == AArch64ISD::VLSHR"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15278, __extension__ __PRETTY_FUNCTION__))
;
15279
15280 SDValue Op = N->getOperand(0);
15281 unsigned OpScalarSize = Op.getScalarValueSizeInBits();
15282
15283 unsigned ShiftImm = N->getConstantOperandVal(1);
15284 assert(OpScalarSize > ShiftImm && "Invalid shift imm")(static_cast <bool> (OpScalarSize > ShiftImm &&
"Invalid shift imm") ? void (0) : __assert_fail ("OpScalarSize > ShiftImm && \"Invalid shift imm\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15284, __extension__ __PRETTY_FUNCTION__))
;
15285
15286 APInt ShiftedOutBits = APInt::getLowBitsSet(OpScalarSize, ShiftImm);
15287 APInt DemandedMask = ~ShiftedOutBits;
15288
15289 if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15290 return SDValue(N, 0);
15291
15292 return SDValue();
15293}
15294
15295/// Target-specific DAG combine function for post-increment LD1 (lane) and
15296/// post-increment LD1R.
15297static SDValue performPostLD1Combine(SDNode *N,
15298 TargetLowering::DAGCombinerInfo &DCI,
15299 bool IsLaneOp) {
15300 if (DCI.isBeforeLegalizeOps())
15301 return SDValue();
15302
15303 SelectionDAG &DAG = DCI.DAG;
15304 EVT VT = N->getValueType(0);
15305
15306 if (VT.isScalableVector())
15307 return SDValue();
15308
15309 unsigned LoadIdx = IsLaneOp ? 1 : 0;
15310 SDNode *LD = N->getOperand(LoadIdx).getNode();
15311 // If it is not LOAD, can not do such combine.
15312 if (LD->getOpcode() != ISD::LOAD)
15313 return SDValue();
15314
15315 // The vector lane must be a constant in the LD1LANE opcode.
15316 SDValue Lane;
15317 if (IsLaneOp) {
15318 Lane = N->getOperand(2);
15319 auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
15320 if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
15321 return SDValue();
15322 }
15323
15324 LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
15325 EVT MemVT = LoadSDN->getMemoryVT();
15326 // Check if memory operand is the same type as the vector element.
15327 if (MemVT != VT.getVectorElementType())
15328 return SDValue();
15329
15330 // Check if there are other uses. If so, do not combine as it will introduce
15331 // an extra load.
15332 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE;
15333 ++UI) {
15334 if (UI.getUse().getResNo() == 1) // Ignore uses of the chain result.
15335 continue;
15336 if (*UI != N)
15337 return SDValue();
15338 }
15339
15340 SDValue Addr = LD->getOperand(1);
15341 SDValue Vector = N->getOperand(0);
15342 // Search for a use of the address operand that is an increment.
15343 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
15344 Addr.getNode()->use_end(); UI != UE; ++UI) {
15345 SDNode *User = *UI;
15346 if (User->getOpcode() != ISD::ADD
15347 || UI.getUse().getResNo() != Addr.getResNo())
15348 continue;
15349
15350 // If the increment is a constant, it must match the memory ref size.
15351 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
15352 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
15353 uint32_t IncVal = CInc->getZExtValue();
15354 unsigned NumBytes = VT.getScalarSizeInBits() / 8;
15355 if (IncVal != NumBytes)
15356 continue;
15357 Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
15358 }
15359
15360 // To avoid cycle construction make sure that neither the load nor the add
15361 // are predecessors to each other or the Vector.
15362 SmallPtrSet<const SDNode *, 32> Visited;
15363 SmallVector<const SDNode *, 16> Worklist;
15364 Visited.insert(Addr.getNode());
15365 Worklist.push_back(User);
15366 Worklist.push_back(LD);
15367 Worklist.push_back(Vector.getNode());
15368 if (SDNode::hasPredecessorHelper(LD, Visited, Worklist) ||
15369 SDNode::hasPredecessorHelper(User, Visited, Worklist))
15370 continue;
15371
15372 SmallVector<SDValue, 8> Ops;
15373 Ops.push_back(LD->getOperand(0)); // Chain
15374 if (IsLaneOp) {
15375 Ops.push_back(Vector); // The vector to be inserted
15376 Ops.push_back(Lane); // The lane to be inserted in the vector
15377 }
15378 Ops.push_back(Addr);
15379 Ops.push_back(Inc);
15380
15381 EVT Tys[3] = { VT, MVT::i64, MVT::Other };
15382 SDVTList SDTys = DAG.getVTList(Tys);
15383 unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
15384 SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
15385 MemVT,
15386 LoadSDN->getMemOperand());
15387
15388 // Update the uses.
15389 SDValue NewResults[] = {
15390 SDValue(LD, 0), // The result of load
15391 SDValue(UpdN.getNode(), 2) // Chain
15392 };
15393 DCI.CombineTo(LD, NewResults);
15394 DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
15395 DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
15396
15397 break;
15398 }
15399 return SDValue();
15400}
15401
15402/// Simplify ``Addr`` given that the top byte of it is ignored by HW during
15403/// address translation.
15404static bool performTBISimplification(SDValue Addr,
15405 TargetLowering::DAGCombinerInfo &DCI,
15406 SelectionDAG &DAG) {
15407 APInt DemandedMask = APInt::getLowBitsSet(64, 56);
15408 KnownBits Known;
15409 TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
15410 !DCI.isBeforeLegalizeOps());
15411 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15412 if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) {
15413 DCI.CommitTargetLoweringOpt(TLO);
15414 return true;
15415 }
15416 return false;
15417}
15418
15419static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) {
15420 assert((N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) &&(static_cast <bool> ((N->getOpcode() == ISD::STORE ||
N->getOpcode() == ISD::MSTORE) && "Expected STORE dag node in input!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) && \"Expected STORE dag node in input!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15421, __extension__ __PRETTY_FUNCTION__))
15421 "Expected STORE dag node in input!")(static_cast <bool> ((N->getOpcode() == ISD::STORE ||
N->getOpcode() == ISD::MSTORE) && "Expected STORE dag node in input!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) && \"Expected STORE dag node in input!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15421, __extension__ __PRETTY_FUNCTION__))
;
15422
15423 if (auto Store = dyn_cast<StoreSDNode>(N)) {
15424 if (!Store->isTruncatingStore() || Store->isIndexed())
15425 return SDValue();
15426 SDValue Ext = Store->getValue();
15427 auto ExtOpCode = Ext.getOpcode();
15428 if (ExtOpCode != ISD::ZERO_EXTEND && ExtOpCode != ISD::SIGN_EXTEND &&
15429 ExtOpCode != ISD::ANY_EXTEND)
15430 return SDValue();
15431 SDValue Orig = Ext->getOperand(0);
15432 if (Store->getMemoryVT() != Orig.getValueType())
15433 return SDValue();
15434 return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
15435 Store->getBasePtr(), Store->getMemOperand());
15436 }
15437
15438 return SDValue();
15439}
15440
15441static SDValue performSTORECombine(SDNode *N,
15442 TargetLowering::DAGCombinerInfo &DCI,
15443 SelectionDAG &DAG,
15444 const AArch64Subtarget *Subtarget) {
15445 if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
15446 return Split;
15447
15448 if (Subtarget->supportsAddressTopByteIgnored() &&
15449 performTBISimplification(N->getOperand(2), DCI, DAG))
15450 return SDValue(N, 0);
15451
15452 if (SDValue Store = foldTruncStoreOfExt(DAG, N))
15453 return Store;
15454
15455 return SDValue();
15456}
15457
15458/// Target-specific DAG combine function for NEON load/store intrinsics
15459/// to merge base address updates.
15460static SDValue performNEONPostLDSTCombine(SDNode *N,
15461 TargetLowering::DAGCombinerInfo &DCI,
15462 SelectionDAG &DAG) {
15463 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
15464 return SDValue();
15465
15466 unsigned AddrOpIdx = N->getNumOperands() - 1;
15467 SDValue Addr = N->getOperand(AddrOpIdx);
15468
15469 // Search for a use of the address operand that is an increment.
15470 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
15471 UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
15472 SDNode *User = *UI;
15473 if (User->getOpcode() != ISD::ADD ||
15474 UI.getUse().getResNo() != Addr.getResNo())
15475 continue;
15476
15477 // Check that the add is independent of the load/store. Otherwise, folding
15478 // it would create a cycle.
15479 SmallPtrSet<const SDNode *, 32> Visited;
15480 SmallVector<const SDNode *, 16> Worklist;
15481 Visited.insert(Addr.getNode());
15482 Worklist.push_back(N);
15483 Worklist.push_back(User);
15484 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
15485 SDNode::hasPredecessorHelper(User, Visited, Worklist))
15486 continue;
15487
15488 // Find the new opcode for the updating load/store.
15489 bool IsStore = false;
15490 bool IsLaneOp = false;
15491 bool IsDupOp = false;
15492 unsigned NewOpc = 0;
15493 unsigned NumVecs = 0;
15494 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
15495 switch (IntNo) {
15496 default: llvm_unreachable("unexpected intrinsic for Neon base update")::llvm::llvm_unreachable_internal("unexpected intrinsic for Neon base update"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15496)
;
15497 case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
15498 NumVecs = 2; break;
15499 case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
15500 NumVecs = 3; break;
15501 case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
15502 NumVecs = 4; break;
15503 case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
15504 NumVecs = 2; IsStore = true; break;
15505 case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
15506 NumVecs = 3; IsStore = true; break;
15507 case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
15508 NumVecs = 4; IsStore = true; break;
15509 case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
15510 NumVecs = 2; break;
15511 case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
15512 NumVecs = 3; break;
15513 case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
15514 NumVecs = 4; break;
15515 case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
15516 NumVecs = 2; IsStore = true; break;
15517 case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
15518 NumVecs = 3; IsStore = true; break;
15519 case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
15520 NumVecs = 4; IsStore = true; break;
15521 case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
15522 NumVecs = 2; IsDupOp = true; break;
15523 case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
15524 NumVecs = 3; IsDupOp = true; break;
15525 case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
15526 NumVecs = 4; IsDupOp = true; break;
15527 case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
15528 NumVecs = 2; IsLaneOp = true; break;
15529 case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
15530 NumVecs = 3; IsLaneOp = true; break;
15531 case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
15532 NumVecs = 4; IsLaneOp = true; break;
15533 case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
15534 NumVecs = 2; IsStore = true; IsLaneOp = true; break;
15535 case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
15536 NumVecs = 3; IsStore = true; IsLaneOp = true; break;
15537 case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
15538 NumVecs = 4; IsStore = true; IsLaneOp = true; break;
15539 }
15540
15541 EVT VecTy;
15542 if (IsStore)
15543 VecTy = N->getOperand(2).getValueType();
15544 else
15545 VecTy = N->getValueType(0);
15546
15547 // If the increment is a constant, it must match the memory ref size.
15548 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
15549 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
15550 uint32_t IncVal = CInc->getZExtValue();
15551 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
15552 if (IsLaneOp || IsDupOp)
15553 NumBytes /= VecTy.getVectorNumElements();
15554 if (IncVal != NumBytes)
15555 continue;
15556 Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
15557 }
15558 SmallVector<SDValue, 8> Ops;
15559 Ops.push_back(N->getOperand(0)); // Incoming chain
15560 // Load lane and store have vector list as input.
15561 if (IsLaneOp || IsStore)
15562 for (unsigned i = 2; i < AddrOpIdx; ++i)
15563 Ops.push_back(N->getOperand(i));
15564 Ops.push_back(Addr); // Base register
15565 Ops.push_back(Inc);
15566
15567 // Return Types.
15568 EVT Tys[6];
15569 unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
15570 unsigned n;
15571 for (n = 0; n < NumResultVecs; ++n)
15572 Tys[n] = VecTy;
15573 Tys[n++] = MVT::i64; // Type of write back register
15574 Tys[n] = MVT::Other; // Type of the chain
15575 SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
15576
15577 MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
15578 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
15579 MemInt->getMemoryVT(),
15580 MemInt->getMemOperand());
15581
15582 // Update the uses.
15583 std::vector<SDValue> NewResults;
15584 for (unsigned i = 0; i < NumResultVecs; ++i) {
15585 NewResults.push_back(SDValue(UpdN.getNode(), i));
15586 }
15587 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1));
15588 DCI.CombineTo(N, NewResults);
15589 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
15590
15591 break;
15592 }
15593 return SDValue();
15594}
15595
15596// Checks to see if the value is the prescribed width and returns information
15597// about its extension mode.
15598static
15599bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
15600 ExtType = ISD::NON_EXTLOAD;
15601 switch(V.getNode()->getOpcode()) {
15602 default:
15603 return false;
15604 case ISD::LOAD: {
15605 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
15606 if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
15607 || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
15608 ExtType = LoadNode->getExtensionType();
15609 return true;
15610 }
15611 return false;
15612 }
15613 case ISD::AssertSext: {
15614 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
15615 if ((TypeNode->getVT() == MVT::i8 && width == 8)
15616 || (TypeNode->getVT() == MVT::i16 && width == 16)) {
15617 ExtType = ISD::SEXTLOAD;
15618 return true;
15619 }
15620 return false;
15621 }
15622 case ISD::AssertZext: {
15623 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
15624 if ((TypeNode->getVT() == MVT::i8 && width == 8)
15625 || (TypeNode->getVT() == MVT::i16 && width == 16)) {
15626 ExtType = ISD::ZEXTLOAD;
15627 return true;
15628 }
15629 return false;
15630 }
15631 case ISD::Constant:
15632 case ISD::TargetConstant: {
15633 return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
15634 1LL << (width - 1);
15635 }
15636 }
15637
15638 return true;
15639}
15640
15641// This function does a whole lot of voodoo to determine if the tests are
15642// equivalent without and with a mask. Essentially what happens is that given a
15643// DAG resembling:
15644//
15645// +-------------+ +-------------+ +-------------+ +-------------+
15646// | Input | | AddConstant | | CompConstant| | CC |
15647// +-------------+ +-------------+ +-------------+ +-------------+
15648// | | | |
15649// V V | +----------+
15650// +-------------+ +----+ | |
15651// | ADD | |0xff| | |
15652// +-------------+ +----+ | |
15653// | | | |
15654// V V | |
15655// +-------------+ | |
15656// | AND | | |
15657// +-------------+ | |
15658// | | |
15659// +-----+ | |
15660// | | |
15661// V V V
15662// +-------------+
15663// | CMP |
15664// +-------------+
15665//
15666// The AND node may be safely removed for some combinations of inputs. In
15667// particular we need to take into account the extension type of the Input,
15668// the exact values of AddConstant, CompConstant, and CC, along with the nominal
15669// width of the input (this can work for any width inputs, the above graph is
15670// specific to 8 bits.
15671//
15672// The specific equations were worked out by generating output tables for each
15673// AArch64CC value in terms of and AddConstant (w1), CompConstant(w2). The
15674// problem was simplified by working with 4 bit inputs, which means we only
15675// needed to reason about 24 distinct bit patterns: 8 patterns unique to zero
15676// extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
15677// patterns present in both extensions (0,7). For every distinct set of
15678// AddConstant and CompConstants bit patterns we can consider the masked and
15679// unmasked versions to be equivalent if the result of this function is true for
15680// all 16 distinct bit patterns of for the current extension type of Input (w0).
15681//
15682// sub w8, w0, w1
15683// and w10, w8, #0x0f
15684// cmp w8, w2
15685// cset w9, AArch64CC
15686// cmp w10, w2
15687// cset w11, AArch64CC
15688// cmp w9, w11
15689// cset w0, eq
15690// ret
15691//
15692// Since the above function shows when the outputs are equivalent it defines
15693// when it is safe to remove the AND. Unfortunately it only runs on AArch64 and
15694// would be expensive to run during compiles. The equations below were written
15695// in a test harness that confirmed they gave equivalent outputs to the above
15696// for all inputs function, so they can be used determine if the removal is
15697// legal instead.
15698//
15699// isEquivalentMaskless() is the code for testing if the AND can be removed
15700// factored out of the DAG recognition as the DAG can take several forms.
15701
15702static bool isEquivalentMaskless(unsigned CC, unsigned width,
15703 ISD::LoadExtType ExtType, int AddConstant,
15704 int CompConstant) {
15705 // By being careful about our equations and only writing the in term
15706 // symbolic values and well known constants (0, 1, -1, MaxUInt) we can
15707 // make them generally applicable to all bit widths.
15708 int MaxUInt = (1 << width);
15709
15710 // For the purposes of these comparisons sign extending the type is
15711 // equivalent to zero extending the add and displacing it by half the integer
15712 // width. Provided we are careful and make sure our equations are valid over
15713 // the whole range we can just adjust the input and avoid writing equations
15714 // for sign extended inputs.
15715 if (ExtType == ISD::SEXTLOAD)
15716 AddConstant -= (1 << (width-1));
15717
15718 switch(CC) {
15719 case AArch64CC::LE:
15720 case AArch64CC::GT:
15721 if ((AddConstant == 0) ||
15722 (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
15723 (AddConstant >= 0 && CompConstant < 0) ||
15724 (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
15725 return true;
15726 break;
15727 case AArch64CC::LT:
15728 case AArch64CC::GE:
15729 if ((AddConstant == 0) ||
15730 (AddConstant >= 0 && CompConstant <= 0) ||
15731 (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
15732 return true;
15733 break;
15734 case AArch64CC::HI:
15735 case AArch64CC::LS:
15736 if ((AddConstant >= 0 && CompConstant < 0) ||
15737 (AddConstant <= 0 && CompConstant >= -1 &&
15738 CompConstant < AddConstant + MaxUInt))
15739 return true;
15740 break;
15741 case AArch64CC::PL:
15742 case AArch64CC::MI:
15743 if ((AddConstant == 0) ||
15744 (AddConstant > 0 && CompConstant <= 0) ||
15745 (AddConstant < 0 && CompConstant <= AddConstant))
15746 return true;
15747 break;
15748 case AArch64CC::LO:
15749 case AArch64CC::HS:
15750 if ((AddConstant >= 0 && CompConstant <= 0) ||
15751 (AddConstant <= 0 && CompConstant >= 0 &&
15752 CompConstant <= AddConstant + MaxUInt))
15753 return true;
15754 break;
15755 case AArch64CC::EQ:
15756 case AArch64CC::NE:
15757 if ((AddConstant > 0 && CompConstant < 0) ||
15758 (AddConstant < 0 && CompConstant >= 0 &&
15759 CompConstant < AddConstant + MaxUInt) ||
15760 (AddConstant >= 0 && CompConstant >= 0 &&
15761 CompConstant >= AddConstant) ||
15762 (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
15763 return true;
15764 break;
15765 case AArch64CC::VS:
15766 case AArch64CC::VC:
15767 case AArch64CC::AL:
15768 case AArch64CC::NV:
15769 return true;
15770 case AArch64CC::Invalid:
15771 break;
15772 }
15773
15774 return false;
15775}
15776
15777static
15778SDValue performCONDCombine(SDNode *N,
15779 TargetLowering::DAGCombinerInfo &DCI,
15780 SelectionDAG &DAG, unsigned CCIndex,
15781 unsigned CmpIndex) {
15782 unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
15783 SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
15784 unsigned CondOpcode = SubsNode->getOpcode();
15785
15786 if (CondOpcode != AArch64ISD::SUBS)
15787 return SDValue();
15788
15789 // There is a SUBS feeding this condition. Is it fed by a mask we can
15790 // use?
15791
15792 SDNode *AndNode = SubsNode->getOperand(0).getNode();
15793 unsigned MaskBits = 0;
15794
15795 if (AndNode->getOpcode() != ISD::AND)
15796 return SDValue();
15797
15798 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
15799 uint32_t CNV = CN->getZExtValue();
15800 if (CNV == 255)
15801 MaskBits = 8;
15802 else if (CNV == 65535)
15803 MaskBits = 16;
15804 }
15805
15806 if (!MaskBits)
15807 return SDValue();
15808
15809 SDValue AddValue = AndNode->getOperand(0);
15810
15811 if (AddValue.getOpcode() != ISD::ADD)
15812 return SDValue();
15813
15814 // The basic dag structure is correct, grab the inputs and validate them.
15815
15816 SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
15817 SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
15818 SDValue SubsInputValue = SubsNode->getOperand(1);
15819
15820 // The mask is present and the provenance of all the values is a smaller type,
15821 // lets see if the mask is superfluous.
15822
15823 if (!isa<ConstantSDNode>(AddInputValue2.getNode()) ||
15824 !isa<ConstantSDNode>(SubsInputValue.getNode()))
15825 return SDValue();
15826
15827 ISD::LoadExtType ExtType;
15828
15829 if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) ||
15830 !checkValueWidth(AddInputValue2, MaskBits, ExtType) ||
15831 !checkValueWidth(AddInputValue1, MaskBits, ExtType) )
15832 return SDValue();
15833
15834 if(!isEquivalentMaskless(CC, MaskBits, ExtType,
15835 cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
15836 cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
15837 return SDValue();
15838
15839 // The AND is not necessary, remove it.
15840
15841 SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
15842 SubsNode->getValueType(1));
15843 SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
15844
15845 SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
15846 DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());
15847
15848 return SDValue(N, 0);
15849}
15850
15851// Optimize compare with zero and branch.
15852static SDValue performBRCONDCombine(SDNode *N,
15853 TargetLowering::DAGCombinerInfo &DCI,
15854 SelectionDAG &DAG) {
15855 MachineFunction &MF = DAG.getMachineFunction();
15856 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
15857 // will not be produced, as they are conditional branch instructions that do
15858 // not set flags.
15859 if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
15860 return SDValue();
15861
15862 if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))
15863 N = NV.getNode();
15864 SDValue Chain = N->getOperand(0);
15865 SDValue Dest = N->getOperand(1);
15866 SDValue CCVal = N->getOperand(2);
15867 SDValue Cmp = N->getOperand(3);
15868
15869 assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!")(static_cast <bool> (isa<ConstantSDNode>(CCVal) &&
"Expected a ConstantSDNode here!") ? void (0) : __assert_fail
("isa<ConstantSDNode>(CCVal) && \"Expected a ConstantSDNode here!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15869, __extension__ __PRETTY_FUNCTION__))
;
15870 unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
15871 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
15872 return SDValue();
15873
15874 unsigned CmpOpc = Cmp.getOpcode();
15875 if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
15876 return SDValue();
15877
15878 // Only attempt folding if there is only one use of the flag and no use of the
15879 // value.
15880 if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
15881 return SDValue();
15882
15883 SDValue LHS = Cmp.getOperand(0);
15884 SDValue RHS = Cmp.getOperand(1);
15885
15886 assert(LHS.getValueType() == RHS.getValueType() &&(static_cast <bool> (LHS.getValueType() == RHS.getValueType
() && "Expected the value type to be the same for both operands!"
) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType() && \"Expected the value type to be the same for both operands!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15887, __extension__ __PRETTY_FUNCTION__))
15887 "Expected the value type to be the same for both operands!")(static_cast <bool> (LHS.getValueType() == RHS.getValueType
() && "Expected the value type to be the same for both operands!"
) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType() && \"Expected the value type to be the same for both operands!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15887, __extension__ __PRETTY_FUNCTION__))
;
15888 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
15889 return SDValue();
15890
15891 if (isNullConstant(LHS))
15892 std::swap(LHS, RHS);
15893
15894 if (!isNullConstant(RHS))
15895 return SDValue();
15896
15897 if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
15898 LHS.getOpcode() == ISD::SRL)
15899 return SDValue();
15900
15901 // Fold the compare into the branch instruction.
15902 SDValue BR;
15903 if (CC == AArch64CC::EQ)
15904 BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
15905 else
15906 BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
15907
15908 // Do not add new nodes to DAG combiner worklist.
15909 DCI.CombineTo(N, BR, false);
15910
15911 return SDValue();
15912}
15913
15914// Optimize CSEL instructions
15915static SDValue performCSELCombine(SDNode *N,
15916 TargetLowering::DAGCombinerInfo &DCI,
15917 SelectionDAG &DAG) {
15918 // CSEL x, x, cc -> x
15919 if (N->getOperand(0) == N->getOperand(1))
15920 return N->getOperand(0);
15921
15922 return performCONDCombine(N, DCI, DAG, 2, 3);
15923}
15924
15925static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
15926 assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!")(static_cast <bool> (N->getOpcode() == ISD::SETCC &&
"Unexpected opcode!") ? void (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"Unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15926, __extension__ __PRETTY_FUNCTION__))
;
15927 SDValue LHS = N->getOperand(0);
15928 SDValue RHS = N->getOperand(1);
15929 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
15930
15931 // setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X
15932 if (Cond == ISD::SETNE && isOneConstant(RHS) &&
15933 LHS->getOpcode() == AArch64ISD::CSEL &&
15934 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
15935 LHS->hasOneUse()) {
15936 SDLoc DL(N);
15937
15938 // Invert CSEL's condition.
15939 auto *OpCC = cast<ConstantSDNode>(LHS.getOperand(2));
15940 auto OldCond = static_cast<AArch64CC::CondCode>(OpCC->getZExtValue());
15941 auto NewCond = getInvertedCondCode(OldCond);
15942
15943 // csel 0, 1, !cond, X
15944 SDValue CSEL =
15945 DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(), LHS.getOperand(0),
15946 LHS.getOperand(1), DAG.getConstant(NewCond, DL, MVT::i32),
15947 LHS.getOperand(3));
15948 return DAG.getZExtOrTrunc(CSEL, DL, N->getValueType(0));
15949 }
15950
15951 return SDValue();
15952}
15953
15954static SDValue performSetccMergeZeroCombine(SDNode *N, SelectionDAG &DAG) {
15955 assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&(static_cast <bool> (N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO
&& "Unexpected opcode!") ? void (0) : __assert_fail (
"N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO && \"Unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15956, __extension__ __PRETTY_FUNCTION__))
15956 "Unexpected opcode!")(static_cast <bool> (N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO
&& "Unexpected opcode!") ? void (0) : __assert_fail (
"N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO && \"Unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15956, __extension__ __PRETTY_FUNCTION__))
;
15957
15958 SDValue Pred = N->getOperand(0);
15959 SDValue LHS = N->getOperand(1);
15960 SDValue RHS = N->getOperand(2);
15961 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
15962
15963 // setcc_merge_zero pred (sign_extend (setcc_merge_zero ... pred ...)), 0, ne
15964 // => inner setcc_merge_zero
15965 if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) &&
15966 LHS->getOpcode() == ISD::SIGN_EXTEND &&
15967 LHS->getOperand(0)->getValueType(0) == N->getValueType(0) &&
15968 LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
15969 LHS->getOperand(0)->getOperand(0) == Pred)
15970 return LHS->getOperand(0);
15971
15972 return SDValue();
15973}
15974
15975// Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test
15976// as well as whether the test should be inverted. This code is required to
15977// catch these cases (as opposed to standard dag combines) because
15978// AArch64ISD::TBZ is matched during legalization.
15979static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
15980 SelectionDAG &DAG) {
15981
15982 if (!Op->hasOneUse())
15983 return Op;
15984
15985 // We don't handle undef/constant-fold cases below, as they should have
15986 // already been taken care of (e.g. and of 0, test of undefined shifted bits,
15987 // etc.)
15988
15989 // (tbz (trunc x), b) -> (tbz x, b)
15990 // This case is just here to enable more of the below cases to be caught.
15991 if (Op->getOpcode() == ISD::TRUNCATE &&
15992 Bit < Op->getValueType(0).getSizeInBits()) {
15993 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
15994 }
15995
15996 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
15997 if (Op->getOpcode() == ISD::ANY_EXTEND &&
15998 Bit < Op->getOperand(0).getValueSizeInBits()) {
15999 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16000 }
16001
16002 if (Op->getNumOperands() != 2)
16003 return Op;
16004
16005 auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
16006 if (!C)
16007 return Op;
16008
16009 switch (Op->getOpcode()) {
16010 default:
16011 return Op;
16012
16013 // (tbz (and x, m), b) -> (tbz x, b)
16014 case ISD::AND:
16015 if ((C->getZExtValue() >> Bit) & 1)
16016 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16017 return Op;
16018
16019 // (tbz (shl x, c), b) -> (tbz x, b-c)
16020 case ISD::SHL:
16021 if (C->getZExtValue() <= Bit &&
16022 (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
16023 Bit = Bit - C->getZExtValue();
16024 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16025 }
16026 return Op;
16027
16028 // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
16029 case ISD::SRA:
16030 Bit = Bit + C->getZExtValue();
16031 if (Bit >= Op->getValueType(0).getSizeInBits())
16032 Bit = Op->getValueType(0).getSizeInBits() - 1;
16033 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16034
16035 // (tbz (srl x, c), b) -> (tbz x, b+c)
16036 case ISD::SRL:
16037 if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
16038 Bit = Bit + C->getZExtValue();
16039 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16040 }
16041 return Op;
16042
16043 // (tbz (xor x, -1), b) -> (tbnz x, b)
16044 case ISD::XOR:
16045 if ((C->getZExtValue() >> Bit) & 1)
16046 Invert = !Invert;
16047 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16048 }
16049}
16050
16051// Optimize test single bit zero/non-zero and branch.
16052static SDValue performTBZCombine(SDNode *N,
16053 TargetLowering::DAGCombinerInfo &DCI,
16054 SelectionDAG &DAG) {
16055 unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
16056 bool Invert = false;
16057 SDValue TestSrc = N->getOperand(1);
16058 SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);
16059
16060 if (TestSrc == NewTestSrc)
16061 return SDValue();
16062
16063 unsigned NewOpc = N->getOpcode();
16064 if (Invert) {
16065 if (NewOpc == AArch64ISD::TBZ)
16066 NewOpc = AArch64ISD::TBNZ;
16067 else {
16068 assert(NewOpc == AArch64ISD::TBNZ)(static_cast <bool> (NewOpc == AArch64ISD::TBNZ) ? void
(0) : __assert_fail ("NewOpc == AArch64ISD::TBNZ", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16068, __extension__ __PRETTY_FUNCTION__))
;
16069 NewOpc = AArch64ISD::TBZ;
16070 }
16071 }
16072
16073 SDLoc DL(N);
16074 return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
16075 DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
16076}
16077
16078// vselect (v1i1 setcc) ->
16079// vselect (v1iXX setcc) (XX is the size of the compared operand type)
16080// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
16081// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
16082// such VSELECT.
16083static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
16084 SDValue N0 = N->getOperand(0);
16085 EVT CCVT = N0.getValueType();
16086
16087 // Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
16088 // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
16089 // supported types.
16090 SDValue SetCC = N->getOperand(0);
16091 if (SetCC.getOpcode() == ISD::SETCC &&
16092 SetCC.getOperand(2) == DAG.getCondCode(ISD::SETGT)) {
16093 SDValue CmpLHS = SetCC.getOperand(0);
16094 EVT VT = CmpLHS.getValueType();
16095 SDNode *CmpRHS = SetCC.getOperand(1).getNode();
16096 SDNode *SplatLHS = N->getOperand(1).getNode();
16097 SDNode *SplatRHS = N->getOperand(2).getNode();
16098 APInt SplatLHSVal;
16099 if (CmpLHS.getValueType() == N->getOperand(1).getValueType() &&
16100 VT.isSimple() &&
16101 is_contained(
16102 makeArrayRef({MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
16103 MVT::v2i32, MVT::v4i32, MVT::v2i64}),
16104 VT.getSimpleVT().SimpleTy) &&
16105 ISD::isConstantSplatVector(SplatLHS, SplatLHSVal) &&
16106 SplatLHSVal.isOneValue() && ISD::isConstantSplatVectorAllOnes(CmpRHS) &&
16107 ISD::isConstantSplatVectorAllOnes(SplatRHS)) {
16108 unsigned NumElts = VT.getVectorNumElements();
16109 SmallVector<SDValue, 8> Ops(
16110 NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N),
16111 VT.getScalarType()));
16112 SDValue Val = DAG.getBuildVector(VT, SDLoc(N), Ops);
16113
16114 auto Shift = DAG.getNode(ISD::SRA, SDLoc(N), VT, CmpLHS, Val);
16115 auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1));
16116 return Or;
16117 }
16118 }
16119
16120 if (N0.getOpcode() != ISD::SETCC ||
16121 CCVT.getVectorElementCount() != ElementCount::getFixed(1) ||
16122 CCVT.getVectorElementType() != MVT::i1)
16123 return SDValue();
16124
16125 EVT ResVT = N->getValueType(0);
16126 EVT CmpVT = N0.getOperand(0).getValueType();
16127 // Only combine when the result type is of the same size as the compared
16128 // operands.
16129 if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
16130 return SDValue();
16131
16132 SDValue IfTrue = N->getOperand(1);
16133 SDValue IfFalse = N->getOperand(2);
16134 SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
16135 N0.getOperand(0), N0.getOperand(1),
16136 cast<CondCodeSDNode>(N0.getOperand(2))->get());
16137 return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
16138 IfTrue, IfFalse);
16139}
16140
16141/// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
16142/// the compare-mask instructions rather than going via NZCV, even if LHS and
16143/// RHS are really scalar. This replaces any scalar setcc in the above pattern
16144/// with a vector one followed by a DUP shuffle on the result.
16145static SDValue performSelectCombine(SDNode *N,
16146 TargetLowering::DAGCombinerInfo &DCI) {
16147 SelectionDAG &DAG = DCI.DAG;
16148 SDValue N0 = N->getOperand(0);
16149 EVT ResVT = N->getValueType(0);
16150
16151 if (N0.getOpcode() != ISD::SETCC)
16152 return SDValue();
16153
16154 if (ResVT.isScalableVector())
16155 return SDValue();
16156
16157 // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
16158 // scalar SetCCResultType. We also don't expect vectors, because we assume
16159 // that selects fed by vector SETCCs are canonicalized to VSELECT.
16160 assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&(static_cast <bool> ((N0.getValueType() == MVT::i1 || N0
.getValueType() == MVT::i32) && "Scalar-SETCC feeding SELECT has unexpected result type!"
) ? void (0) : __assert_fail ("(N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) && \"Scalar-SETCC feeding SELECT has unexpected result type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16161, __extension__ __PRETTY_FUNCTION__))
16161 "Scalar-SETCC feeding SELECT has unexpected result type!")(static_cast <bool> ((N0.getValueType() == MVT::i1 || N0
.getValueType() == MVT::i32) && "Scalar-SETCC feeding SELECT has unexpected result type!"
) ? void (0) : __assert_fail ("(N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) && \"Scalar-SETCC feeding SELECT has unexpected result type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16161, __extension__ __PRETTY_FUNCTION__))
;
16162
16163 // If NumMaskElts == 0, the comparison is larger than select result. The
16164 // largest real NEON comparison is 64-bits per lane, which means the result is
16165 // at most 32-bits and an illegal vector. Just bail out for now.
16166 EVT SrcVT = N0.getOperand(0).getValueType();
16167
16168 // Don't try to do this optimization when the setcc itself has i1 operands.
16169 // There are no legal vectors of i1, so this would be pointless.
16170 if (SrcVT == MVT::i1)
16171 return SDValue();
16172
16173 int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
16174 if (!ResVT.isVector() || NumMaskElts == 0)
16175 return SDValue();
16176
16177 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
16178 EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
16179
16180 // Also bail out if the vector CCVT isn't the same size as ResVT.
16181 // This can happen if the SETCC operand size doesn't divide the ResVT size
16182 // (e.g., f64 vs v3f32).
16183 if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
16184 return SDValue();
16185
16186 // Make sure we didn't create illegal types, if we're not supposed to.
16187 assert(DCI.isBeforeLegalize() ||(static_cast <bool> (DCI.isBeforeLegalize() || DAG.getTargetLoweringInfo
().isTypeLegal(SrcVT)) ? void (0) : __assert_fail ("DCI.isBeforeLegalize() || DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16188, __extension__ __PRETTY_FUNCTION__))
16188 DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))(static_cast <bool> (DCI.isBeforeLegalize() || DAG.getTargetLoweringInfo
().isTypeLegal(SrcVT)) ? void (0) : __assert_fail ("DCI.isBeforeLegalize() || DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16188, __extension__ __PRETTY_FUNCTION__))
;
16189
16190 // First perform a vector comparison, where lane 0 is the one we're interested
16191 // in.
16192 SDLoc DL(N0);
16193 SDValue LHS =
16194 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
16195 SDValue RHS =
16196 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
16197 SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));
16198
16199 // Now duplicate the comparison mask we want across all other lanes.
16200 SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
16201 SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask);
16202 Mask = DAG.getNode(ISD::BITCAST, DL,
16203 ResVT.changeVectorElementTypeToInteger(), Mask);
16204
16205 return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
16206}
16207
16208/// Get rid of unnecessary NVCASTs (that don't change the type).
16209static SDValue performNVCASTCombine(SDNode *N) {
16210 if (N->getValueType(0) == N->getOperand(0).getValueType())
16211 return N->getOperand(0);
16212
16213 return SDValue();
16214}
16215
16216// If all users of the globaladdr are of the form (globaladdr + constant), find
16217// the smallest constant, fold it into the globaladdr's offset and rewrite the
16218// globaladdr as (globaladdr + constant) - constant.
16219static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
16220 const AArch64Subtarget *Subtarget,
16221 const TargetMachine &TM) {
16222 auto *GN = cast<GlobalAddressSDNode>(N);
16223 if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
16224 AArch64II::MO_NO_FLAG)
16225 return SDValue();
16226
16227 uint64_t MinOffset = -1ull;
16228 for (SDNode *N : GN->uses()) {
16229 if (N->getOpcode() != ISD::ADD)
16230 return SDValue();
16231 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
16232 if (!C)
16233 C = dyn_cast<ConstantSDNode>(N->getOperand(1));
16234 if (!C)
16235 return SDValue();
16236 MinOffset = std::min(MinOffset, C->getZExtValue());
16237 }
16238 uint64_t Offset = MinOffset + GN->getOffset();
16239
16240 // Require that the new offset is larger than the existing one. Otherwise, we
16241 // can end up oscillating between two possible DAGs, for example,
16242 // (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
16243 if (Offset <= uint64_t(GN->getOffset()))
16244 return SDValue();
16245
16246 // Check whether folding this offset is legal. It must not go out of bounds of
16247 // the referenced object to avoid violating the code model, and must be
16248 // smaller than 2^21 because this is the largest offset expressible in all
16249 // object formats.
16250 //
16251 // This check also prevents us from folding negative offsets, which will end
16252 // up being treated in the same way as large positive ones. They could also
16253 // cause code model violations, and aren't really common enough to matter.
16254 if (Offset >= (1 << 21))
16255 return SDValue();
16256
16257 const GlobalValue *GV = GN->getGlobal();
16258 Type *T = GV->getValueType();
16259 if (!T->isSized() ||
16260 Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
16261 return SDValue();
16262
16263 SDLoc DL(GN);
16264 SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
16265 return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
16266 DAG.getConstant(MinOffset, DL, MVT::i64));
16267}
16268
16269// Turns the vector of indices into a vector of byte offstes by scaling Offset
16270// by (BitWidth / 8).
16271static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset,
16272 SDLoc DL, unsigned BitWidth) {
16273 assert(Offset.getValueType().isScalableVector() &&(static_cast <bool> (Offset.getValueType().isScalableVector
() && "This method is only for scalable vectors of offsets"
) ? void (0) : __assert_fail ("Offset.getValueType().isScalableVector() && \"This method is only for scalable vectors of offsets\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16274, __extension__ __PRETTY_FUNCTION__))
16274 "This method is only for scalable vectors of offsets")(static_cast <bool> (Offset.getValueType().isScalableVector
() && "This method is only for scalable vectors of offsets"
) ? void (0) : __assert_fail ("Offset.getValueType().isScalableVector() && \"This method is only for scalable vectors of offsets\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16274, __extension__ __PRETTY_FUNCTION__))
;
16275
16276 SDValue Shift = DAG.getConstant(Log2_32(BitWidth / 8), DL, MVT::i64);
16277 SDValue SplatShift = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Shift);
16278
16279 return DAG.getNode(ISD::SHL, DL, MVT::nxv2i64, Offset, SplatShift);
16280}
16281
16282/// Check if the value of \p OffsetInBytes can be used as an immediate for
16283/// the gather load/prefetch and scatter store instructions with vector base and
16284/// immediate offset addressing mode:
16285///
16286/// [<Zn>.[S|D]{, #<imm>}]
16287///
16288/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
16289inline static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes,
16290 unsigned ScalarSizeInBytes) {
16291 // The immediate is not a multiple of the scalar size.
16292 if (OffsetInBytes % ScalarSizeInBytes)
16293 return false;
16294
16295 // The immediate is out of range.
16296 if (OffsetInBytes / ScalarSizeInBytes > 31)
16297 return false;
16298
16299 return true;
16300}
16301
16302/// Check if the value of \p Offset represents a valid immediate for the SVE
16303/// gather load/prefetch and scatter store instructiona with vector base and
16304/// immediate offset addressing mode:
16305///
16306/// [<Zn>.[S|D]{, #<imm>}]
16307///
16308/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
16309static bool isValidImmForSVEVecImmAddrMode(SDValue Offset,
16310 unsigned ScalarSizeInBytes) {
16311 ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
16312 return OffsetConst && isValidImmForSVEVecImmAddrMode(
16313 OffsetConst->getZExtValue(), ScalarSizeInBytes);
16314}
16315
16316static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
16317 unsigned Opcode,
16318 bool OnlyPackedOffsets = true) {
16319 const SDValue Src = N->getOperand(2);
16320 const EVT SrcVT = Src->getValueType(0);
16321 assert(SrcVT.isScalableVector() &&(static_cast <bool> (SrcVT.isScalableVector() &&
"Scatter stores are only possible for SVE vectors") ? void (
0) : __assert_fail ("SrcVT.isScalableVector() && \"Scatter stores are only possible for SVE vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16322, __extension__ __PRETTY_FUNCTION__))
16322 "Scatter stores are only possible for SVE vectors")(static_cast <bool> (SrcVT.isScalableVector() &&
"Scatter stores are only possible for SVE vectors") ? void (
0) : __assert_fail ("SrcVT.isScalableVector() && \"Scatter stores are only possible for SVE vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16322, __extension__ __PRETTY_FUNCTION__))
;
16323
16324 SDLoc DL(N);
16325 MVT SrcElVT = SrcVT.getVectorElementType().getSimpleVT();
16326
16327 // Make sure that source data will fit into an SVE register
16328 if (SrcVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
16329 return SDValue();
16330
16331 // For FPs, ACLE only supports _packed_ single and double precision types.
16332 if (SrcElVT.isFloatingPoint())
16333 if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))
16334 return SDValue();
16335
16336 // Depending on the addressing mode, this is either a pointer or a vector of
16337 // pointers (that fits into one register)
16338 SDValue Base = N->getOperand(4);
16339 // Depending on the addressing mode, this is either a single offset or a
16340 // vector of offsets (that fits into one register)
16341 SDValue Offset = N->getOperand(5);
16342
16343 // For "scalar + vector of indices", just scale the indices. This only
16344 // applies to non-temporal scatters because there's no instruction that takes
16345 // indicies.
16346 if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) {
16347 Offset =
16348 getScaledOffsetForBitWidth(DAG, Offset, DL, SrcElVT.getSizeInBits());
16349 Opcode = AArch64ISD::SSTNT1_PRED;
16350 }
16351
16352 // In the case of non-temporal gather loads there's only one SVE instruction
16353 // per data-size: "scalar + vector", i.e.
16354 // * stnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
16355 // Since we do have intrinsics that allow the arguments to be in a different
16356 // order, we may need to swap them to match the spec.
16357 if (Opcode == AArch64ISD::SSTNT1_PRED && Offset.getValueType().isVector())
16358 std::swap(Base, Offset);
16359
16360 // SST1_IMM requires that the offset is an immediate that is:
16361 // * a multiple of #SizeInBytes,
16362 // * in the range [0, 31 x #SizeInBytes],
16363 // where #SizeInBytes is the size in bytes of the stored items. For
16364 // immediates outside that range and non-immediate scalar offsets use SST1 or
16365 // SST1_UXTW instead.
16366 if (Opcode == AArch64ISD::SST1_IMM_PRED) {
16367 if (!isValidImmForSVEVecImmAddrMode(Offset,
16368 SrcVT.getScalarSizeInBits() / 8)) {
16369 if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
16370 Opcode = AArch64ISD::SST1_UXTW_PRED;
16371 else
16372 Opcode = AArch64ISD::SST1_PRED;
16373
16374 std::swap(Base, Offset);
16375 }
16376 }
16377
16378 auto &TLI = DAG.getTargetLoweringInfo();
16379 if (!TLI.isTypeLegal(Base.getValueType()))
16380 return SDValue();
16381
16382 // Some scatter store variants allow unpacked offsets, but only as nxv2i32
16383 // vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
16384 // nxv2i64. Legalize accordingly.
16385 if (!OnlyPackedOffsets &&
16386 Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
16387 Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);
16388
16389 if (!TLI.isTypeLegal(Offset.getValueType()))
16390 return SDValue();
16391
16392 // Source value type that is representable in hardware
16393 EVT HwSrcVt = getSVEContainerType(SrcVT);
16394
16395 // Keep the original type of the input data to store - this is needed to be
16396 // able to select the correct instruction, e.g. ST1B, ST1H, ST1W and ST1D. For
16397 // FP values we want the integer equivalent, so just use HwSrcVt.
16398 SDValue InputVT = DAG.getValueType(SrcVT);
16399 if (SrcVT.isFloatingPoint())
16400 InputVT = DAG.getValueType(HwSrcVt);
16401
16402 SDVTList VTs = DAG.getVTList(MVT::Other);
16403 SDValue SrcNew;
16404
16405 if (Src.getValueType().isFloatingPoint())
16406 SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Src);
16407 else
16408 SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Src);
16409
16410 SDValue Ops[] = {N->getOperand(0), // Chain
16411 SrcNew,
16412 N->getOperand(3), // Pg
16413 Base,
16414 Offset,
16415 InputVT};
16416
16417 return DAG.getNode(Opcode, DL, VTs, Ops);
16418}
16419
16420static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
16421 unsigned Opcode,
16422 bool OnlyPackedOffsets = true) {
16423 const EVT RetVT = N->getValueType(0);
16424 assert(RetVT.isScalableVector() &&(static_cast <bool> (RetVT.isScalableVector() &&
"Gather loads are only possible for SVE vectors") ? void (0)
: __assert_fail ("RetVT.isScalableVector() && \"Gather loads are only possible for SVE vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16425, __extension__ __PRETTY_FUNCTION__))
16425 "Gather loads are only possible for SVE vectors")(static_cast <bool> (RetVT.isScalableVector() &&
"Gather loads are only possible for SVE vectors") ? void (0)
: __assert_fail ("RetVT.isScalableVector() && \"Gather loads are only possible for SVE vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16425, __extension__ __PRETTY_FUNCTION__))
;
16426
16427 SDLoc DL(N);
16428
16429 // Make sure that the loaded data will fit into an SVE register
16430 if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
16431 return SDValue();
16432
16433 // Depending on the addressing mode, this is either a pointer or a vector of
16434 // pointers (that fits into one register)
16435 SDValue Base = N->getOperand(3);
16436 // Depending on the addressing mode, this is either a single offset or a
16437 // vector of offsets (that fits into one register)
16438 SDValue Offset = N->getOperand(4);
16439
16440 // For "scalar + vector of indices", just scale the indices. This only
16441 // applies to non-temporal gathers because there's no instruction that takes
16442 // indicies.
16443 if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) {
16444 Offset = getScaledOffsetForBitWidth(DAG, Offset, DL,
16445 RetVT.getScalarSizeInBits());
16446 Opcode = AArch64ISD::GLDNT1_MERGE_ZERO;
16447 }
16448
16449 // In the case of non-temporal gather loads there's only one SVE instruction
16450 // per data-size: "scalar + vector", i.e.
16451 // * ldnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
16452 // Since we do have intrinsics that allow the arguments to be in a different
16453 // order, we may need to swap them to match the spec.
16454 if (Opcode == AArch64ISD::GLDNT1_MERGE_ZERO &&
16455 Offset.getValueType().isVector())
16456 std::swap(Base, Offset);
16457
16458 // GLD{FF}1_IMM requires that the offset is an immediate that is:
16459 // * a multiple of #SizeInBytes,
16460 // * in the range [0, 31 x #SizeInBytes],
16461 // where #SizeInBytes is the size in bytes of the loaded items. For
16462 // immediates outside that range and non-immediate scalar offsets use
16463 // GLD1_MERGE_ZERO or GLD1_UXTW_MERGE_ZERO instead.
16464 if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO ||
16465 Opcode == AArch64ISD::GLDFF1_IMM_MERGE_ZERO) {
16466 if (!isValidImmForSVEVecImmAddrMode(Offset,
16467 RetVT.getScalarSizeInBits() / 8)) {
16468 if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
16469 Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
16470 ? AArch64ISD::GLD1_UXTW_MERGE_ZERO
16471 : AArch64ISD::GLDFF1_UXTW_MERGE_ZERO;
16472 else
16473 Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
16474 ? AArch64ISD::GLD1_MERGE_ZERO
16475 : AArch64ISD::GLDFF1_MERGE_ZERO;
16476
16477 std::swap(Base, Offset);
16478 }
16479 }
16480
16481 auto &TLI = DAG.getTargetLoweringInfo();
16482 if (!TLI.isTypeLegal(Base.getValueType()))
16483 return SDValue();
16484
16485 // Some gather load variants allow unpacked offsets, but only as nxv2i32
16486 // vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
16487 // nxv2i64. Legalize accordingly.
16488 if (!OnlyPackedOffsets &&
16489 Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
16490 Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);
16491
16492 // Return value type that is representable in hardware
16493 EVT HwRetVt = getSVEContainerType(RetVT);
16494
16495 // Keep the original output value type around - this is needed to be able to
16496 // select the correct instruction, e.g. LD1B, LD1H, LD1W and LD1D. For FP
16497 // values we want the integer equivalent, so just use HwRetVT.
16498 SDValue OutVT = DAG.getValueType(RetVT);
16499 if (RetVT.isFloatingPoint())
16500 OutVT = DAG.getValueType(HwRetVt);
16501
16502 SDVTList VTs = DAG.getVTList(HwRetVt, MVT::Other);
16503 SDValue Ops[] = {N->getOperand(0), // Chain
16504 N->getOperand(2), // Pg
16505 Base, Offset, OutVT};
16506
16507 SDValue Load = DAG.getNode(Opcode, DL, VTs, Ops);
16508 SDValue LoadChain = SDValue(Load.getNode(), 1);
16509
16510 if (RetVT.isInteger() && (RetVT != HwRetVt))
16511 Load = DAG.getNode(ISD::TRUNCATE, DL, RetVT, Load.getValue(0));
16512
16513 // If the original return value was FP, bitcast accordingly. Doing it here
16514 // means that we can avoid adding TableGen patterns for FPs.
16515 if (RetVT.isFloatingPoint())
16516 Load = DAG.getNode(ISD::BITCAST, DL, RetVT, Load.getValue(0));
16517
16518 return DAG.getMergeValues({Load, LoadChain}, DL);
16519}
16520
16521static SDValue
16522performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
16523 SelectionDAG &DAG) {
16524 SDLoc DL(N);
16525 SDValue Src = N->getOperand(0);
16526 unsigned Opc = Src->getOpcode();
16527
16528 // Sign extend of an unsigned unpack -> signed unpack
16529 if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
16530
16531 unsigned SOpc = Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
16532 : AArch64ISD::SUNPKLO;
16533
16534 // Push the sign extend to the operand of the unpack
16535 // This is necessary where, for example, the operand of the unpack
16536 // is another unpack:
16537 // 4i32 sign_extend_inreg (4i32 uunpklo(8i16 uunpklo (16i8 opnd)), from 4i8)
16538 // ->
16539 // 4i32 sunpklo (8i16 sign_extend_inreg(8i16 uunpklo (16i8 opnd), from 8i8)
16540 // ->
16541 // 4i32 sunpklo(8i16 sunpklo(16i8 opnd))
16542 SDValue ExtOp = Src->getOperand(0);
16543 auto VT = cast<VTSDNode>(N->getOperand(1))->getVT();
16544 EVT EltTy = VT.getVectorElementType();
16545 (void)EltTy;
16546
16547 assert((EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) &&(static_cast <bool> ((EltTy == MVT::i8 || EltTy == MVT::
i16 || EltTy == MVT::i32) && "Sign extending from an invalid type"
) ? void (0) : __assert_fail ("(EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) && \"Sign extending from an invalid type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16548, __extension__ __PRETTY_FUNCTION__))
16548 "Sign extending from an invalid type")(static_cast <bool> ((EltTy == MVT::i8 || EltTy == MVT::
i16 || EltTy == MVT::i32) && "Sign extending from an invalid type"
) ? void (0) : __assert_fail ("(EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) && \"Sign extending from an invalid type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16548, __extension__ __PRETTY_FUNCTION__))
;
16549
16550 EVT ExtVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext());
16551
16552 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ExtOp.getValueType(),
16553 ExtOp, DAG.getValueType(ExtVT));
16554
16555 return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
16556 }
16557
16558 if (DCI.isBeforeLegalizeOps())
16559 return SDValue();
16560
16561 if (!EnableCombineMGatherIntrinsics)
16562 return SDValue();
16563
16564 // SVE load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
16565 // for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes.
16566 unsigned NewOpc;
16567 unsigned MemVTOpNum = 4;
16568 switch (Opc) {
16569 case AArch64ISD::LD1_MERGE_ZERO:
16570 NewOpc = AArch64ISD::LD1S_MERGE_ZERO;
16571 MemVTOpNum = 3;
16572 break;
16573 case AArch64ISD::LDNF1_MERGE_ZERO:
16574 NewOpc = AArch64ISD::LDNF1S_MERGE_ZERO;
16575 MemVTOpNum = 3;
16576 break;
16577 case AArch64ISD::LDFF1_MERGE_ZERO:
16578 NewOpc = AArch64ISD::LDFF1S_MERGE_ZERO;
16579 MemVTOpNum = 3;
16580 break;
16581 case AArch64ISD::GLD1_MERGE_ZERO:
16582 NewOpc = AArch64ISD::GLD1S_MERGE_ZERO;
16583 break;
16584 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
16585 NewOpc = AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
16586 break;
16587 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
16588 NewOpc = AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
16589 break;
16590 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
16591 NewOpc = AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
16592 break;
16593 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
16594 NewOpc = AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
16595 break;
16596 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
16597 NewOpc = AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
16598 break;
16599 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
16600 NewOpc = AArch64ISD::GLD1S_IMM_MERGE_ZERO;
16601 break;
16602 case AArch64ISD::GLDFF1_MERGE_ZERO:
16603 NewOpc = AArch64ISD::GLDFF1S_MERGE_ZERO;
16604 break;
16605 case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
16606 NewOpc = AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO;
16607 break;
16608 case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
16609 NewOpc = AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO;
16610 break;
16611 case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
16612 NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO;
16613 break;
16614 case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
16615 NewOpc = AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO;
16616 break;
16617 case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
16618 NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO;
16619 break;
16620 case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
16621 NewOpc = AArch64ISD::GLDFF1S_IMM_MERGE_ZERO;
16622 break;
16623 case AArch64ISD::GLDNT1_MERGE_ZERO:
16624 NewOpc = AArch64ISD::GLDNT1S_MERGE_ZERO;
16625 break;
16626 default:
16627 return SDValue();
16628 }
16629
16630 EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16631 EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();
16632
16633 if ((SignExtSrcVT != SrcMemVT) || !Src.hasOneUse())
16634 return SDValue();
16635
16636 EVT DstVT = N->getValueType(0);
16637 SDVTList VTs = DAG.getVTList(DstVT, MVT::Other);
16638
16639 SmallVector<SDValue, 5> Ops;
16640 for (unsigned I = 0; I < Src->getNumOperands(); ++I)
16641 Ops.push_back(Src->getOperand(I));
16642
16643 SDValue ExtLoad = DAG.getNode(NewOpc, SDLoc(N), VTs, Ops);
16644 DCI.CombineTo(N, ExtLoad);
16645 DCI.CombineTo(Src.getNode(), ExtLoad, ExtLoad.getValue(1));
16646
16647 // Return N so it doesn't get rechecked
16648 return SDValue(N, 0);
16649}
16650
16651/// Legalize the gather prefetch (scalar + vector addressing mode) when the
16652/// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
16653/// != nxv2i32) do not need legalization.
16654static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG) {
16655 const unsigned OffsetPos = 4;
16656 SDValue Offset = N->getOperand(OffsetPos);
16657
16658 // Not an unpacked vector, bail out.
16659 if (Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32)
16660 return SDValue();
16661
16662 // Extend the unpacked offset vector to 64-bit lanes.
16663 SDLoc DL(N);
16664 Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset);
16665 SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
16666 // Replace the offset operand with the 64-bit one.
16667 Ops[OffsetPos] = Offset;
16668
16669 return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
16670}
16671
16672/// Combines a node carrying the intrinsic
16673/// `aarch64_sve_prf<T>_gather_scalar_offset` into a node that uses
16674/// `aarch64_sve_prfb_gather_uxtw_index` when the scalar offset passed to
16675/// `aarch64_sve_prf<T>_gather_scalar_offset` is not a valid immediate for the
16676/// sve gather prefetch instruction with vector plus immediate addressing mode.
16677static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG,
16678 unsigned ScalarSizeInBytes) {
16679 const unsigned ImmPos = 4, OffsetPos = 3;
16680 // No need to combine the node if the immediate is valid...
16681 if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes))
16682 return SDValue();
16683
16684 // ...otherwise swap the offset base with the offset...
16685 SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
16686 std::swap(Ops[ImmPos], Ops[OffsetPos]);
16687 // ...and remap the intrinsic `aarch64_sve_prf<T>_gather_scalar_offset` to
16688 // `aarch64_sve_prfb_gather_uxtw_index`.
16689 SDLoc DL(N);
16690 Ops[1] = DAG.getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index, DL,
16691 MVT::i64);
16692
16693 return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
16694}
16695
16696// Return true if the vector operation can guarantee only the first lane of its
16697// result contains data, with all bits in other lanes set to zero.
16698static bool isLanes1toNKnownZero(SDValue Op) {
16699 switch (Op.getOpcode()) {
16700 default:
16701 return false;
16702 case AArch64ISD::ANDV_PRED:
16703 case AArch64ISD::EORV_PRED:
16704 case AArch64ISD::FADDA_PRED:
16705 case AArch64ISD::FADDV_PRED:
16706 case AArch64ISD::FMAXNMV_PRED:
16707 case AArch64ISD::FMAXV_PRED:
16708 case AArch64ISD::FMINNMV_PRED:
16709 case AArch64ISD::FMINV_PRED:
16710 case AArch64ISD::ORV_PRED:
16711 case AArch64ISD::SADDV_PRED:
16712 case AArch64ISD::SMAXV_PRED:
16713 case AArch64ISD::SMINV_PRED:
16714 case AArch64ISD::UADDV_PRED:
16715 case AArch64ISD::UMAXV_PRED:
16716 case AArch64ISD::UMINV_PRED:
16717 return true;
16718 }
16719}
16720
16721static SDValue removeRedundantInsertVectorElt(SDNode *N) {
16722 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!")(static_cast <bool> (N->getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Unexpected node!") ? void (0) : __assert_fail ("N->getOpcode() == ISD::INSERT_VECTOR_ELT && \"Unexpected node!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16722, __extension__ __PRETTY_FUNCTION__))
;
16723 SDValue InsertVec = N->getOperand(0);
16724 SDValue InsertElt = N->getOperand(1);
16725 SDValue InsertIdx = N->getOperand(2);
16726
16727 // We only care about inserts into the first element...
16728 if (!isNullConstant(InsertIdx))
16729 return SDValue();
16730 // ...of a zero'd vector...
16731 if (!ISD::isConstantSplatVectorAllZeros(InsertVec.getNode()))
16732 return SDValue();
16733 // ...where the inserted data was previously extracted...
16734 if (InsertElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16735 return SDValue();
16736
16737 SDValue ExtractVec = InsertElt.getOperand(0);
16738 SDValue ExtractIdx = InsertElt.getOperand(1);
16739
16740 // ...from the first element of a vector.
16741 if (!isNullConstant(ExtractIdx))
16742 return SDValue();
16743
16744 // If we get here we are effectively trying to zero lanes 1-N of a vector.
16745
16746 // Ensure there's no type conversion going on.
16747 if (N->getValueType(0) != ExtractVec.getValueType())
16748 return SDValue();
16749
16750 if (!isLanes1toNKnownZero(ExtractVec))
16751 return SDValue();
16752
16753 // The explicit zeroing is redundant.
16754 return ExtractVec;
16755}
16756
16757static SDValue
16758performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
16759 if (SDValue Res = removeRedundantInsertVectorElt(N))
16760 return Res;
16761
16762 return performPostLD1Combine(N, DCI, true);
16763}
16764
16765SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) {
16766 EVT Ty = N->getValueType(0);
16767 if (Ty.isInteger())
16768 return SDValue();
16769
16770 EVT IntTy = Ty.changeVectorElementTypeToInteger();
16771 EVT ExtIntTy = getPackedSVEVectorVT(IntTy.getVectorElementCount());
16772 if (ExtIntTy.getVectorElementType().getScalarSizeInBits() <
16773 IntTy.getVectorElementType().getScalarSizeInBits())
16774 return SDValue();
16775
16776 SDLoc DL(N);
16777 SDValue LHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(0)),
16778 DL, ExtIntTy);
16779 SDValue RHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(1)),
16780 DL, ExtIntTy);
16781 SDValue Idx = N->getOperand(2);
16782 SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ExtIntTy, LHS, RHS, Idx);
16783 SDValue Trunc = DAG.getAnyExtOrTrunc(Splice, DL, IntTy);
16784 return DAG.getBitcast(Ty, Trunc);
16785}
16786
16787SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
16788 DAGCombinerInfo &DCI) const {
16789 SelectionDAG &DAG = DCI.DAG;
16790 switch (N->getOpcode()) {
16791 default:
16792 LLVM_DEBUG(dbgs() << "Custom combining: skipping\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom combining: skipping\n"
; } } while (false)
;
16793 break;
16794 case ISD::ADD:
16795 case ISD::SUB:
16796 return performAddSubCombine(N, DCI, DAG);
16797 case ISD::XOR:
16798 return performXorCombine(N, DAG, DCI, Subtarget);
16799 case ISD::MUL:
16800 return performMulCombine(N, DAG, DCI, Subtarget);
16801 case ISD::SINT_TO_FP:
16802 case ISD::UINT_TO_FP:
16803 return performIntToFpCombine(N, DAG, Subtarget);
16804 case ISD::FP_TO_SINT:
16805 case ISD::FP_TO_UINT:
16806 return performFpToIntCombine(N, DAG, DCI, Subtarget);
16807 case ISD::FDIV:
16808 return performFDivCombine(N, DAG, DCI, Subtarget);
16809 case ISD::OR:
16810 return performORCombine(N, DCI, Subtarget);
16811 case ISD::AND:
16812 return performANDCombine(N, DCI);
16813 case ISD::SRL:
16814 return performSRLCombine(N, DCI);
16815 case ISD::INTRINSIC_WO_CHAIN:
16816 return performIntrinsicCombine(N, DCI, Subtarget);
16817 case ISD::ANY_EXTEND:
16818 case ISD::ZERO_EXTEND:
16819 case ISD::SIGN_EXTEND:
16820 return performExtendCombine(N, DCI, DAG);
16821 case ISD::SIGN_EXTEND_INREG:
16822 return performSignExtendInRegCombine(N, DCI, DAG);
16823 case ISD::TRUNCATE:
16824 return performVectorTruncateCombine(N, DCI, DAG);
16825 case ISD::CONCAT_VECTORS:
16826 return performConcatVectorsCombine(N, DCI, DAG);
16827 case ISD::INSERT_SUBVECTOR:
16828 return performInsertSubvectorCombine(N, DCI, DAG);
16829 case ISD::SELECT:
16830 return performSelectCombine(N, DCI);
16831 case ISD::VSELECT:
16832 return performVSelectCombine(N, DCI.DAG);
16833 case ISD::SETCC:
16834 return performSETCCCombine(N, DAG);
16835 case ISD::LOAD:
16836 if (performTBISimplification(N->getOperand(1), DCI, DAG))
16837 return SDValue(N, 0);
16838 break;
16839 case ISD::STORE:
16840 return performSTORECombine(N, DCI, DAG, Subtarget);
16841 case ISD::VECTOR_SPLICE:
16842 return performSVESpliceCombine(N, DAG);
16843 case AArch64ISD::BRCOND:
16844 return performBRCONDCombine(N, DCI, DAG);
16845 case AArch64ISD::TBNZ:
16846 case AArch64ISD::TBZ:
16847 return performTBZCombine(N, DCI, DAG);
16848 case AArch64ISD::CSEL:
16849 return performCSELCombine(N, DCI, DAG);
16850 case AArch64ISD::DUP:
16851 return performPostLD1Combine(N, DCI, false);
16852 case AArch64ISD::NVCAST:
16853 return performNVCASTCombine(N);
16854 case AArch64ISD::SPLICE:
16855 return performSpliceCombine(N, DAG);
16856 case AArch64ISD::UZP1:
16857 return performUzpCombine(N, DAG);
16858 case AArch64ISD::SETCC_MERGE_ZERO:
16859 return performSetccMergeZeroCombine(N, DAG);
16860 case AArch64ISD::GLD1_MERGE_ZERO:
16861 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
16862 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
16863 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
16864 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
16865 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
16866 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
16867 case AArch64ISD::GLD1S_MERGE_ZERO:
16868 case AArch64ISD::GLD1S_SCALED_MERGE_ZERO:
16869 case AArch64ISD::GLD1S_UXTW_MERGE_ZERO:
16870 case AArch64ISD::GLD1S_SXTW_MERGE_ZERO:
16871 case AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO:
16872 case AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO:
16873 case AArch64ISD::GLD1S_IMM_MERGE_ZERO:
16874 return performGLD1Combine(N, DAG);
16875 case AArch64ISD::VASHR:
16876 case AArch64ISD::VLSHR:
16877 return performVectorShiftCombine(N, *this, DCI);
16878 case ISD::INSERT_VECTOR_ELT:
16879 return performInsertVectorEltCombine(N, DCI);
16880 case ISD::EXTRACT_VECTOR_ELT:
16881 return performExtractVectorEltCombine(N, DAG);
16882 case ISD::VECREDUCE_ADD:
16883 return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
16884 case ISD::INTRINSIC_VOID:
16885 case ISD::INTRINSIC_W_CHAIN:
16886 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
16887 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
16888 return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /*=ScalarSizeInBytes*/);
16889 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
16890 return combineSVEPrefetchVecBaseImmOff(N, DAG, 2 /*=ScalarSizeInBytes*/);
16891 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
16892 return combineSVEPrefetchVecBaseImmOff(N, DAG, 4 /*=ScalarSizeInBytes*/);
16893 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
16894 return combineSVEPrefetchVecBaseImmOff(N, DAG, 8 /*=ScalarSizeInBytes*/);
16895 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
16896 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
16897 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
16898 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
16899 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
16900 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
16901 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
16902 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
16903 return legalizeSVEGatherPrefetchOffsVec(N, DAG);
16904 case Intrinsic::aarch64_neon_ld2:
16905 case Intrinsic::aarch64_neon_ld3:
16906 case Intrinsic::aarch64_neon_ld4:
16907 case Intrinsic::aarch64_neon_ld1x2:
16908 case Intrinsic::aarch64_neon_ld1x3:
16909 case Intrinsic::aarch64_neon_ld1x4:
16910 case Intrinsic::aarch64_neon_ld2lane:
16911 case Intrinsic::aarch64_neon_ld3lane:
16912 case Intrinsic::aarch64_neon_ld4lane:
16913 case Intrinsic::aarch64_neon_ld2r:
16914 case Intrinsic::aarch64_neon_ld3r:
16915 case Intrinsic::aarch64_neon_ld4r:
16916 case Intrinsic::aarch64_neon_st2:
16917 case Intrinsic::aarch64_neon_st3:
16918 case Intrinsic::aarch64_neon_st4:
16919 case Intrinsic::aarch64_neon_st1x2:
16920 case Intrinsic::aarch64_neon_st1x3:
16921 case Intrinsic::aarch64_neon_st1x4:
16922 case Intrinsic::aarch64_neon_st2lane:
16923 case Intrinsic::aarch64_neon_st3lane:
16924 case Intrinsic::aarch64_neon_st4lane:
16925 return performNEONPostLDSTCombine(N, DCI, DAG);
16926 case Intrinsic::aarch64_sve_ldnt1:
16927 return performLDNT1Combine(N, DAG);
16928 case Intrinsic::aarch64_sve_ld1rq:
16929 return performLD1ReplicateCombine<AArch64ISD::LD1RQ_MERGE_ZERO>(N, DAG);
16930 case Intrinsic::aarch64_sve_ld1ro:
16931 return performLD1ReplicateCombine<AArch64ISD::LD1RO_MERGE_ZERO>(N, DAG);
16932 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
16933 return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
16934 case Intrinsic::aarch64_sve_ldnt1_gather:
16935 return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
16936 case Intrinsic::aarch64_sve_ldnt1_gather_index:
16937 return performGatherLoadCombine(N, DAG,
16938 AArch64ISD::GLDNT1_INDEX_MERGE_ZERO);
16939 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
16940 return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
16941 case Intrinsic::aarch64_sve_ld1:
16942 return performLD1Combine(N, DAG, AArch64ISD::LD1_MERGE_ZERO);
16943 case Intrinsic::aarch64_sve_ldnf1:
16944 return performLD1Combine(N, DAG, AArch64ISD::LDNF1_MERGE_ZERO);
16945 case Intrinsic::aarch64_sve_ldff1:
16946 return performLD1Combine(N, DAG, AArch64ISD::LDFF1_MERGE_ZERO);
16947 case Intrinsic::aarch64_sve_st1:
16948 return performST1Combine(N, DAG);
16949 case Intrinsic::aarch64_sve_stnt1:
16950 return performSTNT1Combine(N, DAG);
16951 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
16952 return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
16953 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
16954 return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
16955 case Intrinsic::aarch64_sve_stnt1_scatter:
16956 return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
16957 case Intrinsic::aarch64_sve_stnt1_scatter_index:
16958 return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_INDEX_PRED);
16959 case Intrinsic::aarch64_sve_ld1_gather:
16960 return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_MERGE_ZERO);
16961 case Intrinsic::aarch64_sve_ld1_gather_index:
16962 return performGatherLoadCombine(N, DAG,
16963 AArch64ISD::GLD1_SCALED_MERGE_ZERO);
16964 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
16965 return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW_MERGE_ZERO,
16966 /*OnlyPackedOffsets=*/false);
16967 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
16968 return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW_MERGE_ZERO,
16969 /*OnlyPackedOffsets=*/false);
16970 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
16971 return performGatherLoadCombine(N, DAG,
16972 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO,
16973 /*OnlyPackedOffsets=*/false);
16974 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
16975 return performGatherLoadCombine(N, DAG,
16976 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO,
16977 /*OnlyPackedOffsets=*/false);
16978 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
16979 return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_IMM_MERGE_ZERO);
16980 case Intrinsic::aarch64_sve_ldff1_gather:
16981 return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_MERGE_ZERO);
16982 case Intrinsic::aarch64_sve_ldff1_gather_index:
16983 return performGatherLoadCombine(N, DAG,
16984 AArch64ISD::GLDFF1_SCALED_MERGE_ZERO);
16985 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
16986 return performGatherLoadCombine(N, DAG,
16987 AArch64ISD::GLDFF1_SXTW_MERGE_ZERO,
16988 /*OnlyPackedOffsets=*/false);
16989 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
16990 return performGatherLoadCombine(N, DAG,
16991 AArch64ISD::GLDFF1_UXTW_MERGE_ZERO,
16992 /*OnlyPackedOffsets=*/false);
16993 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
16994 return performGatherLoadCombine(N, DAG,
16995 AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO,
16996 /*OnlyPackedOffsets=*/false);
16997 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
16998 return performGatherLoadCombine(N, DAG,
16999 AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO,
17000 /*OnlyPackedOffsets=*/false);
17001 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
17002 return performGatherLoadCombine(N, DAG,
17003 AArch64ISD::GLDFF1_IMM_MERGE_ZERO);
17004 case Intrinsic::aarch64_sve_st1_scatter:
17005 return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_PRED);
17006 case Intrinsic::aarch64_sve_st1_scatter_index:
17007 return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SCALED_PRED);
17008 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
17009 return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW_PRED,
17010 /*OnlyPackedOffsets=*/false);
17011 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
17012 return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW_PRED,
17013 /*OnlyPackedOffsets=*/false);
17014 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
17015 return performScatterStoreCombine(N, DAG,
17016 AArch64ISD::SST1_SXTW_SCALED_PRED,
17017 /*OnlyPackedOffsets=*/false);
17018 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
17019 return performScatterStoreCombine(N, DAG,
17020 AArch64ISD::SST1_UXTW_SCALED_PRED,
17021 /*OnlyPackedOffsets=*/false);
17022 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
17023 return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED);
17024 case Intrinsic::aarch64_sve_tuple_get: {
17025 SDLoc DL(N);
17026 SDValue Chain = N->getOperand(0);
17027 SDValue Src1 = N->getOperand(2);
17028 SDValue Idx = N->getOperand(3);
17029
17030 uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
17031 EVT ResVT = N->getValueType(0);
17032 uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue();
17033 SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
17034 SDValue Val =
17035 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
17036 return DAG.getMergeValues({Val, Chain}, DL);
17037 }
17038 case Intrinsic::aarch64_sve_tuple_set: {
17039 SDLoc DL(N);
17040 SDValue Chain = N->getOperand(0);
17041 SDValue Tuple = N->getOperand(2);
17042 SDValue Idx = N->getOperand(3);
17043 SDValue Vec = N->getOperand(4);
17044
17045 EVT TupleVT = Tuple.getValueType();
17046 uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue();
17047
17048 uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
17049 uint64_t NumLanes =
17050 Vec.getValueType().getVectorElementCount().getKnownMinValue();
17051
17052 if ((TupleLanes % NumLanes) != 0)
17053 report_fatal_error("invalid tuple vector!");
17054
17055 uint64_t NumVecs = TupleLanes / NumLanes;
17056
17057 SmallVector<SDValue, 4> Opnds;
17058 for (unsigned I = 0; I < NumVecs; ++I) {
17059 if (I == IdxConst)
17060 Opnds.push_back(Vec);
17061 else {
17062 SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
17063 Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
17064 Vec.getValueType(), Tuple, ExtIdx));
17065 }
17066 }
17067 SDValue Concat =
17068 DAG.getNode(ISD::CONCAT_VECTORS, DL, Tuple.getValueType(), Opnds);
17069 return DAG.getMergeValues({Concat, Chain}, DL);
17070 }
17071 case Intrinsic::aarch64_sve_tuple_create2:
17072 case Intrinsic::aarch64_sve_tuple_create3:
17073 case Intrinsic::aarch64_sve_tuple_create4: {
17074 SDLoc DL(N);
17075 SDValue Chain = N->getOperand(0);
17076
17077 SmallVector<SDValue, 4> Opnds;
17078 for (unsigned I = 2; I < N->getNumOperands(); ++I)
17079 Opnds.push_back(N->getOperand(I));
17080
17081 EVT VT = Opnds[0].getValueType();
17082 EVT EltVT = VT.getVectorElementType();
17083 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
17084 VT.getVectorElementCount() *
17085 (N->getNumOperands() - 2));
17086 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Opnds);
17087 return DAG.getMergeValues({Concat, Chain}, DL);
17088 }
17089 case Intrinsic::aarch64_sve_ld2:
17090 case Intrinsic::aarch64_sve_ld3:
17091 case Intrinsic::aarch64_sve_ld4: {
17092 SDLoc DL(N);
17093 SDValue Chain = N->getOperand(0);
17094 SDValue Mask = N->getOperand(2);
17095 SDValue BasePtr = N->getOperand(3);
17096 SDValue LoadOps[] = {Chain, Mask, BasePtr};
17097 unsigned IntrinsicID =
17098 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
17099 SDValue Result =
17100 LowerSVEStructLoad(IntrinsicID, LoadOps, N->getValueType(0), DAG, DL);
17101 return DAG.getMergeValues({Result, Chain}, DL);
17102 }
17103 case Intrinsic::aarch64_rndr:
17104 case Intrinsic::aarch64_rndrrs: {
17105 unsigned IntrinsicID =
17106 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
17107 auto Register =
17108 (IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR
17109 : AArch64SysReg::RNDRRS);
17110 SDLoc DL(N);
17111 SDValue A = DAG.getNode(
17112 AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, MVT::Glue, MVT::Other),
17113 N->getOperand(0), DAG.getConstant(Register, DL, MVT::i64));
17114 SDValue B = DAG.getNode(
17115 AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32),
17116 DAG.getConstant(0, DL, MVT::i32),
17117 DAG.getConstant(AArch64CC::NE, DL, MVT::i32), A.getValue(1));
17118 return DAG.getMergeValues(
17119 {A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL);
17120 }
17121 default:
17122 break;
17123 }
17124 break;
17125 case ISD::GlobalAddress:
17126 return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
17127 }
17128 return SDValue();
17129}
17130
17131// Check if the return value is used as only a return value, as otherwise
17132// we can't perform a tail-call. In particular, we need to check for
17133// target ISD nodes that are returns and any other "odd" constructs
17134// that the generic analysis code won't necessarily catch.
17135bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
17136 SDValue &Chain) const {
17137 if (N->getNumValues() != 1)
17138 return false;
17139 if (!N->hasNUsesOfValue(1, 0))
17140 return false;
17141
17142 SDValue TCChain = Chain;
17143 SDNode *Copy = *N->use_begin();
17144 if (Copy->getOpcode() == ISD::CopyToReg) {
17145 // If the copy has a glue operand, we conservatively assume it isn't safe to
17146 // perform a tail call.
17147 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
17148 MVT::Glue)
17149 return false;
17150 TCChain = Copy->getOperand(0);
17151 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
17152 return false;
17153
17154 bool HasRet = false;
17155 for (SDNode *Node : Copy->uses()) {
17156 if (Node->getOpcode() != AArch64ISD::RET_FLAG)
17157 return false;
17158 HasRet = true;
17159 }
17160
17161 if (!HasRet)
17162 return false;
17163
17164 Chain = TCChain;
17165 return true;
17166}
17167
17168// Return whether the an instruction can potentially be optimized to a tail
17169// call. This will cause the optimizers to attempt to move, or duplicate,
17170// return instructions to help enable tail call optimizations for this
17171// instruction.
17172bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
17173 return CI->isTailCall();
17174}
17175
17176bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
17177 SDValue &Offset,
17178 ISD::MemIndexedMode &AM,
17179 bool &IsInc,
17180 SelectionDAG &DAG) const {
17181 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
17182 return false;
17183
17184 Base = Op->getOperand(0);
17185 // All of the indexed addressing mode instructions take a signed
17186 // 9 bit immediate offset.
17187 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
17188 int64_t RHSC = RHS->getSExtValue();
17189 if (Op->getOpcode() == ISD::SUB)
17190 RHSC = -(uint64_t)RHSC;
17191 if (!isInt<9>(RHSC))
17192 return false;
17193 IsInc = (Op->getOpcode() == ISD::ADD);
17194 Offset = Op->getOperand(1);
17195 return true;
17196 }
17197 return false;
17198}
17199
17200bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
17201 SDValue &Offset,
17202 ISD::MemIndexedMode &AM,
17203 SelectionDAG &DAG) const {
17204 EVT VT;
17205 SDValue Ptr;
17206 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
17207 VT = LD->getMemoryVT();
17208 Ptr = LD->getBasePtr();
17209 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
17210 VT = ST->getMemoryVT();
17211 Ptr = ST->getBasePtr();
17212 } else
17213 return false;
17214
17215 bool IsInc;
17216 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
17217 return false;
17218 AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
17219 return true;
17220}
17221
17222bool AArch64TargetLowering::getPostIndexedAddressParts(
17223 SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset,
17224 ISD::MemIndexedMode &AM, SelectionDAG &DAG) const {
17225 EVT VT;
17226 SDValue Ptr;
17227 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
17228 VT = LD->getMemoryVT();
17229 Ptr = LD->getBasePtr();
17230 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
17231 VT = ST->getMemoryVT();
17232 Ptr = ST->getBasePtr();
17233 } else
17234 return false;
17235
17236 bool IsInc;
17237 if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
17238 return false;
17239 // Post-indexing updates the base, so it's not a valid transform
17240 // if that's not the same as the load's pointer.
17241 if (Ptr != Base)
17242 return false;
17243 AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
17244 return true;
17245}
17246
17247void AArch64TargetLowering::ReplaceBITCASTResults(
17248 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
17249 SDLoc DL(N);
17250 SDValue Op = N->getOperand(0);
17251 EVT VT = N->getValueType(0);
17252 EVT SrcVT = Op.getValueType();
17253
17254 if (VT.isScalableVector() && !isTypeLegal(VT) && isTypeLegal(SrcVT)) {
17255 assert(!VT.isFloatingPoint() && SrcVT.isFloatingPoint() &&(static_cast <bool> (!VT.isFloatingPoint() && SrcVT
.isFloatingPoint() && "Expected fp->int bitcast!")
? void (0) : __assert_fail ("!VT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"Expected fp->int bitcast!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17256, __extension__ __PRETTY_FUNCTION__))
17256 "Expected fp->int bitcast!")(static_cast <bool> (!VT.isFloatingPoint() && SrcVT
.isFloatingPoint() && "Expected fp->int bitcast!")
? void (0) : __assert_fail ("!VT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"Expected fp->int bitcast!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17256, __extension__ __PRETTY_FUNCTION__))
;
17257 SDValue CastResult = getSVESafeBitCast(getSVEContainerType(VT), Op, DAG);
17258 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, CastResult));
17259 return;
17260 }
17261
17262 if (VT != MVT::i16 || (SrcVT != MVT::f16 && SrcVT != MVT::bf16))
17263 return;
17264
17265 Op = SDValue(
17266 DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
17267 DAG.getUNDEF(MVT::i32), Op,
17268 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
17269 0);
17270 Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
17271 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
17272}
17273
17274static void ReplaceReductionResults(SDNode *N,
17275 SmallVectorImpl<SDValue> &Results,
17276 SelectionDAG &DAG, unsigned InterOp,
17277 unsigned AcrossOp) {
17278 EVT LoVT, HiVT;
17279 SDValue Lo, Hi;
17280 SDLoc dl(N);
17281 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
17282 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
17283 SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi);
17284 SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal);
17285 Results.push_back(SplitVal);
17286}
17287
17288static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) {
17289 SDLoc DL(N);
17290 SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, N);
17291 SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
17292 DAG.getNode(ISD::SRL, DL, MVT::i128, N,
17293 DAG.getConstant(64, DL, MVT::i64)));
17294 return std::make_pair(Lo, Hi);
17295}
17296
17297void AArch64TargetLowering::ReplaceExtractSubVectorResults(
17298 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
17299 SDValue In = N->getOperand(0);
17300 EVT InVT = In.getValueType();
17301
17302 // Common code will handle these just fine.
17303 if (!InVT.isScalableVector() || !InVT.isInteger())
17304 return;
17305
17306 SDLoc DL(N);
17307 EVT VT = N->getValueType(0);
17308
17309 // The following checks bail if this is not a halving operation.
17310
17311 ElementCount ResEC = VT.getVectorElementCount();
17312
17313 if (InVT.getVectorElementCount() != (ResEC * 2))
17314 return;
17315
17316 auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
17317 if (!CIndex)
17318 return;
17319
17320 unsigned Index = CIndex->getZExtValue();
17321 if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
17322 return;
17323
17324 unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
17325 EVT ExtendedHalfVT = VT.widenIntegerVectorElementType(*DAG.getContext());
17326
17327 SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
17328 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Half));
17329}
17330
17331// Create an even/odd pair of X registers holding integer value V.
17332static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
17333 SDLoc dl(V.getNode());
17334 SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64);
17335 SDValue VHi = DAG.getAnyExtOrTrunc(
17336 DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)),
17337 dl, MVT::i64);
17338 if (DAG.getDataLayout().isBigEndian())
17339 std::swap (VLo, VHi);
17340 SDValue RegClass =
17341 DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
17342 SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
17343 SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
17344 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
17345 return SDValue(
17346 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
17347}
17348
17349static void ReplaceCMP_SWAP_128Results(SDNode *N,
17350 SmallVectorImpl<SDValue> &Results,
17351 SelectionDAG &DAG,
17352 const AArch64Subtarget *Subtarget) {
17353 assert(N->getValueType(0) == MVT::i128 &&(static_cast <bool> (N->getValueType(0) == MVT::i128
&& "AtomicCmpSwap on types less than 128 should be legal"
) ? void (0) : __assert_fail ("N->getValueType(0) == MVT::i128 && \"AtomicCmpSwap on types less than 128 should be legal\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17354, __extension__ __PRETTY_FUNCTION__))
17354 "AtomicCmpSwap on types less than 128 should be legal")(static_cast <bool> (N->getValueType(0) == MVT::i128
&& "AtomicCmpSwap on types less than 128 should be legal"
) ? void (0) : __assert_fail ("N->getValueType(0) == MVT::i128 && \"AtomicCmpSwap on types less than 128 should be legal\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17354, __extension__ __PRETTY_FUNCTION__))
;
17355
17356 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
17357 if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
17358 // LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
17359 // so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
17360 SDValue Ops[] = {
17361 createGPRPairNode(DAG, N->getOperand(2)), // Compare value
17362 createGPRPairNode(DAG, N->getOperand(3)), // Store value
17363 N->getOperand(1), // Ptr
17364 N->getOperand(0), // Chain in
17365 };
17366
17367 unsigned Opcode;
17368 switch (MemOp->getMergedOrdering()) {
17369 case AtomicOrdering::Monotonic:
17370 Opcode = AArch64::CASPX;
17371 break;
17372 case AtomicOrdering::Acquire:
17373 Opcode = AArch64::CASPAX;
17374 break;
17375 case AtomicOrdering::Release:
17376 Opcode = AArch64::CASPLX;
17377 break;
17378 case AtomicOrdering::AcquireRelease:
17379 case AtomicOrdering::SequentiallyConsistent:
17380 Opcode = AArch64::CASPALX;
17381 break;
17382 default:
17383 llvm_unreachable("Unexpected ordering!")::llvm::llvm_unreachable_internal("Unexpected ordering!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17383)
;
17384 }
17385
17386 MachineSDNode *CmpSwap = DAG.getMachineNode(
17387 Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
17388 DAG.setNodeMemRefs(CmpSwap, {MemOp});
17389
17390 unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
17391 if (DAG.getDataLayout().isBigEndian())
17392 std::swap(SubReg1, SubReg2);
17393 SDValue Lo = DAG.getTargetExtractSubreg(SubReg1, SDLoc(N), MVT::i64,
17394 SDValue(CmpSwap, 0));
17395 SDValue Hi = DAG.getTargetExtractSubreg(SubReg2, SDLoc(N), MVT::i64,
17396 SDValue(CmpSwap, 0));
17397 Results.push_back(
17398 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, Lo, Hi));
17399 Results.push_back(SDValue(CmpSwap, 1)); // Chain out
17400 return;
17401 }
17402
17403 unsigned Opcode;
17404 switch (MemOp->getMergedOrdering()) {
17405 case AtomicOrdering::Monotonic:
17406 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
17407 break;
17408 case AtomicOrdering::Acquire:
17409 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
17410 break;
17411 case AtomicOrdering::Release:
17412 Opcode = AArch64::CMP_SWAP_128_RELEASE;
17413 break;
17414 case AtomicOrdering::AcquireRelease:
17415 case AtomicOrdering::SequentiallyConsistent:
17416 Opcode = AArch64::CMP_SWAP_128;
17417 break;
17418 default:
17419 llvm_unreachable("Unexpected ordering!")::llvm::llvm_unreachable_internal("Unexpected ordering!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17419)
;
17420 }
17421
17422 auto Desired = splitInt128(N->getOperand(2), DAG);
17423 auto New = splitInt128(N->getOperand(3), DAG);
17424 SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
17425 New.first, New.second, N->getOperand(0)};
17426 SDNode *CmpSwap = DAG.getMachineNode(
17427 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other),
17428 Ops);
17429 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
17430
17431 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
17432 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
17433 Results.push_back(SDValue(CmpSwap, 3));
17434}
17435
17436void AArch64TargetLowering::ReplaceNodeResults(
17437 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
17438 switch (N->getOpcode()) {
17439 default:
17440 llvm_unreachable("Don't know how to custom expand this")::llvm::llvm_unreachable_internal("Don't know how to custom expand this"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17440)
;
17441 case ISD::BITCAST:
17442 ReplaceBITCASTResults(N, Results, DAG);
17443 return;
17444 case ISD::VECREDUCE_ADD:
17445 case ISD::VECREDUCE_SMAX:
17446 case ISD::VECREDUCE_SMIN:
17447 case ISD::VECREDUCE_UMAX:
17448 case ISD::VECREDUCE_UMIN:
17449 Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG));
17450 return;
17451
17452 case ISD::CTPOP:
17453 if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG))
17454 Results.push_back(Result);
17455 return;
17456 case AArch64ISD::SADDV:
17457 ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV);
17458 return;
17459 case AArch64ISD::UADDV:
17460 ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::UADDV);
17461 return;
17462 case AArch64ISD::SMINV:
17463 ReplaceReductionResults(N, Results, DAG, ISD::SMIN, AArch64ISD::SMINV);
17464 return;
17465 case AArch64ISD::UMINV:
17466 ReplaceReductionResults(N, Results, DAG, ISD::UMIN, AArch64ISD::UMINV);
17467 return;
17468 case AArch64ISD::SMAXV:
17469 ReplaceReductionResults(N, Results, DAG, ISD::SMAX, AArch64ISD::SMAXV);
17470 return;
17471 case AArch64ISD::UMAXV:
17472 ReplaceReductionResults(N, Results, DAG, ISD::UMAX, AArch64ISD::UMAXV);
17473 return;
17474 case ISD::FP_TO_UINT:
17475 case ISD::FP_TO_SINT:
17476 case ISD::STRICT_FP_TO_SINT:
17477 case ISD::STRICT_FP_TO_UINT:
17478 assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion")(static_cast <bool> (N->getValueType(0) == MVT::i128
&& "unexpected illegal conversion") ? void (0) : __assert_fail
("N->getValueType(0) == MVT::i128 && \"unexpected illegal conversion\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17478, __extension__ __PRETTY_FUNCTION__))
;
17479 // Let normal code take care of it by not adding anything to Results.
17480 return;
17481 case ISD::ATOMIC_CMP_SWAP:
17482 ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
17483 return;
17484 case ISD::LOAD: {
17485 assert(SDValue(N, 0).getValueType() == MVT::i128 &&(static_cast <bool> (SDValue(N, 0).getValueType() == MVT
::i128 && "unexpected load's value type") ? void (0) :
__assert_fail ("SDValue(N, 0).getValueType() == MVT::i128 && \"unexpected load's value type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17486, __extension__ __PRETTY_FUNCTION__))
17486 "unexpected load's value type")(static_cast <bool> (SDValue(N, 0).getValueType() == MVT
::i128 && "unexpected load's value type") ? void (0) :
__assert_fail ("SDValue(N, 0).getValueType() == MVT::i128 && \"unexpected load's value type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17486, __extension__ __PRETTY_FUNCTION__))
;
17487 LoadSDNode *LoadNode = cast<LoadSDNode>(N);
17488 if (!LoadNode->isVolatile() || LoadNode->getMemoryVT() != MVT::i128) {
17489 // Non-volatile loads are optimized later in AArch64's load/store
17490 // optimizer.
17491 return;
17492 }
17493
17494 SDValue Result = DAG.getMemIntrinsicNode(
17495 AArch64ISD::LDP, SDLoc(N),
17496 DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
17497 {LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->getMemoryVT(),
17498 LoadNode->getMemOperand());
17499
17500 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
17501 Result.getValue(0), Result.getValue(1));
17502 Results.append({Pair, Result.getValue(2) /* Chain */});
17503 return;
17504 }
17505 case ISD::EXTRACT_SUBVECTOR:
17506 ReplaceExtractSubVectorResults(N, Results, DAG);
17507 return;
17508 case ISD::INSERT_SUBVECTOR:
17509 // Custom lowering has been requested for INSERT_SUBVECTOR -- but delegate
17510 // to common code for result type legalisation
17511 return;
17512 case ISD::INTRINSIC_WO_CHAIN: {
17513 EVT VT = N->getValueType(0);
17514 assert((VT == MVT::i8 || VT == MVT::i16) &&(static_cast <bool> ((VT == MVT::i8 || VT == MVT::i16) &&
"custom lowering for unexpected type") ? void (0) : __assert_fail
("(VT == MVT::i8 || VT == MVT::i16) && \"custom lowering for unexpected type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17515, __extension__ __PRETTY_FUNCTION__))
17515 "custom lowering for unexpected type")(static_cast <bool> ((VT == MVT::i8 || VT == MVT::i16) &&
"custom lowering for unexpected type") ? void (0) : __assert_fail
("(VT == MVT::i8 || VT == MVT::i16) && \"custom lowering for unexpected type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17515, __extension__ __PRETTY_FUNCTION__))
;
17516
17517 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(0));
17518 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
17519 switch (IntID) {
17520 default:
17521 return;
17522 case Intrinsic::aarch64_sve_clasta_n: {
17523 SDLoc DL(N);
17524 auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
17525 auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32,
17526 N->getOperand(1), Op2, N->getOperand(3));
17527 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
17528 return;
17529 }
17530 case Intrinsic::aarch64_sve_clastb_n: {
17531 SDLoc DL(N);
17532 auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
17533 auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32,
17534 N->getOperand(1), Op2, N->getOperand(3));
17535 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
17536 return;
17537 }
17538 case Intrinsic::aarch64_sve_lasta: {
17539 SDLoc DL(N);
17540 auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32,
17541 N->getOperand(1), N->getOperand(2));
17542 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
17543 return;
17544 }
17545 case Intrinsic::aarch64_sve_lastb: {
17546 SDLoc DL(N);
17547 auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32,
17548 N->getOperand(1), N->getOperand(2));
17549 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
17550 return;
17551 }
17552 }
17553 }
17554 }
17555}
17556
17557bool AArch64TargetLowering::useLoadStackGuardNode() const {
17558 if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
17559 return TargetLowering::useLoadStackGuardNode();
17560 return true;
17561}
17562
17563unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
17564 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
17565 // reciprocal if there are three or more FDIVs.
17566 return 3;
17567}
17568
17569TargetLoweringBase::LegalizeTypeAction
17570AArch64TargetLowering::getPreferredVectorAction(MVT VT) const {
17571 // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
17572 // v4i16, v2i32 instead of to promote.
17573 if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 ||
17574 VT == MVT::v1f32)
17575 return TypeWidenVector;
17576
17577 return TargetLoweringBase::getPreferredVectorAction(VT);
17578}
17579
17580// Loads and stores less than 128-bits are already atomic; ones above that
17581// are doomed anyway, so defer to the default libcall and blame the OS when
17582// things go wrong.
17583bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
17584 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
17585 return Size == 128;
17586}
17587
17588// Loads and stores less than 128-bits are already atomic; ones above that
17589// are doomed anyway, so defer to the default libcall and blame the OS when
17590// things go wrong.
17591TargetLowering::AtomicExpansionKind
17592AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
17593 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
17594 return Size == 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None;
17595}
17596
17597// For the real atomic operations, we have ldxr/stxr up to 128 bits,
17598TargetLowering::AtomicExpansionKind
17599AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
17600 if (AI->isFloatingPointOperation())
17601 return AtomicExpansionKind::CmpXChg;
17602
17603 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
17604 if (Size > 128) return AtomicExpansionKind::None;
17605
17606 // Nand is not supported in LSE.
17607 // Leave 128 bits to LLSC or CmpXChg.
17608 if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
17609 if (Subtarget->hasLSE())
17610 return AtomicExpansionKind::None;
17611 if (Subtarget->outlineAtomics()) {
17612 // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
17613 // Don't outline them unless
17614 // (1) high level <atomic> support approved:
17615 // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
17616 // (2) low level libgcc and compiler-rt support implemented by:
17617 // min/max outline atomics helpers
17618 if (AI->getOperation() != AtomicRMWInst::Min &&
17619 AI->getOperation() != AtomicRMWInst::Max &&
17620 AI->getOperation() != AtomicRMWInst::UMin &&
17621 AI->getOperation() != AtomicRMWInst::UMax) {
17622 return AtomicExpansionKind::None;
17623 }
17624 }
17625 }
17626
17627 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
17628 // implement atomicrmw without spilling. If the target address is also on the
17629 // stack and close enough to the spill slot, this can lead to a situation
17630 // where the monitor always gets cleared and the atomic operation can never
17631 // succeed. So at -O0 lower this operation to a CAS loop.
17632 if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
17633 return AtomicExpansionKind::CmpXChg;
17634
17635 return AtomicExpansionKind::LLSC;
17636}
17637
17638TargetLowering::AtomicExpansionKind
17639AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
17640 AtomicCmpXchgInst *AI) const {
17641 // If subtarget has LSE, leave cmpxchg intact for codegen.
17642 if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
17643 return AtomicExpansionKind::None;
17644 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
17645 // implement cmpxchg without spilling. If the address being exchanged is also
17646 // on the stack and close enough to the spill slot, this can lead to a
17647 // situation where the monitor always gets cleared and the atomic operation
17648 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
17649 if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
17650 return AtomicExpansionKind::None;
17651
17652 // 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
17653 // it.
17654 unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
17655 if (Size > 64)
17656 return AtomicExpansionKind::None;
17657
17658 return AtomicExpansionKind::LLSC;
17659}
17660
17661Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
17662 Type *ValueTy, Value *Addr,
17663 AtomicOrdering Ord) const {
17664 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
17665 bool IsAcquire = isAcquireOrStronger(Ord);
17666
17667 // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
17668 // intrinsic must return {i64, i64} and we have to recombine them into a
17669 // single i128 here.
17670 if (ValueTy->getPrimitiveSizeInBits() == 128) {
17671 Intrinsic::ID Int =
17672 IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
17673 Function *Ldxr = Intrinsic::getDeclaration(M, Int);
17674
17675 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
17676 Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
17677
17678 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
17679 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
17680 Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
17681 Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
17682 return Builder.CreateOr(
17683 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");
17684 }
17685
17686 Type *Tys[] = { Addr->getType() };
17687 Intrinsic::ID Int =
17688 IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
17689 Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
17690
17691 const DataLayout &DL = M->getDataLayout();
17692 IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
17693 Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy);
17694
17695 return Builder.CreateBitCast(Trunc, ValueTy);
17696}
17697
17698void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
17699 IRBuilderBase &Builder) const {
17700 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
17701 Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
17702}
17703
17704Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
17705 Value *Val, Value *Addr,
17706 AtomicOrdering Ord) const {
17707 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
17708 bool IsRelease = isReleaseOrStronger(Ord);
17709
17710 // Since the intrinsics must have legal type, the i128 intrinsics take two
17711 // parameters: "i64, i64". We must marshal Val into the appropriate form
17712 // before the call.
17713 if (Val->getType()->getPrimitiveSizeInBits() == 128) {
17714 Intrinsic::ID Int =
17715 IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
17716 Function *Stxr = Intrinsic::getDeclaration(M, Int);
17717 Type *Int64Ty = Type::getInt64Ty(M->getContext());
17718
17719 Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
17720 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
17721 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
17722 return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
17723 }
17724
17725 Intrinsic::ID Int =
17726 IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
17727 Type *Tys[] = { Addr->getType() };
17728 Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
17729
17730 const DataLayout &DL = M->getDataLayout();
17731 IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
17732 Val = Builder.CreateBitCast(Val, IntValTy);
17733
17734 return Builder.CreateCall(Stxr,
17735 {Builder.CreateZExtOrBitCast(
17736 Val, Stxr->getFunctionType()->getParamType(0)),
17737 Addr});
17738}
17739
17740bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
17741 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
17742 const DataLayout &DL) const {
17743 if (!Ty->isArrayTy()) {
17744 const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
17745 return TySize.isScalable() && TySize.getKnownMinSize() > 128;
17746 }
17747
17748 // All non aggregate members of the type must have the same type
17749 SmallVector<EVT> ValueVTs;
17750 ComputeValueVTs(*this, DL, Ty, ValueVTs);
17751 return is_splat(ValueVTs);
17752}
17753
17754bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
17755 EVT) const {
17756 return false;
17757}
17758
17759static Value *UseTlsOffset(IRBuilderBase &IRB, unsigned Offset) {
17760 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
17761 Function *ThreadPointerFunc =
17762 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
17763 return IRB.CreatePointerCast(
17764 IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc),
17765 Offset),
17766 IRB.getInt8PtrTy()->getPointerTo(0));
17767}
17768
17769Value *AArch64TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
17770 // Android provides a fixed TLS slot for the stack cookie. See the definition
17771 // of TLS_SLOT_STACK_GUARD in
17772 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
17773 if (Subtarget->isTargetAndroid())
17774 return UseTlsOffset(IRB, 0x28);
17775
17776 // Fuchsia is similar.
17777 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
17778 if (Subtarget->isTargetFuchsia())
17779 return UseTlsOffset(IRB, -0x10);
17780
17781 return TargetLowering::getIRStackGuard(IRB);
17782}
17783
17784void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
17785 // MSVC CRT provides functionalities for stack protection.
17786 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
17787 // MSVC CRT has a global variable holding security cookie.
17788 M.getOrInsertGlobal("__security_cookie",
17789 Type::getInt8PtrTy(M.getContext()));
17790
17791 // MSVC CRT has a function to validate security cookie.
17792 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
17793 "__security_check_cookie", Type::getVoidTy(M.getContext()),
17794 Type::getInt8PtrTy(M.getContext()));
17795 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
17796 F->setCallingConv(CallingConv::Win64);
17797 F->addParamAttr(0, Attribute::AttrKind::InReg);
17798 }
17799 return;
17800 }
17801 TargetLowering::insertSSPDeclarations(M);
17802}
17803
17804Value *AArch64TargetLowering::getSDagStackGuard(const Module &M) const {
17805 // MSVC CRT has a global variable holding security cookie.
17806 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
17807 return M.getGlobalVariable("__security_cookie");
17808 return TargetLowering::getSDagStackGuard(M);
17809}
17810
17811Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
17812 // MSVC CRT has a function to validate security cookie.
17813 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
17814 return M.getFunction("__security_check_cookie");
17815 return TargetLowering::getSSPStackGuardCheck(M);
17816}
17817
17818Value *
17819AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
17820 // Android provides a fixed TLS slot for the SafeStack pointer. See the
17821 // definition of TLS_SLOT_SAFESTACK in
17822 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
17823 if (Subtarget->isTargetAndroid())
17824 return UseTlsOffset(IRB, 0x48);
17825
17826 // Fuchsia is similar.
17827 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
17828 if (Subtarget->isTargetFuchsia())
17829 return UseTlsOffset(IRB, -0x8);
17830
17831 return TargetLowering::getSafeStackPointerLocation(IRB);
17832}
17833
17834bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
17835 const Instruction &AndI) const {
17836 // Only sink 'and' mask to cmp use block if it is masking a single bit, since
17837 // this is likely to be fold the and/cmp/br into a single tbz instruction. It
17838 // may be beneficial to sink in other cases, but we would have to check that
17839 // the cmp would not get folded into the br to form a cbz for these to be
17840 // beneficial.
17841 ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
17842 if (!Mask)
17843 return false;
17844 return Mask->getValue().isPowerOf2();
17845}
17846
17847bool AArch64TargetLowering::
17848 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
17849 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
17850 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
17851 SelectionDAG &DAG) const {
17852 // Does baseline recommend not to perform the fold by default?
17853 if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
17854 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
17855 return false;
17856 // Else, if this is a vector shift, prefer 'shl'.
17857 return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL;
17858}
17859
17860bool AArch64TargetLowering::shouldExpandShift(SelectionDAG &DAG,
17861 SDNode *N) const {
17862 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
17863 !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
17864 return false;
17865 return true;
17866}
17867
17868void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
17869 // Update IsSplitCSR in AArch64unctionInfo.
17870 AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
17871 AFI->setIsSplitCSR(true);
17872}
17873
17874void AArch64TargetLowering::insertCopiesSplitCSR(
17875 MachineBasicBlock *Entry,
17876 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
17877 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
17878 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
17879 if (!IStart)
17880 return;
17881
17882 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
17883 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
17884 MachineBasicBlock::iterator MBBI = Entry->begin();
17885 for (const MCPhysReg *I = IStart; *I; ++I) {
17886 const TargetRegisterClass *RC = nullptr;
17887 if (AArch64::GPR64RegClass.contains(*I))
17888 RC = &AArch64::GPR64RegClass;
17889 else if (AArch64::FPR64RegClass.contains(*I))
17890 RC = &AArch64::FPR64RegClass;
17891 else
17892 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17892)
;
17893
17894 Register NewVR = MRI->createVirtualRegister(RC);
17895 // Create copy from CSR to a virtual register.
17896 // FIXME: this currently does not emit CFI pseudo-instructions, it works
17897 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
17898 // nounwind. If we want to generalize this later, we may need to emit
17899 // CFI pseudo-instructions.
17900 assert(Entry->getParent()->getFunction().hasFnAttribute((static_cast <bool> (Entry->getParent()->getFunction
().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? void (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17902, __extension__ __PRETTY_FUNCTION__))
17901 Attribute::NoUnwind) &&(static_cast <bool> (Entry->getParent()->getFunction
().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? void (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17902, __extension__ __PRETTY_FUNCTION__))
17902 "Function should be nounwind in insertCopiesSplitCSR!")(static_cast <bool> (Entry->getParent()->getFunction
().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? void (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17902, __extension__ __PRETTY_FUNCTION__))
;
17903 Entry->addLiveIn(*I);
17904 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
17905 .addReg(*I);
17906
17907 // Insert the copy-back instructions right before the terminator.
17908 for (auto *Exit : Exits)
17909 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
17910 TII->get(TargetOpcode::COPY), *I)
17911 .addReg(NewVR);
17912 }
17913}
17914
17915bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
17916 // Integer division on AArch64 is expensive. However, when aggressively
17917 // optimizing for code size, we prefer to use a div instruction, as it is
17918 // usually smaller than the alternative sequence.
17919 // The exception to this is vector division. Since AArch64 doesn't have vector
17920 // integer division, leaving the division as-is is a loss even in terms of
17921 // size, because it will have to be scalarized, while the alternative code
17922 // sequence can be performed in vector form.
17923 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
17924 return OptSize && !VT.isVector();
17925}
17926
17927bool AArch64TargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
17928 // We want inc-of-add for scalars and sub-of-not for vectors.
17929 return VT.isScalarInteger();
17930}
17931
17932bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const {
17933 return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
17934}
17935
17936unsigned
17937AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
17938 if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
17939 return getPointerTy(DL).getSizeInBits();
17940
17941 return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
17942}
17943
17944void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
17945 MF.getFrameInfo().computeMaxCallFrameSize(MF);
17946 TargetLoweringBase::finalizeLowering(MF);
17947}
17948
17949// Unlike X86, we let frame lowering assign offsets to all catch objects.
17950bool AArch64TargetLowering::needsFixedCatchObjects() const {
17951 return false;
17952}
17953
17954bool AArch64TargetLowering::shouldLocalize(
17955 const MachineInstr &MI, const TargetTransformInfo *TTI) const {
17956 switch (MI.getOpcode()) {
17957 case TargetOpcode::G_GLOBAL_VALUE: {
17958 // On Darwin, TLS global vars get selected into function calls, which
17959 // we don't want localized, as they can get moved into the middle of a
17960 // another call sequence.
17961 const GlobalValue &GV = *MI.getOperand(1).getGlobal();
17962 if (GV.isThreadLocal() && Subtarget->isTargetMachO())
17963 return false;
17964 break;
17965 }
17966 // If we legalized G_GLOBAL_VALUE into ADRP + G_ADD_LOW, mark both as being
17967 // localizable.
17968 case AArch64::ADRP:
17969 case AArch64::G_ADD_LOW:
17970 return true;
17971 default:
17972 break;
17973 }
17974 return TargetLoweringBase::shouldLocalize(MI, TTI);
17975}
17976
17977bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
17978 if (isa<ScalableVectorType>(Inst.getType()))
17979 return true;
17980
17981 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
17982 if (isa<ScalableVectorType>(Inst.getOperand(i)->getType()))
17983 return true;
17984
17985 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
17986 if (isa<ScalableVectorType>(AI->getAllocatedType()))
17987 return true;
17988 }
17989
17990 return false;
17991}
17992
17993// Return the largest legal scalable vector type that matches VT's element type.
17994static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT) {
17995 assert(VT.isFixedLengthVector() &&(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17997, __extension__ __PRETTY_FUNCTION__))
17996 DAG.getTargetLoweringInfo().isTypeLegal(VT) &&(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17997, __extension__ __PRETTY_FUNCTION__))
17997 "Expected legal fixed length vector!")(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17997, __extension__ __PRETTY_FUNCTION__))
;
17998 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
17999 default:
18000 llvm_unreachable("unexpected element type for SVE container")::llvm::llvm_unreachable_internal("unexpected element type for SVE container"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18000)
;
18001 case MVT::i8:
18002 return EVT(MVT::nxv16i8);
18003 case MVT::i16:
18004 return EVT(MVT::nxv8i16);
18005 case MVT::i32:
18006 return EVT(MVT::nxv4i32);
18007 case MVT::i64:
18008 return EVT(MVT::nxv2i64);
18009 case MVT::f16:
18010 return EVT(MVT::nxv8f16);
18011 case MVT::f32:
18012 return EVT(MVT::nxv4f32);
18013 case MVT::f64:
18014 return EVT(MVT::nxv2f64);
18015 }
18016}
18017
18018// Return a PTRUE with active lanes corresponding to the extent of VT.
18019static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
18020 EVT VT) {
18021 assert(VT.isFixedLengthVector() &&(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18023, __extension__ __PRETTY_FUNCTION__))
18022 DAG.getTargetLoweringInfo().isTypeLegal(VT) &&(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18023, __extension__ __PRETTY_FUNCTION__))
18023 "Expected legal fixed length vector!")(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18023, __extension__ __PRETTY_FUNCTION__))
;
18024
18025 unsigned PgPattern =
18026 getSVEPredPatternFromNumElements(VT.getVectorNumElements());
18027 assert(PgPattern && "Unexpected element count for SVE predicate")(static_cast <bool> (PgPattern && "Unexpected element count for SVE predicate"
) ? void (0) : __assert_fail ("PgPattern && \"Unexpected element count for SVE predicate\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18027, __extension__ __PRETTY_FUNCTION__))
;
18028
18029 // For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use
18030 // AArch64SVEPredPattern::all, which can enable the use of unpredicated
18031 // variants of instructions when available.
18032 const auto &Subtarget =
18033 static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
18034 unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
18035 unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
18036 if (MaxSVESize && MinSVESize == MaxSVESize &&
18037 MaxSVESize == VT.getSizeInBits())
18038 PgPattern = AArch64SVEPredPattern::all;
18039
18040 MVT MaskVT;
18041 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
18042 default:
18043 llvm_unreachable("unexpected element type for SVE predicate")::llvm::llvm_unreachable_internal("unexpected element type for SVE predicate"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18043)
;
18044 case MVT::i8:
18045 MaskVT = MVT::nxv16i1;
18046 break;
18047 case MVT::i16:
18048 case MVT::f16:
18049 MaskVT = MVT::nxv8i1;
18050 break;
18051 case MVT::i32:
18052 case MVT::f32:
18053 MaskVT = MVT::nxv4i1;
18054 break;
18055 case MVT::i64:
18056 case MVT::f64:
18057 MaskVT = MVT::nxv2i1;
18058 break;
18059 }
18060
18061 return getPTrue(DAG, DL, MaskVT, PgPattern);
18062}
18063
18064static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
18065 EVT VT) {
18066 assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&(static_cast <bool> (VT.isScalableVector() && DAG
.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal scalable vector!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal scalable vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18067, __extension__ __PRETTY_FUNCTION__))
18067 "Expected legal scalable vector!")(static_cast <bool> (VT.isScalableVector() && DAG
.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal scalable vector!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal scalable vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18067, __extension__ __PRETTY_FUNCTION__))
;
18068 auto PredTy = VT.changeVectorElementType(MVT::i1);
18069 return getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);
18070}
18071
18072static SDValue getPredicateForVector(SelectionDAG &DAG, SDLoc &DL, EVT VT) {
18073 if (VT.isFixedLengthVector())
18074 return getPredicateForFixedLengthVector(DAG, DL, VT);
18075
18076 return getPredicateForScalableVector(DAG, DL, VT);
18077}
18078
18079// Grow V to consume an entire SVE register.
18080static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
18081 assert(VT.isScalableVector() &&(static_cast <bool> (VT.isScalableVector() && "Expected to convert into a scalable vector!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Expected to convert into a scalable vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18082, __extension__ __PRETTY_FUNCTION__))
18082 "Expected to convert into a scalable vector!")(static_cast <bool> (VT.isScalableVector() && "Expected to convert into a scalable vector!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Expected to convert into a scalable vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18082, __extension__ __PRETTY_FUNCTION__))
;
18083 assert(V.getValueType().isFixedLengthVector() &&(static_cast <bool> (V.getValueType().isFixedLengthVector
() && "Expected a fixed length vector operand!") ? void
(0) : __assert_fail ("V.getValueType().isFixedLengthVector() && \"Expected a fixed length vector operand!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18084, __extension__ __PRETTY_FUNCTION__))
18084 "Expected a fixed length vector operand!")(static_cast <bool> (V.getValueType().isFixedLengthVector
() && "Expected a fixed length vector operand!") ? void
(0) : __assert_fail ("V.getValueType().isFixedLengthVector() && \"Expected a fixed length vector operand!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18084, __extension__ __PRETTY_FUNCTION__))
;
18085 SDLoc DL(V);
18086 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
18087 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
18088}
18089
18090// Shrink V so it's just big enough to maintain a VT's worth of data.
18091static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
18092 assert(VT.isFixedLengthVector() &&(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected to convert into a fixed length vector!") ? void (0
) : __assert_fail ("VT.isFixedLengthVector() && \"Expected to convert into a fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18093, __extension__ __PRETTY_FUNCTION__))
18093 "Expected to convert into a fixed length vector!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected to convert into a fixed length vector!") ? void (0
) : __assert_fail ("VT.isFixedLengthVector() && \"Expected to convert into a fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18093, __extension__ __PRETTY_FUNCTION__))
;
18094 assert(V.getValueType().isScalableVector() &&(static_cast <bool> (V.getValueType().isScalableVector(
) && "Expected a scalable vector operand!") ? void (0
) : __assert_fail ("V.getValueType().isScalableVector() && \"Expected a scalable vector operand!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18095, __extension__ __PRETTY_FUNCTION__))
18095 "Expected a scalable vector operand!")(static_cast <bool> (V.getValueType().isScalableVector(
) && "Expected a scalable vector operand!") ? void (0
) : __assert_fail ("V.getValueType().isScalableVector() && \"Expected a scalable vector operand!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18095, __extension__ __PRETTY_FUNCTION__))
;
18096 SDLoc DL(V);
18097 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
18098 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
18099}
18100
18101// Convert all fixed length vector loads larger than NEON to masked_loads.
18102SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
18103 SDValue Op, SelectionDAG &DAG) const {
18104 auto Load = cast<LoadSDNode>(Op);
18105
18106 SDLoc DL(Op);
18107 EVT VT = Op.getValueType();
18108 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18109
18110 auto NewLoad = DAG.getMaskedLoad(
18111 ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
18112 getPredicateForFixedLengthVector(DAG, DL, VT), DAG.getUNDEF(ContainerVT),
18113 Load->getMemoryVT(), Load->getMemOperand(), Load->getAddressingMode(),
18114 Load->getExtensionType());
18115
18116 auto Result = convertFromScalableVector(DAG, VT, NewLoad);
18117 SDValue MergedValues[2] = {Result, Load->getChain()};
18118 return DAG.getMergeValues(MergedValues, DL);
18119}
18120
18121static SDValue convertFixedMaskToScalableVector(SDValue Mask,
18122 SelectionDAG &DAG) {
18123 SDLoc DL(Mask);
18124 EVT InVT = Mask.getValueType();
18125 EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18126
18127 auto Op1 = convertToScalableVector(DAG, ContainerVT, Mask);
18128 auto Op2 = DAG.getConstant(0, DL, ContainerVT);
18129 auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
18130
18131 EVT CmpVT = Pg.getValueType();
18132 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
18133 {Pg, Op1, Op2, DAG.getCondCode(ISD::SETNE)});
18134}
18135
18136// Convert all fixed length vector loads larger than NEON to masked_loads.
18137SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
18138 SDValue Op, SelectionDAG &DAG) const {
18139 auto Load = cast<MaskedLoadSDNode>(Op);
18140
18141 if (Load->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD)
18142 return SDValue();
18143
18144 SDLoc DL(Op);
18145 EVT VT = Op.getValueType();
18146 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18147
18148 SDValue Mask = convertFixedMaskToScalableVector(Load->getMask(), DAG);
18149
18150 SDValue PassThru;
18151 bool IsPassThruZeroOrUndef = false;
18152
18153 if (Load->getPassThru()->isUndef()) {
18154 PassThru = DAG.getUNDEF(ContainerVT);
18155 IsPassThruZeroOrUndef = true;
18156 } else {
18157 if (ContainerVT.isInteger())
18158 PassThru = DAG.getConstant(0, DL, ContainerVT);
18159 else
18160 PassThru = DAG.getConstantFP(0, DL, ContainerVT);
18161 if (isZerosVector(Load->getPassThru().getNode()))
18162 IsPassThruZeroOrUndef = true;
18163 }
18164
18165 auto NewLoad = DAG.getMaskedLoad(
18166 ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
18167 Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(),
18168 Load->getAddressingMode(), Load->getExtensionType());
18169
18170 if (!IsPassThruZeroOrUndef) {
18171 SDValue OldPassThru =
18172 convertToScalableVector(DAG, ContainerVT, Load->getPassThru());
18173 NewLoad = DAG.getSelect(DL, ContainerVT, Mask, NewLoad, OldPassThru);
18174 }
18175
18176 auto Result = convertFromScalableVector(DAG, VT, NewLoad);
18177 SDValue MergedValues[2] = {Result, Load->getChain()};
18178 return DAG.getMergeValues(MergedValues, DL);
18179}
18180
18181// Convert all fixed length vector stores larger than NEON to masked_stores.
18182SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
18183 SDValue Op, SelectionDAG &DAG) const {
18184 auto Store = cast<StoreSDNode>(Op);
18185
18186 SDLoc DL(Op);
18187 EVT VT = Store->getValue().getValueType();
18188 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18189
18190 auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
18191 return DAG.getMaskedStore(
18192 Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
18193 getPredicateForFixedLengthVector(DAG, DL, VT), Store->getMemoryVT(),
18194 Store->getMemOperand(), Store->getAddressingMode(),
18195 Store->isTruncatingStore());
18196}
18197
18198SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
18199 SDValue Op, SelectionDAG &DAG) const {
18200 auto Store = cast<MaskedStoreSDNode>(Op);
18201
18202 if (Store->isTruncatingStore())
18203 return SDValue();
18204
18205 SDLoc DL(Op);
18206 EVT VT = Store->getValue().getValueType();
18207 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18208
18209 auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
18210 SDValue Mask = convertFixedMaskToScalableVector(Store->getMask(), DAG);
18211
18212 return DAG.getMaskedStore(
18213 Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
18214 Mask, Store->getMemoryVT(), Store->getMemOperand(),
18215 Store->getAddressingMode(), Store->isTruncatingStore());
18216}
18217
18218SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
18219 SDValue Op, SelectionDAG &DAG) const {
18220 SDLoc dl(Op);
18221 EVT VT = Op.getValueType();
18222 EVT EltVT = VT.getVectorElementType();
18223
18224 bool Signed = Op.getOpcode() == ISD::SDIV;
18225 unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
18226
18227 // Scalable vector i32/i64 DIV is supported.
18228 if (EltVT == MVT::i32 || EltVT == MVT::i64)
18229 return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
18230
18231 // Scalable vector i8/i16 DIV is not supported. Promote it to i32.
18232 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18233 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18234 EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
18235 EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT);
18236
18237 // If this is not a full vector, extend, div, and truncate it.
18238 EVT WidenedVT = VT.widenIntegerVectorElementType(*DAG.getContext());
18239 if (DAG.getTargetLoweringInfo().isTypeLegal(WidenedVT)) {
18240 unsigned ExtendOpcode = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
18241 SDValue Op0 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(0));
18242 SDValue Op1 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(1));
18243 SDValue Div = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0, Op1);
18244 return DAG.getNode(ISD::TRUNCATE, dl, VT, Div);
18245 }
18246
18247 // Convert the operands to scalable vectors.
18248 SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
18249 SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
18250
18251 // Extend the scalable operands.
18252 unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
18253 unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
18254 SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0);
18255 SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1);
18256 SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0);
18257 SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1);
18258
18259 // Convert back to fixed vectors so the DIV can be further lowered.
18260 Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo);
18261 Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo);
18262 Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi);
18263 Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi);
18264 SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
18265 Op0Lo, Op1Lo);
18266 SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
18267 Op0Hi, Op1Hi);
18268
18269 // Convert again to scalable vectors to truncate.
18270 ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo);
18271 ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi);
18272 SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT,
18273 ResultLo, ResultHi);
18274
18275 return convertFromScalableVector(DAG, VT, ScalableResult);
18276}
18277
18278SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
18279 SDValue Op, SelectionDAG &DAG) const {
18280 EVT VT = Op.getValueType();
18281 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18281, __extension__ __PRETTY_FUNCTION__))
;
18282
18283 SDLoc DL(Op);
18284 SDValue Val = Op.getOperand(0);
18285 EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
18286 Val = convertToScalableVector(DAG, ContainerVT, Val);
18287
18288 bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND;
18289 unsigned ExtendOpc = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
18290
18291 // Repeatedly unpack Val until the result is of the desired element type.
18292 switch (ContainerVT.getSimpleVT().SimpleTy) {
18293 default:
18294 llvm_unreachable("unimplemented container type")::llvm::llvm_unreachable_internal("unimplemented container type"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18294)
;
18295 case MVT::nxv16i8:
18296 Val = DAG.getNode(ExtendOpc, DL, MVT::nxv8i16, Val);
18297 if (VT.getVectorElementType() == MVT::i16)
18298 break;
18299 LLVM_FALLTHROUGH[[gnu::fallthrough]];
18300 case MVT::nxv8i16:
18301 Val = DAG.getNode(ExtendOpc, DL, MVT::nxv4i32, Val);
18302 if (VT.getVectorElementType() == MVT::i32)
18303 break;
18304 LLVM_FALLTHROUGH[[gnu::fallthrough]];
18305 case MVT::nxv4i32:
18306 Val = DAG.getNode(ExtendOpc, DL, MVT::nxv2i64, Val);
18307 assert(VT.getVectorElementType() == MVT::i64 && "Unexpected element type!")(static_cast <bool> (VT.getVectorElementType() == MVT::
i64 && "Unexpected element type!") ? void (0) : __assert_fail
("VT.getVectorElementType() == MVT::i64 && \"Unexpected element type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18307, __extension__ __PRETTY_FUNCTION__))
;
18308 break;
18309 }
18310
18311 return convertFromScalableVector(DAG, VT, Val);
18312}
18313
18314SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
18315 SDValue Op, SelectionDAG &DAG) const {
18316 EVT VT = Op.getValueType();
18317 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18317, __extension__ __PRETTY_FUNCTION__))
;
18318
18319 SDLoc DL(Op);
18320 SDValue Val = Op.getOperand(0);
18321 EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
18322 Val = convertToScalableVector(DAG, ContainerVT, Val);
18323
18324 // Repeatedly truncate Val until the result is of the desired element type.
18325 switch (ContainerVT.getSimpleVT().SimpleTy) {
18326 default:
18327 llvm_unreachable("unimplemented container type")::llvm::llvm_unreachable_internal("unimplemented container type"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18327)
;
18328 case MVT::nxv2i64:
18329 Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv4i32, Val);
18330 Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv4i32, Val, Val);
18331 if (VT.getVectorElementType() == MVT::i32)
18332 break;
18333 LLVM_FALLTHROUGH[[gnu::fallthrough]];
18334 case MVT::nxv4i32:
18335 Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv8i16, Val);
18336 Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv8i16, Val, Val);
18337 if (VT.getVectorElementType() == MVT::i16)
18338 break;
18339 LLVM_FALLTHROUGH[[gnu::fallthrough]];
18340 case MVT::nxv8i16:
18341 Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i8, Val);
18342 Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv16i8, Val, Val);
18343 assert(VT.getVectorElementType() == MVT::i8 && "Unexpected element type!")(static_cast <bool> (VT.getVectorElementType() == MVT::
i8 && "Unexpected element type!") ? void (0) : __assert_fail
("VT.getVectorElementType() == MVT::i8 && \"Unexpected element type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18343, __extension__ __PRETTY_FUNCTION__))
;
18344 break;
18345 }
18346
18347 return convertFromScalableVector(DAG, VT, Val);
18348}
18349
18350SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt(
18351 SDValue Op, SelectionDAG &DAG) const {
18352 EVT VT = Op.getValueType();
18353 EVT InVT = Op.getOperand(0).getValueType();
18354 assert(InVT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (InVT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("InVT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18354, __extension__ __PRETTY_FUNCTION__))
;
18355
18356 SDLoc DL(Op);
18357 EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18358 SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
18359
18360 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Op.getOperand(1));
18361}
18362
18363SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
18364 SDValue Op, SelectionDAG &DAG) const {
18365 EVT VT = Op.getValueType();
18366 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18366, __extension__ __PRETTY_FUNCTION__))
;
18367
18368 SDLoc DL(Op);
18369 EVT InVT = Op.getOperand(0).getValueType();
18370 EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18371 SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
18372
18373 auto ScalableRes = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, Op0,
18374 Op.getOperand(1), Op.getOperand(2));
18375
18376 return convertFromScalableVector(DAG, VT, ScalableRes);
18377}
18378
18379// Convert vector operation 'Op' to an equivalent predicated operation whereby
18380// the original operation's type is used to construct a suitable predicate.
18381// NOTE: The results for inactive lanes are undefined.
18382SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
18383 SelectionDAG &DAG,
18384 unsigned NewOp,
18385 bool OverrideNEON) const {
18386 EVT VT = Op.getValueType();
18387 SDLoc DL(Op);
18388 auto Pg = getPredicateForVector(DAG, DL, VT);
18389
18390 if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) {
18391 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18392
18393 // Create list of operands by converting existing ones to scalable types.
18394 SmallVector<SDValue, 4> Operands = {Pg};
18395 for (const SDValue &V : Op->op_values()) {
18396 if (isa<CondCodeSDNode>(V)) {
18397 Operands.push_back(V);
18398 continue;
18399 }
18400
18401 if (const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
18402 EVT VTArg = VTNode->getVT().getVectorElementType();
18403 EVT NewVTArg = ContainerVT.changeVectorElementType(VTArg);
18404 Operands.push_back(DAG.getValueType(NewVTArg));
18405 continue;
18406 }
18407
18408 assert(useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) &&(static_cast <bool> (useSVEForFixedLengthVectorVT(V.getValueType
(), OverrideNEON) && "Only fixed length vectors are supported!"
) ? void (0) : __assert_fail ("useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) && \"Only fixed length vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18409, __extension__ __PRETTY_FUNCTION__))
18409 "Only fixed length vectors are supported!")(static_cast <bool> (useSVEForFixedLengthVectorVT(V.getValueType
(), OverrideNEON) && "Only fixed length vectors are supported!"
) ? void (0) : __assert_fail ("useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) && \"Only fixed length vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18409, __extension__ __PRETTY_FUNCTION__))
;
18410 Operands.push_back(convertToScalableVector(DAG, ContainerVT, V));
18411 }
18412
18413 if (isMergePassthruOpcode(NewOp))
18414 Operands.push_back(DAG.getUNDEF(ContainerVT));
18415
18416 auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
18417 return convertFromScalableVector(DAG, VT, ScalableRes);
18418 }
18419
18420 assert(VT.isScalableVector() && "Only expect to lower scalable vector op!")(static_cast <bool> (VT.isScalableVector() && "Only expect to lower scalable vector op!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Only expect to lower scalable vector op!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18420, __extension__ __PRETTY_FUNCTION__))
;
18421
18422 SmallVector<SDValue, 4> Operands = {Pg};
18423 for (const SDValue &V : Op->op_values()) {
18424 assert((!V.getValueType().isVector() ||(static_cast <bool> ((!V.getValueType().isVector() || V
.getValueType().isScalableVector()) && "Only scalable vectors are supported!"
) ? void (0) : __assert_fail ("(!V.getValueType().isVector() || V.getValueType().isScalableVector()) && \"Only scalable vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18426, __extension__ __PRETTY_FUNCTION__))
18425 V.getValueType().isScalableVector()) &&(static_cast <bool> ((!V.getValueType().isVector() || V
.getValueType().isScalableVector()) && "Only scalable vectors are supported!"
) ? void (0) : __assert_fail ("(!V.getValueType().isVector() || V.getValueType().isScalableVector()) && \"Only scalable vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18426, __extension__ __PRETTY_FUNCTION__))
18426 "Only scalable vectors are supported!")(static_cast <bool> ((!V.getValueType().isVector() || V
.getValueType().isScalableVector()) && "Only scalable vectors are supported!"
) ? void (0) : __assert_fail ("(!V.getValueType().isVector() || V.getValueType().isScalableVector()) && \"Only scalable vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18426, __extension__ __PRETTY_FUNCTION__))
;
18427 Operands.push_back(V);
18428 }
18429
18430 if (isMergePassthruOpcode(NewOp))
18431 Operands.push_back(DAG.getUNDEF(VT));
18432
18433 return DAG.getNode(NewOp, DL, VT, Operands);
18434}
18435
18436// If a fixed length vector operation has no side effects when applied to
18437// undefined elements, we can safely use scalable vectors to perform the same
18438// operation without needing to worry about predication.
18439SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
18440 SelectionDAG &DAG) const {
18441 EVT VT = Op.getValueType();
18442 assert(useSVEForFixedLengthVectorVT(VT) &&(static_cast <bool> (useSVEForFixedLengthVectorVT(VT) &&
"Only expected to lower fixed length vector operation!") ? void
(0) : __assert_fail ("useSVEForFixedLengthVectorVT(VT) && \"Only expected to lower fixed length vector operation!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18443, __extension__ __PRETTY_FUNCTION__))
18443 "Only expected to lower fixed length vector operation!")(static_cast <bool> (useSVEForFixedLengthVectorVT(VT) &&
"Only expected to lower fixed length vector operation!") ? void
(0) : __assert_fail ("useSVEForFixedLengthVectorVT(VT) && \"Only expected to lower fixed length vector operation!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18443, __extension__ __PRETTY_FUNCTION__))
;
18444 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18445
18446 // Create list of operands by converting existing ones to scalable types.
18447 SmallVector<SDValue, 4> Ops;
18448 for (const SDValue &V : Op->op_values()) {
18449 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!")(static_cast <bool> (!isa<VTSDNode>(V) &&
"Unexpected VTSDNode node!") ? void (0) : __assert_fail ("!isa<VTSDNode>(V) && \"Unexpected VTSDNode node!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18449, __extension__ __PRETTY_FUNCTION__))
;
18450
18451 // Pass through non-vector operands.
18452 if (!V.getValueType().isVector()) {
18453 Ops.push_back(V);
18454 continue;
18455 }
18456
18457 // "cast" fixed length vector to a scalable vector.
18458 assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&(static_cast <bool> (useSVEForFixedLengthVectorVT(V.getValueType
()) && "Only fixed length vectors are supported!") ? void
(0) : __assert_fail ("useSVEForFixedLengthVectorVT(V.getValueType()) && \"Only fixed length vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18459, __extension__ __PRETTY_FUNCTION__))
18459 "Only fixed length vectors are supported!")(static_cast <bool> (useSVEForFixedLengthVectorVT(V.getValueType
()) && "Only fixed length vectors are supported!") ? void
(0) : __assert_fail ("useSVEForFixedLengthVectorVT(V.getValueType()) && \"Only fixed length vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18459, __extension__ __PRETTY_FUNCTION__))
;
18460 Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
18461 }
18462
18463 auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops);
18464 return convertFromScalableVector(DAG, VT, ScalableRes);
18465}
18466
18467SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
18468 SelectionDAG &DAG) const {
18469 SDLoc DL(ScalarOp);
18470 SDValue AccOp = ScalarOp.getOperand(0);
18471 SDValue VecOp = ScalarOp.getOperand(1);
18472 EVT SrcVT = VecOp.getValueType();
18473 EVT ResVT = SrcVT.getVectorElementType();
18474
18475 EVT ContainerVT = SrcVT;
18476 if (SrcVT.isFixedLengthVector()) {
18477 ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
18478 VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
18479 }
18480
18481 SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
18482 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
18483
18484 // Convert operands to Scalable.
18485 AccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT,
18486 DAG.getUNDEF(ContainerVT), AccOp, Zero);
18487
18488 // Perform reduction.
18489 SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
18490 Pg, AccOp, VecOp);
18491
18492 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
18493}
18494
18495SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
18496 SelectionDAG &DAG) const {
18497 SDLoc DL(ReduceOp);
18498 SDValue Op = ReduceOp.getOperand(0);
18499 EVT OpVT = Op.getValueType();
18500 EVT VT = ReduceOp.getValueType();
18501
18502 if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
18503 return SDValue();
18504
18505 SDValue Pg = getPredicateForVector(DAG, DL, OpVT);
18506
18507 switch (ReduceOp.getOpcode()) {
18508 default:
18509 return SDValue();
18510 case ISD::VECREDUCE_OR:
18511 return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
18512 case ISD::VECREDUCE_AND: {
18513 Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
18514 return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
18515 }
18516 case ISD::VECREDUCE_XOR: {
18517 SDValue ID =
18518 DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
18519 SDValue Cntp =
18520 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op);
18521 return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
18522 }
18523 }
18524
18525 return SDValue();
18526}
18527
18528SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
18529 SDValue ScalarOp,
18530 SelectionDAG &DAG) const {
18531 SDLoc DL(ScalarOp);
18532 SDValue VecOp = ScalarOp.getOperand(0);
18533 EVT SrcVT = VecOp.getValueType();
18534
18535 if (useSVEForFixedLengthVectorVT(SrcVT, true)) {
18536 EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
18537 VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
18538 }
18539
18540 // UADDV always returns an i64 result.
18541 EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
18542 SrcVT.getVectorElementType();
18543 EVT RdxVT = SrcVT;
18544 if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
18545 RdxVT = getPackedSVEVectorVT(ResVT);
18546
18547 SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
18548 SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
18549 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
18550 Rdx, DAG.getConstant(0, DL, MVT::i64));
18551
18552 // The VEC_REDUCE nodes expect an element size result.
18553 if (ResVT != ScalarOp.getValueType())
18554 Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType());
18555
18556 return Res;
18557}
18558
18559SDValue
18560AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
18561 SelectionDAG &DAG) const {
18562 EVT VT = Op.getValueType();
18563 SDLoc DL(Op);
18564
18565 EVT InVT = Op.getOperand(1).getValueType();
18566 EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18567 SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
18568 SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
18569
18570 // Convert the mask to a predicated (NOTE: We don't need to worry about
18571 // inactive lanes since VSELECT is safe when given undefined elements).
18572 EVT MaskVT = Op.getOperand(0).getValueType();
18573 EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT);
18574 auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0));
18575 Mask = DAG.getNode(ISD::TRUNCATE, DL,
18576 MaskContainerVT.changeVectorElementType(MVT::i1), Mask);
18577
18578 auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
18579 Mask, Op1, Op2);
18580
18581 return convertFromScalableVector(DAG, VT, ScalableRes);
18582}
18583
18584SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
18585 SDValue Op, SelectionDAG &DAG) const {
18586 SDLoc DL(Op);
18587 EVT InVT = Op.getOperand(0).getValueType();
18588 EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18589
18590 assert(useSVEForFixedLengthVectorVT(InVT) &&(static_cast <bool> (useSVEForFixedLengthVectorVT(InVT)
&& "Only expected to lower fixed length vector operation!"
) ? void (0) : __assert_fail ("useSVEForFixedLengthVectorVT(InVT) && \"Only expected to lower fixed length vector operation!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18591, __extension__ __PRETTY_FUNCTION__))
18591 "Only expected to lower fixed length vector operation!")(static_cast <bool> (useSVEForFixedLengthVectorVT(InVT)
&& "Only expected to lower fixed length vector operation!"
) ? void (0) : __assert_fail ("useSVEForFixedLengthVectorVT(InVT) && \"Only expected to lower fixed length vector operation!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18591, __extension__ __PRETTY_FUNCTION__))
;
18592 assert(Op.getValueType() == InVT.changeTypeToInteger() &&(static_cast <bool> (Op.getValueType() == InVT.changeTypeToInteger
() && "Expected integer result of the same bit length as the inputs!"
) ? void (0) : __assert_fail ("Op.getValueType() == InVT.changeTypeToInteger() && \"Expected integer result of the same bit length as the inputs!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18593, __extension__ __PRETTY_FUNCTION__))
18593 "Expected integer result of the same bit length as the inputs!")(static_cast <bool> (Op.getValueType() == InVT.changeTypeToInteger
() && "Expected integer result of the same bit length as the inputs!"
) ? void (0) : __assert_fail ("Op.getValueType() == InVT.changeTypeToInteger() && \"Expected integer result of the same bit length as the inputs!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18593, __extension__ __PRETTY_FUNCTION__))
;
18594
18595 auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
18596 auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
18597 auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
18598
18599 EVT CmpVT = Pg.getValueType();
18600 auto Cmp = DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
18601 {Pg, Op1, Op2, Op.getOperand(2)});
18602
18603 EVT PromoteVT = ContainerVT.changeTypeToInteger();
18604 auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT);
18605 return convertFromScalableVector(DAG, Op.getValueType(), Promote);
18606}
18607
18608SDValue
18609AArch64TargetLowering::LowerFixedLengthBitcastToSVE(SDValue Op,
18610 SelectionDAG &DAG) const {
18611 SDLoc DL(Op);
18612 auto SrcOp = Op.getOperand(0);
18613 EVT VT = Op.getValueType();
18614 EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
18615 EVT ContainerSrcVT =
18616 getContainerForFixedLengthVector(DAG, SrcOp.getValueType());
18617
18618 SrcOp = convertToScalableVector(DAG, ContainerSrcVT, SrcOp);
18619 Op = DAG.getNode(ISD::BITCAST, DL, ContainerDstVT, SrcOp);
18620 return convertFromScalableVector(DAG, VT, Op);
18621}
18622
18623SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
18624 SDValue Op, SelectionDAG &DAG) const {
18625 SDLoc DL(Op);
18626 unsigned NumOperands = Op->getNumOperands();
18627
18628 assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&(static_cast <bool> (NumOperands > 1 && isPowerOf2_32
(NumOperands) && "Unexpected number of operands in CONCAT_VECTORS"
) ? void (0) : __assert_fail ("NumOperands > 1 && isPowerOf2_32(NumOperands) && \"Unexpected number of operands in CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18629, __extension__ __PRETTY_FUNCTION__))
18629 "Unexpected number of operands in CONCAT_VECTORS")(static_cast <bool> (NumOperands > 1 && isPowerOf2_32
(NumOperands) && "Unexpected number of operands in CONCAT_VECTORS"
) ? void (0) : __assert_fail ("NumOperands > 1 && isPowerOf2_32(NumOperands) && \"Unexpected number of operands in CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18629, __extension__ __PRETTY_FUNCTION__))
;
18630
18631 auto SrcOp1 = Op.getOperand(0);
18632 auto SrcOp2 = Op.getOperand(1);
18633 EVT VT = Op.getValueType();
18634 EVT SrcVT = SrcOp1.getValueType();
18635
18636 if (NumOperands > 2) {
18637 SmallVector<SDValue, 4> Ops;
18638 EVT PairVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext());
18639 for (unsigned I = 0; I < NumOperands; I += 2)
18640 Ops.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, PairVT,
18641 Op->getOperand(I), Op->getOperand(I + 1)));
18642
18643 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops);
18644 }
18645
18646 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18647
18648 SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, SrcVT);
18649 SrcOp1 = convertToScalableVector(DAG, ContainerVT, SrcOp1);
18650 SrcOp2 = convertToScalableVector(DAG, ContainerVT, SrcOp2);
18651
18652 Op = DAG.getNode(AArch64ISD::SPLICE, DL, ContainerVT, Pg, SrcOp1, SrcOp2);
18653
18654 return convertFromScalableVector(DAG, VT, Op);
18655}
18656
18657SDValue
18658AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(SDValue Op,
18659 SelectionDAG &DAG) const {
18660 EVT VT = Op.getValueType();
18661 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18661, __extension__ __PRETTY_FUNCTION__))
;
18662
18663 SDLoc DL(Op);
18664 SDValue Val = Op.getOperand(0);
18665 SDValue Pg = getPredicateForVector(DAG, DL, VT);
18666 EVT SrcVT = Val.getValueType();
18667 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18668 EVT ExtendVT = ContainerVT.changeVectorElementType(
18669 SrcVT.getVectorElementType());
18670
18671 Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
18672 Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT.changeTypeToInteger(), Val);
18673
18674 Val = convertToScalableVector(DAG, ContainerVT.changeTypeToInteger(), Val);
18675 Val = getSVESafeBitCast(ExtendVT, Val, DAG);
18676 Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
18677 Pg, Val, DAG.getUNDEF(ContainerVT));
18678
18679 return convertFromScalableVector(DAG, VT, Val);
18680}
18681
18682SDValue
18683AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op,
18684 SelectionDAG &DAG) const {
18685 EVT VT = Op.getValueType();
18686 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18686, __extension__ __PRETTY_FUNCTION__))
;
18687
18688 SDLoc DL(Op);
18689 SDValue Val = Op.getOperand(0);
18690 EVT SrcVT = Val.getValueType();
18691 EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
18692 EVT RoundVT = ContainerSrcVT.changeVectorElementType(
18693 VT.getVectorElementType());
18694 SDValue Pg = getPredicateForVector(DAG, DL, RoundVT);
18695
18696 Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
18697 Val = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, RoundVT, Pg, Val,
18698 Op.getOperand(1), DAG.getUNDEF(RoundVT));
18699 Val = getSVESafeBitCast(ContainerSrcVT.changeTypeToInteger(), Val, DAG);
18700 Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
18701
18702 Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
18703 return DAG.getNode(ISD::BITCAST, DL, VT, Val);
18704}
18705
18706SDValue
18707AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
18708 SelectionDAG &DAG) const {
18709 EVT VT = Op.getValueType();
18710 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18710, __extension__ __PRETTY_FUNCTION__))
;
18711
18712 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP;
18713 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
18714 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
18715
18716 SDLoc DL(Op);
18717 SDValue Val = Op.getOperand(0);
18718 EVT SrcVT = Val.getValueType();
18719 EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
18720 EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
18721
18722 if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
18723 ContainerDstVT.getVectorElementType().getSizeInBits()) {
18724 SDValue Pg = getPredicateForVector(DAG, DL, VT);
18725
18726 Val = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
18727 VT.changeTypeToInteger(), Val);
18728
18729 Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
18730 Val = getSVESafeBitCast(ContainerDstVT.changeTypeToInteger(), Val, DAG);
18731 // Safe to use a larger than specified operand since we just unpacked the
18732 // data, hence the upper bits are zero.
18733 Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
18734 DAG.getUNDEF(ContainerDstVT));
18735 return convertFromScalableVector(DAG, VT, Val);
18736 } else {
18737 EVT CvtVT = ContainerSrcVT.changeVectorElementType(
18738 ContainerDstVT.getVectorElementType());
18739 SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
18740
18741 Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
18742 Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
18743 Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
18744 Val = convertFromScalableVector(DAG, SrcVT, Val);
18745
18746 Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
18747 return DAG.getNode(ISD::BITCAST, DL, VT, Val);
18748 }
18749}
18750
18751SDValue
18752AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
18753 SelectionDAG &DAG) const {
18754 EVT VT = Op.getValueType();
18755 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18755, __extension__ __PRETTY_FUNCTION__))
;
18756
18757 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
18758 unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
18759 : AArch64ISD::FCVTZU_MERGE_PASSTHRU;
18760
18761 SDLoc DL(Op);
18762 SDValue Val = Op.getOperand(0);
18763 EVT SrcVT = Val.getValueType();
18764 EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
18765 EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
18766
18767 if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
18768 ContainerDstVT.getVectorElementType().getSizeInBits()) {
18769 EVT CvtVT = ContainerDstVT.changeVectorElementType(
18770 ContainerSrcVT.getVectorElementType());
18771 SDValue Pg = getPredicateForVector(DAG, DL, VT);
18772
18773 Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
18774 Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Val);
18775
18776 Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
18777 Val = getSVESafeBitCast(CvtVT, Val, DAG);
18778 Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
18779 DAG.getUNDEF(ContainerDstVT));
18780 return convertFromScalableVector(DAG, VT, Val);
18781 } else {
18782 EVT CvtVT = ContainerSrcVT.changeTypeToInteger();
18783 SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
18784
18785 // Safe to use a larger than specified result since an fp_to_int where the
18786 // result doesn't fit into the destination is undefined.
18787 Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
18788 Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
18789 Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
18790
18791 return DAG.getNode(ISD::TRUNCATE, DL, VT, Val);
18792 }
18793}
18794
18795SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
18796 SDValue Op, SelectionDAG &DAG) const {
18797 EVT VT = Op.getValueType();
18798 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18798, __extension__ __PRETTY_FUNCTION__))
;
18799
18800 auto *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
18801 auto ShuffleMask = SVN->getMask();
18802
18803 SDLoc DL(Op);
18804 SDValue Op1 = Op.getOperand(0);
18805 SDValue Op2 = Op.getOperand(1);
18806
18807 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18808 Op1 = convertToScalableVector(DAG, ContainerVT, Op1);
18809 Op2 = convertToScalableVector(DAG, ContainerVT, Op2);
18810
18811 bool ReverseEXT = false;
18812 unsigned Imm;
18813 if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm) &&
18814 Imm == VT.getVectorNumElements() - 1) {
18815 if (ReverseEXT)
18816 std::swap(Op1, Op2);
18817
18818 EVT ScalarTy = VT.getVectorElementType();
18819 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
18820 ScalarTy = MVT::i32;
18821 SDValue Scalar = DAG.getNode(
18822 ISD::EXTRACT_VECTOR_ELT, DL, ScalarTy, Op1,
18823 DAG.getConstant(VT.getVectorNumElements() - 1, DL, MVT::i64));
18824 Op = DAG.getNode(AArch64ISD::INSR, DL, ContainerVT, Op2, Scalar);
18825 return convertFromScalableVector(DAG, VT, Op);
18826 }
18827
18828 return SDValue();
18829}
18830
18831SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
18832 SelectionDAG &DAG) const {
18833 SDLoc DL(Op);
18834 EVT InVT = Op.getValueType();
18835 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18836 (void)TLI;
18837
18838 assert(VT.isScalableVector() && TLI.isTypeLegal(VT) &&(static_cast <bool> (VT.isScalableVector() && TLI
.isTypeLegal(VT) && InVT.isScalableVector() &&
TLI.isTypeLegal(InVT) && "Only expect to cast between legal scalable vector types!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && TLI.isTypeLegal(VT) && InVT.isScalableVector() && TLI.isTypeLegal(InVT) && \"Only expect to cast between legal scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18840, __extension__ __PRETTY_FUNCTION__))
18839 InVT.isScalableVector() && TLI.isTypeLegal(InVT) &&(static_cast <bool> (VT.isScalableVector() && TLI
.isTypeLegal(VT) && InVT.isScalableVector() &&
TLI.isTypeLegal(InVT) && "Only expect to cast between legal scalable vector types!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && TLI.isTypeLegal(VT) && InVT.isScalableVector() && TLI.isTypeLegal(InVT) && \"Only expect to cast between legal scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18840, __extension__ __PRETTY_FUNCTION__))
18840 "Only expect to cast between legal scalable vector types!")(static_cast <bool> (VT.isScalableVector() && TLI
.isTypeLegal(VT) && InVT.isScalableVector() &&
TLI.isTypeLegal(InVT) && "Only expect to cast between legal scalable vector types!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && TLI.isTypeLegal(VT) && InVT.isScalableVector() && TLI.isTypeLegal(InVT) && \"Only expect to cast between legal scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18840, __extension__ __PRETTY_FUNCTION__))
;
18841 assert((VT.getVectorElementType() == MVT::i1) ==(static_cast <bool> ((VT.getVectorElementType() == MVT::
i1) == (InVT.getVectorElementType() == MVT::i1) && "Cannot cast between data and predicate scalable vector types!"
) ? void (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) == (InVT.getVectorElementType() == MVT::i1) && \"Cannot cast between data and predicate scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18843, __extension__ __PRETTY_FUNCTION__))
18842 (InVT.getVectorElementType() == MVT::i1) &&(static_cast <bool> ((VT.getVectorElementType() == MVT::
i1) == (InVT.getVectorElementType() == MVT::i1) && "Cannot cast between data and predicate scalable vector types!"
) ? void (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) == (InVT.getVectorElementType() == MVT::i1) && \"Cannot cast between data and predicate scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18843, __extension__ __PRETTY_FUNCTION__))
18843 "Cannot cast between data and predicate scalable vector types!")(static_cast <bool> ((VT.getVectorElementType() == MVT::
i1) == (InVT.getVectorElementType() == MVT::i1) && "Cannot cast between data and predicate scalable vector types!"
) ? void (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) == (InVT.getVectorElementType() == MVT::i1) && \"Cannot cast between data and predicate scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18843, __extension__ __PRETTY_FUNCTION__))
;
18844
18845 if (InVT == VT)
18846 return Op;
18847
18848 if (VT.getVectorElementType() == MVT::i1)
18849 return DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
18850
18851 EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType());
18852 EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());
18853
18854 // Pack input if required.
18855 if (InVT != PackedInVT)
18856 Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op);
18857
18858 Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);
18859
18860 // Unpack result if required.
18861 if (VT != PackedVT)
18862 Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
18863
18864 return Op;
18865}
18866
18867bool AArch64TargetLowering::isAllActivePredicate(SDValue N) const {
18868 return ::isAllActivePredicate(N);
18869}
18870
18871EVT AArch64TargetLowering::getPromotedVTForPredicate(EVT VT) const {
18872 return ::getPromotedVTForPredicate(VT);
18873}
18874
18875bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
18876 SDValue Op, const APInt &OriginalDemandedBits,
18877 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
18878 unsigned Depth) const {
18879
18880 unsigned Opc = Op.getOpcode();
18881 switch (Opc) {
18882 case AArch64ISD::VSHL: {
18883 // Match (VSHL (VLSHR Val X) X)
18884 SDValue ShiftL = Op;
18885 SDValue ShiftR = Op->getOperand(0);
18886 if (ShiftR->getOpcode() != AArch64ISD::VLSHR)
18887 return false;
18888
18889 if (!ShiftL.hasOneUse() || !ShiftR.hasOneUse())
18890 return false;
18891
18892 unsigned ShiftLBits = ShiftL->getConstantOperandVal(1);
18893 unsigned ShiftRBits = ShiftR->getConstantOperandVal(1);
18894
18895 // Other cases can be handled as well, but this is not
18896 // implemented.
18897 if (ShiftRBits != ShiftLBits)
18898 return false;
18899
18900 unsigned ScalarSize = Op.getScalarValueSizeInBits();
18901 assert(ScalarSize > ShiftLBits && "Invalid shift imm")(static_cast <bool> (ScalarSize > ShiftLBits &&
"Invalid shift imm") ? void (0) : __assert_fail ("ScalarSize > ShiftLBits && \"Invalid shift imm\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18901, __extension__ __PRETTY_FUNCTION__))
;
18902
18903 APInt ZeroBits = APInt::getLowBitsSet(ScalarSize, ShiftLBits);
18904 APInt UnusedBits = ~OriginalDemandedBits;
18905
18906 if ((ZeroBits & UnusedBits) != ZeroBits)
18907 return false;
18908
18909 // All bits that are zeroed by (VSHL (VLSHR Val X) X) are not
18910 // used - simplify to just Val.
18911 return TLO.CombineTo(Op, ShiftR->getOperand(0));
18912 }
18913 }
18914
18915 return TargetLowering::SimplifyDemandedBitsForTargetNode(
18916 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
18917}
18918
18919bool AArch64TargetLowering::isConstantUnsignedBitfieldExtactLegal(
18920 unsigned Opc, LLT Ty1, LLT Ty2) const {
18921 return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64));
18922}

/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h

1//===- llvm/Support/Casting.h - Allow flexible, checked, casts --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the isa<X>(), cast<X>(), dyn_cast<X>(), cast_or_null<X>(),
10// and dyn_cast_or_null<X>() templates.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_SUPPORT_CASTING_H
15#define LLVM_SUPPORT_CASTING_H
16
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/type_traits.h"
19#include <cassert>
20#include <memory>
21#include <type_traits>
22
23namespace llvm {
24
25//===----------------------------------------------------------------------===//
26// isa<x> Support Templates
27//===----------------------------------------------------------------------===//
28
29// Define a template that can be specialized by smart pointers to reflect the
30// fact that they are automatically dereferenced, and are not involved with the
31// template selection process... the default implementation is a noop.
32//
33template<typename From> struct simplify_type {
34 using SimpleType = From; // The real type this represents...
35
36 // An accessor to get the real value...
37 static SimpleType &getSimplifiedValue(From &Val) { return Val; }
38};
39
40template<typename From> struct simplify_type<const From> {
41 using NonConstSimpleType = typename simplify_type<From>::SimpleType;
42 using SimpleType =
43 typename add_const_past_pointer<NonConstSimpleType>::type;
44 using RetType =
45 typename add_lvalue_reference_if_not_pointer<SimpleType>::type;
46
47 static RetType getSimplifiedValue(const From& Val) {
48 return simplify_type<From>::getSimplifiedValue(const_cast<From&>(Val));
49 }
50};
51
52// The core of the implementation of isa<X> is here; To and From should be
53// the names of classes. This template can be specialized to customize the
54// implementation of isa<> without rewriting it from scratch.
55template <typename To, typename From, typename Enabler = void>
56struct isa_impl {
57 static inline bool doit(const From &Val) {
58 return To::classof(&Val);
59 }
60};
61
62/// Always allow upcasts, and perform no dynamic check for them.
63template <typename To, typename From>
64struct isa_impl<To, From, std::enable_if_t<std::is_base_of<To, From>::value>> {
65 static inline bool doit(const From &) { return true; }
66};
67
68template <typename To, typename From> struct isa_impl_cl {
69 static inline bool doit(const From &Val) {
70 return isa_impl<To, From>::doit(Val);
71 }
72};
73
74template <typename To, typename From> struct isa_impl_cl<To, const From> {
75 static inline bool doit(const From &Val) {
76 return isa_impl<To, From>::doit(Val);
77 }
78};
79
80template <typename To, typename From>
81struct isa_impl_cl<To, const std::unique_ptr<From>> {
82 static inline bool doit(const std::unique_ptr<From> &Val) {
83 assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer"
) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h"
, 83, __extension__ __PRETTY_FUNCTION__))
;
84 return isa_impl_cl<To, From>::doit(*Val);
85 }
86};
87
88template <typename To, typename From> struct isa_impl_cl<To, From*> {
89 static inline bool doit(const From *Val) {
90 assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer"
) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h"
, 90, __extension__ __PRETTY_FUNCTION__))
;
91 return isa_impl<To, From>::doit(*Val);
92 }
93};
94
95template <typename To, typename From> struct isa_impl_cl<To, From*const> {
96 static inline bool doit(const From *Val) {
97 assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer"
) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h"
, 97, __extension__ __PRETTY_FUNCTION__))
;
98 return isa_impl<To, From>::doit(*Val);
99 }
100};
101
102template <typename To, typename From> struct isa_impl_cl<To, const From*> {
103 static inline bool doit(const From *Val) {
104 assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer"
) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h"
, 104, __extension__ __PRETTY_FUNCTION__))
;
105 return isa_impl<To, From>::doit(*Val);
106 }
107};
108
109template <typename To, typename From> struct isa_impl_cl<To, const From*const> {
110 static inline bool doit(const From *Val) {
111 assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer"
) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h"
, 111, __extension__ __PRETTY_FUNCTION__))
;
112 return isa_impl<To, From>::doit(*Val);
113 }
114};
115
116template<typename To, typename From, typename SimpleFrom>
117struct isa_impl_wrap {
118 // When From != SimplifiedType, we can simplify the type some more by using
119 // the simplify_type template.
120 static bool doit(const From &Val) {
121 return isa_impl_wrap<To, SimpleFrom,
122 typename simplify_type<SimpleFrom>::SimpleType>::doit(
123 simplify_type<const From>::getSimplifiedValue(Val));
124 }
125};
126
127template<typename To, typename FromTy>
128struct isa_impl_wrap<To, FromTy, FromTy> {
129 // When From == SimpleType, we are as simple as we are going to get.
130 static bool doit(const FromTy &Val) {
131 return isa_impl_cl<To,FromTy>::doit(Val);
132 }
133};
134
135// isa<X> - Return true if the parameter to the template is an instance of one
136// of the template type arguments. Used like this:
137//
138// if (isa<Type>(myVal)) { ... }
139// if (isa<Type0, Type1, Type2>(myVal)) { ... }
140//
141template <class X, class Y> LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa(const Y &Val) {
142 return isa_impl_wrap<X, const Y,
143 typename simplify_type<const Y>::SimpleType>::doit(Val);
144}
145
146template <typename First, typename Second, typename... Rest, typename Y>
147LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa(const Y &Val) {
148 return isa<First>(Val) || isa<Second, Rest...>(Val);
149}
150
151// isa_and_nonnull<X> - Functionally identical to isa, except that a null value
152// is accepted.
153//
154template <typename... X, class Y>
155LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa_and_nonnull(const Y &Val) {
156 if (!Val)
157 return false;
158 return isa<X...>(Val);
159}
160
161//===----------------------------------------------------------------------===//
162// cast<x> Support Templates
163//===----------------------------------------------------------------------===//
164
165template<class To, class From> struct cast_retty;
166
167// Calculate what type the 'cast' function should return, based on a requested
168// type of To and a source type of From.
169template<class To, class From> struct cast_retty_impl {
170 using ret_type = To &; // Normal case, return Ty&
171};
172template<class To, class From> struct cast_retty_impl<To, const From> {
173 using ret_type = const To &; // Normal case, return Ty&
174};
175
176template<class To, class From> struct cast_retty_impl<To, From*> {
177 using ret_type = To *; // Pointer arg case, return Ty*
178};
179
180template<class To, class From> struct cast_retty_impl<To, const From*> {
181 using ret_type = const To *; // Constant pointer arg case, return const Ty*
182};
183
184template<class To, class From> struct cast_retty_impl<To, const From*const> {
185 using ret_type = const To *; // Constant pointer arg case, return const Ty*
186};
187
188template <class To, class From>
189struct cast_retty_impl<To, std::unique_ptr<From>> {
190private:
191 using PointerType = typename cast_retty_impl<To, From *>::ret_type;
192 using ResultType = std::remove_pointer_t<PointerType>;
193
194public:
195 using ret_type = std::unique_ptr<ResultType>;
196};
197
198template<class To, class From, class SimpleFrom>
199struct cast_retty_wrap {
200 // When the simplified type and the from type are not the same, use the type
201 // simplifier to reduce the type, then reuse cast_retty_impl to get the
202 // resultant type.
203 using ret_type = typename cast_retty<To, SimpleFrom>::ret_type;
204};
205
206template<class To, class FromTy>
207struct cast_retty_wrap<To, FromTy, FromTy> {
208 // When the simplified type is equal to the from type, use it directly.
209 using ret_type = typename cast_retty_impl<To,FromTy>::ret_type;
210};
211
212template<class To, class From>
213struct cast_retty {
214 using ret_type = typename cast_retty_wrap<
215 To, From, typename simplify_type<From>::SimpleType>::ret_type;
216};
217
218// Ensure the non-simple values are converted using the simplify_type template
219// that may be specialized by smart pointers...
220//
221template<class To, class From, class SimpleFrom> struct cast_convert_val {
222 // This is not a simple type, use the template to simplify it...
223 static typename cast_retty<To, From>::ret_type doit(From &Val) {
224 return cast_convert_val<To, SimpleFrom,
17
Returning without writing to 'Val.Node'
225 typename simplify_type<SimpleFrom>::SimpleType>::doit(
226 simplify_type<From>::getSimplifiedValue(Val));
14
Calling 'simplify_type::getSimplifiedValue'
16
Returning from 'simplify_type::getSimplifiedValue'
227 }
228};
229
230template<class To, class FromTy> struct cast_convert_val<To,FromTy,FromTy> {
231 // This _is_ a simple type, just cast it.
232 static typename cast_retty<To, FromTy>::ret_type doit(const FromTy &Val) {
233 typename cast_retty<To, FromTy>::ret_type Res2
234 = (typename cast_retty<To, FromTy>::ret_type)const_cast<FromTy&>(Val);
235 return Res2;
236 }
237};
238
239template <class X> struct is_simple_type {
240 static const bool value =
241 std::is_same<X, typename simplify_type<X>::SimpleType>::value;
242};
243
244// cast<X> - Return the argument parameter cast to the specified type. This
245// casting operator asserts that the type is correct, so it does not return null
246// on failure. It does not allow a null argument (use cast_or_null for that).
247// It is typically used like this:
248//
249// cast<Instruction>(myVal)->getParent()
250//
251template <class X, class Y>
252inline std::enable_if_t<!is_simple_type<Y>::value,
253 typename cast_retty<X, const Y>::ret_type>
254cast(const Y &Val) {
255 assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast<Ty>() argument of incompatible type!"
) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h"
, 255, __extension__ __PRETTY_FUNCTION__))
;
256 return cast_convert_val<
257 X, const Y, typename simplify_type<const Y>::SimpleType>::doit(Val);
258}
259
260template <class X, class Y>
261inline typename cast_retty<X, Y>::ret_type cast(Y &Val) {
262 assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast<Ty>() argument of incompatible type!"
) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h"
, 262, __extension__ __PRETTY_FUNCTION__))
;
11
Assuming 'Val' is a 'ConstantSDNode'
12
'?' condition is true
263 return cast_convert_val<X, Y,
13
Calling 'cast_convert_val::doit'
18
Returning from 'cast_convert_val::doit'
19
Returning without writing to 'Val.Node'
264 typename simplify_type<Y>::SimpleType>::doit(Val);
265}
266
267template <class X, class Y>
268inline typename cast_retty<X, Y *>::ret_type cast(Y *Val) {
269 assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast<Ty>() argument of incompatible type!"
) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h"
, 269, __extension__ __PRETTY_FUNCTION__))
;
270 return cast_convert_val<X, Y*,
271 typename simplify_type<Y*>::SimpleType>::doit(Val);
272}
273
274template <class X, class Y>
275inline typename cast_retty<X, std::unique_ptr<Y>>::ret_type
276cast(std::unique_ptr<Y> &&Val) {
277 assert(isa<X>(Val.get()) && "cast<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val.get()) &&
"cast<Ty>() argument of incompatible type!") ? void (0
) : __assert_fail ("isa<X>(Val.get()) && \"cast<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h"
, 277, __extension__ __PRETTY_FUNCTION__))
;
278 using ret_type = typename cast_retty<X, std::unique_ptr<Y>>::ret_type;
279 return ret_type(
280 cast_convert_val<X, Y *, typename simplify_type<Y *>::SimpleType>::doit(
281 Val.release()));
282}
283
284// cast_or_null<X> - Functionally identical to cast, except that a null value is
285// accepted.
286//
287template <class X, class Y>
288LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t<
289 !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type>
290cast_or_null(const Y &Val) {
291 if (!Val)
292 return nullptr;
293 assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!"
) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h"
, 293, __extension__ __PRETTY_FUNCTION__))
;
294 return cast<X>(Val);
295}
296
297template <class X, class Y>
298LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t<!is_simple_type<Y>::value,
299 typename cast_retty<X, Y>::ret_type>
300cast_or_null(Y &Val) {
301 if (!Val)
302 return nullptr;
303 assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!"
) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h"
, 303, __extension__ __PRETTY_FUNCTION__))
;
304 return cast<X>(Val);
305}
306
307template <class X, class Y>
308LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type
309cast_or_null(Y *Val) {
310 if (!Val) return nullptr;
311 assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!"
) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h"
, 311, __extension__ __PRETTY_FUNCTION__))
;
312 return cast<X>(Val);
313}
314
315template <class X, class Y>
316inline typename cast_retty<X, std::unique_ptr<Y>>::ret_type
317cast_or_null(std::unique_ptr<Y> &&Val) {
318 if (!Val)
319 return nullptr;
320 return cast<X>(std::move(Val));
321}
322
323// dyn_cast<X> - Return the argument parameter cast to the specified type. This
324// casting operator returns null if the argument is of the wrong type, so it can
325// be used to test for a type as well as cast if successful. This should be
326// used in the context of an if statement like this:
327//
328// if (const Instruction *I = dyn_cast<Instruction>(myVal)) { ... }
329//
330
331template <class X, class Y>
332LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t<
333 !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type>
334dyn_cast(const Y &Val) {
335 return isa<X>(Val) ? cast<X>(Val) : nullptr;
336}
337
338template <class X, class Y>
339LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y>::ret_type dyn_cast(Y &Val) {
340 return isa<X>(Val) ? cast<X>(Val) : nullptr;
8
Assuming 'Val' is a 'ConstantSDNode'
9
'?' condition is true
10
Calling 'cast<llvm::ConstantSDNode, llvm::SDValue>'
20
Returning from 'cast<llvm::ConstantSDNode, llvm::SDValue>'
21
Returning without writing to 'Val.Node'
341}
342
343template <class X, class Y>
344LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type dyn_cast(Y *Val) {
345 return isa<X>(Val) ? cast<X>(Val) : nullptr;
346}
347
348// dyn_cast_or_null<X> - Functionally identical to dyn_cast, except that a null
349// value is accepted.
350//
351template <class X, class Y>
352LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t<
353 !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type>
354dyn_cast_or_null(const Y &Val) {
355 return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr;
356}
357
358template <class X, class Y>
359LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t<!is_simple_type<Y>::value,
360 typename cast_retty<X, Y>::ret_type>
361dyn_cast_or_null(Y &Val) {
362 return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr;
363}
364
365template <class X, class Y>
366LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type
367dyn_cast_or_null(Y *Val) {
368 return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr;
369}
370
371// unique_dyn_cast<X> - Given a unique_ptr<Y>, try to return a unique_ptr<X>,
372// taking ownership of the input pointer iff isa<X>(Val) is true. If the
373// cast is successful, From refers to nullptr on exit and the casted value
374// is returned. If the cast is unsuccessful, the function returns nullptr
375// and From is unchanged.
376template <class X, class Y>
377LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast(std::unique_ptr<Y> &Val)
378 -> decltype(cast<X>(Val)) {
379 if (!isa<X>(Val))
380 return nullptr;
381 return cast<X>(std::move(Val));
382}
383
384template <class X, class Y>
385LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast(std::unique_ptr<Y> &&Val) {
386 return unique_dyn_cast<X, Y>(Val);
387}
388
389// dyn_cast_or_null<X> - Functionally identical to unique_dyn_cast, except that
390// a null value is accepted.
391template <class X, class Y>
392LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast_or_null(std::unique_ptr<Y> &Val)
393 -> decltype(cast<X>(Val)) {
394 if (!Val)
395 return nullptr;
396 return unique_dyn_cast<X, Y>(Val);
397}
398
399template <class X, class Y>
400LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast_or_null(std::unique_ptr<Y> &&Val) {
401 return unique_dyn_cast_or_null<X, Y>(Val);
402}
403
404} // end namespace llvm
405
406#endif // LLVM_SUPPORT_CASTING_H

/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h

1//===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the SDNode class and derived classes, which are used to
10// represent the nodes and operations present in a SelectionDAG. These nodes
11// and operations are machine code level operations, with some similarities to
12// the GCC RTL representation.
13//
14// Clients should include the SelectionDAG.h file instead of this file directly.
15//
16//===----------------------------------------------------------------------===//
17
18#ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
19#define LLVM_CODEGEN_SELECTIONDAGNODES_H
20
21#include "llvm/ADT/APFloat.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/BitVector.h"
24#include "llvm/ADT/FoldingSet.h"
25#include "llvm/ADT/GraphTraits.h"
26#include "llvm/ADT/SmallPtrSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/ilist_node.h"
29#include "llvm/ADT/iterator.h"
30#include "llvm/ADT/iterator_range.h"
31#include "llvm/CodeGen/ISDOpcodes.h"
32#include "llvm/CodeGen/MachineMemOperand.h"
33#include "llvm/CodeGen/Register.h"
34#include "llvm/CodeGen/ValueTypes.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DebugLoc.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/Metadata.h"
40#include "llvm/IR/Operator.h"
41#include "llvm/Support/AlignOf.h"
42#include "llvm/Support/AtomicOrdering.h"
43#include "llvm/Support/Casting.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/MachineValueType.h"
46#include "llvm/Support/TypeSize.h"
47#include <algorithm>
48#include <cassert>
49#include <climits>
50#include <cstddef>
51#include <cstdint>
52#include <cstring>
53#include <iterator>
54#include <string>
55#include <tuple>
56
57namespace llvm {
58
59class APInt;
60class Constant;
61template <typename T> struct DenseMapInfo;
62class GlobalValue;
63class MachineBasicBlock;
64class MachineConstantPoolValue;
65class MCSymbol;
66class raw_ostream;
67class SDNode;
68class SelectionDAG;
69class Type;
70class Value;
71
72void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr,
73 bool force = false);
74
75/// This represents a list of ValueType's that has been intern'd by
76/// a SelectionDAG. Instances of this simple value class are returned by
77/// SelectionDAG::getVTList(...).
78///
79struct SDVTList {
80 const EVT *VTs;
81 unsigned int NumVTs;
82};
83
84namespace ISD {
85
86 /// Node predicates
87
88/// If N is a BUILD_VECTOR or SPLAT_VECTOR node whose elements are all the
89/// same constant or undefined, return true and return the constant value in
90/// \p SplatValue.
91bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
92
93/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
94/// all of the elements are ~0 or undef. If \p BuildVectorOnly is set to
95/// true, it only checks BUILD_VECTOR.
96bool isConstantSplatVectorAllOnes(const SDNode *N,
97 bool BuildVectorOnly = false);
98
99/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
100/// all of the elements are 0 or undef. If \p BuildVectorOnly is set to true, it
101/// only checks BUILD_VECTOR.
102bool isConstantSplatVectorAllZeros(const SDNode *N,
103 bool BuildVectorOnly = false);
104
105/// Return true if the specified node is a BUILD_VECTOR where all of the
106/// elements are ~0 or undef.
107bool isBuildVectorAllOnes(const SDNode *N);
108
109/// Return true if the specified node is a BUILD_VECTOR where all of the
110/// elements are 0 or undef.
111bool isBuildVectorAllZeros(const SDNode *N);
112
113/// Return true if the specified node is a BUILD_VECTOR node of all
114/// ConstantSDNode or undef.
115bool isBuildVectorOfConstantSDNodes(const SDNode *N);
116
117/// Return true if the specified node is a BUILD_VECTOR node of all
118/// ConstantFPSDNode or undef.
119bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
120
121/// Return true if the node has at least one operand and all operands of the
122/// specified node are ISD::UNDEF.
123bool allOperandsUndef(const SDNode *N);
124
125} // end namespace ISD
126
127//===----------------------------------------------------------------------===//
128/// Unlike LLVM values, Selection DAG nodes may return multiple
129/// values as the result of a computation. Many nodes return multiple values,
130/// from loads (which define a token and a return value) to ADDC (which returns
131/// a result and a carry value), to calls (which may return an arbitrary number
132/// of values).
133///
134/// As such, each use of a SelectionDAG computation must indicate the node that
135/// computes it as well as which return value to use from that node. This pair
136/// of information is represented with the SDValue value type.
137///
138class SDValue {
139 friend struct DenseMapInfo<SDValue>;
140
141 SDNode *Node = nullptr; // The node defining the value we are using.
142 unsigned ResNo = 0; // Which return value of the node we are using.
143
144public:
145 SDValue() = default;
146 SDValue(SDNode *node, unsigned resno);
147
148 /// get the index which selects a specific result in the SDNode
149 unsigned getResNo() const { return ResNo; }
150
151 /// get the SDNode which holds the desired result
152 SDNode *getNode() const { return Node; }
153
154 /// set the SDNode
155 void setNode(SDNode *N) { Node = N; }
156
157 inline SDNode *operator->() const { return Node; }
158
159 bool operator==(const SDValue &O) const {
160 return Node == O.Node && ResNo == O.ResNo;
161 }
162 bool operator!=(const SDValue &O) const {
163 return !operator==(O);
164 }
165 bool operator<(const SDValue &O) const {
166 return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo);
167 }
168 explicit operator bool() const {
169 return Node != nullptr;
170 }
171
172 SDValue getValue(unsigned R) const {
173 return SDValue(Node, R);
174 }
175
176 /// Return true if this node is an operand of N.
177 bool isOperandOf(const SDNode *N) const;
178
179 /// Return the ValueType of the referenced return value.
180 inline EVT getValueType() const;
181
182 /// Return the simple ValueType of the referenced return value.
183 MVT getSimpleValueType() const {
184 return getValueType().getSimpleVT();
185 }
186
187 /// Returns the size of the value in bits.
188 ///
189 /// If the value type is a scalable vector type, the scalable property will
190 /// be set and the runtime size will be a positive integer multiple of the
191 /// base size.
192 TypeSize getValueSizeInBits() const {
193 return getValueType().getSizeInBits();
194 }
195
196 uint64_t getScalarValueSizeInBits() const {
197 return getValueType().getScalarType().getFixedSizeInBits();
198 }
199
200 // Forwarding methods - These forward to the corresponding methods in SDNode.
201 inline unsigned getOpcode() const;
202 inline unsigned getNumOperands() const;
203 inline const SDValue &getOperand(unsigned i) const;
204 inline uint64_t getConstantOperandVal(unsigned i) const;
205 inline const APInt &getConstantOperandAPInt(unsigned i) const;
206 inline bool isTargetMemoryOpcode() const;
207 inline bool isTargetOpcode() const;
208 inline bool isMachineOpcode() const;
209 inline bool isUndef() const;
210 inline unsigned getMachineOpcode() const;
211 inline const DebugLoc &getDebugLoc() const;
212 inline void dump() const;
213 inline void dump(const SelectionDAG *G) const;
214 inline void dumpr() const;
215 inline void dumpr(const SelectionDAG *G) const;
216
217 /// Return true if this operand (which must be a chain) reaches the
218 /// specified operand without crossing any side-effecting instructions.
219 /// In practice, this looks through token factors and non-volatile loads.
220 /// In order to remain efficient, this only
221 /// looks a couple of nodes in, it does not do an exhaustive search.
222 bool reachesChainWithoutSideEffects(SDValue Dest,
223 unsigned Depth = 2) const;
224
225 /// Return true if there are no nodes using value ResNo of Node.
226 inline bool use_empty() const;
227
228 /// Return true if there is exactly one node using value ResNo of Node.
229 inline bool hasOneUse() const;
230};
231
232template<> struct DenseMapInfo<SDValue> {
233 static inline SDValue getEmptyKey() {
234 SDValue V;
235 V.ResNo = -1U;
236 return V;
237 }
238
239 static inline SDValue getTombstoneKey() {
240 SDValue V;
241 V.ResNo = -2U;
242 return V;
243 }
244
245 static unsigned getHashValue(const SDValue &Val) {
246 return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
247 (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo();
248 }
249
250 static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
251 return LHS == RHS;
252 }
253};
254
255/// Allow casting operators to work directly on
256/// SDValues as if they were SDNode*'s.
257template<> struct simplify_type<SDValue> {
258 using SimpleType = SDNode *;
259
260 static SimpleType getSimplifiedValue(SDValue &Val) {
261 return Val.getNode();
15
Returning without writing to 'Val.Node'
262 }
263};
264template<> struct simplify_type<const SDValue> {
265 using SimpleType = /*const*/ SDNode *;
266
267 static SimpleType getSimplifiedValue(const SDValue &Val) {
268 return Val.getNode();
269 }
270};
271
272/// Represents a use of a SDNode. This class holds an SDValue,
273/// which records the SDNode being used and the result number, a
274/// pointer to the SDNode using the value, and Next and Prev pointers,
275/// which link together all the uses of an SDNode.
276///
277class SDUse {
278 /// Val - The value being used.
279 SDValue Val;
280 /// User - The user of this value.
281 SDNode *User = nullptr;
282 /// Prev, Next - Pointers to the uses list of the SDNode referred by
283 /// this operand.
284 SDUse **Prev = nullptr;
285 SDUse *Next = nullptr;
286
287public:
288 SDUse() = default;
289 SDUse(const SDUse &U) = delete;
290 SDUse &operator=(const SDUse &) = delete;
291
292 /// Normally SDUse will just implicitly convert to an SDValue that it holds.
293 operator const SDValue&() const { return Val; }
294
295 /// If implicit conversion to SDValue doesn't work, the get() method returns
296 /// the SDValue.
297 const SDValue &get() const { return Val; }
298
299 /// This returns the SDNode that contains this Use.
300 SDNode *getUser() { return User; }
301
302 /// Get the next SDUse in the use list.
303 SDUse *getNext() const { return Next; }
304
305 /// Convenience function for get().getNode().
306 SDNode *getNode() const { return Val.getNode(); }
307 /// Convenience function for get().getResNo().
308 unsigned getResNo() const { return Val.getResNo(); }
309 /// Convenience function for get().getValueType().
310 EVT getValueType() const { return Val.getValueType(); }
311
312 /// Convenience function for get().operator==
313 bool operator==(const SDValue &V) const {
314 return Val == V;
315 }
316
317 /// Convenience function for get().operator!=
318 bool operator!=(const SDValue &V) const {
319 return Val != V;
320 }
321
322 /// Convenience function for get().operator<
323 bool operator<(const SDValue &V) const {
324 return Val < V;
325 }
326
327private:
328 friend class SelectionDAG;
329 friend class SDNode;
330 // TODO: unfriend HandleSDNode once we fix its operand handling.
331 friend class HandleSDNode;
332
333 void setUser(SDNode *p) { User = p; }
334
335 /// Remove this use from its existing use list, assign it the
336 /// given value, and add it to the new value's node's use list.
337 inline void set(const SDValue &V);
338 /// Like set, but only supports initializing a newly-allocated
339 /// SDUse with a non-null value.
340 inline void setInitial(const SDValue &V);
341 /// Like set, but only sets the Node portion of the value,
342 /// leaving the ResNo portion unmodified.
343 inline void setNode(SDNode *N);
344
345 void addToList(SDUse **List) {
346 Next = *List;
347 if (Next) Next->Prev = &Next;
348 Prev = List;
349 *List = this;
350 }
351
352 void removeFromList() {
353 *Prev = Next;
354 if (Next) Next->Prev = Prev;
355 }
356};
357
358/// simplify_type specializations - Allow casting operators to work directly on
359/// SDValues as if they were SDNode*'s.
360template<> struct simplify_type<SDUse> {
361 using SimpleType = SDNode *;
362
363 static SimpleType getSimplifiedValue(SDUse &Val) {
364 return Val.getNode();
365 }
366};
367
368/// These are IR-level optimization flags that may be propagated to SDNodes.
369/// TODO: This data structure should be shared by the IR optimizer and the
370/// the backend.
371struct SDNodeFlags {
372private:
373 bool NoUnsignedWrap : 1;
374 bool NoSignedWrap : 1;
375 bool Exact : 1;
376 bool NoNaNs : 1;
377 bool NoInfs : 1;
378 bool NoSignedZeros : 1;
379 bool AllowReciprocal : 1;
380 bool AllowContract : 1;
381 bool ApproximateFuncs : 1;
382 bool AllowReassociation : 1;
383
384 // We assume instructions do not raise floating-point exceptions by default,
385 // and only those marked explicitly may do so. We could choose to represent
386 // this via a positive "FPExcept" flags like on the MI level, but having a
387 // negative "NoFPExcept" flag here (that defaults to true) makes the flag
388 // intersection logic more straightforward.
389 bool NoFPExcept : 1;
390
391public:
392 /// Default constructor turns off all optimization flags.
393 SDNodeFlags()
394 : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false),
395 NoInfs(false), NoSignedZeros(false), AllowReciprocal(false),
396 AllowContract(false), ApproximateFuncs(false),
397 AllowReassociation(false), NoFPExcept(false) {}
398
399 /// Propagate the fast-math-flags from an IR FPMathOperator.
400 void copyFMF(const FPMathOperator &FPMO) {
401 setNoNaNs(FPMO.hasNoNaNs());
402 setNoInfs(FPMO.hasNoInfs());
403 setNoSignedZeros(FPMO.hasNoSignedZeros());
404 setAllowReciprocal(FPMO.hasAllowReciprocal());
405 setAllowContract(FPMO.hasAllowContract());
406 setApproximateFuncs(FPMO.hasApproxFunc());
407 setAllowReassociation(FPMO.hasAllowReassoc());
408 }
409
410 // These are mutators for each flag.
411 void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
412 void setNoSignedWrap(bool b) { NoSignedWrap = b; }
413 void setExact(bool b) { Exact = b; }
414 void setNoNaNs(bool b) { NoNaNs = b; }
415 void setNoInfs(bool b) { NoInfs = b; }
416 void setNoSignedZeros(bool b) { NoSignedZeros = b; }
417 void setAllowReciprocal(bool b) { AllowReciprocal = b; }
418 void setAllowContract(bool b) { AllowContract = b; }
419 void setApproximateFuncs(bool b) { ApproximateFuncs = b; }
420 void setAllowReassociation(bool b) { AllowReassociation = b; }
421 void setNoFPExcept(bool b) { NoFPExcept = b; }
422
423 // These are accessors for each flag.
424 bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
425 bool hasNoSignedWrap() const { return NoSignedWrap; }
426 bool hasExact() const { return Exact; }
427 bool hasNoNaNs() const { return NoNaNs; }
428 bool hasNoInfs() const { return NoInfs; }
429 bool hasNoSignedZeros() const { return NoSignedZeros; }
430 bool hasAllowReciprocal() const { return AllowReciprocal; }
431 bool hasAllowContract() const { return AllowContract; }
432 bool hasApproximateFuncs() const { return ApproximateFuncs; }
433 bool hasAllowReassociation() const { return AllowReassociation; }
434 bool hasNoFPExcept() const { return NoFPExcept; }
435
436 /// Clear any flags in this flag set that aren't also set in Flags. All
437 /// flags will be cleared if Flags are undefined.
438 void intersectWith(const SDNodeFlags Flags) {
439 NoUnsignedWrap &= Flags.NoUnsignedWrap;
440 NoSignedWrap &= Flags.NoSignedWrap;
441 Exact &= Flags.Exact;
442 NoNaNs &= Flags.NoNaNs;
443 NoInfs &= Flags.NoInfs;
444 NoSignedZeros &= Flags.NoSignedZeros;
445 AllowReciprocal &= Flags.AllowReciprocal;
446 AllowContract &= Flags.AllowContract;
447 ApproximateFuncs &= Flags.ApproximateFuncs;
448 AllowReassociation &= Flags.AllowReassociation;
449 NoFPExcept &= Flags.NoFPExcept;
450 }
451};
452
453/// Represents one node in the SelectionDAG.
454///
455class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
456private:
457 /// The operation that this node performs.
458 int16_t NodeType;
459
460protected:
461 // We define a set of mini-helper classes to help us interpret the bits in our
462 // SubclassData. These are designed to fit within a uint16_t so they pack
463 // with NodeType.
464
465#if defined(_AIX) && (!defined(__GNUC__4) || defined(__clang__1))
466// Except for GCC; by default, AIX compilers store bit-fields in 4-byte words
467// and give the `pack` pragma push semantics.
468#define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")pack(2)
469#define END_TWO_BYTE_PACK() _Pragma("pack(pop)")pack(pop)
470#else
471#define BEGIN_TWO_BYTE_PACK()
472#define END_TWO_BYTE_PACK()
473#endif
474
475BEGIN_TWO_BYTE_PACK()
476 class SDNodeBitfields {
477 friend class SDNode;
478 friend class MemIntrinsicSDNode;
479 friend class MemSDNode;
480 friend class SelectionDAG;
481
482 uint16_t HasDebugValue : 1;
483 uint16_t IsMemIntrinsic : 1;
484 uint16_t IsDivergent : 1;
485 };
486 enum { NumSDNodeBits = 3 };
487
488 class ConstantSDNodeBitfields {
489 friend class ConstantSDNode;
490
491 uint16_t : NumSDNodeBits;
492
493 uint16_t IsOpaque : 1;
494 };
495
496 class MemSDNodeBitfields {
497 friend class MemSDNode;
498 friend class MemIntrinsicSDNode;
499 friend class AtomicSDNode;
500
501 uint16_t : NumSDNodeBits;
502
503 uint16_t IsVolatile : 1;
504 uint16_t IsNonTemporal : 1;
505 uint16_t IsDereferenceable : 1;
506 uint16_t IsInvariant : 1;
507 };
508 enum { NumMemSDNodeBits = NumSDNodeBits + 4 };
509
510 class LSBaseSDNodeBitfields {
511 friend class LSBaseSDNode;
512 friend class MaskedLoadStoreSDNode;
513 friend class MaskedGatherScatterSDNode;
514
515 uint16_t : NumMemSDNodeBits;
516
517 // This storage is shared between disparate class hierarchies to hold an
518 // enumeration specific to the class hierarchy in use.
519 // LSBaseSDNode => enum ISD::MemIndexedMode
520 // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode
521 // MaskedGatherScatterSDNode => enum ISD::MemIndexType
522 uint16_t AddressingMode : 3;
523 };
524 enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 };
525
526 class LoadSDNodeBitfields {
527 friend class LoadSDNode;
528 friend class MaskedLoadSDNode;
529 friend class MaskedGatherSDNode;
530
531 uint16_t : NumLSBaseSDNodeBits;
532
533 uint16_t ExtTy : 2; // enum ISD::LoadExtType
534 uint16_t IsExpanding : 1;
535 };
536
537 class StoreSDNodeBitfields {
538 friend class StoreSDNode;
539 friend class MaskedStoreSDNode;
540 friend class MaskedScatterSDNode;
541
542 uint16_t : NumLSBaseSDNodeBits;
543
544 uint16_t IsTruncating : 1;
545 uint16_t IsCompressing : 1;
546 };
547
548 union {
549 char RawSDNodeBits[sizeof(uint16_t)];
550 SDNodeBitfields SDNodeBits;
551 ConstantSDNodeBitfields ConstantSDNodeBits;
552 MemSDNodeBitfields MemSDNodeBits;
553 LSBaseSDNodeBitfields LSBaseSDNodeBits;
554 LoadSDNodeBitfields LoadSDNodeBits;
555 StoreSDNodeBitfields StoreSDNodeBits;
556 };
557END_TWO_BYTE_PACK()
558#undef BEGIN_TWO_BYTE_PACK
559#undef END_TWO_BYTE_PACK
560
561 // RawSDNodeBits must cover the entirety of the union. This means that all of
562 // the union's members must have size <= RawSDNodeBits. We write the RHS as
563 // "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter.
564 static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide");
565 static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide");
566 static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide");
567 static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide");
568 static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide");
569 static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide");
570
571private:
572 friend class SelectionDAG;
573 // TODO: unfriend HandleSDNode once we fix its operand handling.
574 friend class HandleSDNode;
575
576 /// Unique id per SDNode in the DAG.
577 int NodeId = -1;
578
579 /// The values that are used by this operation.
580 SDUse *OperandList = nullptr;
581
582 /// The types of the values this node defines. SDNode's may
583 /// define multiple values simultaneously.
584 const EVT *ValueList;
585
586 /// List of uses for this SDNode.
587 SDUse *UseList = nullptr;
588
589 /// The number of entries in the Operand/Value list.
590 unsigned short NumOperands = 0;
591 unsigned short NumValues;
592
593 // The ordering of the SDNodes. It roughly corresponds to the ordering of the
594 // original LLVM instructions.
595 // This is used for turning off scheduling, because we'll forgo
596 // the normal scheduling algorithms and output the instructions according to
597 // this ordering.
598 unsigned IROrder;
599
600 /// Source line information.
601 DebugLoc debugLoc;
602
603 /// Return a pointer to the specified value type.
604 static const EVT *getValueTypeList(EVT VT);
605
606 SDNodeFlags Flags;
607
608public:
609 /// Unique and persistent id per SDNode in the DAG.
610 /// Used for debug printing.
611 uint16_t PersistentId;
612
613 //===--------------------------------------------------------------------===//
614 // Accessors
615 //
616
617 /// Return the SelectionDAG opcode value for this node. For
618 /// pre-isel nodes (those for which isMachineOpcode returns false), these
619 /// are the opcode values in the ISD and <target>ISD namespaces. For
620 /// post-isel opcodes, see getMachineOpcode.
621 unsigned getOpcode() const { return (unsigned short)NodeType; }
622
623 /// Test if this node has a target-specific opcode (in the
624 /// \<target\>ISD namespace).
625 bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }
626
627 /// Test if this node has a target-specific opcode that may raise
628 /// FP exceptions (in the \<target\>ISD namespace and greater than
629 /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory
630 /// opcode are currently automatically considered to possibly raise
631 /// FP exceptions as well.
632 bool isTargetStrictFPOpcode() const {
633 return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE;
634 }
635
636 /// Test if this node has a target-specific
637 /// memory-referencing opcode (in the \<target\>ISD namespace and
638 /// greater than FIRST_TARGET_MEMORY_OPCODE).
639 bool isTargetMemoryOpcode() const {
640 return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
641 }
642
643 /// Return true if the type of the node type undefined.
644 bool isUndef() const { return NodeType == ISD::UNDEF; }
645
646 /// Test if this node is a memory intrinsic (with valid pointer information).
647 /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for
648 /// non-memory intrinsics (with chains) that are not really instances of
649 /// MemSDNode. For such nodes, we need some extra state to determine the
650 /// proper classof relationship.
651 bool isMemIntrinsic() const {
652 return (NodeType == ISD::INTRINSIC_W_CHAIN ||
653 NodeType == ISD::INTRINSIC_VOID) &&
654 SDNodeBits.IsMemIntrinsic;
655 }
656
657 /// Test if this node is a strict floating point pseudo-op.
658 bool isStrictFPOpcode() {
659 switch (NodeType) {
660 default:
661 return false;
662 case ISD::STRICT_FP16_TO_FP:
663 case ISD::STRICT_FP_TO_FP16:
664#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
665 case ISD::STRICT_##DAGN:
666#include "llvm/IR/ConstrainedOps.def"
667 return true;
668 }
669 }
670
671 /// Test if this node has a post-isel opcode, directly
672 /// corresponding to a MachineInstr opcode.
673 bool isMachineOpcode() const { return NodeType < 0; }
674
675 /// This may only be called if isMachineOpcode returns
676 /// true. It returns the MachineInstr opcode value that the node's opcode
677 /// corresponds to.
678 unsigned getMachineOpcode() const {
679 assert(isMachineOpcode() && "Not a MachineInstr opcode!")(static_cast <bool> (isMachineOpcode() && "Not a MachineInstr opcode!"
) ? void (0) : __assert_fail ("isMachineOpcode() && \"Not a MachineInstr opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 679, __extension__ __PRETTY_FUNCTION__))
;
680 return ~NodeType;
681 }
682
683 bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
684 void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }
685
686 bool isDivergent() const { return SDNodeBits.IsDivergent; }
687
688 /// Return true if there are no uses of this node.
689 bool use_empty() const { return UseList == nullptr; }
690
691 /// Return true if there is exactly one use of this node.
692 bool hasOneUse() const { return hasSingleElement(uses()); }
693
694 /// Return the number of uses of this node. This method takes
695 /// time proportional to the number of uses.
696 size_t use_size() const { return std::distance(use_begin(), use_end()); }
697
698 /// Return the unique node id.
699 int getNodeId() const { return NodeId; }
700
701 /// Set unique node id.
702 void setNodeId(int Id) { NodeId = Id; }
703
704 /// Return the node ordering.
705 unsigned getIROrder() const { return IROrder; }
706
707 /// Set the node ordering.
708 void setIROrder(unsigned Order) { IROrder = Order; }
709
710 /// Return the source location info.
711 const DebugLoc &getDebugLoc() const { return debugLoc; }
712
713 /// Set source location info. Try to avoid this, putting
714 /// it in the constructor is preferable.
715 void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); }
716
717 /// This class provides iterator support for SDUse
718 /// operands that use a specific SDNode.
719 class use_iterator {
720 friend class SDNode;
721
722 SDUse *Op = nullptr;
723
724 explicit use_iterator(SDUse *op) : Op(op) {}
725
726 public:
727 using iterator_category = std::forward_iterator_tag;
728 using value_type = SDUse;
729 using difference_type = std::ptrdiff_t;
730 using pointer = value_type *;
731 using reference = value_type &;
732
733 use_iterator() = default;
734 use_iterator(const use_iterator &I) : Op(I.Op) {}
735
736 bool operator==(const use_iterator &x) const {
737 return Op == x.Op;
738 }
739 bool operator!=(const use_iterator &x) const {
740 return !operator==(x);
741 }
742
743 /// Return true if this iterator is at the end of uses list.
744 bool atEnd() const { return Op == nullptr; }
745
746 // Iterator traversal: forward iteration only.
747 use_iterator &operator++() { // Preincrement
748 assert(Op && "Cannot increment end iterator!")(static_cast <bool> (Op && "Cannot increment end iterator!"
) ? void (0) : __assert_fail ("Op && \"Cannot increment end iterator!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 748, __extension__ __PRETTY_FUNCTION__))
;
749 Op = Op->getNext();
750 return *this;
751 }
752
753 use_iterator operator++(int) { // Postincrement
754 use_iterator tmp = *this; ++*this; return tmp;
755 }
756
757 /// Retrieve a pointer to the current user node.
758 SDNode *operator*() const {
759 assert(Op && "Cannot dereference end iterator!")(static_cast <bool> (Op && "Cannot dereference end iterator!"
) ? void (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 759, __extension__ __PRETTY_FUNCTION__))
;
760 return Op->getUser();
761 }
762
763 SDNode *operator->() const { return operator*(); }
764
765 SDUse &getUse() const { return *Op; }
766
767 /// Retrieve the operand # of this use in its user.
768 unsigned getOperandNo() const {
769 assert(Op && "Cannot dereference end iterator!")(static_cast <bool> (Op && "Cannot dereference end iterator!"
) ? void (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 769, __extension__ __PRETTY_FUNCTION__))
;
770 return (unsigned)(Op - Op->getUser()->OperandList);
771 }
772 };
773
774 /// Provide iteration support to walk over all uses of an SDNode.
775 use_iterator use_begin() const {
776 return use_iterator(UseList);
777 }
778
779 static use_iterator use_end() { return use_iterator(nullptr); }
780
781 inline iterator_range<use_iterator> uses() {
782 return make_range(use_begin(), use_end());
783 }
784 inline iterator_range<use_iterator> uses() const {
785 return make_range(use_begin(), use_end());
786 }
787
788 /// Return true if there are exactly NUSES uses of the indicated value.
789 /// This method ignores uses of other values defined by this operation.
790 bool hasNUsesOfValue(unsigned NUses, unsigned Value) const;
791
792 /// Return true if there are any use of the indicated value.
793 /// This method ignores uses of other values defined by this operation.
794 bool hasAnyUseOfValue(unsigned Value) const;
795
796 /// Return true if this node is the only use of N.
797 bool isOnlyUserOf(const SDNode *N) const;
798
799 /// Return true if this node is an operand of N.
800 bool isOperandOf(const SDNode *N) const;
801
802 /// Return true if this node is a predecessor of N.
803 /// NOTE: Implemented on top of hasPredecessor and every bit as
804 /// expensive. Use carefully.
805 bool isPredecessorOf(const SDNode *N) const {
806 return N->hasPredecessor(this);
807 }
808
809 /// Return true if N is a predecessor of this node.
810 /// N is either an operand of this node, or can be reached by recursively
811 /// traversing up the operands.
812 /// NOTE: This is an expensive method. Use it carefully.
813 bool hasPredecessor(const SDNode *N) const;
814
815 /// Returns true if N is a predecessor of any node in Worklist. This
816 /// helper keeps Visited and Worklist sets externally to allow unions
817 /// searches to be performed in parallel, caching of results across
818 /// queries and incremental addition to Worklist. Stops early if N is
819 /// found but will resume. Remember to clear Visited and Worklists
820 /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before
821 /// giving up. The TopologicalPrune flag signals that positive NodeIds are
822 /// topologically ordered (Operands have strictly smaller node id) and search
823 /// can be pruned leveraging this.
824 static bool hasPredecessorHelper(const SDNode *N,
825 SmallPtrSetImpl<const SDNode *> &Visited,
826 SmallVectorImpl<const SDNode *> &Worklist,
827 unsigned int MaxSteps = 0,
828 bool TopologicalPrune = false) {
829 SmallVector<const SDNode *, 8> DeferredNodes;
830 if (Visited.count(N))
831 return true;
832
833 // Node Id's are assigned in three places: As a topological
834 // ordering (> 0), during legalization (results in values set to
835 // 0), new nodes (set to -1). If N has a topolgical id then we
836 // know that all nodes with ids smaller than it cannot be
837 // successors and we need not check them. Filter out all node
838 // that can't be matches. We add them to the worklist before exit
839 // in case of multiple calls. Note that during selection the topological id
840 // may be violated if a node's predecessor is selected before it. We mark
841 // this at selection negating the id of unselected successors and
842 // restricting topological pruning to positive ids.
843
844 int NId = N->getNodeId();
845 // If we Invalidated the Id, reconstruct original NId.
846 if (NId < -1)
847 NId = -(NId + 1);
848
849 bool Found = false;
850 while (!Worklist.empty()) {
851 const SDNode *M = Worklist.pop_back_val();
852 int MId = M->getNodeId();
853 if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) &&
854 (MId > 0) && (MId < NId)) {
855 DeferredNodes.push_back(M);
856 continue;
857 }
858 for (const SDValue &OpV : M->op_values()) {
859 SDNode *Op = OpV.getNode();
860 if (Visited.insert(Op).second)
861 Worklist.push_back(Op);
862 if (Op == N)
863 Found = true;
864 }
865 if (Found)
866 break;
867 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
868 break;
869 }
870 // Push deferred nodes back on worklist.
871 Worklist.append(DeferredNodes.begin(), DeferredNodes.end());
872 // If we bailed early, conservatively return found.
873 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
874 return true;
875 return Found;
876 }
877
878 /// Return true if all the users of N are contained in Nodes.
879 /// NOTE: Requires at least one match, but doesn't require them all.
880 static bool areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N);
881
882 /// Return the number of values used by this operation.
883 unsigned getNumOperands() const { return NumOperands; }
884
885 /// Return the maximum number of operands that a SDNode can hold.
886 static constexpr size_t getMaxNumOperands() {
887 return std::numeric_limits<decltype(SDNode::NumOperands)>::max();
888 }
889
890 /// Helper method returns the integer value of a ConstantSDNode operand.
891 inline uint64_t getConstantOperandVal(unsigned Num) const;
892
893 /// Helper method returns the APInt of a ConstantSDNode operand.
894 inline const APInt &getConstantOperandAPInt(unsigned Num) const;
895
896 const SDValue &getOperand(unsigned Num) const {
897 assert(Num < NumOperands && "Invalid child # of SDNode!")(static_cast <bool> (Num < NumOperands && "Invalid child # of SDNode!"
) ? void (0) : __assert_fail ("Num < NumOperands && \"Invalid child # of SDNode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 897, __extension__ __PRETTY_FUNCTION__))
;
898 return OperandList[Num];
899 }
900
901 using op_iterator = SDUse *;
902
903 op_iterator op_begin() const { return OperandList; }
904 op_iterator op_end() const { return OperandList+NumOperands; }
905 ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); }
906
907 /// Iterator for directly iterating over the operand SDValue's.
908 struct value_op_iterator
909 : iterator_adaptor_base<value_op_iterator, op_iterator,
910 std::random_access_iterator_tag, SDValue,
911 ptrdiff_t, value_op_iterator *,
912 value_op_iterator *> {
913 explicit value_op_iterator(SDUse *U = nullptr)
914 : iterator_adaptor_base(U) {}
915
916 const SDValue &operator*() const { return I->get(); }
917 };
918
919 iterator_range<value_op_iterator> op_values() const {
920 return make_range(value_op_iterator(op_begin()),
921 value_op_iterator(op_end()));
922 }
923
924 SDVTList getVTList() const {
925 SDVTList X = { ValueList, NumValues };
926 return X;
927 }
928
929 /// If this node has a glue operand, return the node
930 /// to which the glue operand points. Otherwise return NULL.
931 SDNode *getGluedNode() const {
932 if (getNumOperands() != 0 &&
933 getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
934 return getOperand(getNumOperands()-1).getNode();
935 return nullptr;
936 }
937
938 /// If this node has a glue value with a user, return
939 /// the user (there is at most one). Otherwise return NULL.
940 SDNode *getGluedUser() const {
941 for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI)
942 if (UI.getUse().get().getValueType() == MVT::Glue)
943 return *UI;
944 return nullptr;
945 }
946
947 SDNodeFlags getFlags() const { return Flags; }
948 void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; }
949
950 /// Clear any flags in this node that aren't also set in Flags.
951 /// If Flags is not in a defined state then this has no effect.
952 void intersectFlagsWith(const SDNodeFlags Flags);
953
954 /// Return the number of values defined/returned by this operator.
955 unsigned getNumValues() const { return NumValues; }
956
957 /// Return the type of a specified result.
958 EVT getValueType(unsigned ResNo) const {
959 assert(ResNo < NumValues && "Illegal result number!")(static_cast <bool> (ResNo < NumValues && "Illegal result number!"
) ? void (0) : __assert_fail ("ResNo < NumValues && \"Illegal result number!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 959, __extension__ __PRETTY_FUNCTION__))
;
960 return ValueList[ResNo];
961 }
962
963 /// Return the type of a specified result as a simple type.
964 MVT getSimpleValueType(unsigned ResNo) const {
965 return getValueType(ResNo).getSimpleVT();
966 }
967
968 /// Returns MVT::getSizeInBits(getValueType(ResNo)).
969 ///
970 /// If the value type is a scalable vector type, the scalable property will
971 /// be set and the runtime size will be a positive integer multiple of the
972 /// base size.
973 TypeSize getValueSizeInBits(unsigned ResNo) const {
974 return getValueType(ResNo).getSizeInBits();
975 }
976
977 using value_iterator = const EVT *;
978
979 value_iterator value_begin() const { return ValueList; }
980 value_iterator value_end() const { return ValueList+NumValues; }
981 iterator_range<value_iterator> values() const {
982 return llvm::make_range(value_begin(), value_end());
983 }
984
985 /// Return the opcode of this operation for printing.
986 std::string getOperationName(const SelectionDAG *G = nullptr) const;
987 static const char* getIndexedModeName(ISD::MemIndexedMode AM);
988 void print_types(raw_ostream &OS, const SelectionDAG *G) const;
989 void print_details(raw_ostream &OS, const SelectionDAG *G) const;
990 void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
991 void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
992
993 /// Print a SelectionDAG node and all children down to
994 /// the leaves. The given SelectionDAG allows target-specific nodes
995 /// to be printed in human-readable form. Unlike printr, this will
996 /// print the whole DAG, including children that appear multiple
997 /// times.
998 ///
999 void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const;
1000
1001 /// Print a SelectionDAG node and children up to
1002 /// depth "depth." The given SelectionDAG allows target-specific
1003 /// nodes to be printed in human-readable form. Unlike printr, this
1004 /// will print children that appear multiple times wherever they are
1005 /// used.
1006 ///
1007 void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr,
1008 unsigned depth = 100) const;
1009
1010 /// Dump this node, for debugging.
1011 void dump() const;
1012
1013 /// Dump (recursively) this node and its use-def subgraph.
1014 void dumpr() const;
1015
1016 /// Dump this node, for debugging.
1017 /// The given SelectionDAG allows target-specific nodes to be printed
1018 /// in human-readable form.
1019 void dump(const SelectionDAG *G) const;
1020
1021 /// Dump (recursively) this node and its use-def subgraph.
1022 /// The given SelectionDAG allows target-specific nodes to be printed
1023 /// in human-readable form.
1024 void dumpr(const SelectionDAG *G) const;
1025
1026 /// printrFull to dbgs(). The given SelectionDAG allows
1027 /// target-specific nodes to be printed in human-readable form.
1028 /// Unlike dumpr, this will print the whole DAG, including children
1029 /// that appear multiple times.
1030 void dumprFull(const SelectionDAG *G = nullptr) const;
1031
1032 /// printrWithDepth to dbgs(). The given
1033 /// SelectionDAG allows target-specific nodes to be printed in
1034 /// human-readable form. Unlike dumpr, this will print children
1035 /// that appear multiple times wherever they are used.
1036 ///
1037 void dumprWithDepth(const SelectionDAG *G = nullptr,
1038 unsigned depth = 100) const;
1039
1040 /// Gather unique data for the node.
1041 void Profile(FoldingSetNodeID &ID) const;
1042
1043 /// This method should only be used by the SDUse class.
1044 void addUse(SDUse &U) { U.addToList(&UseList); }
1045
1046protected:
1047 static SDVTList getSDVTList(EVT VT) {
1048 SDVTList Ret = { getValueTypeList(VT), 1 };
1049 return Ret;
1050 }
1051
1052 /// Create an SDNode.
1053 ///
1054 /// SDNodes are created without any operands, and never own the operand
1055 /// storage. To add operands, see SelectionDAG::createOperands.
1056 SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs)
1057 : NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs),
1058 IROrder(Order), debugLoc(std::move(dl)) {
1059 memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits));
1060 assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor")(static_cast <bool> (debugLoc.hasTrivialDestructor() &&
"Expected trivial destructor") ? void (0) : __assert_fail ("debugLoc.hasTrivialDestructor() && \"Expected trivial destructor\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1060, __extension__ __PRETTY_FUNCTION__))
;
1061 assert(NumValues == VTs.NumVTs &&(static_cast <bool> (NumValues == VTs.NumVTs &&
"NumValues wasn't wide enough for its operands!") ? void (0)
: __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1062, __extension__ __PRETTY_FUNCTION__))
1062 "NumValues wasn't wide enough for its operands!")(static_cast <bool> (NumValues == VTs.NumVTs &&
"NumValues wasn't wide enough for its operands!") ? void (0)
: __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1062, __extension__ __PRETTY_FUNCTION__))
;
1063 }
1064
1065 /// Release the operands and set this node to have zero operands.
1066 void DropOperands();
1067};
1068
1069/// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed
1070/// into SDNode creation functions.
1071/// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted
1072/// from the original Instruction, and IROrder is the ordinal position of
1073/// the instruction.
1074/// When an SDNode is created after the DAG is being built, both DebugLoc and
1075/// the IROrder are propagated from the original SDNode.
1076/// So SDLoc class provides two constructors besides the default one, one to
1077/// be used by the DAGBuilder, the other to be used by others.
1078class SDLoc {
1079private:
1080 DebugLoc DL;
1081 int IROrder = 0;
1082
1083public:
1084 SDLoc() = default;
1085 SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {}
1086 SDLoc(const SDValue V) : SDLoc(V.getNode()) {}
1087 SDLoc(const Instruction *I, int Order) : IROrder(Order) {
1088 assert(Order >= 0 && "bad IROrder")(static_cast <bool> (Order >= 0 && "bad IROrder"
) ? void (0) : __assert_fail ("Order >= 0 && \"bad IROrder\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1088, __extension__ __PRETTY_FUNCTION__))
;
1089 if (I)
1090 DL = I->getDebugLoc();
1091 }
1092
1093 unsigned getIROrder() const { return IROrder; }
1094 const DebugLoc &getDebugLoc() const { return DL; }
1095};
1096
1097// Define inline functions from the SDValue class.
1098
1099inline SDValue::SDValue(SDNode *node, unsigned resno)
1100 : Node(node), ResNo(resno) {
1101 // Explicitly check for !ResNo to avoid use-after-free, because there are
1102 // callers that use SDValue(N, 0) with a deleted N to indicate successful
1103 // combines.
1104 assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&(static_cast <bool> ((!Node || !ResNo || ResNo < Node
->getNumValues()) && "Invalid result number for the given node!"
) ? void (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1105, __extension__ __PRETTY_FUNCTION__))
1105 "Invalid result number for the given node!")(static_cast <bool> ((!Node || !ResNo || ResNo < Node
->getNumValues()) && "Invalid result number for the given node!"
) ? void (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1105, __extension__ __PRETTY_FUNCTION__))
;
1106 assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.")(static_cast <bool> (ResNo < -2U && "Cannot use result numbers reserved for DenseMaps."
) ? void (0) : __assert_fail ("ResNo < -2U && \"Cannot use result numbers reserved for DenseMaps.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1106, __extension__ __PRETTY_FUNCTION__))
;
1107}
1108
1109inline unsigned SDValue::getOpcode() const {
1110 return Node->getOpcode();
27
Called C++ object pointer is null
1111}
1112
1113inline EVT SDValue::getValueType() const {
1114 return Node->getValueType(ResNo);
1115}
1116
1117inline unsigned SDValue::getNumOperands() const {
1118 return Node->getNumOperands();
1119}
1120
1121inline const SDValue &SDValue::getOperand(unsigned i) const {
1122 return Node->getOperand(i);
1123}
1124
1125inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
1126 return Node->getConstantOperandVal(i);
1127}
1128
1129inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const {
1130 return Node->getConstantOperandAPInt(i);
1131}
1132
1133inline bool SDValue::isTargetOpcode() const {
1134 return Node->isTargetOpcode();
1135}
1136
1137inline bool SDValue::isTargetMemoryOpcode() const {
1138 return Node->isTargetMemoryOpcode();
1139}
1140
1141inline bool SDValue::isMachineOpcode() const {
1142 return Node->isMachineOpcode();
1143}
1144
1145inline unsigned SDValue::getMachineOpcode() const {
1146 return Node->getMachineOpcode();
1147}
1148
1149inline bool SDValue::isUndef() const {
1150 return Node->isUndef();
1151}
1152
1153inline bool SDValue::use_empty() const {
1154 return !Node->hasAnyUseOfValue(ResNo);
1155}
1156
1157inline bool SDValue::hasOneUse() const {
1158 return Node->hasNUsesOfValue(1, ResNo);
1159}
1160
1161inline const DebugLoc &SDValue::getDebugLoc() const {
1162 return Node->getDebugLoc();
1163}
1164
1165inline void SDValue::dump() const {
1166 return Node->dump();
1167}
1168
1169inline void SDValue::dump(const SelectionDAG *G) const {
1170 return Node->dump(G);
1171}
1172
1173inline void SDValue::dumpr() const {
1174 return Node->dumpr();
1175}
1176
1177inline void SDValue::dumpr(const SelectionDAG *G) const {
1178 return Node->dumpr(G);
1179}
1180
1181// Define inline functions from the SDUse class.
1182
1183inline void SDUse::set(const SDValue &V) {
1184 if (Val.getNode()) removeFromList();
1185 Val = V;
1186 if (V.getNode()) V.getNode()->addUse(*this);
1187}
1188
1189inline void SDUse::setInitial(const SDValue &V) {
1190 Val = V;
1191 V.getNode()->addUse(*this);
1192}
1193
1194inline void SDUse::setNode(SDNode *N) {
1195 if (Val.getNode()) removeFromList();
1196 Val.setNode(N);
1197 if (N) N->addUse(*this);
1198}
1199
1200/// This class is used to form a handle around another node that
1201/// is persistent and is updated across invocations of replaceAllUsesWith on its
1202/// operand. This node should be directly created by end-users and not added to
1203/// the AllNodes list.
1204class HandleSDNode : public SDNode {
1205 SDUse Op;
1206
1207public:
1208 explicit HandleSDNode(SDValue X)
1209 : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
1210 // HandleSDNodes are never inserted into the DAG, so they won't be
1211 // auto-numbered. Use ID 65535 as a sentinel.
1212 PersistentId = 0xffff;
1213
1214 // Manually set up the operand list. This node type is special in that it's
1215 // always stack allocated and SelectionDAG does not manage its operands.
1216 // TODO: This should either (a) not be in the SDNode hierarchy, or (b) not
1217 // be so special.
1218 Op.setUser(this);
1219 Op.setInitial(X);
1220 NumOperands = 1;
1221 OperandList = &Op;
1222 }
1223 ~HandleSDNode();
1224
1225 const SDValue &getValue() const { return Op; }
1226};
1227
1228class AddrSpaceCastSDNode : public SDNode {
1229private:
1230 unsigned SrcAddrSpace;
1231 unsigned DestAddrSpace;
1232
1233public:
1234 AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT,
1235 unsigned SrcAS, unsigned DestAS);
1236
1237 unsigned getSrcAddressSpace() const { return SrcAddrSpace; }
1238 unsigned getDestAddressSpace() const { return DestAddrSpace; }
1239
1240 static bool classof(const SDNode *N) {
1241 return N->getOpcode() == ISD::ADDRSPACECAST;
1242 }
1243};
1244
1245/// This is an abstract virtual class for memory operations.
1246class MemSDNode : public SDNode {
1247private:
1248 // VT of in-memory value.
1249 EVT MemoryVT;
1250
1251protected:
1252 /// Memory reference information.
1253 MachineMemOperand *MMO;
1254
1255public:
1256 MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs,
1257 EVT memvt, MachineMemOperand *MMO);
1258
1259 bool readMem() const { return MMO->isLoad(); }
1260 bool writeMem() const { return MMO->isStore(); }
1261
1262 /// Returns alignment and volatility of the memory access
1263 Align getOriginalAlign() const { return MMO->getBaseAlign(); }
1264 Align getAlign() const { return MMO->getAlign(); }
1265 // FIXME: Remove once transition to getAlign is over.
1266 unsigned getAlignment() const { return MMO->getAlign().value(); }
1267
1268 /// Return the SubclassData value, without HasDebugValue. This contains an
1269 /// encoding of the volatile flag, as well as bits used by subclasses. This
1270 /// function should only be used to compute a FoldingSetNodeID value.
1271 /// The HasDebugValue bit is masked out because CSE map needs to match
1272 /// nodes with debug info with nodes without debug info. Same is about
1273 /// isDivergent bit.
1274 unsigned getRawSubclassData() const {
1275 uint16_t Data;
1276 union {
1277 char RawSDNodeBits[sizeof(uint16_t)];
1278 SDNodeBitfields SDNodeBits;
1279 };
1280 memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits));
1281 SDNodeBits.HasDebugValue = 0;
1282 SDNodeBits.IsDivergent = false;
1283 memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits));
1284 return Data;
1285 }
1286
1287 bool isVolatile() const { return MemSDNodeBits.IsVolatile; }
1288 bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; }
1289 bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; }
1290 bool isInvariant() const { return MemSDNodeBits.IsInvariant; }
1291
1292 // Returns the offset from the location of the access.
1293 int64_t getSrcValueOffset() const { return MMO->getOffset(); }
1294
1295 /// Returns the AA info that describes the dereference.
1296 AAMDNodes getAAInfo() const { return MMO->getAAInfo(); }
1297
1298 /// Returns the Ranges that describes the dereference.
1299 const MDNode *getRanges() const { return MMO->getRanges(); }
1300
1301 /// Returns the synchronization scope ID for this memory operation.
1302 SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); }
1303
1304 /// Return the atomic ordering requirements for this memory operation. For
1305 /// cmpxchg atomic operations, return the atomic ordering requirements when
1306 /// store occurs.
1307 AtomicOrdering getSuccessOrdering() const {
1308 return MMO->getSuccessOrdering();
1309 }
1310
1311 /// Return a single atomic ordering that is at least as strong as both the
1312 /// success and failure orderings for an atomic operation. (For operations
1313 /// other than cmpxchg, this is equivalent to getSuccessOrdering().)
1314 AtomicOrdering getMergedOrdering() const { return MMO->getMergedOrdering(); }
1315
1316 /// Return true if the memory operation ordering is Unordered or higher.
1317 bool isAtomic() const { return MMO->isAtomic(); }
1318
1319 /// Returns true if the memory operation doesn't imply any ordering
1320 /// constraints on surrounding memory operations beyond the normal memory
1321 /// aliasing rules.
1322 bool isUnordered() const { return MMO->isUnordered(); }
1323
1324 /// Returns true if the memory operation is neither atomic or volatile.
1325 bool isSimple() const { return !isAtomic() && !isVolatile(); }
1326
1327 /// Return the type of the in-memory value.
1328 EVT getMemoryVT() const { return MemoryVT; }
1329
1330 /// Return a MachineMemOperand object describing the memory
1331 /// reference performed by operation.
1332 MachineMemOperand *getMemOperand() const { return MMO; }
1333
1334 const MachinePointerInfo &getPointerInfo() const {
1335 return MMO->getPointerInfo();
1336 }
1337
1338 /// Return the address space for the associated pointer
1339 unsigned getAddressSpace() const {
1340 return getPointerInfo().getAddrSpace();
1341 }
1342
1343 /// Update this MemSDNode's MachineMemOperand information
1344 /// to reflect the alignment of NewMMO, if it has a greater alignment.
1345 /// This must only be used when the new alignment applies to all users of
1346 /// this MachineMemOperand.
1347 void refineAlignment(const MachineMemOperand *NewMMO) {
1348 MMO->refineAlignment(NewMMO);
1349 }
1350
1351 const SDValue &getChain() const { return getOperand(0); }
1352
1353 const SDValue &getBasePtr() const {
1354 switch (getOpcode()) {
1355 case ISD::STORE:
1356 case ISD::MSTORE:
1357 return getOperand(2);
1358 case ISD::MGATHER:
1359 case ISD::MSCATTER:
1360 return getOperand(3);
1361 default:
1362 return getOperand(1);
1363 }
1364 }
1365
1366 // Methods to support isa and dyn_cast
1367 static bool classof(const SDNode *N) {
1368 // For some targets, we lower some target intrinsics to a MemIntrinsicNode
1369 // with either an intrinsic or a target opcode.
1370 switch (N->getOpcode()) {
1371 case ISD::LOAD:
1372 case ISD::STORE:
1373 case ISD::PREFETCH:
1374 case ISD::ATOMIC_CMP_SWAP:
1375 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
1376 case ISD::ATOMIC_SWAP:
1377 case ISD::ATOMIC_LOAD_ADD:
1378 case ISD::ATOMIC_LOAD_SUB:
1379 case ISD::ATOMIC_LOAD_AND:
1380 case ISD::ATOMIC_LOAD_CLR:
1381 case ISD::ATOMIC_LOAD_OR:
1382 case ISD::ATOMIC_LOAD_XOR:
1383 case ISD::ATOMIC_LOAD_NAND:
1384 case ISD::ATOMIC_LOAD_MIN:
1385 case ISD::ATOMIC_LOAD_MAX:
1386 case ISD::ATOMIC_LOAD_UMIN:
1387 case ISD::ATOMIC_LOAD_UMAX:
1388 case ISD::ATOMIC_LOAD_FADD:
1389 case ISD::ATOMIC_LOAD_FSUB:
1390 case ISD::ATOMIC_LOAD:
1391 case ISD::ATOMIC_STORE:
1392 case ISD::MLOAD:
1393 case ISD::MSTORE:
1394 case ISD::MGATHER:
1395 case ISD::MSCATTER:
1396 return true;
1397 default:
1398 return N->isMemIntrinsic() || N->isTargetMemoryOpcode();
1399 }
1400 }
1401};
1402
1403/// This is an SDNode representing atomic operations.
1404class AtomicSDNode : public MemSDNode {
1405public:
1406 AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
1407 EVT MemVT, MachineMemOperand *MMO)
1408 : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
1409 assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||(static_cast <bool> (((Opc != ISD::ATOMIC_LOAD &&
Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? void (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1410, __extension__ __PRETTY_FUNCTION__))
1410 MMO->isAtomic()) && "then why are we using an AtomicSDNode?")(static_cast <bool> (((Opc != ISD::ATOMIC_LOAD &&
Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? void (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1410, __extension__ __PRETTY_FUNCTION__))
;
1411 }
1412
1413 const SDValue &getBasePtr() const { return getOperand(1); }
1414 const SDValue &getVal() const { return getOperand(2); }
1415
1416 /// Returns true if this SDNode represents cmpxchg atomic operation, false
1417 /// otherwise.
1418 bool isCompareAndSwap() const {
1419 unsigned Op = getOpcode();
1420 return Op == ISD::ATOMIC_CMP_SWAP ||
1421 Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS;
1422 }
1423
1424 /// For cmpxchg atomic operations, return the atomic ordering requirements
1425 /// when store does not occur.
1426 AtomicOrdering getFailureOrdering() const {
1427 assert(isCompareAndSwap() && "Must be cmpxchg operation")(static_cast <bool> (isCompareAndSwap() && "Must be cmpxchg operation"
) ? void (0) : __assert_fail ("isCompareAndSwap() && \"Must be cmpxchg operation\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1427, __extension__ __PRETTY_FUNCTION__))
;
1428 return MMO->getFailureOrdering();
1429 }
1430
1431 // Methods to support isa and dyn_cast
1432 static bool classof(const SDNode *N) {
1433 return N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
1434 N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
1435 N->getOpcode() == ISD::ATOMIC_SWAP ||
1436 N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
1437 N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
1438 N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
1439 N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
1440 N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
1441 N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
1442 N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
1443 N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
1444 N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
1445 N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
1446 N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
1447 N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
1448 N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
1449 N->getOpcode() == ISD::ATOMIC_LOAD ||
1450 N->getOpcode() == ISD::ATOMIC_STORE;
1451 }
1452};
1453
1454/// This SDNode is used for target intrinsics that touch
1455/// memory and need an associated MachineMemOperand. Its opcode may be
1456/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode
1457/// with a value not less than FIRST_TARGET_MEMORY_OPCODE.
1458class MemIntrinsicSDNode : public MemSDNode {
1459public:
1460 MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
1461 SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO)
1462 : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) {
1463 SDNodeBits.IsMemIntrinsic = true;
1464 }
1465
1466 // Methods to support isa and dyn_cast
1467 static bool classof(const SDNode *N) {
1468 // We lower some target intrinsics to their target opcode
1469 // early a node with a target opcode can be of this class
1470 return N->isMemIntrinsic() ||
1471 N->getOpcode() == ISD::PREFETCH ||
1472 N->isTargetMemoryOpcode();
1473 }
1474};
1475
1476/// This SDNode is used to implement the code generator
1477/// support for the llvm IR shufflevector instruction. It combines elements
1478/// from two input vectors into a new input vector, with the selection and
1479/// ordering of elements determined by an array of integers, referred to as
1480/// the shuffle mask. For input vectors of width N, mask indices of 0..N-1
1481/// refer to elements from the LHS input, and indices from N to 2N-1 the RHS.
1482/// An index of -1 is treated as undef, such that the code generator may put
1483/// any value in the corresponding element of the result.
1484class ShuffleVectorSDNode : public SDNode {
1485 // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and
1486 // is freed when the SelectionDAG object is destroyed.
1487 const int *Mask;
1488
1489protected:
1490 friend class SelectionDAG;
1491
1492 ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M)
1493 : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {}
1494
1495public:
1496 ArrayRef<int> getMask() const {
1497 EVT VT = getValueType(0);
1498 return makeArrayRef(Mask, VT.getVectorNumElements());
1499 }
1500
1501 int getMaskElt(unsigned Idx) const {
1502 assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!")(static_cast <bool> (Idx < getValueType(0).getVectorNumElements
() && "Idx out of range!") ? void (0) : __assert_fail
("Idx < getValueType(0).getVectorNumElements() && \"Idx out of range!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1502, __extension__ __PRETTY_FUNCTION__))
;
1503 return Mask[Idx];
1504 }
1505
1506 bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }
1507
1508 int getSplatIndex() const {
1509 assert(isSplat() && "Cannot get splat index for non-splat!")(static_cast <bool> (isSplat() && "Cannot get splat index for non-splat!"
) ? void (0) : __assert_fail ("isSplat() && \"Cannot get splat index for non-splat!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1509, __extension__ __PRETTY_FUNCTION__))
;
1510 EVT VT = getValueType(0);
1511 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
1512 if (Mask[i] >= 0)
1513 return Mask[i];
1514
1515 // We can choose any index value here and be correct because all elements
1516 // are undefined. Return 0 for better potential for callers to simplify.
1517 return 0;
1518 }
1519
1520 static bool isSplatMask(const int *Mask, EVT VT);
1521
1522 /// Change values in a shuffle permute mask assuming
1523 /// the two vector operands have swapped position.
1524 static void commuteMask(MutableArrayRef<int> Mask) {
1525 unsigned NumElems = Mask.size();
1526 for (unsigned i = 0; i != NumElems; ++i) {
1527 int idx = Mask[i];
1528 if (idx < 0)
1529 continue;
1530 else if (idx < (int)NumElems)
1531 Mask[i] = idx + NumElems;
1532 else
1533 Mask[i] = idx - NumElems;
1534 }
1535 }
1536
1537 static bool classof(const SDNode *N) {
1538 return N->getOpcode() == ISD::VECTOR_SHUFFLE;
1539 }
1540};
1541
1542class ConstantSDNode : public SDNode {
1543 friend class SelectionDAG;
1544
1545 const ConstantInt *Value;
1546
1547 ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT)
1548 : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(),
1549 getSDVTList(VT)),
1550 Value(val) {
1551 ConstantSDNodeBits.IsOpaque = isOpaque;
1552 }
1553
1554public:
1555 const ConstantInt *getConstantIntValue() const { return Value; }
1556 const APInt &getAPIntValue() const { return Value->getValue(); }
1557 uint64_t getZExtValue() const { return Value->getZExtValue(); }
1558 int64_t getSExtValue() const { return Value->getSExtValue(); }
1559 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) {
1560 return Value->getLimitedValue(Limit);
1561 }
1562 MaybeAlign getMaybeAlignValue() const { return Value->getMaybeAlignValue(); }
1563 Align getAlignValue() const { return Value->getAlignValue(); }
1564
1565 bool isOne() const { return Value->isOne(); }
1566 bool isNullValue() const { return Value->isZero(); }
1567 bool isAllOnesValue() const { return Value->isMinusOne(); }
1568 bool isMaxSignedValue() const { return Value->isMaxValue(true); }
1569 bool isMinSignedValue() const { return Value->isMinValue(true); }
1570
1571 bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; }
1572
1573 static bool classof(const SDNode *N) {
1574 return N->getOpcode() == ISD::Constant ||
1575 N->getOpcode() == ISD::TargetConstant;
1576 }
1577};
1578
1579uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
1580 return cast<ConstantSDNode>(getOperand(Num))->getZExtValue();
1581}
1582
1583const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const {
1584 return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue();
1585}
1586
1587class ConstantFPSDNode : public SDNode {
1588 friend class SelectionDAG;
1589
1590 const ConstantFP *Value;
1591
1592 ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT)
1593 : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0,
1594 DebugLoc(), getSDVTList(VT)),
1595 Value(val) {}
1596
1597public:
1598 const APFloat& getValueAPF() const { return Value->getValueAPF(); }
1599 const ConstantFP *getConstantFPValue() const { return Value; }
1600
1601 /// Return true if the value is positive or negative zero.
1602 bool isZero() const { return Value->isZero(); }
1603
1604 /// Return true if the value is a NaN.
1605 bool isNaN() const { return Value->isNaN(); }
1606
1607 /// Return true if the value is an infinity
1608 bool isInfinity() const { return Value->isInfinity(); }
1609
1610 /// Return true if the value is negative.
1611 bool isNegative() const { return Value->isNegative(); }
1612
1613 /// We don't rely on operator== working on double values, as
1614 /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
1615 /// As such, this method can be used to do an exact bit-for-bit comparison of
1616 /// two floating point values.
1617
1618 /// We leave the version with the double argument here because it's just so
1619 /// convenient to write "2.0" and the like. Without this function we'd
1620 /// have to duplicate its logic everywhere it's called.
1621 bool isExactlyValue(double V) const {
1622 return Value->getValueAPF().isExactlyValue(V);
1623 }
1624 bool isExactlyValue(const APFloat& V) const;
1625
1626 static bool isValueValidForType(EVT VT, const APFloat& Val);
1627
1628 static bool classof(const SDNode *N) {
1629 return N->getOpcode() == ISD::ConstantFP ||
1630 N->getOpcode() == ISD::TargetConstantFP;
1631 }
1632};
1633
1634/// Returns true if \p V is a constant integer zero.
1635bool isNullConstant(SDValue V);
1636
1637/// Returns true if \p V is an FP constant with a value of positive zero.
1638bool isNullFPConstant(SDValue V);
1639
1640/// Returns true if \p V is an integer constant with all bits set.
1641bool isAllOnesConstant(SDValue V);
1642
1643/// Returns true if \p V is a constant integer one.
1644bool isOneConstant(SDValue V);
1645
1646/// Return the non-bitcasted source operand of \p V if it exists.
1647/// If \p V is not a bitcasted value, it is returned as-is.
1648SDValue peekThroughBitcasts(SDValue V);
1649
1650/// Return the non-bitcasted and one-use source operand of \p V if it exists.
1651/// If \p V is not a bitcasted one-use value, it is returned as-is.
1652SDValue peekThroughOneUseBitcasts(SDValue V);
1653
1654/// Return the non-extracted vector source operand of \p V if it exists.
1655/// If \p V is not an extracted subvector, it is returned as-is.
1656SDValue peekThroughExtractSubvectors(SDValue V);
1657
1658/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
1659/// constant is canonicalized to be operand 1.
1660bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
1661
1662/// Returns the SDNode if it is a constant splat BuildVector or constant int.
1663ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false,
1664 bool AllowTruncation = false);
1665
1666/// Returns the SDNode if it is a demanded constant splat BuildVector or
1667/// constant int.
1668ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
1669 bool AllowUndefs = false,
1670 bool AllowTruncation = false);
1671
1672/// Returns the SDNode if it is a constant splat BuildVector or constant float.
1673ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
1674
1675/// Returns the SDNode if it is a demanded constant splat BuildVector or
1676/// constant float.
1677ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts,
1678 bool AllowUndefs = false);
1679
1680/// Return true if the value is a constant 0 integer or a splatted vector of
1681/// a constant 0 integer (with no undefs by default).
1682/// Build vector implicit truncation is not an issue for null values.
1683bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false);
1684
1685/// Return true if the value is a constant 1 integer or a splatted vector of a
1686/// constant 1 integer (with no undefs).
1687/// Does not permit build vector implicit truncation.
1688bool isOneOrOneSplat(SDValue V, bool AllowUndefs = false);
1689
1690/// Return true if the value is a constant -1 integer or a splatted vector of a
1691/// constant -1 integer (with no undefs).
1692/// Does not permit build vector implicit truncation.
1693bool isAllOnesOrAllOnesSplat(SDValue V, bool AllowUndefs = false);
1694
1695/// Return true if \p V is either a integer or FP constant.
1696inline bool isIntOrFPConstant(SDValue V) {
1697 return isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V);
1698}
1699
1700class GlobalAddressSDNode : public SDNode {
1701 friend class SelectionDAG;
1702
1703 const GlobalValue *TheGlobal;
1704 int64_t Offset;
1705 unsigned TargetFlags;
1706
1707 GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
1708 const GlobalValue *GA, EVT VT, int64_t o,
1709 unsigned TF);
1710
1711public:
1712 const GlobalValue *getGlobal() const { return TheGlobal; }
1713 int64_t getOffset() const { return Offset; }
1714 unsigned getTargetFlags() const { return TargetFlags; }
1715 // Return the address space this GlobalAddress belongs to.
1716 unsigned getAddressSpace() const;
1717
1718 static bool classof(const SDNode *N) {
1719 return N->getOpcode() == ISD::GlobalAddress ||
1720 N->getOpcode() == ISD::TargetGlobalAddress ||
1721 N->getOpcode() == ISD::GlobalTLSAddress ||
1722 N->getOpcode() == ISD::TargetGlobalTLSAddress;
1723 }
1724};
1725
1726class FrameIndexSDNode : public SDNode {
1727 friend class SelectionDAG;
1728
1729 int FI;
1730
1731 FrameIndexSDNode(int fi, EVT VT, bool isTarg)
1732 : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
1733 0, DebugLoc(), getSDVTList(VT)), FI(fi) {
1734 }
1735
1736public:
1737 int getIndex() const { return FI; }
1738
1739 static bool classof(const SDNode *N) {
1740 return N->getOpcode() == ISD::FrameIndex ||
1741 N->getOpcode() == ISD::TargetFrameIndex;
1742 }
1743};
1744
1745/// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate
1746/// the offet and size that are started/ended in the underlying FrameIndex.
1747class LifetimeSDNode : public SDNode {
1748 friend class SelectionDAG;
1749 int64_t Size;
1750 int64_t Offset; // -1 if offset is unknown.
1751
1752 LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1753 SDVTList VTs, int64_t Size, int64_t Offset)
1754 : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {}
1755public:
1756 int64_t getFrameIndex() const {
1757 return cast<FrameIndexSDNode>(getOperand(1))->getIndex();
1758 }
1759
1760 bool hasOffset() const { return Offset >= 0; }
1761 int64_t getOffset() const {
1762 assert(hasOffset() && "offset is unknown")(static_cast <bool> (hasOffset() && "offset is unknown"
) ? void (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1762, __extension__ __PRETTY_FUNCTION__))
;
1763 return Offset;
1764 }
1765 int64_t getSize() const {
1766 assert(hasOffset() && "offset is unknown")(static_cast <bool> (hasOffset() && "offset is unknown"
) ? void (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1766, __extension__ __PRETTY_FUNCTION__))
;
1767 return Size;
1768 }
1769
1770 // Methods to support isa and dyn_cast
1771 static bool classof(const SDNode *N) {
1772 return N->getOpcode() == ISD::LIFETIME_START ||
1773 N->getOpcode() == ISD::LIFETIME_END;
1774 }
1775};
1776
1777/// This SDNode is used for PSEUDO_PROBE values, which are the function guid and
1778/// the index of the basic block being probed. A pseudo probe serves as a place
1779/// holder and will be removed at the end of compilation. It does not have any
1780/// operand because we do not want the instruction selection to deal with any.
1781class PseudoProbeSDNode : public SDNode {
1782 friend class SelectionDAG;
1783 uint64_t Guid;
1784 uint64_t Index;
1785 uint32_t Attributes;
1786
1787 PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &Dl,
1788 SDVTList VTs, uint64_t Guid, uint64_t Index, uint32_t Attr)
1789 : SDNode(Opcode, Order, Dl, VTs), Guid(Guid), Index(Index),
1790 Attributes(Attr) {}
1791
1792public:
1793 uint64_t getGuid() const { return Guid; }
1794 uint64_t getIndex() const { return Index; }
1795 uint32_t getAttributes() const { return Attributes; }
1796
1797 // Methods to support isa and dyn_cast
1798 static bool classof(const SDNode *N) {
1799 return N->getOpcode() == ISD::PSEUDO_PROBE;
1800 }
1801};
1802
1803class JumpTableSDNode : public SDNode {
1804 friend class SelectionDAG;
1805
1806 int JTI;
1807 unsigned TargetFlags;
1808
1809 JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF)
1810 : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
1811 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
1812 }
1813
1814public:
1815 int getIndex() const { return JTI; }
1816 unsigned getTargetFlags() const { return TargetFlags; }
1817
1818 static bool classof(const SDNode *N) {
1819 return N->getOpcode() == ISD::JumpTable ||
1820 N->getOpcode() == ISD::TargetJumpTable;
1821 }
1822};
1823
1824class ConstantPoolSDNode : public SDNode {
1825 friend class SelectionDAG;
1826
1827 union {
1828 const Constant *ConstVal;
1829 MachineConstantPoolValue *MachineCPVal;
1830 } Val;
1831 int Offset; // It's a MachineConstantPoolValue if top bit is set.
1832 Align Alignment; // Minimum alignment requirement of CP.
1833 unsigned TargetFlags;
1834
1835 ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
1836 Align Alignment, unsigned TF)
1837 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1838 DebugLoc(), getSDVTList(VT)),
1839 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1840 assert(Offset >= 0 && "Offset is too large")(static_cast <bool> (Offset >= 0 && "Offset is too large"
) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1840, __extension__ __PRETTY_FUNCTION__))
;
1841 Val.ConstVal = c;
1842 }
1843
1844 ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, EVT VT, int o,
1845 Align Alignment, unsigned TF)
1846 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1847 DebugLoc(), getSDVTList(VT)),
1848 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1849 assert(Offset >= 0 && "Offset is too large")(static_cast <bool> (Offset >= 0 && "Offset is too large"
) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1849, __extension__ __PRETTY_FUNCTION__))
;
1850 Val.MachineCPVal = v;
1851 Offset |= 1 << (sizeof(unsigned)*CHAR_BIT8-1);
1852 }
1853
1854public:
1855 bool isMachineConstantPoolEntry() const {
1856 return Offset < 0;
1857 }
1858
1859 const Constant *getConstVal() const {
1860 assert(!isMachineConstantPoolEntry() && "Wrong constantpool type")(static_cast <bool> (!isMachineConstantPoolEntry() &&
"Wrong constantpool type") ? void (0) : __assert_fail ("!isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1860, __extension__ __PRETTY_FUNCTION__))
;
1861 return Val.ConstVal;
1862 }
1863
1864 MachineConstantPoolValue *getMachineCPVal() const {
1865 assert(isMachineConstantPoolEntry() && "Wrong constantpool type")(static_cast <bool> (isMachineConstantPoolEntry() &&
"Wrong constantpool type") ? void (0) : __assert_fail ("isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1865, __extension__ __PRETTY_FUNCTION__))
;
1866 return Val.MachineCPVal;
1867 }
1868
1869 int getOffset() const {
1870 return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT8-1));
1871 }
1872
1873 // Return the alignment of this constant pool object, which is either 0 (for
1874 // default alignment) or the desired value.
1875 Align getAlign() const { return Alignment; }
1876 unsigned getTargetFlags() const { return TargetFlags; }
1877
1878 Type *getType() const;
1879
1880 static bool classof(const SDNode *N) {
1881 return N->getOpcode() == ISD::ConstantPool ||
1882 N->getOpcode() == ISD::TargetConstantPool;
1883 }
1884};
1885
1886/// Completely target-dependent object reference.
1887class TargetIndexSDNode : public SDNode {
1888 friend class SelectionDAG;
1889
1890 unsigned TargetFlags;
1891 int Index;
1892 int64_t Offset;
1893
1894public:
1895 TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF)
1896 : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
1897 TargetFlags(TF), Index(Idx), Offset(Ofs) {}
1898
1899 unsigned getTargetFlags() const { return TargetFlags; }
1900 int getIndex() const { return Index; }
1901 int64_t getOffset() const { return Offset; }
1902
1903 static bool classof(const SDNode *N) {
1904 return N->getOpcode() == ISD::TargetIndex;
1905 }
1906};
1907
1908class BasicBlockSDNode : public SDNode {
1909 friend class SelectionDAG;
1910
1911 MachineBasicBlock *MBB;
1912
1913 /// Debug info is meaningful and potentially useful here, but we create
1914 /// blocks out of order when they're jumped to, which makes it a bit
1915 /// harder. Let's see if we need it first.
1916 explicit BasicBlockSDNode(MachineBasicBlock *mbb)
1917 : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb)
1918 {}
1919
1920public:
1921 MachineBasicBlock *getBasicBlock() const { return MBB; }
1922
1923 static bool classof(const SDNode *N) {
1924 return N->getOpcode() == ISD::BasicBlock;
1925 }
1926};
1927
1928/// A "pseudo-class" with methods for operating on BUILD_VECTORs.
1929class BuildVectorSDNode : public SDNode {
1930public:
1931 // These are constructed as SDNodes and then cast to BuildVectorSDNodes.
1932 explicit BuildVectorSDNode() = delete;
1933
1934 /// Check if this is a constant splat, and if so, find the
1935 /// smallest element size that splats the vector. If MinSplatBits is
1936 /// nonzero, the element size must be at least that large. Note that the
1937 /// splat element may be the entire vector (i.e., a one element vector).
1938 /// Returns the splat element value in SplatValue. Any undefined bits in
1939 /// that value are zero, and the corresponding bits in the SplatUndef mask
1940 /// are set. The SplatBitSize value is set to the splat element size in
1941 /// bits. HasAnyUndefs is set to true if any bits in the vector are
1942 /// undefined. isBigEndian describes the endianness of the target.
1943 bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
1944 unsigned &SplatBitSize, bool &HasAnyUndefs,
1945 unsigned MinSplatBits = 0,
1946 bool isBigEndian = false) const;
1947
1948 /// Returns the demanded splatted value or a null value if this is not a
1949 /// splat.
1950 ///
1951 /// The DemandedElts mask indicates the elements that must be in the splat.
1952 /// If passed a non-null UndefElements bitvector, it will resize it to match
1953 /// the vector width and set the bits where elements are undef.
1954 SDValue getSplatValue(const APInt &DemandedElts,
1955 BitVector *UndefElements = nullptr) const;
1956
1957 /// Returns the splatted value or a null value if this is not a splat.
1958 ///
1959 /// If passed a non-null UndefElements bitvector, it will resize it to match
1960 /// the vector width and set the bits where elements are undef.
1961 SDValue getSplatValue(BitVector *UndefElements = nullptr) const;
1962
1963 /// Find the shortest repeating sequence of values in the build vector.
1964 ///
1965 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
1966 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
1967 ///
1968 /// Currently this must be a power-of-2 build vector.
1969 /// The DemandedElts mask indicates the elements that must be present,
1970 /// undemanded elements in Sequence may be null (SDValue()). If passed a
1971 /// non-null UndefElements bitvector, it will resize it to match the original
1972 /// vector width and set the bits where elements are undef. If result is
1973 /// false, Sequence will be empty.
1974 bool getRepeatedSequence(const APInt &DemandedElts,
1975 SmallVectorImpl<SDValue> &Sequence,
1976 BitVector *UndefElements = nullptr) const;
1977
1978 /// Find the shortest repeating sequence of values in the build vector.
1979 ///
1980 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
1981 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
1982 ///
1983 /// Currently this must be a power-of-2 build vector.
1984 /// If passed a non-null UndefElements bitvector, it will resize it to match
1985 /// the original vector width and set the bits where elements are undef.
1986 /// If result is false, Sequence will be empty.
1987 bool getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
1988 BitVector *UndefElements = nullptr) const;
1989
1990 /// Returns the demanded splatted constant or null if this is not a constant
1991 /// splat.
1992 ///
1993 /// The DemandedElts mask indicates the elements that must be in the splat.
1994 /// If passed a non-null UndefElements bitvector, it will resize it to match
1995 /// the vector width and set the bits where elements are undef.
1996 ConstantSDNode *
1997 getConstantSplatNode(const APInt &DemandedElts,
1998 BitVector *UndefElements = nullptr) const;
1999
2000 /// Returns the splatted constant or null if this is not a constant
2001 /// splat.
2002 ///
2003 /// If passed a non-null UndefElements bitvector, it will resize it to match
2004 /// the vector width and set the bits where elements are undef.
2005 ConstantSDNode *
2006 getConstantSplatNode(BitVector *UndefElements = nullptr) const;
2007
2008 /// Returns the demanded splatted constant FP or null if this is not a
2009 /// constant FP splat.
2010 ///
2011 /// The DemandedElts mask indicates the elements that must be in the splat.
2012 /// If passed a non-null UndefElements bitvector, it will resize it to match
2013 /// the vector width and set the bits where elements are undef.
2014 ConstantFPSDNode *
2015 getConstantFPSplatNode(const APInt &DemandedElts,
2016 BitVector *UndefElements = nullptr) const;
2017
2018 /// Returns the splatted constant FP or null if this is not a constant
2019 /// FP splat.
2020 ///
2021 /// If passed a non-null UndefElements bitvector, it will resize it to match
2022 /// the vector width and set the bits where elements are undef.
2023 ConstantFPSDNode *
2024 getConstantFPSplatNode(BitVector *UndefElements = nullptr) const;
2025
2026 /// If this is a constant FP splat and the splatted constant FP is an
2027 /// exact power or 2, return the log base 2 integer value. Otherwise,
2028 /// return -1.
2029 ///
2030 /// The BitWidth specifies the necessary bit precision.
2031 int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
2032 uint32_t BitWidth) const;
2033
2034 bool isConstant() const;
2035
2036 static bool classof(const SDNode *N) {
2037 return N->getOpcode() == ISD::BUILD_VECTOR;
2038 }
2039};
2040
2041/// An SDNode that holds an arbitrary LLVM IR Value. This is
2042/// used when the SelectionDAG needs to make a simple reference to something
2043/// in the LLVM IR representation.
2044///
2045class SrcValueSDNode : public SDNode {
2046 friend class SelectionDAG;
2047
2048 const Value *V;
2049
2050 /// Create a SrcValue for a general value.
2051 explicit SrcValueSDNode(const Value *v)
2052 : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}
2053
2054public:
2055 /// Return the contained Value.
2056 const Value *getValue() const { return V; }
2057
2058 static bool classof(const SDNode *N) {
2059 return N->getOpcode() == ISD::SRCVALUE;
2060 }
2061};
2062
2063class MDNodeSDNode : public SDNode {
2064 friend class SelectionDAG;
2065
2066 const MDNode *MD;
2067
2068 explicit MDNodeSDNode(const MDNode *md)
2069 : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md)
2070 {}
2071
2072public:
2073 const MDNode *getMD() const { return MD; }
2074
2075 static bool classof(const SDNode *N) {
2076 return N->getOpcode() == ISD::MDNODE_SDNODE;
2077 }
2078};
2079
2080class RegisterSDNode : public SDNode {
2081 friend class SelectionDAG;
2082
2083 Register Reg;
2084
2085 RegisterSDNode(Register reg, EVT VT)
2086 : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {}
2087
2088public:
2089 Register getReg() const { return Reg; }
2090
2091 static bool classof(const SDNode *N) {
2092 return N->getOpcode() == ISD::Register;
2093 }
2094};
2095
2096class RegisterMaskSDNode : public SDNode {
2097 friend class SelectionDAG;
2098
2099 // The memory for RegMask is not owned by the node.
2100 const uint32_t *RegMask;
2101
2102 RegisterMaskSDNode(const uint32_t *mask)
2103 : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)),
2104 RegMask(mask) {}
2105
2106public:
2107 const uint32_t *getRegMask() const { return RegMask; }
2108
2109 static bool classof(const SDNode *N) {
2110 return N->getOpcode() == ISD::RegisterMask;
2111 }
2112};
2113
2114class BlockAddressSDNode : public SDNode {
2115 friend class SelectionDAG;
2116
2117 const BlockAddress *BA;
2118 int64_t Offset;
2119 unsigned TargetFlags;
2120
2121 BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
2122 int64_t o, unsigned Flags)
2123 : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
2124 BA(ba), Offset(o), TargetFlags(Flags) {}
2125
2126public:
2127 const BlockAddress *getBlockAddress() const { return BA; }
2128 int64_t getOffset() const { return Offset; }
2129 unsigned getTargetFlags() const { return TargetFlags; }
2130
2131 static bool classof(const SDNode *N) {
2132 return N->getOpcode() == ISD::BlockAddress ||
2133 N->getOpcode() == ISD::TargetBlockAddress;
2134 }
2135};
2136
2137class LabelSDNode : public SDNode {
2138 friend class SelectionDAG;
2139
2140 MCSymbol *Label;
2141
2142 LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L)
2143 : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) {
2144 assert(LabelSDNode::classof(this) && "not a label opcode")(static_cast <bool> (LabelSDNode::classof(this) &&
"not a label opcode") ? void (0) : __assert_fail ("LabelSDNode::classof(this) && \"not a label opcode\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2144, __extension__ __PRETTY_FUNCTION__))
;
2145 }
2146
2147public:
2148 MCSymbol *getLabel() const { return Label; }
2149
2150 static bool classof(const SDNode *N) {
2151 return N->getOpcode() == ISD::EH_LABEL ||
2152 N->getOpcode() == ISD::ANNOTATION_LABEL;
2153 }
2154};
2155
2156class ExternalSymbolSDNode : public SDNode {
2157 friend class SelectionDAG;
2158
2159 const char *Symbol;
2160 unsigned TargetFlags;
2161
2162 ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT)
2163 : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0,
2164 DebugLoc(), getSDVTList(VT)),
2165 Symbol(Sym), TargetFlags(TF) {}
2166
2167public:
2168 const char *getSymbol() const { return Symbol; }
2169 unsigned getTargetFlags() const { return TargetFlags; }
2170
2171 static bool classof(const SDNode *N) {
2172 return N->getOpcode() == ISD::ExternalSymbol ||
2173 N->getOpcode() == ISD::TargetExternalSymbol;
2174 }
2175};
2176
2177class MCSymbolSDNode : public SDNode {
2178 friend class SelectionDAG;
2179
2180 MCSymbol *Symbol;
2181
2182 MCSymbolSDNode(MCSymbol *Symbol, EVT VT)
2183 : SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {}
2184
2185public:
2186 MCSymbol *getMCSymbol() const { return Symbol; }
2187
2188 static bool classof(const SDNode *N) {
2189 return N->getOpcode() == ISD::MCSymbol;
2190 }
2191};
2192
2193class CondCodeSDNode : public SDNode {
2194 friend class SelectionDAG;
2195
2196 ISD::CondCode Condition;
2197
2198 explicit CondCodeSDNode(ISD::CondCode Cond)
2199 : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2200 Condition(Cond) {}
2201
2202public:
2203 ISD::CondCode get() const { return Condition; }
2204
2205 static bool classof(const SDNode *N) {
2206 return N->getOpcode() == ISD::CONDCODE;
2207 }
2208};
2209
2210/// This class is used to represent EVT's, which are used
2211/// to parameterize some operations.
2212class VTSDNode : public SDNode {
2213 friend class SelectionDAG;
2214
2215 EVT ValueType;
2216
2217 explicit VTSDNode(EVT VT)
2218 : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2219 ValueType(VT) {}
2220
2221public:
2222 EVT getVT() const { return ValueType; }
2223
2224 static bool classof(const SDNode *N) {
2225 return N->getOpcode() == ISD::VALUETYPE;
2226 }
2227};
2228
2229/// Base class for LoadSDNode and StoreSDNode
2230class LSBaseSDNode : public MemSDNode {
2231public:
2232 LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
2233 SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
2234 MachineMemOperand *MMO)
2235 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2236 LSBaseSDNodeBits.AddressingMode = AM;
2237 assert(getAddressingMode() == AM && "Value truncated")(static_cast <bool> (getAddressingMode() == AM &&
"Value truncated") ? void (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2237, __extension__ __PRETTY_FUNCTION__))
;
2238 }
2239
2240 const SDValue &getOffset() const {
2241 return getOperand(getOpcode() == ISD::LOAD ? 2 : 3);
2242 }
2243
2244 /// Return the addressing mode for this load or store:
2245 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2246 ISD::MemIndexedMode getAddressingMode() const {
2247 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2248 }
2249
2250 /// Return true if this is a pre/post inc/dec load/store.
2251 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2252
2253 /// Return true if this is NOT a pre/post inc/dec load/store.
2254 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2255
2256 static bool classof(const SDNode *N) {
2257 return N->getOpcode() == ISD::LOAD ||
2258 N->getOpcode() == ISD::STORE;
2259 }
2260};
2261
2262/// This class is used to represent ISD::LOAD nodes.
2263class LoadSDNode : public LSBaseSDNode {
2264 friend class SelectionDAG;
2265
2266 LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2267 ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT,
2268 MachineMemOperand *MMO)
2269 : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) {
2270 LoadSDNodeBits.ExtTy = ETy;
2271 assert(readMem() && "Load MachineMemOperand is not a load!")(static_cast <bool> (readMem() && "Load MachineMemOperand is not a load!"
) ? void (0) : __assert_fail ("readMem() && \"Load MachineMemOperand is not a load!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2271, __extension__ __PRETTY_FUNCTION__))
;
2272 assert(!writeMem() && "Load MachineMemOperand is a store!")(static_cast <bool> (!writeMem() && "Load MachineMemOperand is a store!"
) ? void (0) : __assert_fail ("!writeMem() && \"Load MachineMemOperand is a store!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2272, __extension__ __PRETTY_FUNCTION__))
;
2273 }
2274
2275public:
2276 /// Return whether this is a plain node,
2277 /// or one of the varieties of value-extending loads.
2278 ISD::LoadExtType getExtensionType() const {
2279 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2280 }
2281
2282 const SDValue &getBasePtr() const { return getOperand(1); }
2283 const SDValue &getOffset() const { return getOperand(2); }
2284
2285 static bool classof(const SDNode *N) {
2286 return N->getOpcode() == ISD::LOAD;
2287 }
2288};
2289
2290/// This class is used to represent ISD::STORE nodes.
2291class StoreSDNode : public LSBaseSDNode {
2292 friend class SelectionDAG;
2293
2294 StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2295 ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
2296 MachineMemOperand *MMO)
2297 : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) {
2298 StoreSDNodeBits.IsTruncating = isTrunc;
2299 assert(!readMem() && "Store MachineMemOperand is a load!")(static_cast <bool> (!readMem() && "Store MachineMemOperand is a load!"
) ? void (0) : __assert_fail ("!readMem() && \"Store MachineMemOperand is a load!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2299, __extension__ __PRETTY_FUNCTION__))
;
2300 assert(writeMem() && "Store MachineMemOperand is not a store!")(static_cast <bool> (writeMem() && "Store MachineMemOperand is not a store!"
) ? void (0) : __assert_fail ("writeMem() && \"Store MachineMemOperand is not a store!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2300, __extension__ __PRETTY_FUNCTION__))
;
2301 }
2302
2303public:
2304 /// Return true if the op does a truncation before store.
2305 /// For integers this is the same as doing a TRUNCATE and storing the result.
2306 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2307 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2308 void setTruncatingStore(bool Truncating) {
2309 StoreSDNodeBits.IsTruncating = Truncating;
2310 }
2311
2312 const SDValue &getValue() const { return getOperand(1); }
2313 const SDValue &getBasePtr() const { return getOperand(2); }
2314 const SDValue &getOffset() const { return getOperand(3); }
2315
2316 static bool classof(const SDNode *N) {
2317 return N->getOpcode() == ISD::STORE;
2318 }
2319};
2320
2321/// This base class is used to represent MLOAD and MSTORE nodes
2322class MaskedLoadStoreSDNode : public MemSDNode {
2323public:
2324 friend class SelectionDAG;
2325
2326 MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
2327 const DebugLoc &dl, SDVTList VTs,
2328 ISD::MemIndexedMode AM, EVT MemVT,
2329 MachineMemOperand *MMO)
2330 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2331 LSBaseSDNodeBits.AddressingMode = AM;
2332 assert(getAddressingMode() == AM && "Value truncated")(static_cast <bool> (getAddressingMode() == AM &&
"Value truncated") ? void (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2332, __extension__ __PRETTY_FUNCTION__))
;
2333 }
2334
2335 // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru)
2336 // MaskedStoreSDNode (Chain, data, ptr, offset, mask)
2337 // Mask is a vector of i1 elements
2338 const SDValue &getOffset() const {
2339 return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3);
2340 }
2341 const SDValue &getMask() const {
2342 return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4);
2343 }
2344
2345 /// Return the addressing mode for this load or store:
2346 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2347 ISD::MemIndexedMode getAddressingMode() const {
2348 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2349 }
2350
2351 /// Return true if this is a pre/post inc/dec load/store.
2352 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2353
2354 /// Return true if this is NOT a pre/post inc/dec load/store.
2355 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2356
2357 static bool classof(const SDNode *N) {
2358 return N->getOpcode() == ISD::MLOAD ||
2359 N->getOpcode() == ISD::MSTORE;
2360 }
2361};
2362
2363/// This class is used to represent an MLOAD node
2364class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
2365public:
2366 friend class SelectionDAG;
2367
2368 MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2369 ISD::MemIndexedMode AM, ISD::LoadExtType ETy,
2370 bool IsExpanding, EVT MemVT, MachineMemOperand *MMO)
2371 : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) {
2372 LoadSDNodeBits.ExtTy = ETy;
2373 LoadSDNodeBits.IsExpanding = IsExpanding;
2374 }
2375
2376 ISD::LoadExtType getExtensionType() const {
2377 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2378 }
2379
2380 const SDValue &getBasePtr() const { return getOperand(1); }
2381 const SDValue &getOffset() const { return getOperand(2); }
2382 const SDValue &getMask() const { return getOperand(3); }
2383 const SDValue &getPassThru() const { return getOperand(4); }
2384
2385 static bool classof(const SDNode *N) {
2386 return N->getOpcode() == ISD::MLOAD;
2387 }
2388
2389 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2390};
2391
2392/// This class is used to represent an MSTORE node
2393class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
2394public:
2395 friend class SelectionDAG;
2396
2397 MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2398 ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
2399 EVT MemVT, MachineMemOperand *MMO)
2400 : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) {
2401 StoreSDNodeBits.IsTruncating = isTrunc;
2402 StoreSDNodeBits.IsCompressing = isCompressing;
2403 }
2404
2405 /// Return true if the op does a truncation before store.
2406 /// For integers this is the same as doing a TRUNCATE and storing the result.
2407 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2408 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2409
2410 /// Returns true if the op does a compression to the vector before storing.
2411 /// The node contiguously stores the active elements (integers or floats)
2412 /// in src (those with their respective bit set in writemask k) to unaligned
2413 /// memory at base_addr.
2414 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2415
2416 const SDValue &getValue() const { return getOperand(1); }
2417 const SDValue &getBasePtr() const { return getOperand(2); }
2418 const SDValue &getOffset() const { return getOperand(3); }
2419 const SDValue &getMask() const { return getOperand(4); }
2420
2421 static bool classof(const SDNode *N) {
2422 return N->getOpcode() == ISD::MSTORE;
2423 }
2424};
2425
2426/// This is a base class used to represent
2427/// MGATHER and MSCATTER nodes
2428///
2429class MaskedGatherScatterSDNode : public MemSDNode {
2430public:
2431 friend class SelectionDAG;
2432
2433 MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
2434 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2435 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2436 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2437 LSBaseSDNodeBits.AddressingMode = IndexType;
2438 assert(getIndexType() == IndexType && "Value truncated")(static_cast <bool> (getIndexType() == IndexType &&
"Value truncated") ? void (0) : __assert_fail ("getIndexType() == IndexType && \"Value truncated\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2438, __extension__ __PRETTY_FUNCTION__))
;
2439 }
2440
2441 /// How is Index applied to BasePtr when computing addresses.
2442 ISD::MemIndexType getIndexType() const {
2443 return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
2444 }
2445 void setIndexType(ISD::MemIndexType IndexType) {
2446 LSBaseSDNodeBits.AddressingMode = IndexType;
2447 }
2448 bool isIndexScaled() const {
2449 return (getIndexType() == ISD::SIGNED_SCALED) ||
2450 (getIndexType() == ISD::UNSIGNED_SCALED);
2451 }
2452 bool isIndexSigned() const {
2453 return (getIndexType() == ISD::SIGNED_SCALED) ||
2454 (getIndexType() == ISD::SIGNED_UNSCALED);
2455 }
2456
2457 // In the both nodes address is Op1, mask is Op2:
2458 // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale)
2459 // MaskedScatterSDNode (Chain, value, mask, base, index, scale)
2460 // Mask is a vector of i1 elements
2461 const SDValue &getBasePtr() const { return getOperand(3); }
2462 const SDValue &getIndex() const { return getOperand(4); }
2463 const SDValue &getMask() const { return getOperand(2); }
2464 const SDValue &getScale() const { return getOperand(5); }
2465
2466 static bool classof(const SDNode *N) {
2467 return N->getOpcode() == ISD::MGATHER ||
2468 N->getOpcode() == ISD::MSCATTER;
2469 }
2470};
2471
2472/// This class is used to represent an MGATHER node
2473///
2474class MaskedGatherSDNode : public MaskedGatherScatterSDNode {
2475public:
2476 friend class SelectionDAG;
2477
2478 MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2479 EVT MemVT, MachineMemOperand *MMO,
2480 ISD::MemIndexType IndexType, ISD::LoadExtType ETy)
2481 : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO,
2482 IndexType) {
2483 LoadSDNodeBits.ExtTy = ETy;
2484 }
2485
2486 const SDValue &getPassThru() const { return getOperand(1); }
2487
2488 ISD::LoadExtType getExtensionType() const {
2489 return ISD::LoadExtType(LoadSDNodeBits.ExtTy);
2490 }
2491
2492 static bool classof(const SDNode *N) {
2493 return N->getOpcode() == ISD::MGATHER;
2494 }
2495};
2496
2497/// This class is used to represent an MSCATTER node
2498///
2499class MaskedScatterSDNode : public MaskedGatherScatterSDNode {
2500public:
2501 friend class SelectionDAG;
2502
2503 MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2504 EVT MemVT, MachineMemOperand *MMO,
2505 ISD::MemIndexType IndexType, bool IsTrunc)
2506 : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO,
2507 IndexType) {
2508 StoreSDNodeBits.IsTruncating = IsTrunc;
2509 }
2510
2511 /// Return true if the op does a truncation before store.
2512 /// For integers this is the same as doing a TRUNCATE and storing the result.
2513 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2514 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2515
2516 const SDValue &getValue() const { return getOperand(1); }
2517
2518 static bool classof(const SDNode *N) {
2519 return N->getOpcode() == ISD::MSCATTER;
2520 }
2521};
2522
2523/// An SDNode that represents everything that will be needed
2524/// to construct a MachineInstr. These nodes are created during the
2525/// instruction selection proper phase.
2526///
2527/// Note that the only supported way to set the `memoperands` is by calling the
2528/// `SelectionDAG::setNodeMemRefs` function as the memory management happens
2529/// inside the DAG rather than in the node.
2530class MachineSDNode : public SDNode {
2531private:
2532 friend class SelectionDAG;
2533
2534 MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs)
2535 : SDNode(Opc, Order, DL, VTs) {}
2536
2537 // We use a pointer union between a single `MachineMemOperand` pointer and
2538 // a pointer to an array of `MachineMemOperand` pointers. This is null when
2539 // the number of these is zero, the single pointer variant used when the
2540 // number is one, and the array is used for larger numbers.
2541 //
2542 // The array is allocated via the `SelectionDAG`'s allocator and so will
2543 // always live until the DAG is cleaned up and doesn't require ownership here.
2544 //
2545 // We can't use something simpler like `TinyPtrVector` here because `SDNode`
2546 // subclasses aren't managed in a conforming C++ manner. See the comments on
2547 // `SelectionDAG::MorphNodeTo` which details what all goes on, but the
2548 // constraint here is that these don't manage memory with their constructor or
2549 // destructor and can be initialized to a good state even if they start off
2550 // uninitialized.
2551 PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {};
2552
2553 // Note that this could be folded into the above `MemRefs` member if doing so
2554 // is advantageous at some point. We don't need to store this in most cases.
2555 // However, at the moment this doesn't appear to make the allocation any
2556 // smaller and makes the code somewhat simpler to read.
2557 int NumMemRefs = 0;
2558
2559public:
2560 using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator;
2561
2562 ArrayRef<MachineMemOperand *> memoperands() const {
2563 // Special case the common cases.
2564 if (NumMemRefs == 0)
2565 return {};
2566 if (NumMemRefs == 1)
2567 return makeArrayRef(MemRefs.getAddrOfPtr1(), 1);
2568
2569 // Otherwise we have an actual array.
2570 return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs);
2571 }
2572 mmo_iterator memoperands_begin() const { return memoperands().begin(); }
2573 mmo_iterator memoperands_end() const { return memoperands().end(); }
2574 bool memoperands_empty() const { return memoperands().empty(); }
2575
2576 /// Clear out the memory reference descriptor list.
2577 void clearMemRefs() {
2578 MemRefs = nullptr;
2579 NumMemRefs = 0;
2580 }
2581
2582 static bool classof(const SDNode *N) {
2583 return N->isMachineOpcode();
2584 }
2585};
2586
2587/// An SDNode that records if a register contains a value that is guaranteed to
2588/// be aligned accordingly.
2589class AssertAlignSDNode : public SDNode {
2590 Align Alignment;
2591
2592public:
2593 AssertAlignSDNode(unsigned Order, const DebugLoc &DL, EVT VT, Align A)
2594 : SDNode(ISD::AssertAlign, Order, DL, getSDVTList(VT)), Alignment(A) {}
2595
2596 Align getAlign() const { return Alignment; }
2597
2598 static bool classof(const SDNode *N) {
2599 return N->getOpcode() == ISD::AssertAlign;
2600 }
2601};
2602
2603class SDNodeIterator {
2604 const SDNode *Node;
2605 unsigned Operand;
2606
2607 SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {}
2608
2609public:
2610 using iterator_category = std::forward_iterator_tag;
2611 using value_type = SDNode;
2612 using difference_type = std::ptrdiff_t;
2613 using pointer = value_type *;
2614 using reference = value_type &;
2615
2616 bool operator==(const SDNodeIterator& x) const {
2617 return Operand == x.Operand;
2618 }
2619 bool operator!=(const SDNodeIterator& x) const { return !operator==(x); }
2620
2621 pointer operator*() const {
2622 return Node->getOperand(Operand).getNode();
2623 }
2624 pointer operator->() const { return operator*(); }
2625
2626 SDNodeIterator& operator++() { // Preincrement
2627 ++Operand;
2628 return *this;
2629 }
2630 SDNodeIterator operator++(int) { // Postincrement
2631 SDNodeIterator tmp = *this; ++*this; return tmp;
2632 }
2633 size_t operator-(SDNodeIterator Other) const {
2634 assert(Node == Other.Node &&(static_cast <bool> (Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? void (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2635, __extension__ __PRETTY_FUNCTION__))
2635 "Cannot compare iterators of two different nodes!")(static_cast <bool> (Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? void (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2635, __extension__ __PRETTY_FUNCTION__))
;
2636 return Operand - Other.Operand;
2637 }
2638
2639 static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); }
2640 static SDNodeIterator end (const SDNode *N) {
2641 return SDNodeIterator(N, N->getNumOperands());
2642 }
2643
2644 unsigned getOperand() const { return Operand; }
2645 const SDNode *getNode() const { return Node; }
2646};
2647
2648template <> struct GraphTraits<SDNode*> {
2649 using NodeRef = SDNode *;
2650 using ChildIteratorType = SDNodeIterator;
2651
2652 static NodeRef getEntryNode(SDNode *N) { return N; }
2653
2654 static ChildIteratorType child_begin(NodeRef N) {
2655 return SDNodeIterator::begin(N);
2656 }
2657
2658 static ChildIteratorType child_end(NodeRef N) {
2659 return SDNodeIterator::end(N);
2660 }
2661};
2662
2663/// A representation of the largest SDNode, for use in sizeof().
2664///
2665/// This needs to be a union because the largest node differs on 32 bit systems
2666/// with 4 and 8 byte pointer alignment, respectively.
2667using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
2668 BlockAddressSDNode,
2669 GlobalAddressSDNode,
2670 PseudoProbeSDNode>;
2671
2672/// The SDNode class with the greatest alignment requirement.
2673using MostAlignedSDNode = GlobalAddressSDNode;
2674
2675namespace ISD {
2676
2677 /// Returns true if the specified node is a non-extending and unindexed load.
2678 inline bool isNormalLoad(const SDNode *N) {
2679 const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N);
2680 return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD &&
2681 Ld->getAddressingMode() == ISD::UNINDEXED;
2682 }
2683
2684 /// Returns true if the specified node is a non-extending load.
2685 inline bool isNON_EXTLoad(const SDNode *N) {
2686 return isa<LoadSDNode>(N) &&
2687 cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
2688 }
2689
2690 /// Returns true if the specified node is a EXTLOAD.
2691 inline bool isEXTLoad(const SDNode *N) {
2692 return isa<LoadSDNode>(N) &&
2693 cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
2694 }
2695
2696 /// Returns true if the specified node is a SEXTLOAD.
2697 inline bool isSEXTLoad(const SDNode *N) {
2698 return isa<LoadSDNode>(N) &&
2699 cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
2700 }
2701
2702 /// Returns true if the specified node is a ZEXTLOAD.
2703 inline bool isZEXTLoad(const SDNode *N) {
2704 return isa<LoadSDNode>(N) &&
2705 cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
2706 }
2707
2708 /// Returns true if the specified node is an unindexed load.
2709 inline bool isUNINDEXEDLoad(const SDNode *N) {
2710 return isa<LoadSDNode>(N) &&
2711 cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
2712 }
2713
2714 /// Returns true if the specified node is a non-truncating
2715 /// and unindexed store.
2716 inline bool isNormalStore(const SDNode *N) {
2717 const StoreSDNode *St = dyn_cast<StoreSDNode>(N);
2718 return St && !St->isTruncatingStore() &&
2719 St->getAddressingMode() == ISD::UNINDEXED;
2720 }
2721
2722 /// Returns true if the specified node is an unindexed store.
2723 inline bool isUNINDEXEDStore(const SDNode *N) {
2724 return isa<StoreSDNode>(N) &&
2725 cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
2726 }
2727
2728 /// Attempt to match a unary predicate against a scalar/splat constant or
2729 /// every element of a constant BUILD_VECTOR.
2730 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
2731 bool matchUnaryPredicate(SDValue Op,
2732 std::function<bool(ConstantSDNode *)> Match,
2733 bool AllowUndefs = false);
2734
2735 /// Attempt to match a binary predicate against a pair of scalar/splat
2736 /// constants or every element of a pair of constant BUILD_VECTORs.
2737 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
2738 /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match.
2739 bool matchBinaryPredicate(
2740 SDValue LHS, SDValue RHS,
2741 std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
2742 bool AllowUndefs = false, bool AllowTypeMismatch = false);
2743
2744 /// Returns true if the specified value is the overflow result from one
2745 /// of the overflow intrinsic nodes.
2746 inline bool isOverflowIntrOpRes(SDValue Op) {
2747 unsigned Opc = Op.getOpcode();
2748 return (Op.getResNo() == 1 &&
2749 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
2750 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
2751 }
2752
2753} // end namespace ISD
2754
2755} // end namespace llvm
2756
2757#endif // LLVM_CODEGEN_SELECTIONDAGNODES_H