Bug Summary

File:llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Warning:line 12812, column 48
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'unsigned long long'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/build-llvm -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64 -I include -I /build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-command-line-argument -Wno-unknown-warning-option -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/build-llvm -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-10-19-144714-38752-1 -x c++ /build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64ISelLowering.h"
14#include "AArch64CallingConvention.h"
15#include "AArch64ExpandImm.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/ADT/Triple.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/ObjCARCUtil.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/Analysis.h"
35#include "llvm/CodeGen/CallingConvLower.h"
36#include "llvm/CodeGen/MachineBasicBlock.h"
37#include "llvm/CodeGen/MachineFrameInfo.h"
38#include "llvm/CodeGen/MachineFunction.h"
39#include "llvm/CodeGen/MachineInstr.h"
40#include "llvm/CodeGen/MachineInstrBuilder.h"
41#include "llvm/CodeGen/MachineMemOperand.h"
42#include "llvm/CodeGen/MachineRegisterInfo.h"
43#include "llvm/CodeGen/RuntimeLibcalls.h"
44#include "llvm/CodeGen/SelectionDAG.h"
45#include "llvm/CodeGen/SelectionDAGNodes.h"
46#include "llvm/CodeGen/TargetCallingConv.h"
47#include "llvm/CodeGen/TargetInstrInfo.h"
48#include "llvm/CodeGen/ValueTypes.h"
49#include "llvm/IR/Attributes.h"
50#include "llvm/IR/Constants.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugLoc.h"
53#include "llvm/IR/DerivedTypes.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/GetElementPtrTypeIterator.h"
56#include "llvm/IR/GlobalValue.h"
57#include "llvm/IR/IRBuilder.h"
58#include "llvm/IR/Instruction.h"
59#include "llvm/IR/Instructions.h"
60#include "llvm/IR/IntrinsicInst.h"
61#include "llvm/IR/Intrinsics.h"
62#include "llvm/IR/IntrinsicsAArch64.h"
63#include "llvm/IR/Module.h"
64#include "llvm/IR/OperandTraits.h"
65#include "llvm/IR/PatternMatch.h"
66#include "llvm/IR/Type.h"
67#include "llvm/IR/Use.h"
68#include "llvm/IR/Value.h"
69#include "llvm/MC/MCRegisterInfo.h"
70#include "llvm/Support/Casting.h"
71#include "llvm/Support/CodeGen.h"
72#include "llvm/Support/CommandLine.h"
73#include "llvm/Support/Compiler.h"
74#include "llvm/Support/Debug.h"
75#include "llvm/Support/ErrorHandling.h"
76#include "llvm/Support/KnownBits.h"
77#include "llvm/Support/MachineValueType.h"
78#include "llvm/Support/MathExtras.h"
79#include "llvm/Support/raw_ostream.h"
80#include "llvm/Target/TargetMachine.h"
81#include "llvm/Target/TargetOptions.h"
82#include <algorithm>
83#include <bitset>
84#include <cassert>
85#include <cctype>
86#include <cstdint>
87#include <cstdlib>
88#include <iterator>
89#include <limits>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace llvm::PatternMatch;
96
97#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
98
99STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls"}
;
100STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts"}
;
101STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized"}
;
102
103// FIXME: The necessary dtprel relocations don't seem to be supported
104// well in the GNU bfd and gold linkers at the moment. Therefore, by
105// default, for now, fall back to GeneralDynamic code generation.
106cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
107 "aarch64-elf-ldtls-generation", cl::Hidden,
108 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
109 cl::init(false));
110
111static cl::opt<bool>
112EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
113 cl::desc("Enable AArch64 logical imm instruction "
114 "optimization"),
115 cl::init(true));
116
117// Temporary option added for the purpose of testing functionality added
118// to DAGCombiner.cpp in D92230. It is expected that this can be removed
119// in future when both implementations will be based off MGATHER rather
120// than the GLD1 nodes added for the SVE gather load intrinsics.
121static cl::opt<bool>
122EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
123 cl::desc("Combine extends of AArch64 masked "
124 "gather intrinsics"),
125 cl::init(true));
126
127/// Value type used for condition codes.
128static const MVT MVT_CC = MVT::i32;
129
130static inline EVT getPackedSVEVectorVT(EVT VT) {
131 switch (VT.getSimpleVT().SimpleTy) {
132 default:
133 llvm_unreachable("unexpected element type for vector")::llvm::llvm_unreachable_internal("unexpected element type for vector"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 133)
;
134 case MVT::i8:
135 return MVT::nxv16i8;
136 case MVT::i16:
137 return MVT::nxv8i16;
138 case MVT::i32:
139 return MVT::nxv4i32;
140 case MVT::i64:
141 return MVT::nxv2i64;
142 case MVT::f16:
143 return MVT::nxv8f16;
144 case MVT::f32:
145 return MVT::nxv4f32;
146 case MVT::f64:
147 return MVT::nxv2f64;
148 case MVT::bf16:
149 return MVT::nxv8bf16;
150 }
151}
152
153// NOTE: Currently there's only a need to return integer vector types. If this
154// changes then just add an extra "type" parameter.
155static inline EVT getPackedSVEVectorVT(ElementCount EC) {
156 switch (EC.getKnownMinValue()) {
157 default:
158 llvm_unreachable("unexpected element count for vector")::llvm::llvm_unreachable_internal("unexpected element count for vector"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 158)
;
159 case 16:
160 return MVT::nxv16i8;
161 case 8:
162 return MVT::nxv8i16;
163 case 4:
164 return MVT::nxv4i32;
165 case 2:
166 return MVT::nxv2i64;
167 }
168}
169
170static inline EVT getPromotedVTForPredicate(EVT VT) {
171 assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&(static_cast <bool> (VT.isScalableVector() && (
VT.getVectorElementType() == MVT::i1) && "Expected scalable predicate vector type!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && \"Expected scalable predicate vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 172, __extension__ __PRETTY_FUNCTION__))
172 "Expected scalable predicate vector type!")(static_cast <bool> (VT.isScalableVector() && (
VT.getVectorElementType() == MVT::i1) && "Expected scalable predicate vector type!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && \"Expected scalable predicate vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 172, __extension__ __PRETTY_FUNCTION__))
;
173 switch (VT.getVectorMinNumElements()) {
174 default:
175 llvm_unreachable("unexpected element count for vector")::llvm::llvm_unreachable_internal("unexpected element count for vector"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 175)
;
176 case 2:
177 return MVT::nxv2i64;
178 case 4:
179 return MVT::nxv4i32;
180 case 8:
181 return MVT::nxv8i16;
182 case 16:
183 return MVT::nxv16i8;
184 }
185}
186
187/// Returns true if VT's elements occupy the lowest bit positions of its
188/// associated register class without any intervening space.
189///
190/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
191/// same register class, but only nxv8f16 can be treated as a packed vector.
192static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
193 assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&(static_cast <bool> (VT.isVector() && DAG.getTargetLoweringInfo
().isTypeLegal(VT) && "Expected legal vector type!") ?
void (0) : __assert_fail ("VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 194, __extension__ __PRETTY_FUNCTION__))
194 "Expected legal vector type!")(static_cast <bool> (VT.isVector() && DAG.getTargetLoweringInfo
().isTypeLegal(VT) && "Expected legal vector type!") ?
void (0) : __assert_fail ("VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 194, __extension__ __PRETTY_FUNCTION__))
;
195 return VT.isFixedLengthVector() ||
196 VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
197}
198
199// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
200// predicate and end with a passthru value matching the result type.
201static bool isMergePassthruOpcode(unsigned Opc) {
202 switch (Opc) {
203 default:
204 return false;
205 case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
206 case AArch64ISD::BSWAP_MERGE_PASSTHRU:
207 case AArch64ISD::CTLZ_MERGE_PASSTHRU:
208 case AArch64ISD::CTPOP_MERGE_PASSTHRU:
209 case AArch64ISD::DUP_MERGE_PASSTHRU:
210 case AArch64ISD::ABS_MERGE_PASSTHRU:
211 case AArch64ISD::NEG_MERGE_PASSTHRU:
212 case AArch64ISD::FNEG_MERGE_PASSTHRU:
213 case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
214 case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
215 case AArch64ISD::FCEIL_MERGE_PASSTHRU:
216 case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
217 case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
218 case AArch64ISD::FRINT_MERGE_PASSTHRU:
219 case AArch64ISD::FROUND_MERGE_PASSTHRU:
220 case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
221 case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
222 case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
223 case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
224 case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
225 case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
226 case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
227 case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
228 case AArch64ISD::FSQRT_MERGE_PASSTHRU:
229 case AArch64ISD::FRECPX_MERGE_PASSTHRU:
230 case AArch64ISD::FABS_MERGE_PASSTHRU:
231 return true;
232 }
233}
234
235AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
236 const AArch64Subtarget &STI)
237 : TargetLowering(TM), Subtarget(&STI) {
238 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
239 // we have to make something up. Arbitrarily, choose ZeroOrOne.
240 setBooleanContents(ZeroOrOneBooleanContent);
241 // When comparing vectors the result sets the different elements in the
242 // vector to all-one or all-zero.
243 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
244
245 // Set up the register classes.
246 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
247 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
248
249 if (Subtarget->hasLS64()) {
250 addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass);
251 setOperationAction(ISD::LOAD, MVT::i64x8, Custom);
252 setOperationAction(ISD::STORE, MVT::i64x8, Custom);
253 }
254
255 if (Subtarget->hasFPARMv8()) {
256 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
257 addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
258 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
259 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
260 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
261 }
262
263 if (Subtarget->hasNEON()) {
264 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
265 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
266 // Someone set us up the NEON.
267 addDRTypeForNEON(MVT::v2f32);
268 addDRTypeForNEON(MVT::v8i8);
269 addDRTypeForNEON(MVT::v4i16);
270 addDRTypeForNEON(MVT::v2i32);
271 addDRTypeForNEON(MVT::v1i64);
272 addDRTypeForNEON(MVT::v1f64);
273 addDRTypeForNEON(MVT::v4f16);
274 if (Subtarget->hasBF16())
275 addDRTypeForNEON(MVT::v4bf16);
276
277 addQRTypeForNEON(MVT::v4f32);
278 addQRTypeForNEON(MVT::v2f64);
279 addQRTypeForNEON(MVT::v16i8);
280 addQRTypeForNEON(MVT::v8i16);
281 addQRTypeForNEON(MVT::v4i32);
282 addQRTypeForNEON(MVT::v2i64);
283 addQRTypeForNEON(MVT::v8f16);
284 if (Subtarget->hasBF16())
285 addQRTypeForNEON(MVT::v8bf16);
286 }
287
288 if (Subtarget->hasSVE()) {
289 // Add legal sve predicate types
290 addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
291 addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
292 addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
293 addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
294
295 // Add legal sve data types
296 addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
297 addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
298 addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
299 addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
300
301 addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
302 addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
303 addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
304 addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
305 addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
306 addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
307
308 if (Subtarget->hasBF16()) {
309 addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
310 addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
311 addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
312 }
313
314 if (Subtarget->useSVEForFixedLengthVectors()) {
315 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
316 if (useSVEForFixedLengthVectorVT(VT))
317 addRegisterClass(VT, &AArch64::ZPRRegClass);
318
319 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
320 if (useSVEForFixedLengthVectorVT(VT))
321 addRegisterClass(VT, &AArch64::ZPRRegClass);
322 }
323
324 for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
325 setOperationAction(ISD::SADDSAT, VT, Legal);
326 setOperationAction(ISD::UADDSAT, VT, Legal);
327 setOperationAction(ISD::SSUBSAT, VT, Legal);
328 setOperationAction(ISD::USUBSAT, VT, Legal);
329 setOperationAction(ISD::UREM, VT, Expand);
330 setOperationAction(ISD::SREM, VT, Expand);
331 setOperationAction(ISD::SDIVREM, VT, Expand);
332 setOperationAction(ISD::UDIVREM, VT, Expand);
333 }
334
335 for (auto VT :
336 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
337 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
338 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
339
340 for (auto VT :
341 { MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32,
342 MVT::nxv2f64 }) {
343 setCondCodeAction(ISD::SETO, VT, Expand);
344 setCondCodeAction(ISD::SETOLT, VT, Expand);
345 setCondCodeAction(ISD::SETLT, VT, Expand);
346 setCondCodeAction(ISD::SETOLE, VT, Expand);
347 setCondCodeAction(ISD::SETLE, VT, Expand);
348 setCondCodeAction(ISD::SETULT, VT, Expand);
349 setCondCodeAction(ISD::SETULE, VT, Expand);
350 setCondCodeAction(ISD::SETUGE, VT, Expand);
351 setCondCodeAction(ISD::SETUGT, VT, Expand);
352 setCondCodeAction(ISD::SETUEQ, VT, Expand);
353 setCondCodeAction(ISD::SETUNE, VT, Expand);
354
355 setOperationAction(ISD::FREM, VT, Expand);
356 setOperationAction(ISD::FPOW, VT, Expand);
357 setOperationAction(ISD::FPOWI, VT, Expand);
358 setOperationAction(ISD::FCOS, VT, Expand);
359 setOperationAction(ISD::FSIN, VT, Expand);
360 setOperationAction(ISD::FSINCOS, VT, Expand);
361 setOperationAction(ISD::FEXP, VT, Expand);
362 setOperationAction(ISD::FEXP2, VT, Expand);
363 setOperationAction(ISD::FLOG, VT, Expand);
364 setOperationAction(ISD::FLOG2, VT, Expand);
365 setOperationAction(ISD::FLOG10, VT, Expand);
366 }
367 }
368
369 // Compute derived properties from the register classes
370 computeRegisterProperties(Subtarget->getRegisterInfo());
371
372 // Provide all sorts of operation actions
373 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
374 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
375 setOperationAction(ISD::SETCC, MVT::i32, Custom);
376 setOperationAction(ISD::SETCC, MVT::i64, Custom);
377 setOperationAction(ISD::SETCC, MVT::f16, Custom);
378 setOperationAction(ISD::SETCC, MVT::f32, Custom);
379 setOperationAction(ISD::SETCC, MVT::f64, Custom);
380 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
381 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
382 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
383 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
384 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
385 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
386 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
387 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
388 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
389 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
390 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
391 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
392 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
393 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
394 setOperationAction(ISD::SELECT, MVT::i32, Custom);
395 setOperationAction(ISD::SELECT, MVT::i64, Custom);
396 setOperationAction(ISD::SELECT, MVT::f16, Custom);
397 setOperationAction(ISD::SELECT, MVT::f32, Custom);
398 setOperationAction(ISD::SELECT, MVT::f64, Custom);
399 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
400 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
401 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
402 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
403 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
404 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
405 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
406
407 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
408 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
409 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
410
411 setOperationAction(ISD::FREM, MVT::f32, Expand);
412 setOperationAction(ISD::FREM, MVT::f64, Expand);
413 setOperationAction(ISD::FREM, MVT::f80, Expand);
414
415 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
416
417 // Custom lowering hooks are needed for XOR
418 // to fold it into CSINC/CSINV.
419 setOperationAction(ISD::XOR, MVT::i32, Custom);
420 setOperationAction(ISD::XOR, MVT::i64, Custom);
421
422 // Virtually no operation on f128 is legal, but LLVM can't expand them when
423 // there's a valid register class, so we need custom operations in most cases.
424 setOperationAction(ISD::FABS, MVT::f128, Expand);
425 setOperationAction(ISD::FADD, MVT::f128, LibCall);
426 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
427 setOperationAction(ISD::FCOS, MVT::f128, Expand);
428 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
429 setOperationAction(ISD::FMA, MVT::f128, Expand);
430 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
431 setOperationAction(ISD::FNEG, MVT::f128, Expand);
432 setOperationAction(ISD::FPOW, MVT::f128, Expand);
433 setOperationAction(ISD::FREM, MVT::f128, Expand);
434 setOperationAction(ISD::FRINT, MVT::f128, Expand);
435 setOperationAction(ISD::FSIN, MVT::f128, Expand);
436 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
437 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
438 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
439 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
440 setOperationAction(ISD::SETCC, MVT::f128, Custom);
441 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
442 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
443 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
444 setOperationAction(ISD::SELECT, MVT::f128, Custom);
445 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
446 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
447
448 // Lowering for many of the conversions is actually specified by the non-f128
449 // type. The LowerXXX function will be trivial when f128 isn't involved.
450 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
451 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
452 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
453 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
454 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
455 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
456 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
457 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
458 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
459 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
460 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
461 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
462 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
463 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
464 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
465 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
466 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
467 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
468 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
469 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
470 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
471 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
472 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
473 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
474 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
475 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
476 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
477 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
478 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
479 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
480
481 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
482 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
483 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
484 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
485
486 // Variable arguments.
487 setOperationAction(ISD::VASTART, MVT::Other, Custom);
488 setOperationAction(ISD::VAARG, MVT::Other, Custom);
489 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
490 setOperationAction(ISD::VAEND, MVT::Other, Expand);
491
492 // Variable-sized objects.
493 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
494 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
495
496 if (Subtarget->isTargetWindows())
497 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
498 else
499 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
500
501 // Constant pool entries
502 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
503
504 // BlockAddress
505 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
506
507 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
508 setOperationAction(ISD::ADDC, MVT::i32, Custom);
509 setOperationAction(ISD::ADDE, MVT::i32, Custom);
510 setOperationAction(ISD::SUBC, MVT::i32, Custom);
511 setOperationAction(ISD::SUBE, MVT::i32, Custom);
512 setOperationAction(ISD::ADDC, MVT::i64, Custom);
513 setOperationAction(ISD::ADDE, MVT::i64, Custom);
514 setOperationAction(ISD::SUBC, MVT::i64, Custom);
515 setOperationAction(ISD::SUBE, MVT::i64, Custom);
516
517 // AArch64 lacks both left-rotate and popcount instructions.
518 setOperationAction(ISD::ROTL, MVT::i32, Expand);
519 setOperationAction(ISD::ROTL, MVT::i64, Expand);
520 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
521 setOperationAction(ISD::ROTL, VT, Expand);
522 setOperationAction(ISD::ROTR, VT, Expand);
523 }
524
525 // AArch64 doesn't have i32 MULH{S|U}.
526 setOperationAction(ISD::MULHU, MVT::i32, Expand);
527 setOperationAction(ISD::MULHS, MVT::i32, Expand);
528
529 // AArch64 doesn't have {U|S}MUL_LOHI.
530 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
531 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
532
533 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
534 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
535 setOperationAction(ISD::CTPOP, MVT::i128, Custom);
536
537 setOperationAction(ISD::ABS, MVT::i32, Custom);
538 setOperationAction(ISD::ABS, MVT::i64, Custom);
539
540 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
541 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
542 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
543 setOperationAction(ISD::SDIVREM, VT, Expand);
544 setOperationAction(ISD::UDIVREM, VT, Expand);
545 }
546 setOperationAction(ISD::SREM, MVT::i32, Expand);
547 setOperationAction(ISD::SREM, MVT::i64, Expand);
548 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
549 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
550 setOperationAction(ISD::UREM, MVT::i32, Expand);
551 setOperationAction(ISD::UREM, MVT::i64, Expand);
552
553 // Custom lower Add/Sub/Mul with overflow.
554 setOperationAction(ISD::SADDO, MVT::i32, Custom);
555 setOperationAction(ISD::SADDO, MVT::i64, Custom);
556 setOperationAction(ISD::UADDO, MVT::i32, Custom);
557 setOperationAction(ISD::UADDO, MVT::i64, Custom);
558 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
559 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
560 setOperationAction(ISD::USUBO, MVT::i32, Custom);
561 setOperationAction(ISD::USUBO, MVT::i64, Custom);
562 setOperationAction(ISD::SMULO, MVT::i32, Custom);
563 setOperationAction(ISD::SMULO, MVT::i64, Custom);
564 setOperationAction(ISD::UMULO, MVT::i32, Custom);
565 setOperationAction(ISD::UMULO, MVT::i64, Custom);
566
567 setOperationAction(ISD::FSIN, MVT::f32, Expand);
568 setOperationAction(ISD::FSIN, MVT::f64, Expand);
569 setOperationAction(ISD::FCOS, MVT::f32, Expand);
570 setOperationAction(ISD::FCOS, MVT::f64, Expand);
571 setOperationAction(ISD::FPOW, MVT::f32, Expand);
572 setOperationAction(ISD::FPOW, MVT::f64, Expand);
573 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
574 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
575 if (Subtarget->hasFullFP16())
576 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
577 else
578 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
579
580 setOperationAction(ISD::FREM, MVT::f16, Promote);
581 setOperationAction(ISD::FREM, MVT::v4f16, Expand);
582 setOperationAction(ISD::FREM, MVT::v8f16, Expand);
583 setOperationAction(ISD::FPOW, MVT::f16, Promote);
584 setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
585 setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
586 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
587 setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
588 setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
589 setOperationAction(ISD::FCOS, MVT::f16, Promote);
590 setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
591 setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
592 setOperationAction(ISD::FSIN, MVT::f16, Promote);
593 setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
594 setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
595 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
596 setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
597 setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
598 setOperationAction(ISD::FEXP, MVT::f16, Promote);
599 setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
600 setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
601 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
602 setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
603 setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
604 setOperationAction(ISD::FLOG, MVT::f16, Promote);
605 setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
606 setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
607 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
608 setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
609 setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
610 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
611 setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
612 setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
613
614 if (!Subtarget->hasFullFP16()) {
615 setOperationAction(ISD::SELECT, MVT::f16, Promote);
616 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
617 setOperationAction(ISD::SETCC, MVT::f16, Promote);
618 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
619 setOperationAction(ISD::FADD, MVT::f16, Promote);
620 setOperationAction(ISD::FSUB, MVT::f16, Promote);
621 setOperationAction(ISD::FMUL, MVT::f16, Promote);
622 setOperationAction(ISD::FDIV, MVT::f16, Promote);
623 setOperationAction(ISD::FMA, MVT::f16, Promote);
624 setOperationAction(ISD::FNEG, MVT::f16, Promote);
625 setOperationAction(ISD::FABS, MVT::f16, Promote);
626 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
627 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
628 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
629 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
630 setOperationAction(ISD::FRINT, MVT::f16, Promote);
631 setOperationAction(ISD::FROUND, MVT::f16, Promote);
632 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
633 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
634 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
635 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
636 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
637 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
638
639 // promote v4f16 to v4f32 when that is known to be safe.
640 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
641 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
642 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
643 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
644 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
645 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
646 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
647 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
648
649 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
650 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
651 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
652 setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
653 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
654 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
655 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
656 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
657 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
658 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
659 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
660 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
661 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
662 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
663 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
664 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
665
666 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
667 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
668 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
669 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
670 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
671 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
672 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
673 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
674 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
675 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
676 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
677 setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
678 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
679 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
680 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
681 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
682 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
683 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
684 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
685 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
686 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
687 }
688
689 // AArch64 has implementations of a lot of rounding-like FP operations.
690 for (MVT Ty : {MVT::f32, MVT::f64}) {
691 setOperationAction(ISD::FFLOOR, Ty, Legal);
692 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
693 setOperationAction(ISD::FCEIL, Ty, Legal);
694 setOperationAction(ISD::FRINT, Ty, Legal);
695 setOperationAction(ISD::FTRUNC, Ty, Legal);
696 setOperationAction(ISD::FROUND, Ty, Legal);
697 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
698 setOperationAction(ISD::FMINNUM, Ty, Legal);
699 setOperationAction(ISD::FMAXNUM, Ty, Legal);
700 setOperationAction(ISD::FMINIMUM, Ty, Legal);
701 setOperationAction(ISD::FMAXIMUM, Ty, Legal);
702 setOperationAction(ISD::LROUND, Ty, Legal);
703 setOperationAction(ISD::LLROUND, Ty, Legal);
704 setOperationAction(ISD::LRINT, Ty, Legal);
705 setOperationAction(ISD::LLRINT, Ty, Legal);
706 }
707
708 if (Subtarget->hasFullFP16()) {
709 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
710 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
711 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
712 setOperationAction(ISD::FRINT, MVT::f16, Legal);
713 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
714 setOperationAction(ISD::FROUND, MVT::f16, Legal);
715 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
716 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
717 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
718 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
719 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
720 }
721
722 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
723
724 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
725 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
726
727 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
728 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
729 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
730 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
731 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
732
733 // Generate outline atomics library calls only if LSE was not specified for
734 // subtarget
735 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
736 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
737 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
738 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
739 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
740 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
741 setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
742 setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
743 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
744 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
745 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
746 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
747 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
748 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
749 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
750 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
751 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
752 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
753 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
754 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
755 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
756 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
757 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
758 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
759 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
760 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
761#define LCALLNAMES(A, B, N) \
762 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
763 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
764 setLibcallName(A##N##_REL, #B #N "_rel"); \
765 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
766#define LCALLNAME4(A, B) \
767 LCALLNAMES(A, B, 1) \
768 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
769#define LCALLNAME5(A, B) \
770 LCALLNAMES(A, B, 1) \
771 LCALLNAMES(A, B, 2) \
772 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
773 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
774 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
775 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
776 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
777 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
778 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
779#undef LCALLNAMES
780#undef LCALLNAME4
781#undef LCALLNAME5
782 }
783
784 // 128-bit loads and stores can be done without expanding
785 setOperationAction(ISD::LOAD, MVT::i128, Custom);
786 setOperationAction(ISD::STORE, MVT::i128, Custom);
787
788 // Aligned 128-bit loads and stores are single-copy atomic according to the
789 // v8.4a spec.
790 if (Subtarget->hasLSE2()) {
791 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
792 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
793 }
794
795 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
796 // custom lowering, as there are no un-paired non-temporal stores and
797 // legalization will break up 256 bit inputs.
798 setOperationAction(ISD::STORE, MVT::v32i8, Custom);
799 setOperationAction(ISD::STORE, MVT::v16i16, Custom);
800 setOperationAction(ISD::STORE, MVT::v16f16, Custom);
801 setOperationAction(ISD::STORE, MVT::v8i32, Custom);
802 setOperationAction(ISD::STORE, MVT::v8f32, Custom);
803 setOperationAction(ISD::STORE, MVT::v4f64, Custom);
804 setOperationAction(ISD::STORE, MVT::v4i64, Custom);
805
806 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
807 // This requires the Performance Monitors extension.
808 if (Subtarget->hasPerfMon())
809 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
810
811 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
812 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
813 // Issue __sincos_stret if available.
814 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
815 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
816 } else {
817 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
818 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
819 }
820
821 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
822 // MSVCRT doesn't have powi; fall back to pow
823 setLibcallName(RTLIB::POWI_F32, nullptr);
824 setLibcallName(RTLIB::POWI_F64, nullptr);
825 }
826
827 // Make floating-point constants legal for the large code model, so they don't
828 // become loads from the constant pool.
829 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
830 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
831 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
832 }
833
834 // AArch64 does not have floating-point extending loads, i1 sign-extending
835 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
836 for (MVT VT : MVT::fp_valuetypes()) {
837 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
838 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
839 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
840 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
841 }
842 for (MVT VT : MVT::integer_valuetypes())
843 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
844
845 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
846 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
847 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
848 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
849 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
850 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
851 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
852
853 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
854 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
855 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
856
857 // Indexed loads and stores are supported.
858 for (unsigned im = (unsigned)ISD::PRE_INC;
859 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
860 setIndexedLoadAction(im, MVT::i8, Legal);
861 setIndexedLoadAction(im, MVT::i16, Legal);
862 setIndexedLoadAction(im, MVT::i32, Legal);
863 setIndexedLoadAction(im, MVT::i64, Legal);
864 setIndexedLoadAction(im, MVT::f64, Legal);
865 setIndexedLoadAction(im, MVT::f32, Legal);
866 setIndexedLoadAction(im, MVT::f16, Legal);
867 setIndexedLoadAction(im, MVT::bf16, Legal);
868 setIndexedStoreAction(im, MVT::i8, Legal);
869 setIndexedStoreAction(im, MVT::i16, Legal);
870 setIndexedStoreAction(im, MVT::i32, Legal);
871 setIndexedStoreAction(im, MVT::i64, Legal);
872 setIndexedStoreAction(im, MVT::f64, Legal);
873 setIndexedStoreAction(im, MVT::f32, Legal);
874 setIndexedStoreAction(im, MVT::f16, Legal);
875 setIndexedStoreAction(im, MVT::bf16, Legal);
876 }
877
878 // Trap.
879 setOperationAction(ISD::TRAP, MVT::Other, Legal);
880 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
881 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
882
883 // We combine OR nodes for bitfield operations.
884 setTargetDAGCombine(ISD::OR);
885 // Try to create BICs for vector ANDs.
886 setTargetDAGCombine(ISD::AND);
887
888 // Vector add and sub nodes may conceal a high-half opportunity.
889 // Also, try to fold ADD into CSINC/CSINV..
890 setTargetDAGCombine(ISD::ADD);
891 setTargetDAGCombine(ISD::ABS);
892 setTargetDAGCombine(ISD::SUB);
893 setTargetDAGCombine(ISD::SRL);
894 setTargetDAGCombine(ISD::XOR);
895 setTargetDAGCombine(ISD::SINT_TO_FP);
896 setTargetDAGCombine(ISD::UINT_TO_FP);
897
898 // TODO: Do the same for FP_TO_*INT_SAT.
899 setTargetDAGCombine(ISD::FP_TO_SINT);
900 setTargetDAGCombine(ISD::FP_TO_UINT);
901 setTargetDAGCombine(ISD::FDIV);
902
903 // Try and combine setcc with csel
904 setTargetDAGCombine(ISD::SETCC);
905
906 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
907
908 setTargetDAGCombine(ISD::ANY_EXTEND);
909 setTargetDAGCombine(ISD::ZERO_EXTEND);
910 setTargetDAGCombine(ISD::SIGN_EXTEND);
911 setTargetDAGCombine(ISD::VECTOR_SPLICE);
912 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
913 setTargetDAGCombine(ISD::TRUNCATE);
914 setTargetDAGCombine(ISD::CONCAT_VECTORS);
915 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
916 setTargetDAGCombine(ISD::STORE);
917 if (Subtarget->supportsAddressTopByteIgnored())
918 setTargetDAGCombine(ISD::LOAD);
919
920 setTargetDAGCombine(ISD::MUL);
921
922 setTargetDAGCombine(ISD::SELECT);
923 setTargetDAGCombine(ISD::VSELECT);
924
925 setTargetDAGCombine(ISD::INTRINSIC_VOID);
926 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
927 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
928 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
929 setTargetDAGCombine(ISD::VECREDUCE_ADD);
930 setTargetDAGCombine(ISD::STEP_VECTOR);
931
932 setTargetDAGCombine(ISD::GlobalAddress);
933
934 // In case of strict alignment, avoid an excessive number of byte wide stores.
935 MaxStoresPerMemsetOptSize = 8;
936 MaxStoresPerMemset = Subtarget->requiresStrictAlign()
937 ? MaxStoresPerMemsetOptSize : 32;
938
939 MaxGluedStoresPerMemcpy = 4;
940 MaxStoresPerMemcpyOptSize = 4;
941 MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
942 ? MaxStoresPerMemcpyOptSize : 16;
943
944 MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
945
946 MaxLoadsPerMemcmpOptSize = 4;
947 MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
948 ? MaxLoadsPerMemcmpOptSize : 8;
949
950 setStackPointerRegisterToSaveRestore(AArch64::SP);
951
952 setSchedulingPreference(Sched::Hybrid);
953
954 EnableExtLdPromotion = true;
955
956 // Set required alignment.
957 setMinFunctionAlignment(Align(4));
958 // Set preferred alignments.
959 setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
960 setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
961
962 // Only change the limit for entries in a jump table if specified by
963 // the sub target, but not at the command line.
964 unsigned MaxJT = STI.getMaximumJumpTableSize();
965 if (MaxJT && getMaximumJumpTableSize() == UINT_MAX(2147483647 *2U +1U))
966 setMaximumJumpTableSize(MaxJT);
967
968 setHasExtractBitsInsn(true);
969
970 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
971
972 if (Subtarget->hasNEON()) {
973 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
974 // silliness like this:
975 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
976 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
977 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
978 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
979 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
980 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
981 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
982 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
983 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
984 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
985 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
986 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
987 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
988 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
989 setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand);
990 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
991 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
992 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
993 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
994 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
995 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
996 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
997 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
998 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
999 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
1000 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
1001
1002 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
1003 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
1004 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
1005 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
1006 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
1007
1008 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v1i64, Expand);
1009 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v1i64, Expand);
1010
1011 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
1012
1013 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
1014 // elements smaller than i32, so promote the input to i32 first.
1015 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
1016 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
1017 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
1018 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
1019 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
1020 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
1021
1022 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
1023 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1024 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1025 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
1026 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
1027 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
1028 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
1029 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
1030 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1031
1032 if (Subtarget->hasFullFP16()) {
1033 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1034 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1035 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
1036 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1037 } else {
1038 // when AArch64 doesn't have fullfp16 support, promote the input
1039 // to i32 first.
1040 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
1041 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
1042 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
1043 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
1044 }
1045
1046 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
1047 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
1048 setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
1049 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
1050 setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
1051 setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
1052 setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
1053 setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
1054 for (auto VT : {MVT::v1i64, MVT::v2i64}) {
1055 setOperationAction(ISD::UMAX, VT, Custom);
1056 setOperationAction(ISD::SMAX, VT, Custom);
1057 setOperationAction(ISD::UMIN, VT, Custom);
1058 setOperationAction(ISD::SMIN, VT, Custom);
1059 }
1060
1061 // AArch64 doesn't have MUL.2d:
1062 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
1063 // Custom handling for some quad-vector types to detect MULL.
1064 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
1065 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1066 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1067
1068 // Saturates
1069 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1070 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1071 setOperationAction(ISD::SADDSAT, VT, Legal);
1072 setOperationAction(ISD::UADDSAT, VT, Legal);
1073 setOperationAction(ISD::SSUBSAT, VT, Legal);
1074 setOperationAction(ISD::USUBSAT, VT, Legal);
1075 }
1076
1077 for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1078 MVT::v4i32}) {
1079 setOperationAction(ISD::ABDS, VT, Legal);
1080 setOperationAction(ISD::ABDU, VT, Legal);
1081 }
1082
1083 // Vector reductions
1084 for (MVT VT : { MVT::v4f16, MVT::v2f32,
1085 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1086 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1087 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1088 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1089
1090 setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
1091 }
1092 }
1093 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1094 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1095 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1096 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1097 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1098 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1099 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1100 }
1101 setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
1102
1103 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
1104 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
1105 // Likewise, narrowing and extending vector loads/stores aren't handled
1106 // directly.
1107 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1108 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
1109
1110 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1111 setOperationAction(ISD::MULHS, VT, Legal);
1112 setOperationAction(ISD::MULHU, VT, Legal);
1113 } else {
1114 setOperationAction(ISD::MULHS, VT, Expand);
1115 setOperationAction(ISD::MULHU, VT, Expand);
1116 }
1117 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1118 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1119
1120 setOperationAction(ISD::BSWAP, VT, Expand);
1121 setOperationAction(ISD::CTTZ, VT, Expand);
1122
1123 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
1124 setTruncStoreAction(VT, InnerVT, Expand);
1125 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1126 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1127 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1128 }
1129 }
1130
1131 // AArch64 has implementations of a lot of rounding-like FP operations.
1132 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
1133 setOperationAction(ISD::FFLOOR, Ty, Legal);
1134 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
1135 setOperationAction(ISD::FCEIL, Ty, Legal);
1136 setOperationAction(ISD::FRINT, Ty, Legal);
1137 setOperationAction(ISD::FTRUNC, Ty, Legal);
1138 setOperationAction(ISD::FROUND, Ty, Legal);
1139 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
1140 }
1141
1142 if (Subtarget->hasFullFP16()) {
1143 for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
1144 setOperationAction(ISD::FFLOOR, Ty, Legal);
1145 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
1146 setOperationAction(ISD::FCEIL, Ty, Legal);
1147 setOperationAction(ISD::FRINT, Ty, Legal);
1148 setOperationAction(ISD::FTRUNC, Ty, Legal);
1149 setOperationAction(ISD::FROUND, Ty, Legal);
1150 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
1151 }
1152 }
1153
1154 if (Subtarget->hasSVE())
1155 setOperationAction(ISD::VSCALE, MVT::i32, Custom);
1156
1157 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
1158
1159 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1160 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1161 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1162 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1163 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1164 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1165 }
1166
1167 if (Subtarget->hasSVE()) {
1168 for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1169 setOperationAction(ISD::BITREVERSE, VT, Custom);
1170 setOperationAction(ISD::BSWAP, VT, Custom);
1171 setOperationAction(ISD::CTLZ, VT, Custom);
1172 setOperationAction(ISD::CTPOP, VT, Custom);
1173 setOperationAction(ISD::CTTZ, VT, Custom);
1174 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1175 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1176 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1177 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1178 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1179 setOperationAction(ISD::MGATHER, VT, Custom);
1180 setOperationAction(ISD::MSCATTER, VT, Custom);
1181 setOperationAction(ISD::MLOAD, VT, Custom);
1182 setOperationAction(ISD::MUL, VT, Custom);
1183 setOperationAction(ISD::MULHS, VT, Custom);
1184 setOperationAction(ISD::MULHU, VT, Custom);
1185 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1186 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1187 setOperationAction(ISD::SELECT, VT, Custom);
1188 setOperationAction(ISD::SETCC, VT, Custom);
1189 setOperationAction(ISD::SDIV, VT, Custom);
1190 setOperationAction(ISD::UDIV, VT, Custom);
1191 setOperationAction(ISD::SMIN, VT, Custom);
1192 setOperationAction(ISD::UMIN, VT, Custom);
1193 setOperationAction(ISD::SMAX, VT, Custom);
1194 setOperationAction(ISD::UMAX, VT, Custom);
1195 setOperationAction(ISD::SHL, VT, Custom);
1196 setOperationAction(ISD::SRL, VT, Custom);
1197 setOperationAction(ISD::SRA, VT, Custom);
1198 setOperationAction(ISD::ABS, VT, Custom);
1199 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1200 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1201 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1202 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1203 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1204 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1205 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1206 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1207
1208 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1209 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1210 setOperationAction(ISD::SELECT_CC, VT, Expand);
1211 setOperationAction(ISD::ROTL, VT, Expand);
1212 setOperationAction(ISD::ROTR, VT, Expand);
1213 }
1214
1215 // Illegal unpacked integer vector types.
1216 for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1217 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1218 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1219 }
1220
1221 // Legalize unpacked bitcasts to REINTERPRET_CAST.
1222 for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
1223 MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
1224 setOperationAction(ISD::BITCAST, VT, Custom);
1225
1226 for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
1227 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1228 setOperationAction(ISD::SELECT, VT, Custom);
1229 setOperationAction(ISD::SETCC, VT, Custom);
1230 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1231 setOperationAction(ISD::TRUNCATE, VT, Custom);
1232 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1233 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1234 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1235
1236 setOperationAction(ISD::SELECT_CC, VT, Expand);
1237 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1238 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1239
1240 // There are no legal MVT::nxv16f## based types.
1241 if (VT != MVT::nxv16i1) {
1242 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1243 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1244 }
1245 }
1246
1247 // NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
1248 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1249 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1250 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1251 setOperationAction(ISD::MLOAD, VT, Custom);
1252 setOperationAction(ISD::MSTORE, VT, Custom);
1253 setOperationAction(ISD::MGATHER, VT, Custom);
1254 setOperationAction(ISD::MSCATTER, VT, Custom);
1255 }
1256
1257 for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
1258 for (MVT InnerVT : MVT::fp_scalable_vector_valuetypes()) {
1259 // Avoid marking truncating FP stores as legal to prevent the
1260 // DAGCombiner from creating unsupported truncating stores.
1261 setTruncStoreAction(VT, InnerVT, Expand);
1262 // SVE does not have floating-point extending loads.
1263 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1264 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1265 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1266 }
1267 }
1268
1269 // SVE supports truncating stores of 64 and 128-bit vectors
1270 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom);
1271 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom);
1272 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom);
1273 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
1274 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
1275
1276 for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1277 MVT::nxv4f32, MVT::nxv2f64}) {
1278 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1279 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1280 setOperationAction(ISD::MGATHER, VT, Custom);
1281 setOperationAction(ISD::MSCATTER, VT, Custom);
1282 setOperationAction(ISD::MLOAD, VT, Custom);
1283 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1284 setOperationAction(ISD::SELECT, VT, Custom);
1285 setOperationAction(ISD::FADD, VT, Custom);
1286 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1287 setOperationAction(ISD::FDIV, VT, Custom);
1288 setOperationAction(ISD::FMA, VT, Custom);
1289 setOperationAction(ISD::FMAXIMUM, VT, Custom);
1290 setOperationAction(ISD::FMAXNUM, VT, Custom);
1291 setOperationAction(ISD::FMINIMUM, VT, Custom);
1292 setOperationAction(ISD::FMINNUM, VT, Custom);
1293 setOperationAction(ISD::FMUL, VT, Custom);
1294 setOperationAction(ISD::FNEG, VT, Custom);
1295 setOperationAction(ISD::FSUB, VT, Custom);
1296 setOperationAction(ISD::FCEIL, VT, Custom);
1297 setOperationAction(ISD::FFLOOR, VT, Custom);
1298 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1299 setOperationAction(ISD::FRINT, VT, Custom);
1300 setOperationAction(ISD::FROUND, VT, Custom);
1301 setOperationAction(ISD::FROUNDEVEN, VT, Custom);
1302 setOperationAction(ISD::FTRUNC, VT, Custom);
1303 setOperationAction(ISD::FSQRT, VT, Custom);
1304 setOperationAction(ISD::FABS, VT, Custom);
1305 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1306 setOperationAction(ISD::FP_ROUND, VT, Custom);
1307 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1308 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1309 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1310 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1311 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1312
1313 setOperationAction(ISD::SELECT_CC, VT, Expand);
1314 }
1315
1316 for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1317 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1318 setOperationAction(ISD::MGATHER, VT, Custom);
1319 setOperationAction(ISD::MSCATTER, VT, Custom);
1320 setOperationAction(ISD::MLOAD, VT, Custom);
1321 }
1322
1323 setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
1324
1325 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
1326 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
1327
1328 // NOTE: Currently this has to happen after computeRegisterProperties rather
1329 // than the preferred option of combining it with the addRegisterClass call.
1330 if (Subtarget->useSVEForFixedLengthVectors()) {
1331 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
1332 if (useSVEForFixedLengthVectorVT(VT))
1333 addTypeForFixedLengthSVE(VT);
1334 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
1335 if (useSVEForFixedLengthVectorVT(VT))
1336 addTypeForFixedLengthSVE(VT);
1337
1338 // 64bit results can mean a bigger than NEON input.
1339 for (auto VT : {MVT::v8i8, MVT::v4i16})
1340 setOperationAction(ISD::TRUNCATE, VT, Custom);
1341 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
1342
1343 // 128bit results imply a bigger than NEON input.
1344 for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1345 setOperationAction(ISD::TRUNCATE, VT, Custom);
1346 for (auto VT : {MVT::v8f16, MVT::v4f32})
1347 setOperationAction(ISD::FP_ROUND, VT, Custom);
1348
1349 // These operations are not supported on NEON but SVE can do them.
1350 setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
1351 setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
1352 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1353 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
1354 setOperationAction(ISD::MUL, MVT::v1i64, Custom);
1355 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1356 setOperationAction(ISD::MULHS, MVT::v1i64, Custom);
1357 setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
1358 setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
1359 setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
1360 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
1361 setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
1362 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
1363 setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
1364 setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
1365 setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
1366 setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
1367 setOperationAction(ISD::SDIV, MVT::v2i64, Custom);
1368 setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
1369 setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
1370 setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
1371 setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
1372 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
1373 setOperationAction(ISD::UDIV, MVT::v16i8, Custom);
1374 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
1375 setOperationAction(ISD::UDIV, MVT::v8i16, Custom);
1376 setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
1377 setOperationAction(ISD::UDIV, MVT::v4i32, Custom);
1378 setOperationAction(ISD::UDIV, MVT::v1i64, Custom);
1379 setOperationAction(ISD::UDIV, MVT::v2i64, Custom);
1380 setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
1381 setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
1382 setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
1383 setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
1384 setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
1385 setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
1386 setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
1387 setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
1388
1389 // Int operations with no NEON support.
1390 for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1391 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1392 setOperationAction(ISD::BITREVERSE, VT, Custom);
1393 setOperationAction(ISD::CTTZ, VT, Custom);
1394 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1395 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1396 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1397 }
1398
1399 // FP operations with no NEON support.
1400 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
1401 MVT::v1f64, MVT::v2f64})
1402 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1403
1404 // Use SVE for vectors with more than 2 elements.
1405 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1406 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1407 }
1408
1409 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv2i1, MVT::nxv2i64);
1410 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32);
1411 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16);
1412 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
1413 }
1414
1415 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
1416}
1417
1418void AArch64TargetLowering::addTypeForNEON(MVT VT) {
1419 assert(VT.isVector() && "VT should be a vector type")(static_cast <bool> (VT.isVector() && "VT should be a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1419, __extension__ __PRETTY_FUNCTION__))
;
1420
1421 if (VT.isFloatingPoint()) {
1422 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
1423 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
1424 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
1425 }
1426
1427 // Mark vector float intrinsics as expand.
1428 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
1429 setOperationAction(ISD::FSIN, VT, Expand);
1430 setOperationAction(ISD::FCOS, VT, Expand);
1431 setOperationAction(ISD::FPOW, VT, Expand);
1432 setOperationAction(ISD::FLOG, VT, Expand);
1433 setOperationAction(ISD::FLOG2, VT, Expand);
1434 setOperationAction(ISD::FLOG10, VT, Expand);
1435 setOperationAction(ISD::FEXP, VT, Expand);
1436 setOperationAction(ISD::FEXP2, VT, Expand);
1437 }
1438
1439 // But we do support custom-lowering for FCOPYSIGN.
1440 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
1441 ((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16()))
1442 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1443
1444 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1445 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1446 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1447 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1448 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1449 setOperationAction(ISD::SRA, VT, Custom);
1450 setOperationAction(ISD::SRL, VT, Custom);
1451 setOperationAction(ISD::SHL, VT, Custom);
1452 setOperationAction(ISD::OR, VT, Custom);
1453 setOperationAction(ISD::SETCC, VT, Custom);
1454 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
1455
1456 setOperationAction(ISD::SELECT, VT, Expand);
1457 setOperationAction(ISD::SELECT_CC, VT, Expand);
1458 setOperationAction(ISD::VSELECT, VT, Expand);
1459 for (MVT InnerVT : MVT::all_valuetypes())
1460 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
1461
1462 // CNT supports only B element sizes, then use UADDLP to widen.
1463 if (VT != MVT::v8i8 && VT != MVT::v16i8)
1464 setOperationAction(ISD::CTPOP, VT, Custom);
1465
1466 setOperationAction(ISD::UDIV, VT, Expand);
1467 setOperationAction(ISD::SDIV, VT, Expand);
1468 setOperationAction(ISD::UREM, VT, Expand);
1469 setOperationAction(ISD::SREM, VT, Expand);
1470 setOperationAction(ISD::FREM, VT, Expand);
1471
1472 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1473 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1474 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
1475 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
1476
1477 if (!VT.isFloatingPoint())
1478 setOperationAction(ISD::ABS, VT, Legal);
1479
1480 // [SU][MIN|MAX] are available for all NEON types apart from i64.
1481 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
1482 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
1483 setOperationAction(Opcode, VT, Legal);
1484
1485 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
1486 if (VT.isFloatingPoint() &&
1487 VT.getVectorElementType() != MVT::bf16 &&
1488 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
1489 for (unsigned Opcode :
1490 {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
1491 setOperationAction(Opcode, VT, Legal);
1492
1493 if (Subtarget->isLittleEndian()) {
1494 for (unsigned im = (unsigned)ISD::PRE_INC;
1495 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1496 setIndexedLoadAction(im, VT, Legal);
1497 setIndexedStoreAction(im, VT, Legal);
1498 }
1499 }
1500}
1501
1502void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
1503 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1503, __extension__ __PRETTY_FUNCTION__))
;
1504
1505 // By default everything must be expanded.
1506 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1507 setOperationAction(Op, VT, Expand);
1508
1509 // We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
1510 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1511
1512 if (VT.isFloatingPoint()) {
1513 setCondCodeAction(ISD::SETO, VT, Expand);
1514 setCondCodeAction(ISD::SETOLT, VT, Expand);
1515 setCondCodeAction(ISD::SETLT, VT, Expand);
1516 setCondCodeAction(ISD::SETOLE, VT, Expand);
1517 setCondCodeAction(ISD::SETLE, VT, Expand);
1518 setCondCodeAction(ISD::SETULT, VT, Expand);
1519 setCondCodeAction(ISD::SETULE, VT, Expand);
1520 setCondCodeAction(ISD::SETUGE, VT, Expand);
1521 setCondCodeAction(ISD::SETUGT, VT, Expand);
1522 setCondCodeAction(ISD::SETUEQ, VT, Expand);
1523 setCondCodeAction(ISD::SETUNE, VT, Expand);
1524 }
1525
1526 // Mark integer truncating stores as having custom lowering
1527 if (VT.isInteger()) {
1528 MVT InnerVT = VT.changeVectorElementType(MVT::i8);
1529 while (InnerVT != VT) {
1530 setTruncStoreAction(VT, InnerVT, Custom);
1531 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Custom);
1532 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Custom);
1533 InnerVT = InnerVT.changeVectorElementType(
1534 MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
1535 }
1536 }
1537
1538 // Lower fixed length vector operations to scalable equivalents.
1539 setOperationAction(ISD::ABS, VT, Custom);
1540 setOperationAction(ISD::ADD, VT, Custom);
1541 setOperationAction(ISD::AND, VT, Custom);
1542 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1543 setOperationAction(ISD::BITCAST, VT, Custom);
1544 setOperationAction(ISD::BITREVERSE, VT, Custom);
1545 setOperationAction(ISD::BSWAP, VT, Custom);
1546 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1547 setOperationAction(ISD::CTLZ, VT, Custom);
1548 setOperationAction(ISD::CTPOP, VT, Custom);
1549 setOperationAction(ISD::CTTZ, VT, Custom);
1550 setOperationAction(ISD::FABS, VT, Custom);
1551 setOperationAction(ISD::FADD, VT, Custom);
1552 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1553 setOperationAction(ISD::FCEIL, VT, Custom);
1554 setOperationAction(ISD::FDIV, VT, Custom);
1555 setOperationAction(ISD::FFLOOR, VT, Custom);
1556 setOperationAction(ISD::FMA, VT, Custom);
1557 setOperationAction(ISD::FMAXIMUM, VT, Custom);
1558 setOperationAction(ISD::FMAXNUM, VT, Custom);
1559 setOperationAction(ISD::FMINIMUM, VT, Custom);
1560 setOperationAction(ISD::FMINNUM, VT, Custom);
1561 setOperationAction(ISD::FMUL, VT, Custom);
1562 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1563 setOperationAction(ISD::FNEG, VT, Custom);
1564 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1565 setOperationAction(ISD::FP_ROUND, VT, Custom);
1566 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1567 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1568 setOperationAction(ISD::FRINT, VT, Custom);
1569 setOperationAction(ISD::FROUND, VT, Custom);
1570 setOperationAction(ISD::FROUNDEVEN, VT, Custom);
1571 setOperationAction(ISD::FSQRT, VT, Custom);
1572 setOperationAction(ISD::FSUB, VT, Custom);
1573 setOperationAction(ISD::FTRUNC, VT, Custom);
1574 setOperationAction(ISD::LOAD, VT, Custom);
1575 setOperationAction(ISD::MGATHER, VT, Custom);
1576 setOperationAction(ISD::MLOAD, VT, Custom);
1577 setOperationAction(ISD::MSCATTER, VT, Custom);
1578 setOperationAction(ISD::MSTORE, VT, Custom);
1579 setOperationAction(ISD::MUL, VT, Custom);
1580 setOperationAction(ISD::MULHS, VT, Custom);
1581 setOperationAction(ISD::MULHU, VT, Custom);
1582 setOperationAction(ISD::OR, VT, Custom);
1583 setOperationAction(ISD::SDIV, VT, Custom);
1584 setOperationAction(ISD::SELECT, VT, Custom);
1585 setOperationAction(ISD::SETCC, VT, Custom);
1586 setOperationAction(ISD::SHL, VT, Custom);
1587 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1588 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
1589 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1590 setOperationAction(ISD::SMAX, VT, Custom);
1591 setOperationAction(ISD::SMIN, VT, Custom);
1592 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1593 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1594 setOperationAction(ISD::SRA, VT, Custom);
1595 setOperationAction(ISD::SRL, VT, Custom);
1596 setOperationAction(ISD::STORE, VT, Custom);
1597 setOperationAction(ISD::SUB, VT, Custom);
1598 setOperationAction(ISD::TRUNCATE, VT, Custom);
1599 setOperationAction(ISD::UDIV, VT, Custom);
1600 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1601 setOperationAction(ISD::UMAX, VT, Custom);
1602 setOperationAction(ISD::UMIN, VT, Custom);
1603 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1604 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1605 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1606 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1607 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1608 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1609 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1610 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1611 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1612 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1613 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1614 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1615 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1616 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1617 setOperationAction(ISD::VSELECT, VT, Custom);
1618 setOperationAction(ISD::XOR, VT, Custom);
1619 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1620}
1621
1622void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
1623 addRegisterClass(VT, &AArch64::FPR64RegClass);
1624 addTypeForNEON(VT);
1625}
1626
1627void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
1628 addRegisterClass(VT, &AArch64::FPR128RegClass);
1629 addTypeForNEON(VT);
1630}
1631
1632EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
1633 LLVMContext &C, EVT VT) const {
1634 if (!VT.isVector())
1635 return MVT::i32;
1636 if (VT.isScalableVector())
1637 return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount());
1638 return VT.changeVectorElementTypeToInteger();
1639}
1640
1641static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
1642 const APInt &Demanded,
1643 TargetLowering::TargetLoweringOpt &TLO,
1644 unsigned NewOpc) {
1645 uint64_t OldImm = Imm, NewImm, Enc;
1646 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
1647
1648 // Return if the immediate is already all zeros, all ones, a bimm32 or a
1649 // bimm64.
1650 if (Imm == 0 || Imm == Mask ||
1651 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
1652 return false;
1653
1654 unsigned EltSize = Size;
1655 uint64_t DemandedBits = Demanded.getZExtValue();
1656
1657 // Clear bits that are not demanded.
1658 Imm &= DemandedBits;
1659
1660 while (true) {
1661 // The goal here is to set the non-demanded bits in a way that minimizes
1662 // the number of switching between 0 and 1. In order to achieve this goal,
1663 // we set the non-demanded bits to the value of the preceding demanded bits.
1664 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
1665 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
1666 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
1667 // The final result is 0b11000011.
1668 uint64_t NonDemandedBits = ~DemandedBits;
1669 uint64_t InvertedImm = ~Imm & DemandedBits;
1670 uint64_t RotatedImm =
1671 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
1672 NonDemandedBits;
1673 uint64_t Sum = RotatedImm + NonDemandedBits;
1674 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
1675 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
1676 NewImm = (Imm | Ones) & Mask;
1677
1678 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
1679 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
1680 // we halve the element size and continue the search.
1681 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
1682 break;
1683
1684 // We cannot shrink the element size any further if it is 2-bits.
1685 if (EltSize == 2)
1686 return false;
1687
1688 EltSize /= 2;
1689 Mask >>= EltSize;
1690 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
1691
1692 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
1693 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
1694 return false;
1695
1696 // Merge the upper and lower halves of Imm and DemandedBits.
1697 Imm |= Hi;
1698 DemandedBits |= DemandedBitsHi;
1699 }
1700
1701 ++NumOptimizedImms;
1702
1703 // Replicate the element across the register width.
1704 while (EltSize < Size) {
1705 NewImm |= NewImm << EltSize;
1706 EltSize *= 2;
1707 }
1708
1709 (void)OldImm;
1710 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1711, __extension__ __PRETTY_FUNCTION__))
1711 "demanded bits should never be altered")(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1711, __extension__ __PRETTY_FUNCTION__))
;
1712 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")(static_cast <bool> (OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? void (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1712, __extension__ __PRETTY_FUNCTION__))
;
1713
1714 // Create the new constant immediate node.
1715 EVT VT = Op.getValueType();
1716 SDLoc DL(Op);
1717 SDValue New;
1718
1719 // If the new constant immediate is all-zeros or all-ones, let the target
1720 // independent DAG combine optimize this node.
1721 if (NewImm == 0 || NewImm == OrigMask) {
1722 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
1723 TLO.DAG.getConstant(NewImm, DL, VT));
1724 // Otherwise, create a machine node so that target independent DAG combine
1725 // doesn't undo this optimization.
1726 } else {
1727 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
1728 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
1729 New = SDValue(
1730 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
1731 }
1732
1733 return TLO.CombineTo(Op, New);
1734}
1735
1736bool AArch64TargetLowering::targetShrinkDemandedConstant(
1737 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1738 TargetLoweringOpt &TLO) const {
1739 // Delay this optimization to as late as possible.
1740 if (!TLO.LegalOps)
1741 return false;
1742
1743 if (!EnableOptimizeLogicalImm)
1744 return false;
1745
1746 EVT VT = Op.getValueType();
1747 if (VT.isVector())
1748 return false;
1749
1750 unsigned Size = VT.getSizeInBits();
1751 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1752, __extension__ __PRETTY_FUNCTION__))
1752 "i32 or i64 is expected after legalization.")(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1752, __extension__ __PRETTY_FUNCTION__))
;
1753
1754 // Exit early if we demand all bits.
1755 if (DemandedBits.countPopulation() == Size)
1756 return false;
1757
1758 unsigned NewOpc;
1759 switch (Op.getOpcode()) {
1760 default:
1761 return false;
1762 case ISD::AND:
1763 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1764 break;
1765 case ISD::OR:
1766 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1767 break;
1768 case ISD::XOR:
1769 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1770 break;
1771 }
1772 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1773 if (!C)
1774 return false;
1775 uint64_t Imm = C->getZExtValue();
1776 return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
1777}
1778
1779/// computeKnownBitsForTargetNode - Determine which of the bits specified in
1780/// Mask are known to be either zero or one and return them Known.
1781void AArch64TargetLowering::computeKnownBitsForTargetNode(
1782 const SDValue Op, KnownBits &Known,
1783 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
1784 switch (Op.getOpcode()) {
1785 default:
1786 break;
1787 case AArch64ISD::CSEL: {
1788 KnownBits Known2;
1789 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1790 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1791 Known = KnownBits::commonBits(Known, Known2);
1792 break;
1793 }
1794 case AArch64ISD::LOADgot:
1795 case AArch64ISD::ADDlow: {
1796 if (!Subtarget->isTargetILP32())
1797 break;
1798 // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
1799 Known.Zero = APInt::getHighBitsSet(64, 32);
1800 break;
1801 }
1802 case AArch64ISD::ASSERT_ZEXT_BOOL: {
1803 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1804 Known.Zero |= APInt(Known.getBitWidth(), 0xFE);
1805 break;
1806 }
1807 case ISD::INTRINSIC_W_CHAIN: {
1808 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1809 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1810 switch (IntID) {
1811 default: return;
1812 case Intrinsic::aarch64_ldaxr:
1813 case Intrinsic::aarch64_ldxr: {
1814 unsigned BitWidth = Known.getBitWidth();
1815 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1816 unsigned MemBits = VT.getScalarSizeInBits();
1817 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1818 return;
1819 }
1820 }
1821 break;
1822 }
1823 case ISD::INTRINSIC_WO_CHAIN:
1824 case ISD::INTRINSIC_VOID: {
1825 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1826 switch (IntNo) {
1827 default:
1828 break;
1829 case Intrinsic::aarch64_neon_umaxv:
1830 case Intrinsic::aarch64_neon_uminv: {
1831 // Figure out the datatype of the vector operand. The UMINV instruction
1832 // will zero extend the result, so we can mark as known zero all the
1833 // bits larger than the element datatype. 32-bit or larget doesn't need
1834 // this as those are legal types and will be handled by isel directly.
1835 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1836 unsigned BitWidth = Known.getBitWidth();
1837 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1838 assert(BitWidth >= 8 && "Unexpected width!")(static_cast <bool> (BitWidth >= 8 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1838, __extension__ __PRETTY_FUNCTION__))
;
1839 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1840 Known.Zero |= Mask;
1841 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1842 assert(BitWidth >= 16 && "Unexpected width!")(static_cast <bool> (BitWidth >= 16 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1842, __extension__ __PRETTY_FUNCTION__))
;
1843 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1844 Known.Zero |= Mask;
1845 }
1846 break;
1847 } break;
1848 }
1849 }
1850 }
1851}
1852
1853MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1854 EVT) const {
1855 return MVT::i64;
1856}
1857
1858bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1859 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1860 bool *Fast) const {
1861 if (Subtarget->requiresStrictAlign())
1862 return false;
1863
1864 if (Fast) {
1865 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1866 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1867 // See comments in performSTORECombine() for more details about
1868 // these conditions.
1869
1870 // Code that uses clang vector extensions can mark that it
1871 // wants unaligned accesses to be treated as fast by
1872 // underspecifying alignment to be 1 or 2.
1873 Alignment <= 2 ||
1874
1875 // Disregard v2i64. Memcpy lowering produces those and splitting
1876 // them regresses performance on micro-benchmarks and olden/bh.
1877 VT == MVT::v2i64;
1878 }
1879 return true;
1880}
1881
1882// Same as above but handling LLTs instead.
1883bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1884 LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1885 bool *Fast) const {
1886 if (Subtarget->requiresStrictAlign())
1887 return false;
1888
1889 if (Fast) {
1890 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1891 *Fast = !Subtarget->isMisaligned128StoreSlow() ||
1892 Ty.getSizeInBytes() != 16 ||
1893 // See comments in performSTORECombine() for more details about
1894 // these conditions.
1895
1896 // Code that uses clang vector extensions can mark that it
1897 // wants unaligned accesses to be treated as fast by
1898 // underspecifying alignment to be 1 or 2.
1899 Alignment <= 2 ||
1900
1901 // Disregard v2i64. Memcpy lowering produces those and splitting
1902 // them regresses performance on micro-benchmarks and olden/bh.
1903 Ty == LLT::fixed_vector(2, 64);
1904 }
1905 return true;
1906}
1907
1908FastISel *
1909AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1910 const TargetLibraryInfo *libInfo) const {
1911 return AArch64::createFastISel(funcInfo, libInfo);
1912}
1913
1914const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1915#define MAKE_CASE(V) \
1916 case V: \
1917 return #V;
1918 switch ((AArch64ISD::NodeType)Opcode) {
1919 case AArch64ISD::FIRST_NUMBER:
1920 break;
1921 MAKE_CASE(AArch64ISD::CALL)
1922 MAKE_CASE(AArch64ISD::ADRP)
1923 MAKE_CASE(AArch64ISD::ADR)
1924 MAKE_CASE(AArch64ISD::ADDlow)
1925 MAKE_CASE(AArch64ISD::LOADgot)
1926 MAKE_CASE(AArch64ISD::RET_FLAG)
1927 MAKE_CASE(AArch64ISD::BRCOND)
1928 MAKE_CASE(AArch64ISD::CSEL)
1929 MAKE_CASE(AArch64ISD::CSINV)
1930 MAKE_CASE(AArch64ISD::CSNEG)
1931 MAKE_CASE(AArch64ISD::CSINC)
1932 MAKE_CASE(AArch64ISD::THREAD_POINTER)
1933 MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
1934 MAKE_CASE(AArch64ISD::ADD_PRED)
1935 MAKE_CASE(AArch64ISD::MUL_PRED)
1936 MAKE_CASE(AArch64ISD::MULHS_PRED)
1937 MAKE_CASE(AArch64ISD::MULHU_PRED)
1938 MAKE_CASE(AArch64ISD::SDIV_PRED)
1939 MAKE_CASE(AArch64ISD::SHL_PRED)
1940 MAKE_CASE(AArch64ISD::SMAX_PRED)
1941 MAKE_CASE(AArch64ISD::SMIN_PRED)
1942 MAKE_CASE(AArch64ISD::SRA_PRED)
1943 MAKE_CASE(AArch64ISD::SRL_PRED)
1944 MAKE_CASE(AArch64ISD::SUB_PRED)
1945 MAKE_CASE(AArch64ISD::UDIV_PRED)
1946 MAKE_CASE(AArch64ISD::UMAX_PRED)
1947 MAKE_CASE(AArch64ISD::UMIN_PRED)
1948 MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
1949 MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
1950 MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
1951 MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
1952 MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
1953 MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
1954 MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
1955 MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
1956 MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
1957 MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
1958 MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
1959 MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
1960 MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
1961 MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
1962 MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
1963 MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
1964 MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
1965 MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
1966 MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
1967 MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
1968 MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
1969 MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
1970 MAKE_CASE(AArch64ISD::ADC)
1971 MAKE_CASE(AArch64ISD::SBC)
1972 MAKE_CASE(AArch64ISD::ADDS)
1973 MAKE_CASE(AArch64ISD::SUBS)
1974 MAKE_CASE(AArch64ISD::ADCS)
1975 MAKE_CASE(AArch64ISD::SBCS)
1976 MAKE_CASE(AArch64ISD::ANDS)
1977 MAKE_CASE(AArch64ISD::CCMP)
1978 MAKE_CASE(AArch64ISD::CCMN)
1979 MAKE_CASE(AArch64ISD::FCCMP)
1980 MAKE_CASE(AArch64ISD::FCMP)
1981 MAKE_CASE(AArch64ISD::STRICT_FCMP)
1982 MAKE_CASE(AArch64ISD::STRICT_FCMPE)
1983 MAKE_CASE(AArch64ISD::DUP)
1984 MAKE_CASE(AArch64ISD::DUPLANE8)
1985 MAKE_CASE(AArch64ISD::DUPLANE16)
1986 MAKE_CASE(AArch64ISD::DUPLANE32)
1987 MAKE_CASE(AArch64ISD::DUPLANE64)
1988 MAKE_CASE(AArch64ISD::MOVI)
1989 MAKE_CASE(AArch64ISD::MOVIshift)
1990 MAKE_CASE(AArch64ISD::MOVIedit)
1991 MAKE_CASE(AArch64ISD::MOVImsl)
1992 MAKE_CASE(AArch64ISD::FMOV)
1993 MAKE_CASE(AArch64ISD::MVNIshift)
1994 MAKE_CASE(AArch64ISD::MVNImsl)
1995 MAKE_CASE(AArch64ISD::BICi)
1996 MAKE_CASE(AArch64ISD::ORRi)
1997 MAKE_CASE(AArch64ISD::BSP)
1998 MAKE_CASE(AArch64ISD::EXTR)
1999 MAKE_CASE(AArch64ISD::ZIP1)
2000 MAKE_CASE(AArch64ISD::ZIP2)
2001 MAKE_CASE(AArch64ISD::UZP1)
2002 MAKE_CASE(AArch64ISD::UZP2)
2003 MAKE_CASE(AArch64ISD::TRN1)
2004 MAKE_CASE(AArch64ISD::TRN2)
2005 MAKE_CASE(AArch64ISD::REV16)
2006 MAKE_CASE(AArch64ISD::REV32)
2007 MAKE_CASE(AArch64ISD::REV64)
2008 MAKE_CASE(AArch64ISD::EXT)
2009 MAKE_CASE(AArch64ISD::SPLICE)
2010 MAKE_CASE(AArch64ISD::VSHL)
2011 MAKE_CASE(AArch64ISD::VLSHR)
2012 MAKE_CASE(AArch64ISD::VASHR)
2013 MAKE_CASE(AArch64ISD::VSLI)
2014 MAKE_CASE(AArch64ISD::VSRI)
2015 MAKE_CASE(AArch64ISD::CMEQ)
2016 MAKE_CASE(AArch64ISD::CMGE)
2017 MAKE_CASE(AArch64ISD::CMGT)
2018 MAKE_CASE(AArch64ISD::CMHI)
2019 MAKE_CASE(AArch64ISD::CMHS)
2020 MAKE_CASE(AArch64ISD::FCMEQ)
2021 MAKE_CASE(AArch64ISD::FCMGE)
2022 MAKE_CASE(AArch64ISD::FCMGT)
2023 MAKE_CASE(AArch64ISD::CMEQz)
2024 MAKE_CASE(AArch64ISD::CMGEz)
2025 MAKE_CASE(AArch64ISD::CMGTz)
2026 MAKE_CASE(AArch64ISD::CMLEz)
2027 MAKE_CASE(AArch64ISD::CMLTz)
2028 MAKE_CASE(AArch64ISD::FCMEQz)
2029 MAKE_CASE(AArch64ISD::FCMGEz)
2030 MAKE_CASE(AArch64ISD::FCMGTz)
2031 MAKE_CASE(AArch64ISD::FCMLEz)
2032 MAKE_CASE(AArch64ISD::FCMLTz)
2033 MAKE_CASE(AArch64ISD::SADDV)
2034 MAKE_CASE(AArch64ISD::UADDV)
2035 MAKE_CASE(AArch64ISD::SRHADD)
2036 MAKE_CASE(AArch64ISD::URHADD)
2037 MAKE_CASE(AArch64ISD::SHADD)
2038 MAKE_CASE(AArch64ISD::UHADD)
2039 MAKE_CASE(AArch64ISD::SDOT)
2040 MAKE_CASE(AArch64ISD::UDOT)
2041 MAKE_CASE(AArch64ISD::SMINV)
2042 MAKE_CASE(AArch64ISD::UMINV)
2043 MAKE_CASE(AArch64ISD::SMAXV)
2044 MAKE_CASE(AArch64ISD::UMAXV)
2045 MAKE_CASE(AArch64ISD::SADDV_PRED)
2046 MAKE_CASE(AArch64ISD::UADDV_PRED)
2047 MAKE_CASE(AArch64ISD::SMAXV_PRED)
2048 MAKE_CASE(AArch64ISD::UMAXV_PRED)
2049 MAKE_CASE(AArch64ISD::SMINV_PRED)
2050 MAKE_CASE(AArch64ISD::UMINV_PRED)
2051 MAKE_CASE(AArch64ISD::ORV_PRED)
2052 MAKE_CASE(AArch64ISD::EORV_PRED)
2053 MAKE_CASE(AArch64ISD::ANDV_PRED)
2054 MAKE_CASE(AArch64ISD::CLASTA_N)
2055 MAKE_CASE(AArch64ISD::CLASTB_N)
2056 MAKE_CASE(AArch64ISD::LASTA)
2057 MAKE_CASE(AArch64ISD::LASTB)
2058 MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
2059 MAKE_CASE(AArch64ISD::LS64_BUILD)
2060 MAKE_CASE(AArch64ISD::LS64_EXTRACT)
2061 MAKE_CASE(AArch64ISD::TBL)
2062 MAKE_CASE(AArch64ISD::FADD_PRED)
2063 MAKE_CASE(AArch64ISD::FADDA_PRED)
2064 MAKE_CASE(AArch64ISD::FADDV_PRED)
2065 MAKE_CASE(AArch64ISD::FDIV_PRED)
2066 MAKE_CASE(AArch64ISD::FMA_PRED)
2067 MAKE_CASE(AArch64ISD::FMAX_PRED)
2068 MAKE_CASE(AArch64ISD::FMAXV_PRED)
2069 MAKE_CASE(AArch64ISD::FMAXNM_PRED)
2070 MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
2071 MAKE_CASE(AArch64ISD::FMIN_PRED)
2072 MAKE_CASE(AArch64ISD::FMINV_PRED)
2073 MAKE_CASE(AArch64ISD::FMINNM_PRED)
2074 MAKE_CASE(AArch64ISD::FMINNMV_PRED)
2075 MAKE_CASE(AArch64ISD::FMUL_PRED)
2076 MAKE_CASE(AArch64ISD::FSUB_PRED)
2077 MAKE_CASE(AArch64ISD::BIC)
2078 MAKE_CASE(AArch64ISD::BIT)
2079 MAKE_CASE(AArch64ISD::CBZ)
2080 MAKE_CASE(AArch64ISD::CBNZ)
2081 MAKE_CASE(AArch64ISD::TBZ)
2082 MAKE_CASE(AArch64ISD::TBNZ)
2083 MAKE_CASE(AArch64ISD::TC_RETURN)
2084 MAKE_CASE(AArch64ISD::PREFETCH)
2085 MAKE_CASE(AArch64ISD::SITOF)
2086 MAKE_CASE(AArch64ISD::UITOF)
2087 MAKE_CASE(AArch64ISD::NVCAST)
2088 MAKE_CASE(AArch64ISD::MRS)
2089 MAKE_CASE(AArch64ISD::SQSHL_I)
2090 MAKE_CASE(AArch64ISD::UQSHL_I)
2091 MAKE_CASE(AArch64ISD::SRSHR_I)
2092 MAKE_CASE(AArch64ISD::URSHR_I)
2093 MAKE_CASE(AArch64ISD::SQSHLU_I)
2094 MAKE_CASE(AArch64ISD::WrapperLarge)
2095 MAKE_CASE(AArch64ISD::LD2post)
2096 MAKE_CASE(AArch64ISD::LD3post)
2097 MAKE_CASE(AArch64ISD::LD4post)
2098 MAKE_CASE(AArch64ISD::ST2post)
2099 MAKE_CASE(AArch64ISD::ST3post)
2100 MAKE_CASE(AArch64ISD::ST4post)
2101 MAKE_CASE(AArch64ISD::LD1x2post)
2102 MAKE_CASE(AArch64ISD::LD1x3post)
2103 MAKE_CASE(AArch64ISD::LD1x4post)
2104 MAKE_CASE(AArch64ISD::ST1x2post)
2105 MAKE_CASE(AArch64ISD::ST1x3post)
2106 MAKE_CASE(AArch64ISD::ST1x4post)
2107 MAKE_CASE(AArch64ISD::LD1DUPpost)
2108 MAKE_CASE(AArch64ISD::LD2DUPpost)
2109 MAKE_CASE(AArch64ISD::LD3DUPpost)
2110 MAKE_CASE(AArch64ISD::LD4DUPpost)
2111 MAKE_CASE(AArch64ISD::LD1LANEpost)
2112 MAKE_CASE(AArch64ISD::LD2LANEpost)
2113 MAKE_CASE(AArch64ISD::LD3LANEpost)
2114 MAKE_CASE(AArch64ISD::LD4LANEpost)
2115 MAKE_CASE(AArch64ISD::ST2LANEpost)
2116 MAKE_CASE(AArch64ISD::ST3LANEpost)
2117 MAKE_CASE(AArch64ISD::ST4LANEpost)
2118 MAKE_CASE(AArch64ISD::SMULL)
2119 MAKE_CASE(AArch64ISD::UMULL)
2120 MAKE_CASE(AArch64ISD::FRECPE)
2121 MAKE_CASE(AArch64ISD::FRECPS)
2122 MAKE_CASE(AArch64ISD::FRSQRTE)
2123 MAKE_CASE(AArch64ISD::FRSQRTS)
2124 MAKE_CASE(AArch64ISD::STG)
2125 MAKE_CASE(AArch64ISD::STZG)
2126 MAKE_CASE(AArch64ISD::ST2G)
2127 MAKE_CASE(AArch64ISD::STZ2G)
2128 MAKE_CASE(AArch64ISD::SUNPKHI)
2129 MAKE_CASE(AArch64ISD::SUNPKLO)
2130 MAKE_CASE(AArch64ISD::UUNPKHI)
2131 MAKE_CASE(AArch64ISD::UUNPKLO)
2132 MAKE_CASE(AArch64ISD::INSR)
2133 MAKE_CASE(AArch64ISD::PTEST)
2134 MAKE_CASE(AArch64ISD::PTRUE)
2135 MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
2136 MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
2137 MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
2138 MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO)
2139 MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO)
2140 MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO)
2141 MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO)
2142 MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO)
2143 MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
2144 MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
2145 MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
2146 MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO)
2147 MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO)
2148 MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO)
2149 MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO)
2150 MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
2151 MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
2152 MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
2153 MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
2154 MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
2155 MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
2156 MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO)
2157 MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO)
2158 MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO)
2159 MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO)
2160 MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO)
2161 MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO)
2162 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO)
2163 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO)
2164 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO)
2165 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO)
2166 MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO)
2167 MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO)
2168 MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO)
2169 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO)
2170 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO)
2171 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO)
2172 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO)
2173 MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO)
2174 MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
2175 MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
2176 MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
2177 MAKE_CASE(AArch64ISD::ST1_PRED)
2178 MAKE_CASE(AArch64ISD::SST1_PRED)
2179 MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
2180 MAKE_CASE(AArch64ISD::SST1_SXTW_PRED)
2181 MAKE_CASE(AArch64ISD::SST1_UXTW_PRED)
2182 MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED)
2183 MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED)
2184 MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
2185 MAKE_CASE(AArch64ISD::SSTNT1_PRED)
2186 MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
2187 MAKE_CASE(AArch64ISD::LDP)
2188 MAKE_CASE(AArch64ISD::STP)
2189 MAKE_CASE(AArch64ISD::STNP)
2190 MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
2191 MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
2192 MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
2193 MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
2194 MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
2195 MAKE_CASE(AArch64ISD::INDEX_VECTOR)
2196 MAKE_CASE(AArch64ISD::UADDLP)
2197 MAKE_CASE(AArch64ISD::CALL_RVMARKER)
2198 MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL)
2199 }
2200#undef MAKE_CASE
2201 return nullptr;
2202}
2203
2204MachineBasicBlock *
2205AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
2206 MachineBasicBlock *MBB) const {
2207 // We materialise the F128CSEL pseudo-instruction as some control flow and a
2208 // phi node:
2209
2210 // OrigBB:
2211 // [... previous instrs leading to comparison ...]
2212 // b.ne TrueBB
2213 // b EndBB
2214 // TrueBB:
2215 // ; Fallthrough
2216 // EndBB:
2217 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
2218
2219 MachineFunction *MF = MBB->getParent();
2220 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2221 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2222 DebugLoc DL = MI.getDebugLoc();
2223 MachineFunction::iterator It = ++MBB->getIterator();
2224
2225 Register DestReg = MI.getOperand(0).getReg();
2226 Register IfTrueReg = MI.getOperand(1).getReg();
2227 Register IfFalseReg = MI.getOperand(2).getReg();
2228 unsigned CondCode = MI.getOperand(3).getImm();
2229 bool NZCVKilled = MI.getOperand(4).isKill();
2230
2231 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
2232 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
2233 MF->insert(It, TrueBB);
2234 MF->insert(It, EndBB);
2235
2236 // Transfer rest of current basic-block to EndBB
2237 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
2238 MBB->end());
2239 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
2240
2241 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
2242 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
2243 MBB->addSuccessor(TrueBB);
2244 MBB->addSuccessor(EndBB);
2245
2246 // TrueBB falls through to the end.
2247 TrueBB->addSuccessor(EndBB);
2248
2249 if (!NZCVKilled) {
2250 TrueBB->addLiveIn(AArch64::NZCV);
2251 EndBB->addLiveIn(AArch64::NZCV);
2252 }
2253
2254 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
2255 .addReg(IfTrueReg)
2256 .addMBB(TrueBB)
2257 .addReg(IfFalseReg)
2258 .addMBB(MBB);
2259
2260 MI.eraseFromParent();
2261 return EndBB;
2262}
2263
2264MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
2265 MachineInstr &MI, MachineBasicBlock *BB) const {
2266 assert(!isAsynchronousEHPersonality(classifyEHPersonality((static_cast <bool> (!isAsynchronousEHPersonality(classifyEHPersonality
( BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? void (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2268, __extension__ __PRETTY_FUNCTION__))
2267 BB->getParent()->getFunction().getPersonalityFn())) &&(static_cast <bool> (!isAsynchronousEHPersonality(classifyEHPersonality
( BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? void (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2268, __extension__ __PRETTY_FUNCTION__))
2268 "SEH does not use catchret!")(static_cast <bool> (!isAsynchronousEHPersonality(classifyEHPersonality
( BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? void (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2268, __extension__ __PRETTY_FUNCTION__))
;
2269 return BB;
2270}
2271
2272MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
2273 MachineInstr &MI, MachineBasicBlock *BB) const {
2274 switch (MI.getOpcode()) {
2275 default:
2276#ifndef NDEBUG
2277 MI.dump();
2278#endif
2279 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2279)
;
2280
2281 case AArch64::F128CSEL:
2282 return EmitF128CSEL(MI, BB);
2283
2284 case TargetOpcode::STACKMAP:
2285 case TargetOpcode::PATCHPOINT:
2286 case TargetOpcode::STATEPOINT:
2287 return emitPatchPoint(MI, BB);
2288
2289 case AArch64::CATCHRET:
2290 return EmitLoweredCatchRet(MI, BB);
2291 }
2292}
2293
2294//===----------------------------------------------------------------------===//
2295// AArch64 Lowering private implementation.
2296//===----------------------------------------------------------------------===//
2297
2298//===----------------------------------------------------------------------===//
2299// Lowering Code
2300//===----------------------------------------------------------------------===//
2301
2302// Forward declarations of SVE fixed length lowering helpers
2303static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT);
2304static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
2305static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
2306static SDValue convertFixedMaskToScalableVector(SDValue Mask,
2307 SelectionDAG &DAG);
2308
2309/// isZerosVector - Check whether SDNode N is a zero-filled vector.
2310static bool isZerosVector(const SDNode *N) {
2311 // Look through a bit convert.
2312 while (N->getOpcode() == ISD::BITCAST)
2313 N = N->getOperand(0).getNode();
2314
2315 if (ISD::isConstantSplatVectorAllZeros(N))
2316 return true;
2317
2318 if (N->getOpcode() != AArch64ISD::DUP)
2319 return false;
2320
2321 auto Opnd0 = N->getOperand(0);
2322 auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
2323 auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
2324 return (CINT && CINT->isZero()) || (CFP && CFP->isZero());
2325}
2326
2327/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
2328/// CC
2329static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
2330 switch (CC) {
2331 default:
2332 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2332)
;
2333 case ISD::SETNE:
2334 return AArch64CC::NE;
2335 case ISD::SETEQ:
2336 return AArch64CC::EQ;
2337 case ISD::SETGT:
2338 return AArch64CC::GT;
2339 case ISD::SETGE:
2340 return AArch64CC::GE;
2341 case ISD::SETLT:
2342 return AArch64CC::LT;
2343 case ISD::SETLE:
2344 return AArch64CC::LE;
2345 case ISD::SETUGT:
2346 return AArch64CC::HI;
2347 case ISD::SETUGE:
2348 return AArch64CC::HS;
2349 case ISD::SETULT:
2350 return AArch64CC::LO;
2351 case ISD::SETULE:
2352 return AArch64CC::LS;
2353 }
2354}
2355
2356/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
2357static void changeFPCCToAArch64CC(ISD::CondCode CC,
2358 AArch64CC::CondCode &CondCode,
2359 AArch64CC::CondCode &CondCode2) {
2360 CondCode2 = AArch64CC::AL;
2361 switch (CC) {
2362 default:
2363 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2363)
;
2364 case ISD::SETEQ:
2365 case ISD::SETOEQ:
2366 CondCode = AArch64CC::EQ;
2367 break;
2368 case ISD::SETGT:
2369 case ISD::SETOGT:
2370 CondCode = AArch64CC::GT;
2371 break;
2372 case ISD::SETGE:
2373 case ISD::SETOGE:
2374 CondCode = AArch64CC::GE;
2375 break;
2376 case ISD::SETOLT:
2377 CondCode = AArch64CC::MI;
2378 break;
2379 case ISD::SETOLE:
2380 CondCode = AArch64CC::LS;
2381 break;
2382 case ISD::SETONE:
2383 CondCode = AArch64CC::MI;
2384 CondCode2 = AArch64CC::GT;
2385 break;
2386 case ISD::SETO:
2387 CondCode = AArch64CC::VC;
2388 break;
2389 case ISD::SETUO:
2390 CondCode = AArch64CC::VS;
2391 break;
2392 case ISD::SETUEQ:
2393 CondCode = AArch64CC::EQ;
2394 CondCode2 = AArch64CC::VS;
2395 break;
2396 case ISD::SETUGT:
2397 CondCode = AArch64CC::HI;
2398 break;
2399 case ISD::SETUGE:
2400 CondCode = AArch64CC::PL;
2401 break;
2402 case ISD::SETLT:
2403 case ISD::SETULT:
2404 CondCode = AArch64CC::LT;
2405 break;
2406 case ISD::SETLE:
2407 case ISD::SETULE:
2408 CondCode = AArch64CC::LE;
2409 break;
2410 case ISD::SETNE:
2411 case ISD::SETUNE:
2412 CondCode = AArch64CC::NE;
2413 break;
2414 }
2415}
2416
2417/// Convert a DAG fp condition code to an AArch64 CC.
2418/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
2419/// should be AND'ed instead of OR'ed.
2420static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
2421 AArch64CC::CondCode &CondCode,
2422 AArch64CC::CondCode &CondCode2) {
2423 CondCode2 = AArch64CC::AL;
2424 switch (CC) {
2425 default:
2426 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2427 assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void
(0) : __assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2427, __extension__ __PRETTY_FUNCTION__))
;
2428 break;
2429 case ISD::SETONE:
2430 // (a one b)
2431 // == ((a olt b) || (a ogt b))
2432 // == ((a ord b) && (a une b))
2433 CondCode = AArch64CC::VC;
2434 CondCode2 = AArch64CC::NE;
2435 break;
2436 case ISD::SETUEQ:
2437 // (a ueq b)
2438 // == ((a uno b) || (a oeq b))
2439 // == ((a ule b) && (a uge b))
2440 CondCode = AArch64CC::PL;
2441 CondCode2 = AArch64CC::LE;
2442 break;
2443 }
2444}
2445
2446/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
2447/// CC usable with the vector instructions. Fewer operations are available
2448/// without a real NZCV register, so we have to use less efficient combinations
2449/// to get the same effect.
2450static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
2451 AArch64CC::CondCode &CondCode,
2452 AArch64CC::CondCode &CondCode2,
2453 bool &Invert) {
2454 Invert = false;
2455 switch (CC) {
2456 default:
2457 // Mostly the scalar mappings work fine.
2458 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2459 break;
2460 case ISD::SETUO:
2461 Invert = true;
2462 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2463 case ISD::SETO:
2464 CondCode = AArch64CC::MI;
2465 CondCode2 = AArch64CC::GE;
2466 break;
2467 case ISD::SETUEQ:
2468 case ISD::SETULT:
2469 case ISD::SETULE:
2470 case ISD::SETUGT:
2471 case ISD::SETUGE:
2472 // All of the compare-mask comparisons are ordered, but we can switch
2473 // between the two by a double inversion. E.g. ULE == !OGT.
2474 Invert = true;
2475 changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
2476 CondCode, CondCode2);
2477 break;
2478 }
2479}
2480
2481static bool isLegalArithImmed(uint64_t C) {
2482 // Matches AArch64DAGToDAGISel::SelectArithImmed().
2483 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
2484 LLVM_DEBUG(dbgs() << "Is imm " << Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
2485 << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
2486 return IsLegal;
2487}
2488
2489// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
2490// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
2491// can be set differently by this operation. It comes down to whether
2492// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
2493// everything is fine. If not then the optimization is wrong. Thus general
2494// comparisons are only valid if op2 != 0.
2495//
2496// So, finally, the only LLVM-native comparisons that don't mention C and V
2497// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
2498// the absence of information about op2.
2499static bool isCMN(SDValue Op, ISD::CondCode CC) {
2500 return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
2501 (CC == ISD::SETEQ || CC == ISD::SETNE);
2502}
2503
2504static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
2505 SelectionDAG &DAG, SDValue Chain,
2506 bool IsSignaling) {
2507 EVT VT = LHS.getValueType();
2508 assert(VT != MVT::f128)(static_cast <bool> (VT != MVT::f128) ? void (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2508, __extension__ __PRETTY_FUNCTION__))
;
2509 assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented")(static_cast <bool> (VT != MVT::f16 && "Lowering of strict fp16 not yet implemented"
) ? void (0) : __assert_fail ("VT != MVT::f16 && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2509, __extension__ __PRETTY_FUNCTION__))
;
2510 unsigned Opcode =
2511 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
2512 return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
2513}
2514
2515static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2516 const SDLoc &dl, SelectionDAG &DAG) {
2517 EVT VT = LHS.getValueType();
2518 const bool FullFP16 =
2519 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2520
2521 if (VT.isFloatingPoint()) {
2522 assert(VT != MVT::f128)(static_cast <bool> (VT != MVT::f128) ? void (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2522, __extension__ __PRETTY_FUNCTION__))
;
2523 if (VT == MVT::f16 && !FullFP16) {
2524 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
2525 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
2526 VT = MVT::f32;
2527 }
2528 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
2529 }
2530
2531 // The CMP instruction is just an alias for SUBS, and representing it as
2532 // SUBS means that it's possible to get CSE with subtract operations.
2533 // A later phase can perform the optimization of setting the destination
2534 // register to WZR/XZR if it ends up being unused.
2535 unsigned Opcode = AArch64ISD::SUBS;
2536
2537 if (isCMN(RHS, CC)) {
2538 // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
2539 Opcode = AArch64ISD::ADDS;
2540 RHS = RHS.getOperand(1);
2541 } else if (isCMN(LHS, CC)) {
2542 // As we are looking for EQ/NE compares, the operands can be commuted ; can
2543 // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
2544 Opcode = AArch64ISD::ADDS;
2545 LHS = LHS.getOperand(1);
2546 } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
2547 if (LHS.getOpcode() == ISD::AND) {
2548 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
2549 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
2550 // of the signed comparisons.
2551 const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
2552 DAG.getVTList(VT, MVT_CC),
2553 LHS.getOperand(0),
2554 LHS.getOperand(1));
2555 // Replace all users of (and X, Y) with newly generated (ands X, Y)
2556 DAG.ReplaceAllUsesWith(LHS, ANDSNode);
2557 return ANDSNode.getValue(1);
2558 } else if (LHS.getOpcode() == AArch64ISD::ANDS) {
2559 // Use result of ANDS
2560 return LHS.getValue(1);
2561 }
2562 }
2563
2564 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
2565 .getValue(1);
2566}
2567
2568/// \defgroup AArch64CCMP CMP;CCMP matching
2569///
2570/// These functions deal with the formation of CMP;CCMP;... sequences.
2571/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
2572/// a comparison. They set the NZCV flags to a predefined value if their
2573/// predicate is false. This allows to express arbitrary conjunctions, for
2574/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
2575/// expressed as:
2576/// cmp A
2577/// ccmp B, inv(CB), CA
2578/// check for CB flags
2579///
2580/// This naturally lets us implement chains of AND operations with SETCC
2581/// operands. And we can even implement some other situations by transforming
2582/// them:
2583/// - We can implement (NEG SETCC) i.e. negating a single comparison by
2584/// negating the flags used in a CCMP/FCCMP operations.
2585/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
2586/// by negating the flags we test for afterwards. i.e.
2587/// NEG (CMP CCMP CCCMP ...) can be implemented.
2588/// - Note that we can only ever negate all previously processed results.
2589/// What we can not implement by flipping the flags to test is a negation
2590/// of two sub-trees (because the negation affects all sub-trees emitted so
2591/// far, so the 2nd sub-tree we emit would also affect the first).
2592/// With those tools we can implement some OR operations:
2593/// - (OR (SETCC A) (SETCC B)) can be implemented via:
2594/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
2595/// - After transforming OR to NEG/AND combinations we may be able to use NEG
2596/// elimination rules from earlier to implement the whole thing as a
2597/// CCMP/FCCMP chain.
2598///
2599/// As complete example:
2600/// or (or (setCA (cmp A)) (setCB (cmp B)))
2601/// (and (setCC (cmp C)) (setCD (cmp D)))"
2602/// can be reassociated to:
2603/// or (and (setCC (cmp C)) setCD (cmp D))
2604// (or (setCA (cmp A)) (setCB (cmp B)))
2605/// can be transformed to:
2606/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
2607/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
2608/// which can be implemented as:
2609/// cmp C
2610/// ccmp D, inv(CD), CC
2611/// ccmp A, CA, inv(CD)
2612/// ccmp B, CB, inv(CA)
2613/// check for CB flags
2614///
2615/// A counterexample is "or (and A B) (and C D)" which translates to
2616/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
2617/// can only implement 1 of the inner (not) operations, but not both!
2618/// @{
2619
2620/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
2621static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
2622 ISD::CondCode CC, SDValue CCOp,
2623 AArch64CC::CondCode Predicate,
2624 AArch64CC::CondCode OutCC,
2625 const SDLoc &DL, SelectionDAG &DAG) {
2626 unsigned Opcode = 0;
2627 const bool FullFP16 =
2628 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2629
2630 if (LHS.getValueType().isFloatingPoint()) {
2631 assert(LHS.getValueType() != MVT::f128)(static_cast <bool> (LHS.getValueType() != MVT::f128) ?
void (0) : __assert_fail ("LHS.getValueType() != MVT::f128",
"/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2631, __extension__ __PRETTY_FUNCTION__))
;
2632 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
2633 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
2634 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
2635 }
2636 Opcode = AArch64ISD::FCCMP;
2637 } else if (RHS.getOpcode() == ISD::SUB) {
2638 SDValue SubOp0 = RHS.getOperand(0);
2639 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2640 // See emitComparison() on why we can only do this for SETEQ and SETNE.
2641 Opcode = AArch64ISD::CCMN;
2642 RHS = RHS.getOperand(1);
2643 }
2644 }
2645 if (Opcode == 0)
2646 Opcode = AArch64ISD::CCMP;
2647
2648 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
2649 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
2650 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
2651 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
2652 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
2653}
2654
2655/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
2656/// expressed as a conjunction. See \ref AArch64CCMP.
2657/// \param CanNegate Set to true if we can negate the whole sub-tree just by
2658/// changing the conditions on the SETCC tests.
2659/// (this means we can call emitConjunctionRec() with
2660/// Negate==true on this sub-tree)
2661/// \param MustBeFirst Set to true if this subtree needs to be negated and we
2662/// cannot do the negation naturally. We are required to
2663/// emit the subtree first in this case.
2664/// \param WillNegate Is true if are called when the result of this
2665/// subexpression must be negated. This happens when the
2666/// outer expression is an OR. We can use this fact to know
2667/// that we have a double negation (or (or ...) ...) that
2668/// can be implemented for free.
2669static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
2670 bool &MustBeFirst, bool WillNegate,
2671 unsigned Depth = 0) {
2672 if (!Val.hasOneUse())
2673 return false;
2674 unsigned Opcode = Val->getOpcode();
2675 if (Opcode == ISD::SETCC) {
2676 if (Val->getOperand(0).getValueType() == MVT::f128)
2677 return false;
2678 CanNegate = true;
2679 MustBeFirst = false;
2680 return true;
2681 }
2682 // Protect against exponential runtime and stack overflow.
2683 if (Depth > 6)
2684 return false;
2685 if (Opcode == ISD::AND || Opcode == ISD::OR) {
2686 bool IsOR = Opcode == ISD::OR;
2687 SDValue O0 = Val->getOperand(0);
2688 SDValue O1 = Val->getOperand(1);
2689 bool CanNegateL;
2690 bool MustBeFirstL;
2691 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
2692 return false;
2693 bool CanNegateR;
2694 bool MustBeFirstR;
2695 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
2696 return false;
2697
2698 if (MustBeFirstL && MustBeFirstR)
2699 return false;
2700
2701 if (IsOR) {
2702 // For an OR expression we need to be able to naturally negate at least
2703 // one side or we cannot do the transformation at all.
2704 if (!CanNegateL && !CanNegateR)
2705 return false;
2706 // If we the result of the OR will be negated and we can naturally negate
2707 // the leafs, then this sub-tree as a whole negates naturally.
2708 CanNegate = WillNegate && CanNegateL && CanNegateR;
2709 // If we cannot naturally negate the whole sub-tree, then this must be
2710 // emitted first.
2711 MustBeFirst = !CanNegate;
2712 } else {
2713 assert(Opcode == ISD::AND && "Must be OR or AND")(static_cast <bool> (Opcode == ISD::AND && "Must be OR or AND"
) ? void (0) : __assert_fail ("Opcode == ISD::AND && \"Must be OR or AND\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2713, __extension__ __PRETTY_FUNCTION__))
;
2714 // We cannot naturally negate an AND operation.
2715 CanNegate = false;
2716 MustBeFirst = MustBeFirstL || MustBeFirstR;
2717 }
2718 return true;
2719 }
2720 return false;
2721}
2722
2723/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
2724/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
2725/// Tries to transform the given i1 producing node @p Val to a series compare
2726/// and conditional compare operations. @returns an NZCV flags producing node
2727/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
2728/// transformation was not possible.
2729/// \p Negate is true if we want this sub-tree being negated just by changing
2730/// SETCC conditions.
2731static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
2732 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
2733 AArch64CC::CondCode Predicate) {
2734 // We're at a tree leaf, produce a conditional comparison operation.
2735 unsigned Opcode = Val->getOpcode();
2736 if (Opcode == ISD::SETCC) {
2737 SDValue LHS = Val->getOperand(0);
2738 SDValue RHS = Val->getOperand(1);
2739 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
2740 bool isInteger = LHS.getValueType().isInteger();
2741 if (Negate)
2742 CC = getSetCCInverse(CC, LHS.getValueType());
2743 SDLoc DL(Val);
2744 // Determine OutCC and handle FP special case.
2745 if (isInteger) {
2746 OutCC = changeIntCCToAArch64CC(CC);
2747 } else {
2748 assert(LHS.getValueType().isFloatingPoint())(static_cast <bool> (LHS.getValueType().isFloatingPoint
()) ? void (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2748, __extension__ __PRETTY_FUNCTION__))
;
2749 AArch64CC::CondCode ExtraCC;
2750 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
2751 // Some floating point conditions can't be tested with a single condition
2752 // code. Construct an additional comparison in this case.
2753 if (ExtraCC != AArch64CC::AL) {
2754 SDValue ExtraCmp;
2755 if (!CCOp.getNode())
2756 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
2757 else
2758 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
2759 ExtraCC, DL, DAG);
2760 CCOp = ExtraCmp;
2761 Predicate = ExtraCC;
2762 }
2763 }
2764
2765 // Produce a normal comparison if we are first in the chain
2766 if (!CCOp)
2767 return emitComparison(LHS, RHS, CC, DL, DAG);
2768 // Otherwise produce a ccmp.
2769 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
2770 DAG);
2771 }
2772 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree")(static_cast <bool> (Val->hasOneUse() && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("Val->hasOneUse() && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2772, __extension__ __PRETTY_FUNCTION__))
;
2773
2774 bool IsOR = Opcode == ISD::OR;
2775
2776 SDValue LHS = Val->getOperand(0);
2777 bool CanNegateL;
2778 bool MustBeFirstL;
2779 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
2780 assert(ValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidL && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2780, __extension__ __PRETTY_FUNCTION__))
;
2781 (void)ValidL;
2782
2783 SDValue RHS = Val->getOperand(1);
2784 bool CanNegateR;
2785 bool MustBeFirstR;
2786 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
2787 assert(ValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2787, __extension__ __PRETTY_FUNCTION__))
;
2788 (void)ValidR;
2789
2790 // Swap sub-tree that must come first to the right side.
2791 if (MustBeFirstL) {
2792 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2792, __extension__ __PRETTY_FUNCTION__))
;
2793 std::swap(LHS, RHS);
2794 std::swap(CanNegateL, CanNegateR);
2795 std::swap(MustBeFirstL, MustBeFirstR);
2796 }
2797
2798 bool NegateR;
2799 bool NegateAfterR;
2800 bool NegateL;
2801 bool NegateAfterAll;
2802 if (Opcode == ISD::OR) {
2803 // Swap the sub-tree that we can negate naturally to the left.
2804 if (!CanNegateL) {
2805 assert(CanNegateR && "at least one side must be negatable")(static_cast <bool> (CanNegateR && "at least one side must be negatable"
) ? void (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2805, __extension__ __PRETTY_FUNCTION__))
;
2806 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2806, __extension__ __PRETTY_FUNCTION__))
;
2807 assert(!Negate)(static_cast <bool> (!Negate) ? void (0) : __assert_fail
("!Negate", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2807, __extension__ __PRETTY_FUNCTION__))
;
2808 std::swap(LHS, RHS);
2809 NegateR = false;
2810 NegateAfterR = true;
2811 } else {
2812 // Negate the left sub-tree if possible, otherwise negate the result.
2813 NegateR = CanNegateR;
2814 NegateAfterR = !CanNegateR;
2815 }
2816 NegateL = true;
2817 NegateAfterAll = !Negate;
2818 } else {
2819 assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree")(static_cast <bool> (Opcode == ISD::AND && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("Opcode == ISD::AND && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2819, __extension__ __PRETTY_FUNCTION__))
;
2820 assert(!Negate && "Valid conjunction/disjunction tree")(static_cast <bool> (!Negate && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2820, __extension__ __PRETTY_FUNCTION__))
;
2821
2822 NegateL = false;
2823 NegateR = false;
2824 NegateAfterR = false;
2825 NegateAfterAll = false;
2826 }
2827
2828 // Emit sub-trees.
2829 AArch64CC::CondCode RHSCC;
2830 SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
2831 if (NegateAfterR)
2832 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
2833 SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
2834 if (NegateAfterAll)
2835 OutCC = AArch64CC::getInvertedCondCode(OutCC);
2836 return CmpL;
2837}
2838
2839/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
2840/// In some cases this is even possible with OR operations in the expression.
2841/// See \ref AArch64CCMP.
2842/// \see emitConjunctionRec().
2843static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
2844 AArch64CC::CondCode &OutCC) {
2845 bool DummyCanNegate;
2846 bool DummyMustBeFirst;
2847 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
2848 return SDValue();
2849
2850 return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
2851}
2852
2853/// @}
2854
2855/// Returns how profitable it is to fold a comparison's operand's shift and/or
2856/// extension operations.
2857static unsigned getCmpOperandFoldingProfit(SDValue Op) {
2858 auto isSupportedExtend = [&](SDValue V) {
2859 if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
2860 return true;
2861
2862 if (V.getOpcode() == ISD::AND)
2863 if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
2864 uint64_t Mask = MaskCst->getZExtValue();
2865 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
2866 }
2867
2868 return false;
2869 };
2870
2871 if (!Op.hasOneUse())
2872 return 0;
2873
2874 if (isSupportedExtend(Op))
2875 return 1;
2876
2877 unsigned Opc = Op.getOpcode();
2878 if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
2879 if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2880 uint64_t Shift = ShiftCst->getZExtValue();
2881 if (isSupportedExtend(Op.getOperand(0)))
2882 return (Shift <= 4) ? 2 : 1;
2883 EVT VT = Op.getValueType();
2884 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
2885 return 1;
2886 }
2887
2888 return 0;
2889}
2890
2891static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2892 SDValue &AArch64cc, SelectionDAG &DAG,
2893 const SDLoc &dl) {
2894 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2895 EVT VT = RHS.getValueType();
2896 uint64_t C = RHSC->getZExtValue();
2897 if (!isLegalArithImmed(C)) {
2898 // Constant does not fit, try adjusting it by one?
2899 switch (CC) {
2900 default:
2901 break;
2902 case ISD::SETLT:
2903 case ISD::SETGE:
2904 if ((VT == MVT::i32 && C != 0x80000000 &&
2905 isLegalArithImmed((uint32_t)(C - 1))) ||
2906 (VT == MVT::i64 && C != 0x80000000ULL &&
2907 isLegalArithImmed(C - 1ULL))) {
2908 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2909 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2910 RHS = DAG.getConstant(C, dl, VT);
2911 }
2912 break;
2913 case ISD::SETULT:
2914 case ISD::SETUGE:
2915 if ((VT == MVT::i32 && C != 0 &&
2916 isLegalArithImmed((uint32_t)(C - 1))) ||
2917 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
2918 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2919 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2920 RHS = DAG.getConstant(C, dl, VT);
2921 }
2922 break;
2923 case ISD::SETLE:
2924 case ISD::SETGT:
2925 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
2926 isLegalArithImmed((uint32_t)(C + 1))) ||
2927 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
2928 isLegalArithImmed(C + 1ULL))) {
2929 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2930 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2931 RHS = DAG.getConstant(C, dl, VT);
2932 }
2933 break;
2934 case ISD::SETULE:
2935 case ISD::SETUGT:
2936 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
2937 isLegalArithImmed((uint32_t)(C + 1))) ||
2938 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
2939 isLegalArithImmed(C + 1ULL))) {
2940 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2941 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2942 RHS = DAG.getConstant(C, dl, VT);
2943 }
2944 break;
2945 }
2946 }
2947 }
2948
2949 // Comparisons are canonicalized so that the RHS operand is simpler than the
2950 // LHS one, the extreme case being when RHS is an immediate. However, AArch64
2951 // can fold some shift+extend operations on the RHS operand, so swap the
2952 // operands if that can be done.
2953 //
2954 // For example:
2955 // lsl w13, w11, #1
2956 // cmp w13, w12
2957 // can be turned into:
2958 // cmp w12, w11, lsl #1
2959 if (!isa<ConstantSDNode>(RHS) ||
2960 !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
2961 SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
2962
2963 if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
2964 std::swap(LHS, RHS);
2965 CC = ISD::getSetCCSwappedOperands(CC);
2966 }
2967 }
2968
2969 SDValue Cmp;
2970 AArch64CC::CondCode AArch64CC;
2971 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
2972 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
2973
2974 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
2975 // For the i8 operand, the largest immediate is 255, so this can be easily
2976 // encoded in the compare instruction. For the i16 operand, however, the
2977 // largest immediate cannot be encoded in the compare.
2978 // Therefore, use a sign extending load and cmn to avoid materializing the
2979 // -1 constant. For example,
2980 // movz w1, #65535
2981 // ldrh w0, [x0, #0]
2982 // cmp w0, w1
2983 // >
2984 // ldrsh w0, [x0, #0]
2985 // cmn w0, #1
2986 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
2987 // if and only if (sext LHS) == (sext RHS). The checks are in place to
2988 // ensure both the LHS and RHS are truly zero extended and to make sure the
2989 // transformation is profitable.
2990 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
2991 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
2992 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
2993 LHS.getNode()->hasNUsesOfValue(1, 0)) {
2994 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
2995 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
2996 SDValue SExt =
2997 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
2998 DAG.getValueType(MVT::i16));
2999 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
3000 RHS.getValueType()),
3001 CC, dl, DAG);
3002 AArch64CC = changeIntCCToAArch64CC(CC);
3003 }
3004 }
3005
3006 if (!Cmp && (RHSC->isZero() || RHSC->isOne())) {
3007 if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
3008 if ((CC == ISD::SETNE) ^ RHSC->isZero())
3009 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
3010 }
3011 }
3012 }
3013
3014 if (!Cmp) {
3015 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
3016 AArch64CC = changeIntCCToAArch64CC(CC);
3017 }
3018 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
3019 return Cmp;
3020}
3021
3022static std::pair<SDValue, SDValue>
3023getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
3024 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3025, __extension__ __PRETTY_FUNCTION__))
3025 "Unsupported value type")(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3025, __extension__ __PRETTY_FUNCTION__))
;
3026 SDValue Value, Overflow;
3027 SDLoc DL(Op);
3028 SDValue LHS = Op.getOperand(0);
3029 SDValue RHS = Op.getOperand(1);
3030 unsigned Opc = 0;
3031 switch (Op.getOpcode()) {
3032 default:
3033 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3033)
;
3034 case ISD::SADDO:
3035 Opc = AArch64ISD::ADDS;
3036 CC = AArch64CC::VS;
3037 break;
3038 case ISD::UADDO:
3039 Opc = AArch64ISD::ADDS;
3040 CC = AArch64CC::HS;
3041 break;
3042 case ISD::SSUBO:
3043 Opc = AArch64ISD::SUBS;
3044 CC = AArch64CC::VS;
3045 break;
3046 case ISD::USUBO:
3047 Opc = AArch64ISD::SUBS;
3048 CC = AArch64CC::LO;
3049 break;
3050 // Multiply needs a little bit extra work.
3051 case ISD::SMULO:
3052 case ISD::UMULO: {
3053 CC = AArch64CC::NE;
3054 bool IsSigned = Op.getOpcode() == ISD::SMULO;
3055 if (Op.getValueType() == MVT::i32) {
3056 // Extend to 64-bits, then perform a 64-bit multiply.
3057 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3058 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
3059 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
3060 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
3061 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3062
3063 // Check that the result fits into a 32-bit integer.
3064 SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC);
3065 if (IsSigned) {
3066 // cmp xreg, wreg, sxtw
3067 SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value);
3068 Overflow =
3069 DAG.getNode(AArch64ISD::SUBS, DL, VTs, Mul, SExtMul).getValue(1);
3070 } else {
3071 // tst xreg, #0xffffffff00000000
3072 SDValue UpperBits = DAG.getConstant(0xFFFFFFFF00000000, DL, MVT::i64);
3073 Overflow =
3074 DAG.getNode(AArch64ISD::ANDS, DL, VTs, Mul, UpperBits).getValue(1);
3075 }
3076 break;
3077 }
3078 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"Expected an i64 value type") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3078, __extension__ __PRETTY_FUNCTION__))
;
3079 // For the 64 bit multiply
3080 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
3081 if (IsSigned) {
3082 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
3083 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
3084 DAG.getConstant(63, DL, MVT::i64));
3085 // It is important that LowerBits is last, otherwise the arithmetic
3086 // shift will not be folded into the compare (SUBS).
3087 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3088 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
3089 .getValue(1);
3090 } else {
3091 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
3092 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3093 Overflow =
3094 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
3095 DAG.getConstant(0, DL, MVT::i64),
3096 UpperBits).getValue(1);
3097 }
3098 break;
3099 }
3100 } // switch (...)
3101
3102 if (Opc) {
3103 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
3104
3105 // Emit the AArch64 operation with overflow check.
3106 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
3107 Overflow = Value.getValue(1);
3108 }
3109 return std::make_pair(Value, Overflow);
3110}
3111
3112SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
3113 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
3114 return LowerToScalableOp(Op, DAG);
3115
3116 SDValue Sel = Op.getOperand(0);
3117 SDValue Other = Op.getOperand(1);
3118 SDLoc dl(Sel);
3119
3120 // If the operand is an overflow checking operation, invert the condition
3121 // code and kill the Not operation. I.e., transform:
3122 // (xor (overflow_op_bool, 1))
3123 // -->
3124 // (csel 1, 0, invert(cc), overflow_op_bool)
3125 // ... which later gets transformed to just a cset instruction with an
3126 // inverted condition code, rather than a cset + eor sequence.
3127 if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
3128 // Only lower legal XALUO ops.
3129 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
3130 return SDValue();
3131
3132 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3133 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3134 AArch64CC::CondCode CC;
3135 SDValue Value, Overflow;
3136 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
3137 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3138 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
3139 CCVal, Overflow);
3140 }
3141 // If neither operand is a SELECT_CC, give up.
3142 if (Sel.getOpcode() != ISD::SELECT_CC)
3143 std::swap(Sel, Other);
3144 if (Sel.getOpcode() != ISD::SELECT_CC)
3145 return Op;
3146
3147 // The folding we want to perform is:
3148 // (xor x, (select_cc a, b, cc, 0, -1) )
3149 // -->
3150 // (csel x, (xor x, -1), cc ...)
3151 //
3152 // The latter will get matched to a CSINV instruction.
3153
3154 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
3155 SDValue LHS = Sel.getOperand(0);
3156 SDValue RHS = Sel.getOperand(1);
3157 SDValue TVal = Sel.getOperand(2);
3158 SDValue FVal = Sel.getOperand(3);
3159
3160 // FIXME: This could be generalized to non-integer comparisons.
3161 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
3162 return Op;
3163
3164 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
3165 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
3166
3167 // The values aren't constants, this isn't the pattern we're looking for.
3168 if (!CFVal || !CTVal)
3169 return Op;
3170
3171 // We can commute the SELECT_CC by inverting the condition. This
3172 // might be needed to make this fit into a CSINV pattern.
3173 if (CTVal->isAllOnes() && CFVal->isZero()) {
3174 std::swap(TVal, FVal);
3175 std::swap(CTVal, CFVal);
3176 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
3177 }
3178
3179 // If the constants line up, perform the transform!
3180 if (CTVal->isZero() && CFVal->isAllOnes()) {
3181 SDValue CCVal;
3182 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
3183
3184 FVal = Other;
3185 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
3186 DAG.getConstant(-1ULL, dl, Other.getValueType()));
3187
3188 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
3189 CCVal, Cmp);
3190 }
3191
3192 return Op;
3193}
3194
3195static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
3196 EVT VT = Op.getValueType();
3197
3198 // Let legalize expand this if it isn't a legal type yet.
3199 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3200 return SDValue();
3201
3202 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3203
3204 unsigned Opc;
3205 bool ExtraOp = false;
3206 switch (Op.getOpcode()) {
3207 default:
3208 llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3208)
;
3209 case ISD::ADDC:
3210 Opc = AArch64ISD::ADDS;
3211 break;
3212 case ISD::SUBC:
3213 Opc = AArch64ISD::SUBS;
3214 break;
3215 case ISD::ADDE:
3216 Opc = AArch64ISD::ADCS;
3217 ExtraOp = true;
3218 break;
3219 case ISD::SUBE:
3220 Opc = AArch64ISD::SBCS;
3221 ExtraOp = true;
3222 break;
3223 }
3224
3225 if (!ExtraOp)
3226 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
3227 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
3228 Op.getOperand(2));
3229}
3230
3231static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
3232 // Let legalize expand this if it isn't a legal type yet.
3233 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3234 return SDValue();
3235
3236 SDLoc dl(Op);
3237 AArch64CC::CondCode CC;
3238 // The actual operation that sets the overflow or carry flag.
3239 SDValue Value, Overflow;
3240 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
3241
3242 // We use 0 and 1 as false and true values.
3243 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3244 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3245
3246 // We use an inverted condition, because the conditional select is inverted
3247 // too. This will allow it to be selected to a single instruction:
3248 // CSINC Wd, WZR, WZR, invert(cond).
3249 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3250 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
3251 CCVal, Overflow);
3252
3253 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3254 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3255}
3256
3257// Prefetch operands are:
3258// 1: Address to prefetch
3259// 2: bool isWrite
3260// 3: int locality (0 = no locality ... 3 = extreme locality)
3261// 4: bool isDataCache
3262static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
3263 SDLoc DL(Op);
3264 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3265 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
3266 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3267
3268 bool IsStream = !Locality;
3269 // When the locality number is set
3270 if (Locality) {
3271 // The front-end should have filtered out the out-of-range values
3272 assert(Locality <= 3 && "Prefetch locality out-of-range")(static_cast <bool> (Locality <= 3 && "Prefetch locality out-of-range"
) ? void (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3272, __extension__ __PRETTY_FUNCTION__))
;
3273 // The locality degree is the opposite of the cache speed.
3274 // Put the number the other way around.
3275 // The encoding starts at 0 for level 1
3276 Locality = 3 - Locality;
3277 }
3278
3279 // built the mask value encoding the expected behavior.
3280 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
3281 (!IsData << 3) | // IsDataCache bit
3282 (Locality << 1) | // Cache level bits
3283 (unsigned)IsStream; // Stream bit
3284 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
3285 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
3286}
3287
3288SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
3289 SelectionDAG &DAG) const {
3290 EVT VT = Op.getValueType();
3291 if (VT.isScalableVector())
3292 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
3293
3294 if (useSVEForFixedLengthVectorVT(VT))
3295 return LowerFixedLengthFPExtendToSVE(Op, DAG);
3296
3297 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")(static_cast <bool> (Op.getValueType() == MVT::f128 &&
"Unexpected lowering") ? void (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3297, __extension__ __PRETTY_FUNCTION__))
;
3298 return SDValue();
3299}
3300
3301SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
3302 SelectionDAG &DAG) const {
3303 if (Op.getValueType().isScalableVector())
3304 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
3305
3306 bool IsStrict = Op->isStrictFPOpcode();
3307 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3308 EVT SrcVT = SrcVal.getValueType();
3309
3310 if (useSVEForFixedLengthVectorVT(SrcVT))
3311 return LowerFixedLengthFPRoundToSVE(Op, DAG);
3312
3313 if (SrcVT != MVT::f128) {
3314 // Expand cases where the input is a vector bigger than NEON.
3315 if (useSVEForFixedLengthVectorVT(SrcVT))
3316 return SDValue();
3317
3318 // It's legal except when f128 is involved
3319 return Op;
3320 }
3321
3322 return SDValue();
3323}
3324
3325SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
3326 SelectionDAG &DAG) const {
3327 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3328 // Any additional optimization in this function should be recorded
3329 // in the cost tables.
3330 EVT InVT = Op.getOperand(0).getValueType();
3331 EVT VT = Op.getValueType();
3332
3333 if (VT.isScalableVector()) {
3334 unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
3335 ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
3336 : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
3337 return LowerToPredicatedOp(Op, DAG, Opcode);
3338 }
3339
3340 if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
3341 return LowerFixedLengthFPToIntToSVE(Op, DAG);
3342
3343 unsigned NumElts = InVT.getVectorNumElements();
3344
3345 // f16 conversions are promoted to f32 when full fp16 is not supported.
3346 if (InVT.getVectorElementType() == MVT::f16 &&
3347 !Subtarget->hasFullFP16()) {
3348 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
3349 SDLoc dl(Op);
3350 return DAG.getNode(
3351 Op.getOpcode(), dl, Op.getValueType(),
3352 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
3353 }
3354
3355 uint64_t VTSize = VT.getFixedSizeInBits();
3356 uint64_t InVTSize = InVT.getFixedSizeInBits();
3357 if (VTSize < InVTSize) {
3358 SDLoc dl(Op);
3359 SDValue Cv =
3360 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
3361 Op.getOperand(0));
3362 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
3363 }
3364
3365 if (VTSize > InVTSize) {
3366 SDLoc dl(Op);
3367 MVT ExtVT =
3368 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
3369 VT.getVectorNumElements());
3370 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
3371 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
3372 }
3373
3374 // Type changing conversions are illegal.
3375 return Op;
3376}
3377
3378SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
3379 SelectionDAG &DAG) const {
3380 bool IsStrict = Op->isStrictFPOpcode();
3381 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3382
3383 if (SrcVal.getValueType().isVector())
3384 return LowerVectorFP_TO_INT(Op, DAG);
3385
3386 // f16 conversions are promoted to f32 when full fp16 is not supported.
3387 if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
3388 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")(static_cast <bool> (!IsStrict && "Lowering of strict fp16 not yet implemented"
) ? void (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3388, __extension__ __PRETTY_FUNCTION__))
;
3389 SDLoc dl(Op);
3390 return DAG.getNode(
3391 Op.getOpcode(), dl, Op.getValueType(),
3392 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
3393 }
3394
3395 if (SrcVal.getValueType() != MVT::f128) {
3396 // It's legal except when f128 is involved
3397 return Op;
3398 }
3399
3400 return SDValue();
3401}
3402
3403SDValue
3404AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
3405 SelectionDAG &DAG) const {
3406 // AArch64 FP-to-int conversions saturate to the destination element size, so
3407 // we can lower common saturating conversions to simple instructions.
3408 SDValue SrcVal = Op.getOperand(0);
3409 EVT SrcVT = SrcVal.getValueType();
3410 EVT DstVT = Op.getValueType();
3411 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3412
3413 uint64_t SrcElementWidth = SrcVT.getScalarSizeInBits();
3414 uint64_t DstElementWidth = DstVT.getScalarSizeInBits();
3415 uint64_t SatWidth = SatVT.getScalarSizeInBits();
3416 assert(SatWidth <= DstElementWidth &&(static_cast <bool> (SatWidth <= DstElementWidth &&
"Saturation width cannot exceed result width") ? void (0) : __assert_fail
("SatWidth <= DstElementWidth && \"Saturation width cannot exceed result width\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3417, __extension__ __PRETTY_FUNCTION__))
3417 "Saturation width cannot exceed result width")(static_cast <bool> (SatWidth <= DstElementWidth &&
"Saturation width cannot exceed result width") ? void (0) : __assert_fail
("SatWidth <= DstElementWidth && \"Saturation width cannot exceed result width\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3417, __extension__ __PRETTY_FUNCTION__))
;
3418
3419 // TODO: Consider lowering to SVE operations, as in LowerVectorFP_TO_INT.
3420 // Currently, the `llvm.fpto[su]i.sat.*` instrinsics don't accept scalable
3421 // types, so this is hard to reach.
3422 if (DstVT.isScalableVector())
3423 return SDValue();
3424
3425 EVT SrcElementVT = SrcVT.getVectorElementType();
3426
3427 // In the absence of FP16 support, promote f16 to f32 and saturate the result.
3428 if (SrcElementVT == MVT::f16 &&
3429 (!Subtarget->hasFullFP16() || DstElementWidth > 16)) {
3430 MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements());
3431 SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal);
3432 SrcVT = F32VT;
3433 SrcElementVT = MVT::f32;
3434 SrcElementWidth = 32;
3435 } else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
3436 SrcElementVT != MVT::f16)
3437 return SDValue();
3438
3439 SDLoc DL(Op);
3440 // Cases that we can emit directly.
3441 if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
3442 return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
3443 DAG.getValueType(DstVT.getScalarType()));
3444
3445 // Otherwise we emit a cvt that saturates to a higher BW, and saturate the
3446 // result. This is only valid if the legal cvt is larger than the saturate
3447 // width. For double, as we don't have MIN/MAX, it can be simpler to scalarize
3448 // (at least until sqxtn is selected).
3449 if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
3450 return SDValue();
3451
3452 EVT IntVT = SrcVT.changeVectorElementTypeToInteger();
3453 SDValue NativeCvt = DAG.getNode(Op.getOpcode(), DL, IntVT, SrcVal,
3454 DAG.getValueType(IntVT.getScalarType()));
3455 SDValue Sat;
3456 if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
3457 SDValue MinC = DAG.getConstant(
3458 APInt::getSignedMaxValue(SatWidth).sextOrSelf(SrcElementWidth), DL,
3459 IntVT);
3460 SDValue Min = DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt, MinC);
3461 SDValue MaxC = DAG.getConstant(
3462 APInt::getSignedMinValue(SatWidth).sextOrSelf(SrcElementWidth), DL,
3463 IntVT);
3464 Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC);
3465 } else {
3466 SDValue MinC = DAG.getConstant(
3467 APInt::getAllOnesValue(SatWidth).zextOrSelf(SrcElementWidth), DL,
3468 IntVT);
3469 Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC);
3470 }
3471
3472 return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
3473}
3474
3475SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
3476 SelectionDAG &DAG) const {
3477 // AArch64 FP-to-int conversions saturate to the destination register size, so
3478 // we can lower common saturating conversions to simple instructions.
3479 SDValue SrcVal = Op.getOperand(0);
3480 EVT SrcVT = SrcVal.getValueType();
3481
3482 if (SrcVT.isVector())
3483 return LowerVectorFP_TO_INT_SAT(Op, DAG);
3484
3485 EVT DstVT = Op.getValueType();
3486 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3487 uint64_t SatWidth = SatVT.getScalarSizeInBits();
3488 uint64_t DstWidth = DstVT.getScalarSizeInBits();
3489 assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width")(static_cast <bool> (SatWidth <= DstWidth &&
"Saturation width cannot exceed result width") ? void (0) : __assert_fail
("SatWidth <= DstWidth && \"Saturation width cannot exceed result width\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3489, __extension__ __PRETTY_FUNCTION__))
;
3490
3491 // In the absence of FP16 support, promote f16 to f32 and saturate the result.
3492 if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) {
3493 SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal);
3494 SrcVT = MVT::f32;
3495 } else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16)
3496 return SDValue();
3497
3498 SDLoc DL(Op);
3499 // Cases that we can emit directly.
3500 if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
3501 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
3502 DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32))
3503 return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
3504 DAG.getValueType(DstVT));
3505
3506 // Otherwise we emit a cvt that saturates to a higher BW, and saturate the
3507 // result. This is only valid if the legal cvt is larger than the saturate
3508 // width.
3509 if (DstWidth < SatWidth)
3510 return SDValue();
3511
3512 SDValue NativeCvt =
3513 DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal, DAG.getValueType(DstVT));
3514 SDValue Sat;
3515 if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
3516 SDValue MinC = DAG.getConstant(
3517 APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth), DL, DstVT);
3518 SDValue Min = DAG.getNode(ISD::SMIN, DL, DstVT, NativeCvt, MinC);
3519 SDValue MaxC = DAG.getConstant(
3520 APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth), DL, DstVT);
3521 Sat = DAG.getNode(ISD::SMAX, DL, DstVT, Min, MaxC);
3522 } else {
3523 SDValue MinC = DAG.getConstant(
3524 APInt::getAllOnesValue(SatWidth).zextOrSelf(DstWidth), DL, DstVT);
3525 Sat = DAG.getNode(ISD::UMIN, DL, DstVT, NativeCvt, MinC);
3526 }
3527
3528 return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
3529}
3530
3531SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
3532 SelectionDAG &DAG) const {
3533 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3534 // Any additional optimization in this function should be recorded
3535 // in the cost tables.
3536 EVT VT = Op.getValueType();
3537 SDLoc dl(Op);
3538 SDValue In = Op.getOperand(0);
3539 EVT InVT = In.getValueType();
3540 unsigned Opc = Op.getOpcode();
3541 bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
3542
3543 if (VT.isScalableVector()) {
3544 if (InVT.getVectorElementType() == MVT::i1) {
3545 // We can't directly extend an SVE predicate; extend it first.
3546 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3547 EVT CastVT = getPromotedVTForPredicate(InVT);
3548 In = DAG.getNode(CastOpc, dl, CastVT, In);
3549 return DAG.getNode(Opc, dl, VT, In);
3550 }
3551
3552 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
3553 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
3554 return LowerToPredicatedOp(Op, DAG, Opcode);
3555 }
3556
3557 if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
3558 return LowerFixedLengthIntToFPToSVE(Op, DAG);
3559
3560 uint64_t VTSize = VT.getFixedSizeInBits();
3561 uint64_t InVTSize = InVT.getFixedSizeInBits();
3562 if (VTSize < InVTSize) {
3563 MVT CastVT =
3564 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
3565 InVT.getVectorNumElements());
3566 In = DAG.getNode(Opc, dl, CastVT, In);
3567 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
3568 }
3569
3570 if (VTSize > InVTSize) {
3571 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3572 EVT CastVT = VT.changeVectorElementTypeToInteger();
3573 In = DAG.getNode(CastOpc, dl, CastVT, In);
3574 return DAG.getNode(Opc, dl, VT, In);
3575 }
3576
3577 return Op;
3578}
3579
3580SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
3581 SelectionDAG &DAG) const {
3582 if (Op.getValueType().isVector())
3583 return LowerVectorINT_TO_FP(Op, DAG);
3584
3585 bool IsStrict = Op->isStrictFPOpcode();
3586 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3587
3588 // f16 conversions are promoted to f32 when full fp16 is not supported.
3589 if (Op.getValueType() == MVT::f16 &&
3590 !Subtarget->hasFullFP16()) {
3591 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")(static_cast <bool> (!IsStrict && "Lowering of strict fp16 not yet implemented"
) ? void (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3591, __extension__ __PRETTY_FUNCTION__))
;
3592 SDLoc dl(Op);
3593 return DAG.getNode(
3594 ISD::FP_ROUND, dl, MVT::f16,
3595 DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
3596 DAG.getIntPtrConstant(0, dl));
3597 }
3598
3599 // i128 conversions are libcalls.
3600 if (SrcVal.getValueType() == MVT::i128)
3601 return SDValue();
3602
3603 // Other conversions are legal, unless it's to the completely software-based
3604 // fp128.
3605 if (Op.getValueType() != MVT::f128)
3606 return Op;
3607 return SDValue();
3608}
3609
3610SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
3611 SelectionDAG &DAG) const {
3612 // For iOS, we want to call an alternative entry point: __sincos_stret,
3613 // which returns the values in two S / D registers.
3614 SDLoc dl(Op);
3615 SDValue Arg = Op.getOperand(0);
3616 EVT ArgVT = Arg.getValueType();
3617 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
3618
3619 ArgListTy Args;
3620 ArgListEntry Entry;
3621
3622 Entry.Node = Arg;
3623 Entry.Ty = ArgTy;
3624 Entry.IsSExt = false;
3625 Entry.IsZExt = false;
3626 Args.push_back(Entry);
3627
3628 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
3629 : RTLIB::SINCOS_STRET_F32;
3630 const char *LibcallName = getLibcallName(LC);
3631 SDValue Callee =
3632 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
3633
3634 StructType *RetTy = StructType::get(ArgTy, ArgTy);
3635 TargetLowering::CallLoweringInfo CLI(DAG);
3636 CLI.setDebugLoc(dl)
3637 .setChain(DAG.getEntryNode())
3638 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
3639
3640 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3641 return CallResult.first;
3642}
3643
3644static MVT getSVEContainerType(EVT ContentTy);
3645
3646SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
3647 SelectionDAG &DAG) const {
3648 EVT OpVT = Op.getValueType();
3649 EVT ArgVT = Op.getOperand(0).getValueType();
3650
3651 if (useSVEForFixedLengthVectorVT(OpVT))
3652 return LowerFixedLengthBitcastToSVE(Op, DAG);
3653
3654 if (OpVT.isScalableVector()) {
3655 if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) {
3656 assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() &&(static_cast <bool> (OpVT.isFloatingPoint() && !
ArgVT.isFloatingPoint() && "Expected int->fp bitcast!"
) ? void (0) : __assert_fail ("OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() && \"Expected int->fp bitcast!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3657, __extension__ __PRETTY_FUNCTION__))
3657 "Expected int->fp bitcast!")(static_cast <bool> (OpVT.isFloatingPoint() && !
ArgVT.isFloatingPoint() && "Expected int->fp bitcast!"
) ? void (0) : __assert_fail ("OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() && \"Expected int->fp bitcast!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3657, __extension__ __PRETTY_FUNCTION__))
;
3658 SDValue ExtResult =
3659 DAG.getNode(ISD::ANY_EXTEND, SDLoc(Op), getSVEContainerType(ArgVT),
3660 Op.getOperand(0));
3661 return getSVESafeBitCast(OpVT, ExtResult, DAG);
3662 }
3663 return getSVESafeBitCast(OpVT, Op.getOperand(0), DAG);
3664 }
3665
3666 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
3667 return SDValue();
3668
3669 assert(ArgVT == MVT::i16)(static_cast <bool> (ArgVT == MVT::i16) ? void (0) : __assert_fail
("ArgVT == MVT::i16", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3669, __extension__ __PRETTY_FUNCTION__))
;
3670 SDLoc DL(Op);
3671
3672 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
3673 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
3674 return SDValue(
3675 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
3676 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
3677 0);
3678}
3679
3680static EVT getExtensionTo64Bits(const EVT &OrigVT) {
3681 if (OrigVT.getSizeInBits() >= 64)
3682 return OrigVT;
3683
3684 assert(OrigVT.isSimple() && "Expecting a simple value type")(static_cast <bool> (OrigVT.isSimple() && "Expecting a simple value type"
) ? void (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3684, __extension__ __PRETTY_FUNCTION__))
;
3685
3686 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
3687 switch (OrigSimpleTy) {
3688 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3688)
;
3689 case MVT::v2i8:
3690 case MVT::v2i16:
3691 return MVT::v2i32;
3692 case MVT::v4i8:
3693 return MVT::v4i16;
3694 }
3695}
3696
3697static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
3698 const EVT &OrigTy,
3699 const EVT &ExtTy,
3700 unsigned ExtOpcode) {
3701 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
3702 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
3703 // 64-bits we need to insert a new extension so that it will be 64-bits.
3704 assert(ExtTy.is128BitVector() && "Unexpected extension size")(static_cast <bool> (ExtTy.is128BitVector() && "Unexpected extension size"
) ? void (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3704, __extension__ __PRETTY_FUNCTION__))
;
3705 if (OrigTy.getSizeInBits() >= 64)
3706 return N;
3707
3708 // Must extend size to at least 64 bits to be used as an operand for VMULL.
3709 EVT NewVT = getExtensionTo64Bits(OrigTy);
3710
3711 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
3712}
3713
3714static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
3715 bool isSigned) {
3716 EVT VT = N->getValueType(0);
3717
3718 if (N->getOpcode() != ISD::BUILD_VECTOR)
3719 return false;
3720
3721 for (const SDValue &Elt : N->op_values()) {
3722 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
3723 unsigned EltSize = VT.getScalarSizeInBits();
3724 unsigned HalfSize = EltSize / 2;
3725 if (isSigned) {
3726 if (!isIntN(HalfSize, C->getSExtValue()))
3727 return false;
3728 } else {
3729 if (!isUIntN(HalfSize, C->getZExtValue()))
3730 return false;
3731 }
3732 continue;
3733 }
3734 return false;
3735 }
3736
3737 return true;
3738}
3739
3740static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
3741 if (N->getOpcode() == ISD::SIGN_EXTEND ||
3742 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
3743 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
3744 N->getOperand(0)->getValueType(0),
3745 N->getValueType(0),
3746 N->getOpcode());
3747
3748 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "expected BUILD_VECTOR") ? void (0) : __assert_fail
("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3748, __extension__ __PRETTY_FUNCTION__))
;
3749 EVT VT = N->getValueType(0);
3750 SDLoc dl(N);
3751 unsigned EltSize = VT.getScalarSizeInBits() / 2;
3752 unsigned NumElts = VT.getVectorNumElements();
3753 MVT TruncVT = MVT::getIntegerVT(EltSize);
3754 SmallVector<SDValue, 8> Ops;
3755 for (unsigned i = 0; i != NumElts; ++i) {
3756 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
3757 const APInt &CInt = C->getAPIntValue();
3758 // Element types smaller than 32 bits are not legal, so use i32 elements.
3759 // The values are implicitly truncated so sext vs. zext doesn't matter.
3760 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
3761 }
3762 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
3763}
3764
3765static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
3766 return N->getOpcode() == ISD::SIGN_EXTEND ||
3767 N->getOpcode() == ISD::ANY_EXTEND ||
3768 isExtendedBUILD_VECTOR(N, DAG, true);
3769}
3770
3771static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
3772 return N->getOpcode() == ISD::ZERO_EXTEND ||
3773 N->getOpcode() == ISD::ANY_EXTEND ||
3774 isExtendedBUILD_VECTOR(N, DAG, false);
3775}
3776
3777static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
3778 unsigned Opcode = N->getOpcode();
3779 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3780 SDNode *N0 = N->getOperand(0).getNode();
3781 SDNode *N1 = N->getOperand(1).getNode();
3782 return N0->hasOneUse() && N1->hasOneUse() &&
3783 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
3784 }
3785 return false;
3786}
3787
3788static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
3789 unsigned Opcode = N->getOpcode();
3790 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3791 SDNode *N0 = N->getOperand(0).getNode();
3792 SDNode *N1 = N->getOperand(1).getNode();
3793 return N0->hasOneUse() && N1->hasOneUse() &&
3794 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
3795 }
3796 return false;
3797}
3798
3799SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
3800 SelectionDAG &DAG) const {
3801 // The rounding mode is in bits 23:22 of the FPSCR.
3802 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
3803 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
3804 // so that the shift + and get folded into a bitfield extract.
3805 SDLoc dl(Op);
3806
3807 SDValue Chain = Op.getOperand(0);
3808 SDValue FPCR_64 = DAG.getNode(
3809 ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
3810 {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
3811 Chain = FPCR_64.getValue(1);
3812 SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
3813 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
3814 DAG.getConstant(1U << 22, dl, MVT::i32));
3815 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
3816 DAG.getConstant(22, dl, MVT::i32));
3817 SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
3818 DAG.getConstant(3, dl, MVT::i32));
3819 return DAG.getMergeValues({AND, Chain}, dl);
3820}
3821
3822SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
3823 SelectionDAG &DAG) const {
3824 SDLoc DL(Op);
3825 SDValue Chain = Op->getOperand(0);
3826 SDValue RMValue = Op->getOperand(1);
3827
3828 // The rounding mode is in bits 23:22 of the FPCR.
3829 // The llvm.set.rounding argument value to the rounding mode in FPCR mapping
3830 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
3831 // ((arg - 1) & 3) << 22).
3832 //
3833 // The argument of llvm.set.rounding must be within the segment [0, 3], so
3834 // NearestTiesToAway (4) is not handled here. It is responsibility of the code
3835 // generated llvm.set.rounding to ensure this condition.
3836
3837 // Calculate new value of FPCR[23:22].
3838 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
3839 DAG.getConstant(1, DL, MVT::i32));
3840 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
3841 DAG.getConstant(0x3, DL, MVT::i32));
3842 RMValue =
3843 DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
3844 DAG.getConstant(AArch64::RoundingBitsPos, DL, MVT::i32));
3845 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue);
3846
3847 // Get current value of FPCR.
3848 SDValue Ops[] = {
3849 Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
3850 SDValue FPCR =
3851 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
3852 Chain = FPCR.getValue(1);
3853 FPCR = FPCR.getValue(0);
3854
3855 // Put new rounding mode into FPSCR[23:22].
3856 const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos);
3857 FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR,
3858 DAG.getConstant(RMMask, DL, MVT::i64));
3859 FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue);
3860 SDValue Ops2[] = {
3861 Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
3862 FPCR};
3863 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
3864}
3865
3866SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
3867 EVT VT = Op.getValueType();
3868
3869 // If SVE is available then i64 vector multiplications can also be made legal.
3870 bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
3871
3872 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
3873 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
3874
3875 // Multiplications are only custom-lowered for 128-bit vectors so that
3876 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
3877 assert(VT.is128BitVector() && VT.isInteger() &&(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3878, __extension__ __PRETTY_FUNCTION__))
3878 "unexpected type for custom-lowering ISD::MUL")(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3878, __extension__ __PRETTY_FUNCTION__))
;
3879 SDNode *N0 = Op.getOperand(0).getNode();
3880 SDNode *N1 = Op.getOperand(1).getNode();
3881 unsigned NewOpc = 0;
3882 bool isMLA = false;
3883 bool isN0SExt = isSignExtended(N0, DAG);
3884 bool isN1SExt = isSignExtended(N1, DAG);
3885 if (isN0SExt && isN1SExt)
3886 NewOpc = AArch64ISD::SMULL;
3887 else {
3888 bool isN0ZExt = isZeroExtended(N0, DAG);
3889 bool isN1ZExt = isZeroExtended(N1, DAG);
3890 if (isN0ZExt && isN1ZExt)
3891 NewOpc = AArch64ISD::UMULL;
3892 else if (isN1SExt || isN1ZExt) {
3893 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
3894 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
3895 if (isN1SExt && isAddSubSExt(N0, DAG)) {
3896 NewOpc = AArch64ISD::SMULL;
3897 isMLA = true;
3898 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
3899 NewOpc = AArch64ISD::UMULL;
3900 isMLA = true;
3901 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
3902 std::swap(N0, N1);
3903 NewOpc = AArch64ISD::UMULL;
3904 isMLA = true;
3905 }
3906 }
3907
3908 if (!NewOpc) {
3909 if (VT == MVT::v2i64)
3910 // Fall through to expand this. It is not legal.
3911 return SDValue();
3912 else
3913 // Other vector multiplications are legal.
3914 return Op;
3915 }
3916 }
3917
3918 // Legalize to a S/UMULL instruction
3919 SDLoc DL(Op);
3920 SDValue Op0;
3921 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
3922 if (!isMLA) {
3923 Op0 = skipExtensionForVectorMULL(N0, DAG);
3924 assert(Op0.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3926, __extension__ __PRETTY_FUNCTION__))
3925 Op1.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3926, __extension__ __PRETTY_FUNCTION__))
3926 "unexpected types for extended operands to VMULL")(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3926, __extension__ __PRETTY_FUNCTION__))
;
3927 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
3928 }
3929 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
3930 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
3931 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
3932 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
3933 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
3934 EVT Op1VT = Op1.getValueType();
3935 return DAG.getNode(N0->getOpcode(), DL, VT,
3936 DAG.getNode(NewOpc, DL, VT,
3937 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
3938 DAG.getNode(NewOpc, DL, VT,
3939 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
3940}
3941
3942static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
3943 int Pattern) {
3944 return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
3945 DAG.getTargetConstant(Pattern, DL, MVT::i32));
3946}
3947
3948static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
3949 SDLoc DL(Op);
3950 EVT OutVT = Op.getValueType();
3951 SDValue InOp = Op.getOperand(1);
3952 EVT InVT = InOp.getValueType();
3953
3954 // Return the operand if the cast isn't changing type,
3955 // i.e. <n x 16 x i1> -> <n x 16 x i1>
3956 if (InVT == OutVT)
3957 return InOp;
3958
3959 SDValue Reinterpret =
3960 DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, InOp);
3961
3962 // If the argument converted to an svbool is a ptrue or a comparison, the
3963 // lanes introduced by the widening are zero by construction.
3964 switch (InOp.getOpcode()) {
3965 case AArch64ISD::SETCC_MERGE_ZERO:
3966 return Reinterpret;
3967 case ISD::INTRINSIC_WO_CHAIN:
3968 if (InOp.getConstantOperandVal(0) == Intrinsic::aarch64_sve_ptrue)
3969 return Reinterpret;
3970 }
3971
3972 // Otherwise, zero the newly introduced lanes.
3973 SDValue Mask = getPTrue(DAG, DL, InVT, AArch64SVEPredPattern::all);
3974 SDValue MaskReinterpret =
3975 DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, Mask);
3976 return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret);
3977}
3978
3979SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
3980 SelectionDAG &DAG) const {
3981 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3982 SDLoc dl(Op);
3983 switch (IntNo) {
3984 default: return SDValue(); // Don't custom lower most intrinsics.
3985 case Intrinsic::thread_pointer: {
3986 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3987 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
3988 }
3989 case Intrinsic::aarch64_neon_abs: {
3990 EVT Ty = Op.getValueType();
3991 if (Ty == MVT::i64) {
3992 SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
3993 Op.getOperand(1));
3994 Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
3995 return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
3996 } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
3997 return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
3998 } else {
3999 report_fatal_error("Unexpected type for AArch64 NEON intrinic");
4000 }
4001 }
4002 case Intrinsic::aarch64_neon_smax:
4003 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
4004 Op.getOperand(1), Op.getOperand(2));
4005 case Intrinsic::aarch64_neon_umax:
4006 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
4007 Op.getOperand(1), Op.getOperand(2));
4008 case Intrinsic::aarch64_neon_smin:
4009 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
4010 Op.getOperand(1), Op.getOperand(2));
4011 case Intrinsic::aarch64_neon_umin:
4012 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
4013 Op.getOperand(1), Op.getOperand(2));
4014
4015 case Intrinsic::aarch64_sve_sunpkhi:
4016 return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
4017 Op.getOperand(1));
4018 case Intrinsic::aarch64_sve_sunpklo:
4019 return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
4020 Op.getOperand(1));
4021 case Intrinsic::aarch64_sve_uunpkhi:
4022 return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
4023 Op.getOperand(1));
4024 case Intrinsic::aarch64_sve_uunpklo:
4025 return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
4026 Op.getOperand(1));
4027 case Intrinsic::aarch64_sve_clasta_n:
4028 return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
4029 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4030 case Intrinsic::aarch64_sve_clastb_n:
4031 return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
4032 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4033 case Intrinsic::aarch64_sve_lasta:
4034 return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
4035 Op.getOperand(1), Op.getOperand(2));
4036 case Intrinsic::aarch64_sve_lastb:
4037 return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
4038 Op.getOperand(1), Op.getOperand(2));
4039 case Intrinsic::aarch64_sve_rev:
4040 return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
4041 Op.getOperand(1));
4042 case Intrinsic::aarch64_sve_tbl:
4043 return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
4044 Op.getOperand(1), Op.getOperand(2));
4045 case Intrinsic::aarch64_sve_trn1:
4046 return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
4047 Op.getOperand(1), Op.getOperand(2));
4048 case Intrinsic::aarch64_sve_trn2:
4049 return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
4050 Op.getOperand(1), Op.getOperand(2));
4051 case Intrinsic::aarch64_sve_uzp1:
4052 return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
4053 Op.getOperand(1), Op.getOperand(2));
4054 case Intrinsic::aarch64_sve_uzp2:
4055 return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
4056 Op.getOperand(1), Op.getOperand(2));
4057 case Intrinsic::aarch64_sve_zip1:
4058 return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
4059 Op.getOperand(1), Op.getOperand(2));
4060 case Intrinsic::aarch64_sve_zip2:
4061 return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
4062 Op.getOperand(1), Op.getOperand(2));
4063 case Intrinsic::aarch64_sve_splice:
4064 return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
4065 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4066 case Intrinsic::aarch64_sve_ptrue:
4067 return getPTrue(DAG, dl, Op.getValueType(),
4068 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
4069 case Intrinsic::aarch64_sve_clz:
4070 return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
4071 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4072 case Intrinsic::aarch64_sve_cnt: {
4073 SDValue Data = Op.getOperand(3);
4074 // CTPOP only supports integer operands.
4075 if (Data.getValueType().isFloatingPoint())
4076 Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
4077 return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
4078 Op.getOperand(2), Data, Op.getOperand(1));
4079 }
4080 case Intrinsic::aarch64_sve_dupq_lane:
4081 return LowerDUPQLane(Op, DAG);
4082 case Intrinsic::aarch64_sve_convert_from_svbool:
4083 return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
4084 Op.getOperand(1));
4085 case Intrinsic::aarch64_sve_convert_to_svbool:
4086 return lowerConvertToSVBool(Op, DAG);
4087 case Intrinsic::aarch64_sve_fneg:
4088 return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
4089 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4090 case Intrinsic::aarch64_sve_frintp:
4091 return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
4092 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4093 case Intrinsic::aarch64_sve_frintm:
4094 return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
4095 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4096 case Intrinsic::aarch64_sve_frinti:
4097 return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
4098 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4099 case Intrinsic::aarch64_sve_frintx:
4100 return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
4101 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4102 case Intrinsic::aarch64_sve_frinta:
4103 return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
4104 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4105 case Intrinsic::aarch64_sve_frintn:
4106 return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
4107 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4108 case Intrinsic::aarch64_sve_frintz:
4109 return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
4110 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4111 case Intrinsic::aarch64_sve_ucvtf:
4112 return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
4113 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4114 Op.getOperand(1));
4115 case Intrinsic::aarch64_sve_scvtf:
4116 return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
4117 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4118 Op.getOperand(1));
4119 case Intrinsic::aarch64_sve_fcvtzu:
4120 return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
4121 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4122 Op.getOperand(1));
4123 case Intrinsic::aarch64_sve_fcvtzs:
4124 return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
4125 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4126 Op.getOperand(1));
4127 case Intrinsic::aarch64_sve_fsqrt:
4128 return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
4129 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4130 case Intrinsic::aarch64_sve_frecpx:
4131 return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
4132 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4133 case Intrinsic::aarch64_sve_fabs:
4134 return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
4135 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4136 case Intrinsic::aarch64_sve_abs:
4137 return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
4138 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4139 case Intrinsic::aarch64_sve_neg:
4140 return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
4141 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4142 case Intrinsic::aarch64_sve_insr: {
4143 SDValue Scalar = Op.getOperand(2);
4144 EVT ScalarTy = Scalar.getValueType();
4145 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
4146 Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
4147
4148 return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
4149 Op.getOperand(1), Scalar);
4150 }
4151 case Intrinsic::aarch64_sve_rbit:
4152 return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
4153 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4154 Op.getOperand(1));
4155 case Intrinsic::aarch64_sve_revb:
4156 return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
4157 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4158 case Intrinsic::aarch64_sve_sxtb:
4159 return DAG.getNode(
4160 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4161 Op.getOperand(2), Op.getOperand(3),
4162 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
4163 Op.getOperand(1));
4164 case Intrinsic::aarch64_sve_sxth:
4165 return DAG.getNode(
4166 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4167 Op.getOperand(2), Op.getOperand(3),
4168 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
4169 Op.getOperand(1));
4170 case Intrinsic::aarch64_sve_sxtw:
4171 return DAG.getNode(
4172 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4173 Op.getOperand(2), Op.getOperand(3),
4174 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
4175 Op.getOperand(1));
4176 case Intrinsic::aarch64_sve_uxtb:
4177 return DAG.getNode(
4178 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4179 Op.getOperand(2), Op.getOperand(3),
4180 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
4181 Op.getOperand(1));
4182 case Intrinsic::aarch64_sve_uxth:
4183 return DAG.getNode(
4184 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4185 Op.getOperand(2), Op.getOperand(3),
4186 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
4187 Op.getOperand(1));
4188 case Intrinsic::aarch64_sve_uxtw:
4189 return DAG.getNode(
4190 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4191 Op.getOperand(2), Op.getOperand(3),
4192 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
4193 Op.getOperand(1));
4194
4195 case Intrinsic::localaddress: {
4196 const auto &MF = DAG.getMachineFunction();
4197 const auto *RegInfo = Subtarget->getRegisterInfo();
4198 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
4199 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
4200 Op.getSimpleValueType());
4201 }
4202
4203 case Intrinsic::eh_recoverfp: {
4204 // FIXME: This needs to be implemented to correctly handle highly aligned
4205 // stack objects. For now we simply return the incoming FP. Refer D53541
4206 // for more details.
4207 SDValue FnOp = Op.getOperand(1);
4208 SDValue IncomingFPOp = Op.getOperand(2);
4209 GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
4210 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
4211 if (!Fn)
4212 report_fatal_error(
4213 "llvm.eh.recoverfp must take a function as the first argument");
4214 return IncomingFPOp;
4215 }
4216
4217 case Intrinsic::aarch64_neon_vsri:
4218 case Intrinsic::aarch64_neon_vsli: {
4219 EVT Ty = Op.getValueType();
4220
4221 if (!Ty.isVector())
4222 report_fatal_error("Unexpected type for aarch64_neon_vsli");
4223
4224 assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits())(static_cast <bool> (Op.getConstantOperandVal(3) <= Ty
.getScalarSizeInBits()) ? void (0) : __assert_fail ("Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits()"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4224, __extension__ __PRETTY_FUNCTION__))
;
4225
4226 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
4227 unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
4228 return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
4229 Op.getOperand(3));
4230 }
4231
4232 case Intrinsic::aarch64_neon_srhadd:
4233 case Intrinsic::aarch64_neon_urhadd:
4234 case Intrinsic::aarch64_neon_shadd:
4235 case Intrinsic::aarch64_neon_uhadd: {
4236 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4237 IntNo == Intrinsic::aarch64_neon_shadd);
4238 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4239 IntNo == Intrinsic::aarch64_neon_urhadd);
4240 unsigned Opcode =
4241 IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
4242 : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
4243 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4244 Op.getOperand(2));
4245 }
4246 case Intrinsic::aarch64_neon_sabd:
4247 case Intrinsic::aarch64_neon_uabd: {
4248 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU
4249 : ISD::ABDS;
4250 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4251 Op.getOperand(2));
4252 }
4253 case Intrinsic::aarch64_neon_uaddlp: {
4254 unsigned Opcode = AArch64ISD::UADDLP;
4255 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1));
4256 }
4257 case Intrinsic::aarch64_neon_sdot:
4258 case Intrinsic::aarch64_neon_udot:
4259 case Intrinsic::aarch64_sve_sdot:
4260 case Intrinsic::aarch64_sve_udot: {
4261 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
4262 IntNo == Intrinsic::aarch64_sve_udot)
4263 ? AArch64ISD::UDOT
4264 : AArch64ISD::SDOT;
4265 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4266 Op.getOperand(2), Op.getOperand(3));
4267 }
4268 }
4269}
4270
4271bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
4272 if (VT.getVectorElementType() == MVT::i8 ||
4273 VT.getVectorElementType() == MVT::i16) {
4274 EltTy = MVT::i32;
4275 return true;
4276 }
4277 return false;
4278}
4279
4280bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
4281 if (VT.getVectorElementType() == MVT::i32 &&
4282 VT.getVectorElementCount().getKnownMinValue() >= 4 &&
4283 !VT.isFixedLengthVector())
4284 return true;
4285
4286 return false;
4287}
4288
4289bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
4290 return ExtVal.getValueType().isScalableVector() ||
4291 useSVEForFixedLengthVectorVT(ExtVal.getValueType(),
4292 /*OverrideNEON=*/true);
4293}
4294
4295unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
4296 std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
4297 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
4298 AArch64ISD::GLD1_MERGE_ZERO},
4299 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
4300 AArch64ISD::GLD1_UXTW_MERGE_ZERO},
4301 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
4302 AArch64ISD::GLD1_MERGE_ZERO},
4303 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
4304 AArch64ISD::GLD1_SXTW_MERGE_ZERO},
4305 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
4306 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
4307 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
4308 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
4309 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
4310 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
4311 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
4312 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
4313 };
4314 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4315 return AddrModes.find(Key)->second;
4316}
4317
4318unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
4319 std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
4320 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
4321 AArch64ISD::SST1_PRED},
4322 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
4323 AArch64ISD::SST1_UXTW_PRED},
4324 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
4325 AArch64ISD::SST1_PRED},
4326 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
4327 AArch64ISD::SST1_SXTW_PRED},
4328 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
4329 AArch64ISD::SST1_SCALED_PRED},
4330 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
4331 AArch64ISD::SST1_UXTW_SCALED_PRED},
4332 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
4333 AArch64ISD::SST1_SCALED_PRED},
4334 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
4335 AArch64ISD::SST1_SXTW_SCALED_PRED},
4336 };
4337 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4338 return AddrModes.find(Key)->second;
4339}
4340
4341unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
4342 switch (Opcode) {
4343 default:
4344 llvm_unreachable("unimplemented opcode")::llvm::llvm_unreachable_internal("unimplemented opcode", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4344)
;
4345 return Opcode;
4346 case AArch64ISD::GLD1_MERGE_ZERO:
4347 return AArch64ISD::GLD1S_MERGE_ZERO;
4348 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
4349 return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
4350 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
4351 return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
4352 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
4353 return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
4354 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
4355 return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
4356 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
4357 return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
4358 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
4359 return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
4360 }
4361}
4362
4363bool getGatherScatterIndexIsExtended(SDValue Index) {
4364 unsigned Opcode = Index.getOpcode();
4365 if (Opcode == ISD::SIGN_EXTEND_INREG)
4366 return true;
4367
4368 if (Opcode == ISD::AND) {
4369 SDValue Splat = Index.getOperand(1);
4370 if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
4371 return false;
4372 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
4373 if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
4374 return false;
4375 return true;
4376 }
4377
4378 return false;
4379}
4380
4381// If the base pointer of a masked gather or scatter is null, we
4382// may be able to swap BasePtr & Index and use the vector + register
4383// or vector + immediate addressing mode, e.g.
4384// VECTOR + REGISTER:
4385// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
4386// -> getelementptr %offset, <vscale x N x T> %indices
4387// VECTOR + IMMEDIATE:
4388// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
4389// -> getelementptr #x, <vscale x N x T> %indices
4390void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
4391 unsigned &Opcode, bool IsGather,
4392 SelectionDAG &DAG) {
4393 if (!isNullConstant(BasePtr))
4394 return;
4395
4396 // FIXME: This will not match for fixed vector type codegen as the nodes in
4397 // question will have fixed<->scalable conversions around them. This should be
4398 // moved to a DAG combine or complex pattern so that is executes after all of
4399 // the fixed vector insert and extracts have been removed. This deficiency
4400 // will result in a sub-optimal addressing mode being used, i.e. an ADD not
4401 // being folded into the scatter/gather.
4402 ConstantSDNode *Offset = nullptr;
4403 if (Index.getOpcode() == ISD::ADD)
4404 if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
4405 if (isa<ConstantSDNode>(SplatVal))
4406 Offset = cast<ConstantSDNode>(SplatVal);
4407 else {
4408 BasePtr = SplatVal;
4409 Index = Index->getOperand(0);
4410 return;
4411 }
4412 }
4413
4414 unsigned NewOp =
4415 IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
4416
4417 if (!Offset) {
4418 std::swap(BasePtr, Index);
4419 Opcode = NewOp;
4420 return;
4421 }
4422
4423 uint64_t OffsetVal = Offset->getZExtValue();
4424 unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
4425 auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
4426
4427 if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
4428 // Index is out of range for the immediate addressing mode
4429 BasePtr = ConstOffset;
4430 Index = Index->getOperand(0);
4431 return;
4432 }
4433
4434 // Immediate is in range
4435 Opcode = NewOp;
4436 BasePtr = Index->getOperand(0);
4437 Index = ConstOffset;
4438}
4439
4440SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
4441 SelectionDAG &DAG) const {
4442 SDLoc DL(Op);
4443 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
4444 assert(MGT && "Can only custom lower gather load nodes")(static_cast <bool> (MGT && "Can only custom lower gather load nodes"
) ? void (0) : __assert_fail ("MGT && \"Can only custom lower gather load nodes\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4444, __extension__ __PRETTY_FUNCTION__))
;
4445
4446 bool IsFixedLength = MGT->getMemoryVT().isFixedLengthVector();
4447
4448 SDValue Index = MGT->getIndex();
4449 SDValue Chain = MGT->getChain();
4450 SDValue PassThru = MGT->getPassThru();
4451 SDValue Mask = MGT->getMask();
4452 SDValue BasePtr = MGT->getBasePtr();
4453 ISD::LoadExtType ExtTy = MGT->getExtensionType();
4454
4455 ISD::MemIndexType IndexType = MGT->getIndexType();
4456 bool IsScaled =
4457 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4458 bool IsSigned =
4459 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4460 bool IdxNeedsExtend =
4461 getGatherScatterIndexIsExtended(Index) ||
4462 Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4463 bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
4464
4465 EVT VT = PassThru.getSimpleValueType();
4466 EVT IndexVT = Index.getSimpleValueType();
4467 EVT MemVT = MGT->getMemoryVT();
4468 SDValue InputVT = DAG.getValueType(MemVT);
4469
4470 if (VT.getVectorElementType() == MVT::bf16 &&
4471 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4472 return SDValue();
4473
4474 if (IsFixedLength) {
4475 assert(Subtarget->useSVEForFixedLengthVectors() &&(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4476, __extension__ __PRETTY_FUNCTION__))
4476 "Cannot lower when not using SVE for fixed vectors")(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4476, __extension__ __PRETTY_FUNCTION__))
;
4477 if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
4478 IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
4479 MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
4480 } else {
4481 MemVT = getContainerForFixedLengthVector(DAG, MemVT);
4482 IndexVT = MemVT.changeTypeToInteger();
4483 }
4484 InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
4485 Mask = DAG.getNode(
4486 ISD::ZERO_EXTEND, DL,
4487 VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
4488 }
4489
4490 if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
4491 PassThru = SDValue();
4492
4493 if (VT.isFloatingPoint() && !IsFixedLength) {
4494 // Handle FP data by using an integer gather and casting the result.
4495 if (PassThru) {
4496 EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4497 PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
4498 }
4499 InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4500 }
4501
4502 SDVTList VTs = DAG.getVTList(IndexVT, MVT::Other);
4503
4504 if (getGatherScatterIndexIsExtended(Index))
4505 Index = Index.getOperand(0);
4506
4507 unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
4508 selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4509 /*isGather=*/true, DAG);
4510
4511 if (ResNeedsSignExtend)
4512 Opcode = getSignExtendedGatherOpcode(Opcode);
4513
4514 if (IsFixedLength) {
4515 if (Index.getSimpleValueType().isFixedLengthVector())
4516 Index = convertToScalableVector(DAG, IndexVT, Index);
4517 if (BasePtr.getSimpleValueType().isFixedLengthVector())
4518 BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
4519 Mask = convertFixedMaskToScalableVector(Mask, DAG);
4520 }
4521
4522 SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT};
4523 SDValue Result = DAG.getNode(Opcode, DL, VTs, Ops);
4524 Chain = Result.getValue(1);
4525
4526 if (IsFixedLength) {
4527 Result = convertFromScalableVector(
4528 DAG, VT.changeVectorElementType(IndexVT.getVectorElementType()),
4529 Result);
4530 Result = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Result);
4531 Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
4532
4533 if (PassThru)
4534 Result = DAG.getSelect(DL, VT, MGT->getMask(), Result, PassThru);
4535 } else {
4536 if (PassThru)
4537 Result = DAG.getSelect(DL, IndexVT, Mask, Result, PassThru);
4538
4539 if (VT.isFloatingPoint())
4540 Result = getSVESafeBitCast(VT, Result, DAG);
4541 }
4542
4543 return DAG.getMergeValues({Result, Chain}, DL);
4544}
4545
4546SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
4547 SelectionDAG &DAG) const {
4548 SDLoc DL(Op);
4549 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
4550 assert(MSC && "Can only custom lower scatter store nodes")(static_cast <bool> (MSC && "Can only custom lower scatter store nodes"
) ? void (0) : __assert_fail ("MSC && \"Can only custom lower scatter store nodes\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4550, __extension__ __PRETTY_FUNCTION__))
;
4551
4552 bool IsFixedLength = MSC->getMemoryVT().isFixedLengthVector();
4553
4554 SDValue Index = MSC->getIndex();
4555 SDValue Chain = MSC->getChain();
4556 SDValue StoreVal = MSC->getValue();
4557 SDValue Mask = MSC->getMask();
4558 SDValue BasePtr = MSC->getBasePtr();
4559
4560 ISD::MemIndexType IndexType = MSC->getIndexType();
4561 bool IsScaled =
4562 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4563 bool IsSigned =
4564 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4565 bool NeedsExtend =
4566 getGatherScatterIndexIsExtended(Index) ||
4567 Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4568
4569 EVT VT = StoreVal.getSimpleValueType();
4570 EVT IndexVT = Index.getSimpleValueType();
4571 SDVTList VTs = DAG.getVTList(MVT::Other);
4572 EVT MemVT = MSC->getMemoryVT();
4573 SDValue InputVT = DAG.getValueType(MemVT);
4574
4575 if (VT.getVectorElementType() == MVT::bf16 &&
4576 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4577 return SDValue();
4578
4579 if (IsFixedLength) {
4580 assert(Subtarget->useSVEForFixedLengthVectors() &&(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4581, __extension__ __PRETTY_FUNCTION__))
4581 "Cannot lower when not using SVE for fixed vectors")(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4581, __extension__ __PRETTY_FUNCTION__))
;
4582 if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
4583 IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
4584 MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
4585 } else {
4586 MemVT = getContainerForFixedLengthVector(DAG, MemVT);
4587 IndexVT = MemVT.changeTypeToInteger();
4588 }
4589 InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
4590
4591 StoreVal =
4592 DAG.getNode(ISD::BITCAST, DL, VT.changeTypeToInteger(), StoreVal);
4593 StoreVal = DAG.getNode(
4594 ISD::ANY_EXTEND, DL,
4595 VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal);
4596 StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal);
4597 Mask = DAG.getNode(
4598 ISD::ZERO_EXTEND, DL,
4599 VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
4600 } else if (VT.isFloatingPoint()) {
4601 // Handle FP data by casting the data so an integer scatter can be used.
4602 EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4603 StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
4604 InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4605 }
4606
4607 if (getGatherScatterIndexIsExtended(Index))
4608 Index = Index.getOperand(0);
4609
4610 unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
4611 selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4612 /*isGather=*/false, DAG);
4613
4614 if (IsFixedLength) {
4615 if (Index.getSimpleValueType().isFixedLengthVector())
4616 Index = convertToScalableVector(DAG, IndexVT, Index);
4617 if (BasePtr.getSimpleValueType().isFixedLengthVector())
4618 BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
4619 Mask = convertFixedMaskToScalableVector(Mask, DAG);
4620 }
4621
4622 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
4623 return DAG.getNode(Opcode, DL, VTs, Ops);
4624}
4625
4626SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
4627 SDLoc DL(Op);
4628 MaskedLoadSDNode *LoadNode = cast<MaskedLoadSDNode>(Op);
4629 assert(LoadNode && "Expected custom lowering of a masked load node")(static_cast <bool> (LoadNode && "Expected custom lowering of a masked load node"
) ? void (0) : __assert_fail ("LoadNode && \"Expected custom lowering of a masked load node\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4629, __extension__ __PRETTY_FUNCTION__))
;
4630 EVT VT = Op->getValueType(0);
4631
4632 if (useSVEForFixedLengthVectorVT(VT, true))
4633 return LowerFixedLengthVectorMLoadToSVE(Op, DAG);
4634
4635 SDValue PassThru = LoadNode->getPassThru();
4636 SDValue Mask = LoadNode->getMask();
4637
4638 if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
4639 return Op;
4640
4641 SDValue Load = DAG.getMaskedLoad(
4642 VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
4643 LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
4644 LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
4645 LoadNode->getExtensionType());
4646
4647 SDValue Result = DAG.getSelect(DL, VT, Mask, Load, PassThru);
4648
4649 return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
4650}
4651
4652// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
4653static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
4654 EVT VT, EVT MemVT,
4655 SelectionDAG &DAG) {
4656 assert(VT.isVector() && "VT should be a vector type")(static_cast <bool> (VT.isVector() && "VT should be a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4656, __extension__ __PRETTY_FUNCTION__))
;
4657 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16)(static_cast <bool> (MemVT == MVT::v4i8 && VT ==
MVT::v4i16) ? void (0) : __assert_fail ("MemVT == MVT::v4i8 && VT == MVT::v4i16"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4657, __extension__ __PRETTY_FUNCTION__))
;
4658
4659 SDValue Value = ST->getValue();
4660
4661 // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
4662 // the word lane which represent the v4i8 subvector. It optimizes the store
4663 // to:
4664 //
4665 // xtn v0.8b, v0.8h
4666 // str s0, [x0]
4667
4668 SDValue Undef = DAG.getUNDEF(MVT::i16);
4669 SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
4670 {Undef, Undef, Undef, Undef});
4671
4672 SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
4673 Value, UndefVec);
4674 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
4675
4676 Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
4677 SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4678 Trunc, DAG.getConstant(0, DL, MVT::i64));
4679
4680 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
4681 ST->getBasePtr(), ST->getMemOperand());
4682}
4683
4684// Custom lowering for any store, vector or scalar and/or default or with
4685// a truncate operations. Currently only custom lower truncate operation
4686// from vector v4i16 to v4i8 or volatile stores of i128.
4687SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
4688 SelectionDAG &DAG) const {
4689 SDLoc Dl(Op);
4690 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
4691 assert (StoreNode && "Can only custom lower store nodes")(static_cast <bool> (StoreNode && "Can only custom lower store nodes"
) ? void (0) : __assert_fail ("StoreNode && \"Can only custom lower store nodes\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4691, __extension__ __PRETTY_FUNCTION__))
;
4692
4693 SDValue Value = StoreNode->getValue();
4694
4695 EVT VT = Value.getValueType();
4696 EVT MemVT = StoreNode->getMemoryVT();
4697
4698 if (VT.isVector()) {
4699 if (useSVEForFixedLengthVectorVT(VT, true))
4700 return LowerFixedLengthVectorStoreToSVE(Op, DAG);
4701
4702 unsigned AS = StoreNode->getAddressSpace();
4703 Align Alignment = StoreNode->getAlign();
4704 if (Alignment < MemVT.getStoreSize() &&
4705 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
4706 StoreNode->getMemOperand()->getFlags(),
4707 nullptr)) {
4708 return scalarizeVectorStore(StoreNode, DAG);
4709 }
4710
4711 if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
4712 MemVT == MVT::v4i8) {
4713 return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
4714 }
4715 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
4716 // the custom lowering, as there are no un-paired non-temporal stores and
4717 // legalization will break up 256 bit inputs.
4718 ElementCount EC = MemVT.getVectorElementCount();
4719 if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
4720 EC.isKnownEven() &&
4721 ((MemVT.getScalarSizeInBits() == 8u ||
4722 MemVT.getScalarSizeInBits() == 16u ||
4723 MemVT.getScalarSizeInBits() == 32u ||
4724 MemVT.getScalarSizeInBits() == 64u))) {
4725 SDValue Lo =
4726 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
4727 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4728 StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
4729 SDValue Hi =
4730 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
4731 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4732 StoreNode->getValue(),
4733 DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
4734 SDValue Result = DAG.getMemIntrinsicNode(
4735 AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
4736 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4737 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4738 return Result;
4739 }
4740 } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
4741 return LowerStore128(Op, DAG);
4742 } else if (MemVT == MVT::i64x8) {
4743 SDValue Value = StoreNode->getValue();
4744 assert(Value->getValueType(0) == MVT::i64x8)(static_cast <bool> (Value->getValueType(0) == MVT::
i64x8) ? void (0) : __assert_fail ("Value->getValueType(0) == MVT::i64x8"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4744, __extension__ __PRETTY_FUNCTION__))
;
4745 SDValue Chain = StoreNode->getChain();
4746 SDValue Base = StoreNode->getBasePtr();
4747 EVT PtrVT = Base.getValueType();
4748 for (unsigned i = 0; i < 8; i++) {
4749 SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
4750 Value, DAG.getConstant(i, Dl, MVT::i32));
4751 SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
4752 DAG.getConstant(i * 8, Dl, PtrVT));
4753 Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
4754 StoreNode->getOriginalAlign());
4755 }
4756 return Chain;
4757 }
4758
4759 return SDValue();
4760}
4761
4762/// Lower atomic or volatile 128-bit stores to a single STP instruction.
4763SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
4764 SelectionDAG &DAG) const {
4765 MemSDNode *StoreNode = cast<MemSDNode>(Op);
4766 assert(StoreNode->getMemoryVT() == MVT::i128)(static_cast <bool> (StoreNode->getMemoryVT() == MVT
::i128) ? void (0) : __assert_fail ("StoreNode->getMemoryVT() == MVT::i128"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4766, __extension__ __PRETTY_FUNCTION__))
;
4767 assert(StoreNode->isVolatile() || StoreNode->isAtomic())(static_cast <bool> (StoreNode->isVolatile() || StoreNode
->isAtomic()) ? void (0) : __assert_fail ("StoreNode->isVolatile() || StoreNode->isAtomic()"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4767, __extension__ __PRETTY_FUNCTION__))
;
4768 assert(!StoreNode->isAtomic() ||(static_cast <bool> (!StoreNode->isAtomic() || StoreNode
->getMergedOrdering() == AtomicOrdering::Unordered || StoreNode
->getMergedOrdering() == AtomicOrdering::Monotonic) ? void
(0) : __assert_fail ("!StoreNode->isAtomic() || StoreNode->getMergedOrdering() == AtomicOrdering::Unordered || StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4770, __extension__ __PRETTY_FUNCTION__))
4769 StoreNode->getMergedOrdering() == AtomicOrdering::Unordered ||(static_cast <bool> (!StoreNode->isAtomic() || StoreNode
->getMergedOrdering() == AtomicOrdering::Unordered || StoreNode
->getMergedOrdering() == AtomicOrdering::Monotonic) ? void
(0) : __assert_fail ("!StoreNode->isAtomic() || StoreNode->getMergedOrdering() == AtomicOrdering::Unordered || StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4770, __extension__ __PRETTY_FUNCTION__))
4770 StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic)(static_cast <bool> (!StoreNode->isAtomic() || StoreNode
->getMergedOrdering() == AtomicOrdering::Unordered || StoreNode
->getMergedOrdering() == AtomicOrdering::Monotonic) ? void
(0) : __assert_fail ("!StoreNode->isAtomic() || StoreNode->getMergedOrdering() == AtomicOrdering::Unordered || StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4770, __extension__ __PRETTY_FUNCTION__))
;
4771
4772 SDValue Value = StoreNode->getOpcode() == ISD::STORE
4773 ? StoreNode->getOperand(1)
4774 : StoreNode->getOperand(2);
4775 SDLoc DL(Op);
4776 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value,
4777 DAG.getConstant(0, DL, MVT::i64));
4778 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value,
4779 DAG.getConstant(1, DL, MVT::i64));
4780 SDValue Result = DAG.getMemIntrinsicNode(
4781 AArch64ISD::STP, DL, DAG.getVTList(MVT::Other),
4782 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4783 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4784 return Result;
4785}
4786
4787SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
4788 SelectionDAG &DAG) const {
4789 SDLoc DL(Op);
4790 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
4791 assert(LoadNode && "Expected custom lowering of a load node")(static_cast <bool> (LoadNode && "Expected custom lowering of a load node"
) ? void (0) : __assert_fail ("LoadNode && \"Expected custom lowering of a load node\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4791, __extension__ __PRETTY_FUNCTION__))
;
4792
4793 if (LoadNode->getMemoryVT() == MVT::i64x8) {
4794 SmallVector<SDValue, 8> Ops;
4795 SDValue Base = LoadNode->getBasePtr();
4796 SDValue Chain = LoadNode->getChain();
4797 EVT PtrVT = Base.getValueType();
4798 for (unsigned i = 0; i < 8; i++) {
4799 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
4800 DAG.getConstant(i * 8, DL, PtrVT));
4801 SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr,
4802 LoadNode->getPointerInfo(),
4803 LoadNode->getOriginalAlign());
4804 Ops.push_back(Part);
4805 Chain = SDValue(Part.getNode(), 1);
4806 }
4807 SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops);
4808 return DAG.getMergeValues({Loaded, Chain}, DL);
4809 }
4810
4811 // Custom lowering for extending v4i8 vector loads.
4812 EVT VT = Op->getValueType(0);
4813 assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32")(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v4i32
) && "Expected v4i16 or v4i32") ? void (0) : __assert_fail
("(VT == MVT::v4i16 || VT == MVT::v4i32) && \"Expected v4i16 or v4i32\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4813, __extension__ __PRETTY_FUNCTION__))
;
4814
4815 if (LoadNode->getMemoryVT() != MVT::v4i8)
4816 return SDValue();
4817
4818 unsigned ExtType;
4819 if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
4820 ExtType = ISD::SIGN_EXTEND;
4821 else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
4822 LoadNode->getExtensionType() == ISD::EXTLOAD)
4823 ExtType = ISD::ZERO_EXTEND;
4824 else
4825 return SDValue();
4826
4827 SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
4828 LoadNode->getBasePtr(), MachinePointerInfo());
4829 SDValue Chain = Load.getValue(1);
4830 SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load);
4831 SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec);
4832 SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC);
4833 Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext,
4834 DAG.getConstant(0, DL, MVT::i64));
4835 if (VT == MVT::v4i32)
4836 Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext);
4837 return DAG.getMergeValues({Ext, Chain}, DL);
4838}
4839
4840// Generate SUBS and CSEL for integer abs.
4841SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
4842 MVT VT = Op.getSimpleValueType();
4843
4844 if (VT.isVector())
4845 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
4846
4847 SDLoc DL(Op);
4848 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4849 Op.getOperand(0));
4850 // Generate SUBS & CSEL.
4851 SDValue Cmp =
4852 DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
4853 Op.getOperand(0), DAG.getConstant(0, DL, VT));
4854 return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
4855 DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
4856 Cmp.getValue(1));
4857}
4858
4859SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
4860 SelectionDAG &DAG) const {
4861 LLVM_DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
4862 LLVM_DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
4863
4864 switch (Op.getOpcode()) {
4865 default:
4866 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4866)
;
4867 return SDValue();
4868 case ISD::BITCAST:
4869 return LowerBITCAST(Op, DAG);
4870 case ISD::GlobalAddress:
4871 return LowerGlobalAddress(Op, DAG);
4872 case ISD::GlobalTLSAddress:
4873 return LowerGlobalTLSAddress(Op, DAG);
4874 case ISD::SETCC:
4875 case ISD::STRICT_FSETCC:
4876 case ISD::STRICT_FSETCCS:
4877 return LowerSETCC(Op, DAG);
4878 case ISD::BR_CC:
4879 return LowerBR_CC(Op, DAG);
4880 case ISD::SELECT:
4881 return LowerSELECT(Op, DAG);
4882 case ISD::SELECT_CC:
4883 return LowerSELECT_CC(Op, DAG);
4884 case ISD::JumpTable:
4885 return LowerJumpTable(Op, DAG);
4886 case ISD::BR_JT:
4887 return LowerBR_JT(Op, DAG);
4888 case ISD::ConstantPool:
4889 return LowerConstantPool(Op, DAG);
4890 case ISD::BlockAddress:
4891 return LowerBlockAddress(Op, DAG);
4892 case ISD::VASTART:
4893 return LowerVASTART(Op, DAG);
4894 case ISD::VACOPY:
4895 return LowerVACOPY(Op, DAG);
4896 case ISD::VAARG:
4897 return LowerVAARG(Op, DAG);
4898 case ISD::ADDC:
4899 case ISD::ADDE:
4900 case ISD::SUBC:
4901 case ISD::SUBE:
4902 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
4903 case ISD::SADDO:
4904 case ISD::UADDO:
4905 case ISD::SSUBO:
4906 case ISD::USUBO:
4907 case ISD::SMULO:
4908 case ISD::UMULO:
4909 return LowerXALUO(Op, DAG);
4910 case ISD::FADD:
4911 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
4912 case ISD::FSUB:
4913 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
4914 case ISD::FMUL:
4915 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
4916 case ISD::FMA:
4917 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
4918 case ISD::FDIV:
4919 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
4920 case ISD::FNEG:
4921 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
4922 case ISD::FCEIL:
4923 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
4924 case ISD::FFLOOR:
4925 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
4926 case ISD::FNEARBYINT:
4927 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
4928 case ISD::FRINT:
4929 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
4930 case ISD::FROUND:
4931 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
4932 case ISD::FROUNDEVEN:
4933 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
4934 case ISD::FTRUNC:
4935 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
4936 case ISD::FSQRT:
4937 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
4938 case ISD::FABS:
4939 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
4940 case ISD::FP_ROUND:
4941 case ISD::STRICT_FP_ROUND:
4942 return LowerFP_ROUND(Op, DAG);
4943 case ISD::FP_EXTEND:
4944 return LowerFP_EXTEND(Op, DAG);
4945 case ISD::FRAMEADDR:
4946 return LowerFRAMEADDR(Op, DAG);
4947 case ISD::SPONENTRY:
4948 return LowerSPONENTRY(Op, DAG);
4949 case ISD::RETURNADDR:
4950 return LowerRETURNADDR(Op, DAG);
4951 case ISD::ADDROFRETURNADDR:
4952 return LowerADDROFRETURNADDR(Op, DAG);
4953 case ISD::CONCAT_VECTORS:
4954 return LowerCONCAT_VECTORS(Op, DAG);
4955 case ISD::INSERT_VECTOR_ELT:
4956 return LowerINSERT_VECTOR_ELT(Op, DAG);
4957 case ISD::EXTRACT_VECTOR_ELT:
4958 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
4959 case ISD::BUILD_VECTOR:
4960 return LowerBUILD_VECTOR(Op, DAG);
4961 case ISD::VECTOR_SHUFFLE:
4962 return LowerVECTOR_SHUFFLE(Op, DAG);
4963 case ISD::SPLAT_VECTOR:
4964 return LowerSPLAT_VECTOR(Op, DAG);
4965 case ISD::EXTRACT_SUBVECTOR:
4966 return LowerEXTRACT_SUBVECTOR(Op, DAG);
4967 case ISD::INSERT_SUBVECTOR:
4968 return LowerINSERT_SUBVECTOR(Op, DAG);
4969 case ISD::SDIV:
4970 case ISD::UDIV:
4971 return LowerDIV(Op, DAG);
4972 case ISD::SMIN:
4973 case ISD::UMIN:
4974 case ISD::SMAX:
4975 case ISD::UMAX:
4976 return LowerMinMax(Op, DAG);
4977 case ISD::SRA:
4978 case ISD::SRL:
4979 case ISD::SHL:
4980 return LowerVectorSRA_SRL_SHL(Op, DAG);
4981 case ISD::SHL_PARTS:
4982 case ISD::SRL_PARTS:
4983 case ISD::SRA_PARTS:
4984 return LowerShiftParts(Op, DAG);
4985 case ISD::CTPOP:
4986 return LowerCTPOP(Op, DAG);
4987 case ISD::FCOPYSIGN:
4988 return LowerFCOPYSIGN(Op, DAG);
4989 case ISD::OR:
4990 return LowerVectorOR(Op, DAG);
4991 case ISD::XOR:
4992 return LowerXOR(Op, DAG);
4993 case ISD::PREFETCH:
4994 return LowerPREFETCH(Op, DAG);
4995 case ISD::SINT_TO_FP:
4996 case ISD::UINT_TO_FP:
4997 case ISD::STRICT_SINT_TO_FP:
4998 case ISD::STRICT_UINT_TO_FP:
4999 return LowerINT_TO_FP(Op, DAG);
5000 case ISD::FP_TO_SINT:
5001 case ISD::FP_TO_UINT:
5002 case ISD::STRICT_FP_TO_SINT:
5003 case ISD::STRICT_FP_TO_UINT:
5004 return LowerFP_TO_INT(Op, DAG);
5005 case ISD::FP_TO_SINT_SAT:
5006 case ISD::FP_TO_UINT_SAT:
5007 return LowerFP_TO_INT_SAT(Op, DAG);
5008 case ISD::FSINCOS:
5009 return LowerFSINCOS(Op, DAG);
5010 case ISD::FLT_ROUNDS_:
5011 return LowerFLT_ROUNDS_(Op, DAG);
5012 case ISD::SET_ROUNDING:
5013 return LowerSET_ROUNDING(Op, DAG);
5014 case ISD::MUL:
5015 return LowerMUL(Op, DAG);
5016 case ISD::MULHS:
5017 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED,
5018 /*OverrideNEON=*/true);
5019 case ISD::MULHU:
5020 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
5021 /*OverrideNEON=*/true);
5022 case ISD::INTRINSIC_WO_CHAIN:
5023 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
5024 case ISD::ATOMIC_STORE:
5025 if (cast<MemSDNode>(Op)->getMemoryVT() == MVT::i128) {
5026 assert(Subtarget->hasLSE2())(static_cast <bool> (Subtarget->hasLSE2()) ? void (0
) : __assert_fail ("Subtarget->hasLSE2()", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5026, __extension__ __PRETTY_FUNCTION__))
;
5027 return LowerStore128(Op, DAG);
5028 }
5029 return SDValue();
5030 case ISD::STORE:
5031 return LowerSTORE(Op, DAG);
5032 case ISD::MSTORE:
5033 return LowerFixedLengthVectorMStoreToSVE(Op, DAG);
5034 case ISD::MGATHER:
5035 return LowerMGATHER(Op, DAG);
5036 case ISD::MSCATTER:
5037 return LowerMSCATTER(Op, DAG);
5038 case ISD::VECREDUCE_SEQ_FADD:
5039 return LowerVECREDUCE_SEQ_FADD(Op, DAG);
5040 case ISD::VECREDUCE_ADD:
5041 case ISD::VECREDUCE_AND:
5042 case ISD::VECREDUCE_OR:
5043 case ISD::VECREDUCE_XOR:
5044 case ISD::VECREDUCE_SMAX:
5045 case ISD::VECREDUCE_SMIN:
5046 case ISD::VECREDUCE_UMAX:
5047 case ISD::VECREDUCE_UMIN:
5048 case ISD::VECREDUCE_FADD:
5049 case ISD::VECREDUCE_FMAX:
5050 case ISD::VECREDUCE_FMIN:
5051 return LowerVECREDUCE(Op, DAG);
5052 case ISD::ATOMIC_LOAD_SUB:
5053 return LowerATOMIC_LOAD_SUB(Op, DAG);
5054 case ISD::ATOMIC_LOAD_AND:
5055 return LowerATOMIC_LOAD_AND(Op, DAG);
5056 case ISD::DYNAMIC_STACKALLOC:
5057 return LowerDYNAMIC_STACKALLOC(Op, DAG);
5058 case ISD::VSCALE:
5059 return LowerVSCALE(Op, DAG);
5060 case ISD::ANY_EXTEND:
5061 case ISD::SIGN_EXTEND:
5062 case ISD::ZERO_EXTEND:
5063 return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
5064 case ISD::SIGN_EXTEND_INREG: {
5065 // Only custom lower when ExtraVT has a legal byte based element type.
5066 EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5067 EVT ExtraEltVT = ExtraVT.getVectorElementType();
5068 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
5069 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
5070 return SDValue();
5071
5072 return LowerToPredicatedOp(Op, DAG,
5073 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
5074 }
5075 case ISD::TRUNCATE:
5076 return LowerTRUNCATE(Op, DAG);
5077 case ISD::MLOAD:
5078 return LowerMLOAD(Op, DAG);
5079 case ISD::LOAD:
5080 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
5081 return LowerFixedLengthVectorLoadToSVE(Op, DAG);
5082 return LowerLOAD(Op, DAG);
5083 case ISD::ADD:
5084 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
5085 case ISD::AND:
5086 return LowerToScalableOp(Op, DAG);
5087 case ISD::SUB:
5088 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
5089 case ISD::FMAXIMUM:
5090 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
5091 case ISD::FMAXNUM:
5092 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
5093 case ISD::FMINIMUM:
5094 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED);
5095 case ISD::FMINNUM:
5096 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
5097 case ISD::VSELECT:
5098 return LowerFixedLengthVectorSelectToSVE(Op, DAG);
5099 case ISD::ABS:
5100 return LowerABS(Op, DAG);
5101 case ISD::BITREVERSE:
5102 return LowerBitreverse(Op, DAG);
5103 case ISD::BSWAP:
5104 return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
5105 case ISD::CTLZ:
5106 return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
5107 /*OverrideNEON=*/true);
5108 case ISD::CTTZ:
5109 return LowerCTTZ(Op, DAG);
5110 case ISD::VECTOR_SPLICE:
5111 return LowerVECTOR_SPLICE(Op, DAG);
5112 }
5113}
5114
5115bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
5116 return !Subtarget->useSVEForFixedLengthVectors();
5117}
5118
5119bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
5120 EVT VT, bool OverrideNEON) const {
5121 if (!Subtarget->useSVEForFixedLengthVectors())
5122 return false;
5123
5124 if (!VT.isFixedLengthVector())
5125 return false;
5126
5127 // Don't use SVE for vectors we cannot scalarize if required.
5128 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
5129 // Fixed length predicates should be promoted to i8.
5130 // NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
5131 case MVT::i1:
5132 default:
5133 return false;
5134 case MVT::i8:
5135 case MVT::i16:
5136 case MVT::i32:
5137 case MVT::i64:
5138 case MVT::f16:
5139 case MVT::f32:
5140 case MVT::f64:
5141 break;
5142 }
5143
5144 // All SVE implementations support NEON sized vectors.
5145 if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
5146 return true;
5147
5148 // Ensure NEON MVTs only belong to a single register class.
5149 if (VT.getFixedSizeInBits() <= 128)
5150 return false;
5151
5152 // Don't use SVE for types that don't fit.
5153 if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
5154 return false;
5155
5156 // TODO: Perhaps an artificial restriction, but worth having whilst getting
5157 // the base fixed length SVE support in place.
5158 if (!VT.isPow2VectorType())
5159 return false;
5160
5161 return true;
5162}
5163
5164//===----------------------------------------------------------------------===//
5165// Calling Convention Implementation
5166//===----------------------------------------------------------------------===//
5167
5168/// Selects the correct CCAssignFn for a given CallingConvention value.
5169CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
5170 bool IsVarArg) const {
5171 switch (CC) {
5172 default:
5173 report_fatal_error("Unsupported calling convention.");
5174 case CallingConv::WebKit_JS:
5175 return CC_AArch64_WebKit_JS;
5176 case CallingConv::GHC:
5177 return CC_AArch64_GHC;
5178 case CallingConv::C:
5179 case CallingConv::Fast:
5180 case CallingConv::PreserveMost:
5181 case CallingConv::CXX_FAST_TLS:
5182 case CallingConv::Swift:
5183 case CallingConv::SwiftTail:
5184 case CallingConv::Tail:
5185 if (Subtarget->isTargetWindows() && IsVarArg)
5186 return CC_AArch64_Win64_VarArg;
5187 if (!Subtarget->isTargetDarwin())
5188 return CC_AArch64_AAPCS;
5189 if (!IsVarArg)
5190 return CC_AArch64_DarwinPCS;
5191 return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
5192 : CC_AArch64_DarwinPCS_VarArg;
5193 case CallingConv::Win64:
5194 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
5195 case CallingConv::CFGuard_Check:
5196 return CC_AArch64_Win64_CFGuard_Check;
5197 case CallingConv::AArch64_VectorCall:
5198 case CallingConv::AArch64_SVE_VectorCall:
5199 return CC_AArch64_AAPCS;
5200 }
5201}
5202
5203CCAssignFn *
5204AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
5205 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
5206 : RetCC_AArch64_AAPCS;
5207}
5208
5209SDValue AArch64TargetLowering::LowerFormalArguments(
5210 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
5211 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5212 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5213 MachineFunction &MF = DAG.getMachineFunction();
5214 MachineFrameInfo &MFI = MF.getFrameInfo();
5215 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
5216
5217 // Assign locations to all of the incoming arguments.
5218 SmallVector<CCValAssign, 16> ArgLocs;
5219 DenseMap<unsigned, SDValue> CopiedRegs;
5220 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
5221
5222 // At this point, Ins[].VT may already be promoted to i32. To correctly
5223 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
5224 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
5225 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
5226 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
5227 // LocVT.
5228 unsigned NumArgs = Ins.size();
5229 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
5230 unsigned CurArgIdx = 0;
5231 for (unsigned i = 0; i != NumArgs; ++i) {
5232 MVT ValVT = Ins[i].VT;
5233 if (Ins[i].isOrigArg()) {
5234 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
5235 CurArgIdx = Ins[i].getOrigArgIndex();
5236
5237 // Get type of the original argument.
5238 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
5239 /*AllowUnknown*/ true);
5240 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
5241 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5242 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5243 ValVT = MVT::i8;
5244 else if (ActualMVT == MVT::i16)
5245 ValVT = MVT::i16;
5246 }
5247 bool UseVarArgCC = false;
5248 if (IsWin64)
5249 UseVarArgCC = isVarArg;
5250 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
5251 bool Res =
5252 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
5253 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5253, __extension__ __PRETTY_FUNCTION__))
;
5254 (void)Res;
5255 }
5256 SmallVector<SDValue, 16> ArgValues;
5257 unsigned ExtraArgLocs = 0;
5258 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5259 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
5260
5261 if (Ins[i].Flags.isByVal()) {
5262 // Byval is used for HFAs in the PCS, but the system should work in a
5263 // non-compliant manner for larger structs.
5264 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5265 int Size = Ins[i].Flags.getByValSize();
5266 unsigned NumRegs = (Size + 7) / 8;
5267
5268 // FIXME: This works on big-endian for composite byvals, which are the common
5269 // case. It should also work for fundamental types too.
5270 unsigned FrameIdx =
5271 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
5272 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
5273 InVals.push_back(FrameIdxN);
5274
5275 continue;
5276 }
5277
5278 if (Ins[i].Flags.isSwiftAsync())
5279 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5280
5281 SDValue ArgValue;
5282 if (VA.isRegLoc()) {
5283 // Arguments stored in registers.
5284 EVT RegVT = VA.getLocVT();
5285 const TargetRegisterClass *RC;
5286
5287 if (RegVT == MVT::i32)
5288 RC = &AArch64::GPR32RegClass;
5289 else if (RegVT == MVT::i64)
5290 RC = &AArch64::GPR64RegClass;
5291 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
5292 RC = &AArch64::FPR16RegClass;
5293 else if (RegVT == MVT::f32)
5294 RC = &AArch64::FPR32RegClass;
5295 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
5296 RC = &AArch64::FPR64RegClass;
5297 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
5298 RC = &AArch64::FPR128RegClass;
5299 else if (RegVT.isScalableVector() &&
5300 RegVT.getVectorElementType() == MVT::i1)
5301 RC = &AArch64::PPRRegClass;
5302 else if (RegVT.isScalableVector())
5303 RC = &AArch64::ZPRRegClass;
5304 else
5305 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5305)
;
5306
5307 // Transform the arguments in physical registers into virtual ones.
5308 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
5309 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
5310
5311 // If this is an 8, 16 or 32-bit value, it is really passed promoted
5312 // to 64 bits. Insert an assert[sz]ext to capture this, then
5313 // truncate to the right size.
5314 switch (VA.getLocInfo()) {
5315 default:
5316 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5316)
;
5317 case CCValAssign::Full:
5318 break;
5319 case CCValAssign::Indirect:
5320 assert(VA.getValVT().isScalableVector() &&(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5321, __extension__ __PRETTY_FUNCTION__))
5321 "Only scalable vectors can be passed indirectly")(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5321, __extension__ __PRETTY_FUNCTION__))
;
5322 break;
5323 case CCValAssign::BCvt:
5324 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
5325 break;
5326 case CCValAssign::AExt:
5327 case CCValAssign::SExt:
5328 case CCValAssign::ZExt:
5329 break;
5330 case CCValAssign::AExtUpper:
5331 ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
5332 DAG.getConstant(32, DL, RegVT));
5333 ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
5334 break;
5335 }
5336 } else { // VA.isRegLoc()
5337 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")(static_cast <bool> (VA.isMemLoc() && "CCValAssign is neither reg nor mem"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5337, __extension__ __PRETTY_FUNCTION__))
;
5338 unsigned ArgOffset = VA.getLocMemOffset();
5339 unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
5340 ? VA.getLocVT().getSizeInBits()
5341 : VA.getValVT().getSizeInBits()) / 8;
5342
5343 uint32_t BEAlign = 0;
5344 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
5345 !Ins[i].Flags.isInConsecutiveRegs())
5346 BEAlign = 8 - ArgSize;
5347
5348 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
5349
5350 // Create load nodes to retrieve arguments from the stack.
5351 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5352
5353 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
5354 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
5355 MVT MemVT = VA.getValVT();
5356
5357 switch (VA.getLocInfo()) {
5358 default:
5359 break;
5360 case CCValAssign::Trunc:
5361 case CCValAssign::BCvt:
5362 MemVT = VA.getLocVT();
5363 break;
5364 case CCValAssign::Indirect:
5365 assert(VA.getValVT().isScalableVector() &&(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5366, __extension__ __PRETTY_FUNCTION__))
5366 "Only scalable vectors can be passed indirectly")(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5366, __extension__ __PRETTY_FUNCTION__))
;
5367 MemVT = VA.getLocVT();
5368 break;
5369 case CCValAssign::SExt:
5370 ExtType = ISD::SEXTLOAD;
5371 break;
5372 case CCValAssign::ZExt:
5373 ExtType = ISD::ZEXTLOAD;
5374 break;
5375 case CCValAssign::AExt:
5376 ExtType = ISD::EXTLOAD;
5377 break;
5378 }
5379
5380 ArgValue =
5381 DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
5382 MachinePointerInfo::getFixedStack(MF, FI), MemVT);
5383 }
5384
5385 if (VA.getLocInfo() == CCValAssign::Indirect) {
5386 assert(VA.getValVT().isScalableVector() &&(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5387, __extension__ __PRETTY_FUNCTION__))
5387 "Only scalable vectors can be passed indirectly")(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5387, __extension__ __PRETTY_FUNCTION__))
;
5388
5389 uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
5390 unsigned NumParts = 1;
5391 if (Ins[i].Flags.isInConsecutiveRegs()) {
5392 assert(!Ins[i].Flags.isInConsecutiveRegsLast())(static_cast <bool> (!Ins[i].Flags.isInConsecutiveRegsLast
()) ? void (0) : __assert_fail ("!Ins[i].Flags.isInConsecutiveRegsLast()"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5392, __extension__ __PRETTY_FUNCTION__))
;
5393 while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
5394 ++NumParts;
5395 }
5396
5397 MVT PartLoad = VA.getValVT();
5398 SDValue Ptr = ArgValue;
5399
5400 // Ensure we generate all loads for each tuple part, whilst updating the
5401 // pointer after each load correctly using vscale.
5402 while (NumParts > 0) {
5403 ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
5404 InVals.push_back(ArgValue);
5405 NumParts--;
5406 if (NumParts > 0) {
5407 SDValue BytesIncrement = DAG.getVScale(
5408 DL, Ptr.getValueType(),
5409 APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
5410 SDNodeFlags Flags;
5411 Flags.setNoUnsignedWrap(true);
5412 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5413 BytesIncrement, Flags);
5414 ExtraArgLocs++;
5415 i++;
5416 }
5417 }
5418 } else {
5419 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
5420 ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
5421 ArgValue, DAG.getValueType(MVT::i32));
5422
5423 // i1 arguments are zero-extended to i8 by the caller. Emit a
5424 // hint to reflect this.
5425 if (Ins[i].isOrigArg()) {
5426 Argument *OrigArg = MF.getFunction().getArg(Ins[i].getOrigArgIndex());
5427 if (OrigArg->getType()->isIntegerTy(1)) {
5428 if (!Ins[i].Flags.isZExt()) {
5429 ArgValue = DAG.getNode(AArch64ISD::ASSERT_ZEXT_BOOL, DL,
5430 ArgValue.getValueType(), ArgValue);
5431 }
5432 }
5433 }
5434
5435 InVals.push_back(ArgValue);
5436 }
5437 }
5438 assert((ArgLocs.size() + ExtraArgLocs) == Ins.size())(static_cast <bool> ((ArgLocs.size() + ExtraArgLocs) ==
Ins.size()) ? void (0) : __assert_fail ("(ArgLocs.size() + ExtraArgLocs) == Ins.size()"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5438, __extension__ __PRETTY_FUNCTION__))
;
5439
5440 // varargs
5441 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5442 if (isVarArg) {
5443 if (!Subtarget->isTargetDarwin() || IsWin64) {
5444 // The AAPCS variadic function ABI is identical to the non-variadic
5445 // one. As a result there may be more arguments in registers and we should
5446 // save them for future reference.
5447 // Win64 variadic functions also pass arguments in registers, but all float
5448 // arguments are passed in integer registers.
5449 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
5450 }
5451
5452 // This will point to the next argument passed via stack.
5453 unsigned StackOffset = CCInfo.getNextStackOffset();
5454 // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
5455 StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
5456 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
5457
5458 if (MFI.hasMustTailInVarArgFunc()) {
5459 SmallVector<MVT, 2> RegParmTypes;
5460 RegParmTypes.push_back(MVT::i64);
5461 RegParmTypes.push_back(MVT::f128);
5462 // Compute the set of forwarded registers. The rest are scratch.
5463 SmallVectorImpl<ForwardedRegister> &Forwards =
5464 FuncInfo->getForwardedMustTailRegParms();
5465 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
5466 CC_AArch64_AAPCS);
5467
5468 // Conservatively forward X8, since it might be used for aggregate return.
5469 if (!CCInfo.isAllocated(AArch64::X8)) {
5470 unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
5471 Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
5472 }
5473 }
5474 }
5475
5476 // On Windows, InReg pointers must be returned, so record the pointer in a
5477 // virtual register at the start of the function so it can be returned in the
5478 // epilogue.
5479 if (IsWin64) {
5480 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
5481 if (Ins[I].Flags.isInReg()) {
5482 assert(!FuncInfo->getSRetReturnReg())(static_cast <bool> (!FuncInfo->getSRetReturnReg()) ?
void (0) : __assert_fail ("!FuncInfo->getSRetReturnReg()"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5482, __extension__ __PRETTY_FUNCTION__))
;
5483
5484 MVT PtrTy = getPointerTy(DAG.getDataLayout());
5485 Register Reg =
5486 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
5487 FuncInfo->setSRetReturnReg(Reg);
5488
5489 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
5490 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
5491 break;
5492 }
5493 }
5494 }
5495
5496 unsigned StackArgSize = CCInfo.getNextStackOffset();
5497 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
5498 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
5499 // This is a non-standard ABI so by fiat I say we're allowed to make full
5500 // use of the stack area to be popped, which must be aligned to 16 bytes in
5501 // any case:
5502 StackArgSize = alignTo(StackArgSize, 16);
5503
5504 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
5505 // a multiple of 16.
5506 FuncInfo->setArgumentStackToRestore(StackArgSize);
5507
5508 // This realignment carries over to the available bytes below. Our own
5509 // callers will guarantee the space is free by giving an aligned value to
5510 // CALLSEQ_START.
5511 }
5512 // Even if we're not expected to free up the space, it's useful to know how
5513 // much is there while considering tail calls (because we can reuse it).
5514 FuncInfo->setBytesInStackArgArea(StackArgSize);
5515
5516 if (Subtarget->hasCustomCallingConv())
5517 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
5518
5519 return Chain;
5520}
5521
5522void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
5523 SelectionDAG &DAG,
5524 const SDLoc &DL,
5525 SDValue &Chain) const {
5526 MachineFunction &MF = DAG.getMachineFunction();
5527 MachineFrameInfo &MFI = MF.getFrameInfo();
5528 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5529 auto PtrVT = getPointerTy(DAG.getDataLayout());
5530 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
5531
5532 SmallVector<SDValue, 8> MemOps;
5533
5534 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
5535 AArch64::X3, AArch64::X4, AArch64::X5,
5536 AArch64::X6, AArch64::X7 };
5537 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
5538 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
5539
5540 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
5541 int GPRIdx = 0;
5542 if (GPRSaveSize != 0) {
5543 if (IsWin64) {
5544 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
5545 if (GPRSaveSize & 15)
5546 // The extra size here, if triggered, will always be 8.
5547 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
5548 } else
5549 GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
5550
5551 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
5552
5553 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
5554 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
5555 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
5556 SDValue Store =
5557 DAG.getStore(Val.getValue(1), DL, Val, FIN,
5558 IsWin64 ? MachinePointerInfo::getFixedStack(
5559 MF, GPRIdx, (i - FirstVariadicGPR) * 8)
5560 : MachinePointerInfo::getStack(MF, i * 8));
5561 MemOps.push_back(Store);
5562 FIN =
5563 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
5564 }
5565 }
5566 FuncInfo->setVarArgsGPRIndex(GPRIdx);
5567 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
5568
5569 if (Subtarget->hasFPARMv8() && !IsWin64) {
5570 static const MCPhysReg FPRArgRegs[] = {
5571 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
5572 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
5573 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
5574 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
5575
5576 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
5577 int FPRIdx = 0;
5578 if (FPRSaveSize != 0) {
5579 FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
5580
5581 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
5582
5583 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
5584 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
5585 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
5586
5587 SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
5588 MachinePointerInfo::getStack(MF, i * 16));
5589 MemOps.push_back(Store);
5590 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
5591 DAG.getConstant(16, DL, PtrVT));
5592 }
5593 }
5594 FuncInfo->setVarArgsFPRIndex(FPRIdx);
5595 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
5596 }
5597
5598 if (!MemOps.empty()) {
5599 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
5600 }
5601}
5602
5603/// LowerCallResult - Lower the result values of a call into the
5604/// appropriate copies out of appropriate physical registers.
5605SDValue AArch64TargetLowering::LowerCallResult(
5606 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5607 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5608 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
5609 SDValue ThisVal) const {
5610 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
5611 // Assign locations to each value returned by this call.
5612 SmallVector<CCValAssign, 16> RVLocs;
5613 DenseMap<unsigned, SDValue> CopiedRegs;
5614 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5615 *DAG.getContext());
5616 CCInfo.AnalyzeCallResult(Ins, RetCC);
5617
5618 // Copy all of the result registers out of their specified physreg.
5619 for (unsigned i = 0; i != RVLocs.size(); ++i) {
5620 CCValAssign VA = RVLocs[i];
5621
5622 // Pass 'this' value directly from the argument to return value, to avoid
5623 // reg unit interference
5624 if (i == 0 && isThisReturn) {
5625 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i64 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5626, __extension__ __PRETTY_FUNCTION__))
5626 "unexpected return calling convention register assignment")(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i64 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5626, __extension__ __PRETTY_FUNCTION__))
;
5627 InVals.push_back(ThisVal);
5628 continue;
5629 }
5630
5631 // Avoid copying a physreg twice since RegAllocFast is incompetent and only
5632 // allows one use of a physreg per block.
5633 SDValue Val = CopiedRegs.lookup(VA.getLocReg());
5634 if (!Val) {
5635 Val =
5636 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
5637 Chain = Val.getValue(1);
5638 InFlag = Val.getValue(2);
5639 CopiedRegs[VA.getLocReg()] = Val;
5640 }
5641
5642 switch (VA.getLocInfo()) {
5643 default:
5644 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5644)
;
5645 case CCValAssign::Full:
5646 break;
5647 case CCValAssign::BCvt:
5648 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5649 break;
5650 case CCValAssign::AExtUpper:
5651 Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
5652 DAG.getConstant(32, DL, VA.getLocVT()));
5653 LLVM_FALLTHROUGH[[gnu::fallthrough]];
5654 case CCValAssign::AExt:
5655 LLVM_FALLTHROUGH[[gnu::fallthrough]];
5656 case CCValAssign::ZExt:
5657 Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
5658 break;
5659 }
5660
5661 InVals.push_back(Val);
5662 }
5663
5664 return Chain;
5665}
5666
5667/// Return true if the calling convention is one that we can guarantee TCO for.
5668static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
5669 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
5670 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
5671}
5672
5673/// Return true if we might ever do TCO for calls with this calling convention.
5674static bool mayTailCallThisCC(CallingConv::ID CC) {
5675 switch (CC) {
5676 case CallingConv::C:
5677 case CallingConv::AArch64_SVE_VectorCall:
5678 case CallingConv::PreserveMost:
5679 case CallingConv::Swift:
5680 case CallingConv::SwiftTail:
5681 case CallingConv::Tail:
5682 case CallingConv::Fast:
5683 return true;
5684 default:
5685 return false;
5686 }
5687}
5688
5689bool AArch64TargetLowering::isEligibleForTailCallOptimization(
5690 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
5691 const SmallVectorImpl<ISD::OutputArg> &Outs,
5692 const SmallVectorImpl<SDValue> &OutVals,
5693 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
5694 if (!mayTailCallThisCC(CalleeCC))
5695 return false;
5696
5697 MachineFunction &MF = DAG.getMachineFunction();
5698 const Function &CallerF = MF.getFunction();
5699 CallingConv::ID CallerCC = CallerF.getCallingConv();
5700
5701 // Functions using the C or Fast calling convention that have an SVE signature
5702 // preserve more registers and should assume the SVE_VectorCall CC.
5703 // The check for matching callee-saved regs will determine whether it is
5704 // eligible for TCO.
5705 if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) &&
5706 AArch64RegisterInfo::hasSVEArgsOrReturn(&MF))
5707 CallerCC = CallingConv::AArch64_SVE_VectorCall;
5708
5709 bool CCMatch = CallerCC == CalleeCC;
5710
5711 // When using the Windows calling convention on a non-windows OS, we want
5712 // to back up and restore X18 in such functions; we can't do a tail call
5713 // from those functions.
5714 if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
5715 CalleeCC != CallingConv::Win64)
5716 return false;
5717
5718 // Byval parameters hand the function a pointer directly into the stack area
5719 // we want to reuse during a tail call. Working around this *is* possible (see
5720 // X86) but less efficient and uglier in LowerCall.
5721 for (Function::const_arg_iterator i = CallerF.arg_begin(),
5722 e = CallerF.arg_end();
5723 i != e; ++i) {
5724 if (i->hasByValAttr())
5725 return false;
5726
5727 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
5728 // In this case, it is necessary to save/restore X0 in the callee. Tail
5729 // call opt interferes with this. So we disable tail call opt when the
5730 // caller has an argument with "inreg" attribute.
5731
5732 // FIXME: Check whether the callee also has an "inreg" argument.
5733 if (i->hasInRegAttr())
5734 return false;
5735 }
5736
5737 if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
5738 return CCMatch;
5739
5740 // Externally-defined functions with weak linkage should not be
5741 // tail-called on AArch64 when the OS does not support dynamic
5742 // pre-emption of symbols, as the AAELF spec requires normal calls
5743 // to undefined weak functions to be replaced with a NOP or jump to the
5744 // next instruction. The behaviour of branch instructions in this
5745 // situation (as used for tail calls) is implementation-defined, so we
5746 // cannot rely on the linker replacing the tail call with a return.
5747 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5748 const GlobalValue *GV = G->getGlobal();
5749 const Triple &TT = getTargetMachine().getTargetTriple();
5750 if (GV->hasExternalWeakLinkage() &&
5751 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
5752 return false;
5753 }
5754
5755 // Now we search for cases where we can use a tail call without changing the
5756 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
5757 // concept.
5758
5759 // I want anyone implementing a new calling convention to think long and hard
5760 // about this assert.
5761 assert((!isVarArg || CalleeCC == CallingConv::C) &&(static_cast <bool> ((!isVarArg || CalleeCC == CallingConv
::C) && "Unexpected variadic calling convention") ? void
(0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5762, __extension__ __PRETTY_FUNCTION__))
5762 "Unexpected variadic calling convention")(static_cast <bool> ((!isVarArg || CalleeCC == CallingConv
::C) && "Unexpected variadic calling convention") ? void
(0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5762, __extension__ __PRETTY_FUNCTION__))
;
5763
5764 LLVMContext &C = *DAG.getContext();
5765 if (isVarArg && !Outs.empty()) {
5766 // At least two cases here: if caller is fastcc then we can't have any
5767 // memory arguments (we'd be expected to clean up the stack afterwards). If
5768 // caller is C then we could potentially use its argument area.
5769
5770 // FIXME: for now we take the most conservative of these in both cases:
5771 // disallow all variadic memory operands.
5772 SmallVector<CCValAssign, 16> ArgLocs;
5773 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5774
5775 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
5776 for (const CCValAssign &ArgLoc : ArgLocs)
5777 if (!ArgLoc.isRegLoc())
5778 return false;
5779 }
5780
5781 // Check that the call results are passed in the same way.
5782 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
5783 CCAssignFnForCall(CalleeCC, isVarArg),
5784 CCAssignFnForCall(CallerCC, isVarArg)))
5785 return false;
5786 // The callee has to preserve all registers the caller needs to preserve.
5787 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5788 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5789 if (!CCMatch) {
5790 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5791 if (Subtarget->hasCustomCallingConv()) {
5792 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
5793 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
5794 }
5795 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5796 return false;
5797 }
5798
5799 // Nothing more to check if the callee is taking no arguments
5800 if (Outs.empty())
5801 return true;
5802
5803 SmallVector<CCValAssign, 16> ArgLocs;
5804 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5805
5806 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
5807
5808 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5809
5810 // If any of the arguments is passed indirectly, it must be SVE, so the
5811 // 'getBytesInStackArgArea' is not sufficient to determine whether we need to
5812 // allocate space on the stack. That is why we determine this explicitly here
5813 // the call cannot be a tailcall.
5814 if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
5815 assert((A.getLocInfo() != CCValAssign::Indirect ||(static_cast <bool> ((A.getLocInfo() != CCValAssign::Indirect
|| A.getValVT().isScalableVector()) && "Expected value to be scalable"
) ? void (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5817, __extension__ __PRETTY_FUNCTION__))
5816 A.getValVT().isScalableVector()) &&(static_cast <bool> ((A.getLocInfo() != CCValAssign::Indirect
|| A.getValVT().isScalableVector()) && "Expected value to be scalable"
) ? void (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5817, __extension__ __PRETTY_FUNCTION__))
5817 "Expected value to be scalable")(static_cast <bool> ((A.getLocInfo() != CCValAssign::Indirect
|| A.getValVT().isScalableVector()) && "Expected value to be scalable"
) ? void (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5817, __extension__ __PRETTY_FUNCTION__))
;
5818 return A.getLocInfo() == CCValAssign::Indirect;
5819 }))
5820 return false;
5821
5822 // If the stack arguments for this call do not fit into our own save area then
5823 // the call cannot be made tail.
5824 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
5825 return false;
5826
5827 const MachineRegisterInfo &MRI = MF.getRegInfo();
5828 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5829 return false;
5830
5831 return true;
5832}
5833
5834SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
5835 SelectionDAG &DAG,
5836 MachineFrameInfo &MFI,
5837 int ClobberedFI) const {
5838 SmallVector<SDValue, 8> ArgChains;
5839 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
5840 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
5841
5842 // Include the original chain at the beginning of the list. When this is
5843 // used by target LowerCall hooks, this helps legalize find the
5844 // CALLSEQ_BEGIN node.
5845 ArgChains.push_back(Chain);
5846
5847 // Add a chain value for each stack argument corresponding
5848 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
5849 UE = DAG.getEntryNode().getNode()->use_end();
5850 U != UE; ++U)
5851 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
5852 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
5853 if (FI->getIndex() < 0) {
5854 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
5855 int64_t InLastByte = InFirstByte;
5856 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
5857
5858 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
5859 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
5860 ArgChains.push_back(SDValue(L, 1));
5861 }
5862
5863 // Build a tokenfactor for all the chains.
5864 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
5865}
5866
5867bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
5868 bool TailCallOpt) const {
5869 return (CallCC == CallingConv::Fast && TailCallOpt) ||
5870 CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail;
5871}
5872
5873// Check if the value is zero-extended from i1 to i8
5874static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) {
5875 unsigned SizeInBits = Arg.getValueType().getSizeInBits();
5876 if (SizeInBits < 8)
5877 return false;
5878
5879 APInt LowBits(SizeInBits, 0xFF);
5880 APInt RequredZero(SizeInBits, 0xFE);
5881 KnownBits Bits = DAG.computeKnownBits(Arg, LowBits, 4);
5882 bool ZExtBool = (Bits.Zero & RequredZero) == RequredZero;
5883 return ZExtBool;
5884}
5885
5886/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
5887/// and add input and output parameter nodes.
5888SDValue
5889AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
5890 SmallVectorImpl<SDValue> &InVals) const {
5891 SelectionDAG &DAG = CLI.DAG;
5892 SDLoc &DL = CLI.DL;
5893 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
5894 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
5895 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
5896 SDValue Chain = CLI.Chain;
5897 SDValue Callee = CLI.Callee;
5898 bool &IsTailCall = CLI.IsTailCall;
5899 CallingConv::ID CallConv = CLI.CallConv;
5900 bool IsVarArg = CLI.IsVarArg;
5901
5902 MachineFunction &MF = DAG.getMachineFunction();
5903 MachineFunction::CallSiteInfo CSInfo;
5904 bool IsThisReturn = false;
5905
5906 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5907 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
5908 bool IsSibCall = false;
5909 bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CallConv);
5910
5911 // Check callee args/returns for SVE registers and set calling convention
5912 // accordingly.
5913 if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) {
5914 bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
5915 return Out.VT.isScalableVector();
5916 });
5917 bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
5918 return In.VT.isScalableVector();
5919 });
5920
5921 if (CalleeInSVE || CalleeOutSVE)
5922 CallConv = CallingConv::AArch64_SVE_VectorCall;
5923 }
5924
5925 if (IsTailCall) {
5926 // Check if it's really possible to do a tail call.
5927 IsTailCall = isEligibleForTailCallOptimization(
5928 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
5929
5930 // A sibling call is one where we're under the usual C ABI and not planning
5931 // to change that but can still do a tail call:
5932 if (!TailCallOpt && IsTailCall && CallConv != CallingConv::Tail &&
5933 CallConv != CallingConv::SwiftTail)
5934 IsSibCall = true;
5935
5936 if (IsTailCall)
5937 ++NumTailCalls;
5938 }
5939
5940 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
5941 report_fatal_error("failed to perform tail call elimination on a call "
5942 "site marked musttail");
5943
5944 // Analyze operands of the call, assigning locations to each operand.
5945 SmallVector<CCValAssign, 16> ArgLocs;
5946 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5947
5948 if (IsVarArg) {
5949 // Handle fixed and variable vector arguments differently.
5950 // Variable vector arguments always go into memory.
5951 unsigned NumArgs = Outs.size();
5952
5953 for (unsigned i = 0; i != NumArgs; ++i) {
5954 MVT ArgVT = Outs[i].VT;
5955 if (!Outs[i].IsFixed && ArgVT.isScalableVector())
5956 report_fatal_error("Passing SVE types to variadic functions is "
5957 "currently not supported");
5958
5959 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5960 bool UseVarArgCC = !Outs[i].IsFixed;
5961 // On Windows, the fixed arguments in a vararg call are passed in GPRs
5962 // too, so use the vararg CC to force them to integer registers.
5963 if (IsCalleeWin64)
5964 UseVarArgCC = true;
5965 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
5966 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
5967 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5967, __extension__ __PRETTY_FUNCTION__))
;
5968 (void)Res;
5969 }
5970 } else {
5971 // At this point, Outs[].VT may already be promoted to i32. To correctly
5972 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
5973 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
5974 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
5975 // we use a special version of AnalyzeCallOperands to pass in ValVT and
5976 // LocVT.
5977 unsigned NumArgs = Outs.size();
5978 for (unsigned i = 0; i != NumArgs; ++i) {
5979 MVT ValVT = Outs[i].VT;
5980 // Get type of the original argument.
5981 EVT ActualVT = getValueType(DAG.getDataLayout(),
5982 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
5983 /*AllowUnknown*/ true);
5984 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
5985 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5986 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5987 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5988 ValVT = MVT::i8;
5989 else if (ActualMVT == MVT::i16)
5990 ValVT = MVT::i16;
5991
5992 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
5993 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
5994 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5994, __extension__ __PRETTY_FUNCTION__))
;
5995 (void)Res;
5996 }
5997 }
5998
5999 // Get a count of how many bytes are to be pushed on the stack.
6000 unsigned NumBytes = CCInfo.getNextStackOffset();
6001
6002 if (IsSibCall) {
6003 // Since we're not changing the ABI to make this a tail call, the memory
6004 // operands are already available in the caller's incoming argument space.
6005 NumBytes = 0;
6006 }
6007
6008 // FPDiff is the byte offset of the call's argument area from the callee's.
6009 // Stores to callee stack arguments will be placed in FixedStackSlots offset
6010 // by this amount for a tail call. In a sibling call it must be 0 because the
6011 // caller will deallocate the entire stack and the callee still expects its
6012 // arguments to begin at SP+0. Completely unused for non-tail calls.
6013 int FPDiff = 0;
6014
6015 if (IsTailCall && !IsSibCall) {
6016 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
6017
6018 // Since callee will pop argument stack as a tail call, we must keep the
6019 // popped size 16-byte aligned.
6020 NumBytes = alignTo(NumBytes, 16);
6021
6022 // FPDiff will be negative if this tail call requires more space than we
6023 // would automatically have in our incoming argument space. Positive if we
6024 // can actually shrink the stack.
6025 FPDiff = NumReusableBytes - NumBytes;
6026
6027 // Update the required reserved area if this is the tail call requiring the
6028 // most argument stack space.
6029 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
6030 FuncInfo->setTailCallReservedStack(-FPDiff);
6031
6032 // The stack pointer must be 16-byte aligned at all times it's used for a
6033 // memory operation, which in practice means at *all* times and in
6034 // particular across call boundaries. Therefore our own arguments started at
6035 // a 16-byte aligned SP and the delta applied for the tail call should
6036 // satisfy the same constraint.
6037 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")(static_cast <bool> (FPDiff % 16 == 0 && "unaligned stack on tail call"
) ? void (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6037, __extension__ __PRETTY_FUNCTION__))
;
6038 }
6039
6040 // Adjust the stack pointer for the new arguments...
6041 // These operations are automatically eliminated by the prolog/epilog pass
6042 if (!IsSibCall)
6043 Chain = DAG.getCALLSEQ_START(Chain, IsTailCall ? 0 : NumBytes, 0, DL);
6044
6045 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
6046 getPointerTy(DAG.getDataLayout()));
6047
6048 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6049 SmallSet<unsigned, 8> RegsUsed;
6050 SmallVector<SDValue, 8> MemOpChains;
6051 auto PtrVT = getPointerTy(DAG.getDataLayout());
6052
6053 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
6054 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
6055 for (const auto &F : Forwards) {
6056 SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
6057 RegsToPass.emplace_back(F.PReg, Val);
6058 }
6059 }
6060
6061 // Walk the register/memloc assignments, inserting copies/loads.
6062 unsigned ExtraArgLocs = 0;
6063 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
6064 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
6065 SDValue Arg = OutVals[i];
6066 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6067
6068 // Promote the value if needed.
6069 switch (VA.getLocInfo()) {
6070 default:
6071 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6071)
;
6072 case CCValAssign::Full:
6073 break;
6074 case CCValAssign::SExt:
6075 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
6076 break;
6077 case CCValAssign::ZExt:
6078 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
6079 break;
6080 case CCValAssign::AExt:
6081 if (Outs[i].ArgVT == MVT::i1) {
6082 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
6083 //
6084 // Check if we actually have to do this, because the value may
6085 // already be zero-extended.
6086 //
6087 // We cannot just emit a (zext i8 (trunc (assert-zext i8)))
6088 // and rely on DAGCombiner to fold this, because the following
6089 // (anyext i32) is combined with (zext i8) in DAG.getNode:
6090 //
6091 // (ext (zext x)) -> (zext x)
6092 //
6093 // This will give us (zext i32), which we cannot remove, so
6094 // try to check this beforehand.
6095 if (!checkZExtBool(Arg, DAG)) {
6096 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
6097 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
6098 }
6099 }
6100 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
6101 break;
6102 case CCValAssign::AExtUpper:
6103 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")(static_cast <bool> (VA.getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits") ? void (0) : __assert_fail
("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6103, __extension__ __PRETTY_FUNCTION__))
;
6104 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
6105 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
6106 DAG.getConstant(32, DL, VA.getLocVT()));
6107 break;
6108 case CCValAssign::BCvt:
6109 Arg = DAG.getBitcast(VA.getLocVT(), Arg);
6110 break;
6111 case CCValAssign::Trunc:
6112 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
6113 break;
6114 case CCValAssign::FPExt:
6115 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
6116 break;
6117 case CCValAssign::Indirect:
6118 assert(VA.getValVT().isScalableVector() &&(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6119, __extension__ __PRETTY_FUNCTION__))
6119 "Only scalable vectors can be passed indirectly")(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6119, __extension__ __PRETTY_FUNCTION__))
;
6120
6121 uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
6122 uint64_t PartSize = StoreSize;
6123 unsigned NumParts = 1;
6124 if (Outs[i].Flags.isInConsecutiveRegs()) {
6125 assert(!Outs[i].Flags.isInConsecutiveRegsLast())(static_cast <bool> (!Outs[i].Flags.isInConsecutiveRegsLast
()) ? void (0) : __assert_fail ("!Outs[i].Flags.isInConsecutiveRegsLast()"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6125, __extension__ __PRETTY_FUNCTION__))
;
6126 while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
6127 ++NumParts;
6128 StoreSize *= NumParts;
6129 }
6130
6131 MachineFrameInfo &MFI = MF.getFrameInfo();
6132 Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
6133 Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
6134 int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
6135 MFI.setStackID(FI, TargetStackID::ScalableVector);
6136
6137 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
6138 SDValue Ptr = DAG.getFrameIndex(
6139 FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
6140 SDValue SpillSlot = Ptr;
6141
6142 // Ensure we generate all stores for each tuple part, whilst updating the
6143 // pointer after each store correctly using vscale.
6144 while (NumParts) {
6145 Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
6146 NumParts--;
6147 if (NumParts > 0) {
6148 SDValue BytesIncrement = DAG.getVScale(
6149 DL, Ptr.getValueType(),
6150 APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
6151 SDNodeFlags Flags;
6152 Flags.setNoUnsignedWrap(true);
6153
6154 MPI = MachinePointerInfo(MPI.getAddrSpace());
6155 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
6156 BytesIncrement, Flags);
6157 ExtraArgLocs++;
6158 i++;
6159 }
6160 }
6161
6162 Arg = SpillSlot;
6163 break;
6164 }
6165
6166 if (VA.isRegLoc()) {
6167 if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
6168 Outs[0].VT == MVT::i64) {
6169 assert(VA.getLocVT() == MVT::i64 &&(static_cast <bool> (VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6170, __extension__ __PRETTY_FUNCTION__))
6170 "unexpected calling convention register assignment")(static_cast <bool> (VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6170, __extension__ __PRETTY_FUNCTION__))
;
6171 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i64 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6172, __extension__ __PRETTY_FUNCTION__))
6172 "unexpected use of 'returned'")(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i64 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6172, __extension__ __PRETTY_FUNCTION__))
;
6173 IsThisReturn = true;
6174 }
6175 if (RegsUsed.count(VA.getLocReg())) {
6176 // If this register has already been used then we're trying to pack
6177 // parts of an [N x i32] into an X-register. The extension type will
6178 // take care of putting the two halves in the right place but we have to
6179 // combine them.
6180 SDValue &Bits =
6181 llvm::find_if(RegsToPass,
6182 [=](const std::pair<unsigned, SDValue> &Elt) {
6183 return Elt.first == VA.getLocReg();
6184 })
6185 ->second;
6186 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
6187 // Call site info is used for function's parameter entry value
6188 // tracking. For now we track only simple cases when parameter
6189 // is transferred through whole register.
6190 llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
6191 return ArgReg.Reg == VA.getLocReg();
6192 });
6193 } else {
6194 RegsToPass.emplace_back(VA.getLocReg(), Arg);
6195 RegsUsed.insert(VA.getLocReg());
6196 const TargetOptions &Options = DAG.getTarget().Options;
6197 if (Options.EmitCallSiteInfo)
6198 CSInfo.emplace_back(VA.getLocReg(), i);
6199 }
6200 } else {
6201 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6201, __extension__ __PRETTY_FUNCTION__))
;
6202
6203 SDValue DstAddr;
6204 MachinePointerInfo DstInfo;
6205
6206 // FIXME: This works on big-endian for composite byvals, which are the
6207 // common case. It should also work for fundamental types too.
6208 uint32_t BEAlign = 0;
6209 unsigned OpSize;
6210 if (VA.getLocInfo() == CCValAssign::Indirect ||
6211 VA.getLocInfo() == CCValAssign::Trunc)
6212 OpSize = VA.getLocVT().getFixedSizeInBits();
6213 else
6214 OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
6215 : VA.getValVT().getSizeInBits();
6216 OpSize = (OpSize + 7) / 8;
6217 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
6218 !Flags.isInConsecutiveRegs()) {
6219 if (OpSize < 8)
6220 BEAlign = 8 - OpSize;
6221 }
6222 unsigned LocMemOffset = VA.getLocMemOffset();
6223 int32_t Offset = LocMemOffset + BEAlign;
6224 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
6225 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
6226
6227 if (IsTailCall) {
6228 Offset = Offset + FPDiff;
6229 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
6230
6231 DstAddr = DAG.getFrameIndex(FI, PtrVT);
6232 DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
6233
6234 // Make sure any stack arguments overlapping with where we're storing
6235 // are loaded before this eventual operation. Otherwise they'll be
6236 // clobbered.
6237 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
6238 } else {
6239 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
6240
6241 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
6242 DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
6243 }
6244
6245 if (Outs[i].Flags.isByVal()) {
6246 SDValue SizeNode =
6247 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
6248 SDValue Cpy = DAG.getMemcpy(
6249 Chain, DL, DstAddr, Arg, SizeNode,
6250 Outs[i].Flags.getNonZeroByValAlign(),
6251 /*isVol = */ false, /*AlwaysInline = */ false,
6252 /*isTailCall = */ false, DstInfo, MachinePointerInfo());
6253
6254 MemOpChains.push_back(Cpy);
6255 } else {
6256 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
6257 // promoted to a legal register type i32, we should truncate Arg back to
6258 // i1/i8/i16.
6259 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
6260 VA.getValVT() == MVT::i16)
6261 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
6262
6263 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
6264 MemOpChains.push_back(Store);
6265 }
6266 }
6267 }
6268
6269 if (!MemOpChains.empty())
6270 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
6271
6272 // Build a sequence of copy-to-reg nodes chained together with token chain
6273 // and flag operands which copy the outgoing args into the appropriate regs.
6274 SDValue InFlag;
6275 for (auto &RegToPass : RegsToPass) {
6276 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
6277 RegToPass.second, InFlag);
6278 InFlag = Chain.getValue(1);
6279 }
6280
6281 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
6282 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
6283 // node so that legalize doesn't hack it.
6284 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
6285 auto GV = G->getGlobal();
6286 unsigned OpFlags =
6287 Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
6288 if (OpFlags & AArch64II::MO_GOT) {
6289 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
6290 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
6291 } else {
6292 const GlobalValue *GV = G->getGlobal();
6293 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
6294 }
6295 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
6296 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
6297 Subtarget->isTargetMachO()) {
6298 const char *Sym = S->getSymbol();
6299 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
6300 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
6301 } else {
6302 const char *Sym = S->getSymbol();
6303 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
6304 }
6305 }
6306
6307 // We don't usually want to end the call-sequence here because we would tidy
6308 // the frame up *after* the call, however in the ABI-changing tail-call case
6309 // we've carefully laid out the parameters so that when sp is reset they'll be
6310 // in the correct location.
6311 if (IsTailCall && !IsSibCall) {
6312 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
6313 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
6314 InFlag = Chain.getValue(1);
6315 }
6316
6317 std::vector<SDValue> Ops;
6318 Ops.push_back(Chain);
6319 Ops.push_back(Callee);
6320
6321 if (IsTailCall) {
6322 // Each tail call may have to adjust the stack by a different amount, so
6323 // this information must travel along with the operation for eventual
6324 // consumption by emitEpilogue.
6325 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
6326 }
6327
6328 // Add argument registers to the end of the list so that they are known live
6329 // into the call.
6330 for (auto &RegToPass : RegsToPass)
6331 Ops.push_back(DAG.getRegister(RegToPass.first,
6332 RegToPass.second.getValueType()));
6333
6334 // Add a register mask operand representing the call-preserved registers.
6335 const uint32_t *Mask;
6336 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
6337 if (IsThisReturn) {
6338 // For 'this' returns, use the X0-preserving mask if applicable
6339 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
6340 if (!Mask) {
6341 IsThisReturn = false;
6342 Mask = TRI->getCallPreservedMask(MF, CallConv);
6343 }
6344 } else
6345 Mask = TRI->getCallPreservedMask(MF, CallConv);
6346
6347 if (Subtarget->hasCustomCallingConv())
6348 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
6349
6350 if (TRI->isAnyArgRegReserved(MF))
6351 TRI->emitReservedArgRegCallError(MF);
6352
6353 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6353, __extension__ __PRETTY_FUNCTION__))
;
6354 Ops.push_back(DAG.getRegisterMask(Mask));
6355
6356 if (InFlag.getNode())
6357 Ops.push_back(InFlag);
6358
6359 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
6360
6361 // If we're doing a tall call, use a TC_RETURN here rather than an
6362 // actual call instruction.
6363 if (IsTailCall) {
6364 MF.getFrameInfo().setHasTailCall();
6365 SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
6366 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
6367 return Ret;
6368 }
6369
6370 unsigned CallOpc = AArch64ISD::CALL;
6371 // Calls with operand bundle "clang.arc.attachedcall" are special. They should
6372 // be expanded to the call, directly followed by a special marker sequence.
6373 // Use the CALL_RVMARKER to do that.
6374 if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
6375 assert(!IsTailCall &&(static_cast <bool> (!IsTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!IsTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6376, __extension__ __PRETTY_FUNCTION__))
6376 "tail calls cannot be marked with clang.arc.attachedcall")(static_cast <bool> (!IsTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!IsTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6376, __extension__ __PRETTY_FUNCTION__))
;
6377 CallOpc = AArch64ISD::CALL_RVMARKER;
6378 }
6379
6380 // Returns a chain and a flag for retval copy to use.
6381 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
6382 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
6383 InFlag = Chain.getValue(1);
6384 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
6385
6386 uint64_t CalleePopBytes =
6387 DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
6388
6389 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
6390 DAG.getIntPtrConstant(CalleePopBytes, DL, true),
6391 InFlag, DL);
6392 if (!Ins.empty())
6393 InFlag = Chain.getValue(1);
6394
6395 // Handle result values, copying them out of physregs into vregs that we
6396 // return.
6397 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
6398 InVals, IsThisReturn,
6399 IsThisReturn ? OutVals[0] : SDValue());
6400}
6401
6402bool AArch64TargetLowering::CanLowerReturn(
6403 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
6404 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
6405 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
6406 SmallVector<CCValAssign, 16> RVLocs;
6407 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6408 return CCInfo.CheckReturn(Outs, RetCC);
6409}
6410
6411SDValue
6412AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6413 bool isVarArg,
6414 const SmallVectorImpl<ISD::OutputArg> &Outs,
6415 const SmallVectorImpl<SDValue> &OutVals,
6416 const SDLoc &DL, SelectionDAG &DAG) const {
6417 auto &MF = DAG.getMachineFunction();
6418 auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
6419
6420 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
6421 SmallVector<CCValAssign, 16> RVLocs;
6422 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
6423 CCInfo.AnalyzeReturn(Outs, RetCC);
6424
6425 // Copy the result values into the output registers.
6426 SDValue Flag;
6427 SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
6428 SmallSet<unsigned, 4> RegsUsed;
6429 for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
6430 ++i, ++realRVLocIdx) {
6431 CCValAssign &VA = RVLocs[i];
6432 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6432, __extension__ __PRETTY_FUNCTION__))
;
6433 SDValue Arg = OutVals[realRVLocIdx];
6434
6435 switch (VA.getLocInfo()) {
6436 default:
6437 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6437)
;
6438 case CCValAssign::Full:
6439 if (Outs[i].ArgVT == MVT::i1) {
6440 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
6441 // value. This is strictly redundant on Darwin (which uses "zeroext
6442 // i1"), but will be optimised out before ISel.
6443 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
6444 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
6445 }
6446 break;
6447 case CCValAssign::BCvt:
6448 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
6449 break;
6450 case CCValAssign::AExt:
6451 case CCValAssign::ZExt:
6452 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
6453 break;
6454 case CCValAssign::AExtUpper:
6455 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")(static_cast <bool> (VA.getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits") ? void (0) : __assert_fail
("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6455, __extension__ __PRETTY_FUNCTION__))
;
6456 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
6457 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
6458 DAG.getConstant(32, DL, VA.getLocVT()));
6459 break;
6460 }
6461
6462 if (RegsUsed.count(VA.getLocReg())) {
6463 SDValue &Bits =
6464 llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) {
6465 return Elt.first == VA.getLocReg();
6466 })->second;
6467 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
6468 } else {
6469 RetVals.emplace_back(VA.getLocReg(), Arg);
6470 RegsUsed.insert(VA.getLocReg());
6471 }
6472 }
6473
6474 SmallVector<SDValue, 4> RetOps(1, Chain);
6475 for (auto &RetVal : RetVals) {
6476 Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
6477 Flag = Chain.getValue(1);
6478 RetOps.push_back(
6479 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
6480 }
6481
6482 // Windows AArch64 ABIs require that for returning structs by value we copy
6483 // the sret argument into X0 for the return.
6484 // We saved the argument into a virtual register in the entry block,
6485 // so now we copy the value out and into X0.
6486 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
6487 SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
6488 getPointerTy(MF.getDataLayout()));
6489
6490 unsigned RetValReg = AArch64::X0;
6491 Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
6492 Flag = Chain.getValue(1);
6493
6494 RetOps.push_back(
6495 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
6496 }
6497
6498 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
6499 const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&MF);
6500 if (I) {
6501 for (; *I; ++I) {
6502 if (AArch64::GPR64RegClass.contains(*I))
6503 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6504 else if (AArch64::FPR64RegClass.contains(*I))
6505 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6506 else
6507 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6507)
;
6508 }
6509 }
6510
6511 RetOps[0] = Chain; // Update chain.
6512
6513 // Add the flag if we have it.
6514 if (Flag.getNode())
6515 RetOps.push_back(Flag);
6516
6517 return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
6518}
6519
6520//===----------------------------------------------------------------------===//
6521// Other Lowering Code
6522//===----------------------------------------------------------------------===//
6523
6524SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
6525 SelectionDAG &DAG,
6526 unsigned Flag) const {
6527 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
6528 N->getOffset(), Flag);
6529}
6530
6531SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
6532 SelectionDAG &DAG,
6533 unsigned Flag) const {
6534 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
6535}
6536
6537SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
6538 SelectionDAG &DAG,
6539 unsigned Flag) const {
6540 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
6541 N->getOffset(), Flag);
6542}
6543
6544SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
6545 SelectionDAG &DAG,
6546 unsigned Flag) const {
6547 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
6548}
6549
6550// (loadGOT sym)
6551template <class NodeTy>
6552SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
6553 unsigned Flags) const {
6554 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getGOT\n"
; } } while (false)
;
6555 SDLoc DL(N);
6556 EVT Ty = getPointerTy(DAG.getDataLayout());
6557 SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
6558 // FIXME: Once remat is capable of dealing with instructions with register
6559 // operands, expand this into two nodes instead of using a wrapper node.
6560 return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
6561}
6562
6563// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
6564template <class NodeTy>
6565SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
6566 unsigned Flags) const {
6567 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrLarge\n"
; } } while (false)
;
6568 SDLoc DL(N);
6569 EVT Ty = getPointerTy(DAG.getDataLayout());
6570 const unsigned char MO_NC = AArch64II::MO_NC;
6571 return DAG.getNode(
6572 AArch64ISD::WrapperLarge, DL, Ty,
6573 getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
6574 getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
6575 getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
6576 getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
6577}
6578
6579// (addlow (adrp %hi(sym)) %lo(sym))
6580template <class NodeTy>
6581SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
6582 unsigned Flags) const {
6583 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddr\n"
; } } while (false)
;
6584 SDLoc DL(N);
6585 EVT Ty = getPointerTy(DAG.getDataLayout());
6586 SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
6587 SDValue Lo = getTargetNode(N, Ty, DAG,
6588 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
6589 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
6590 return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
6591}
6592
6593// (adr sym)
6594template <class NodeTy>
6595SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
6596 unsigned Flags) const {
6597 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrTiny\n"
; } } while (false)
;
6598 SDLoc DL(N);
6599 EVT Ty = getPointerTy(DAG.getDataLayout());
6600 SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
6601 return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
6602}
6603
6604SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
6605 SelectionDAG &DAG) const {
6606 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
6607 const GlobalValue *GV = GN->getGlobal();
6608 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
6609
6610 if (OpFlags != AArch64II::MO_NO_FLAG)
6611 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&(static_cast <bool> (cast<GlobalAddressSDNode>(Op
)->getOffset() == 0 && "unexpected offset in global node"
) ? void (0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6612, __extension__ __PRETTY_FUNCTION__))
6612 "unexpected offset in global node")(static_cast <bool> (cast<GlobalAddressSDNode>(Op
)->getOffset() == 0 && "unexpected offset in global node"
) ? void (0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6612, __extension__ __PRETTY_FUNCTION__))
;
6613
6614 // This also catches the large code model case for Darwin, and tiny code
6615 // model with got relocations.
6616 if ((OpFlags & AArch64II::MO_GOT) != 0) {
6617 return getGOT(GN, DAG, OpFlags);
6618 }
6619
6620 SDValue Result;
6621 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
6622 Result = getAddrLarge(GN, DAG, OpFlags);
6623 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
6624 Result = getAddrTiny(GN, DAG, OpFlags);
6625 } else {
6626 Result = getAddr(GN, DAG, OpFlags);
6627 }
6628 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6629 SDLoc DL(GN);
6630 if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB))
6631 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
6632 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
6633 return Result;
6634}
6635
6636/// Convert a TLS address reference into the correct sequence of loads
6637/// and calls to compute the variable's address (for Darwin, currently) and
6638/// return an SDValue containing the final node.
6639
6640/// Darwin only has one TLS scheme which must be capable of dealing with the
6641/// fully general situation, in the worst case. This means:
6642/// + "extern __thread" declaration.
6643/// + Defined in a possibly unknown dynamic library.
6644///
6645/// The general system is that each __thread variable has a [3 x i64] descriptor
6646/// which contains information used by the runtime to calculate the address. The
6647/// only part of this the compiler needs to know about is the first xword, which
6648/// contains a function pointer that must be called with the address of the
6649/// entire descriptor in "x0".
6650///
6651/// Since this descriptor may be in a different unit, in general even the
6652/// descriptor must be accessed via an indirect load. The "ideal" code sequence
6653/// is:
6654/// adrp x0, _var@TLVPPAGE
6655/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
6656/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
6657/// ; the function pointer
6658/// blr x1 ; Uses descriptor address in x0
6659/// ; Address of _var is now in x0.
6660///
6661/// If the address of _var's descriptor *is* known to the linker, then it can
6662/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
6663/// a slight efficiency gain.
6664SDValue
6665AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
6666 SelectionDAG &DAG) const {
6667 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6668, __extension__ __PRETTY_FUNCTION__))
6668 "This function expects a Darwin target")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6668, __extension__ __PRETTY_FUNCTION__))
;
6669
6670 SDLoc DL(Op);
6671 MVT PtrVT = getPointerTy(DAG.getDataLayout());
6672 MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
6673 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
6674
6675 SDValue TLVPAddr =
6676 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
6677 SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
6678
6679 // The first entry in the descriptor is a function pointer that we must call
6680 // to obtain the address of the variable.
6681 SDValue Chain = DAG.getEntryNode();
6682 SDValue FuncTLVGet = DAG.getLoad(
6683 PtrMemVT, DL, Chain, DescAddr,
6684 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
6685 Align(PtrMemVT.getSizeInBits() / 8),
6686 MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
6687 Chain = FuncTLVGet.getValue(1);
6688
6689 // Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
6690 FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);
6691
6692 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
6693 MFI.setAdjustsStack(true);
6694
6695 // TLS calls preserve all registers except those that absolutely must be
6696 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
6697 // silly).
6698 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
6699 const uint32_t *Mask = TRI->getTLSCallPreservedMask();
6700 if (Subtarget->hasCustomCallingConv())
6701 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
6702
6703 // Finally, we can make the call. This is just a degenerate version of a
6704 // normal AArch64 call node: x0 takes the address of the descriptor, and
6705 // returns the address of the variable in this thread.
6706 Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
6707 Chain =
6708 DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
6709 Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
6710 DAG.getRegisterMask(Mask), Chain.getValue(1));
6711 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
6712}
6713
6714/// Convert a thread-local variable reference into a sequence of instructions to
6715/// compute the variable's address for the local exec TLS model of ELF targets.
6716/// The sequence depends on the maximum TLS area size.
6717SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
6718 SDValue ThreadBase,
6719 const SDLoc &DL,
6720 SelectionDAG &DAG) const {
6721 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6722 SDValue TPOff, Addr;
6723
6724 switch (DAG.getTarget().Options.TLSSize) {
6725 default:
6726 llvm_unreachable("Unexpected TLS size")::llvm::llvm_unreachable_internal("Unexpected TLS size", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6726)
;
6727
6728 case 12: {
6729 // mrs x0, TPIDR_EL0
6730 // add x0, x0, :tprel_lo12:a
6731 SDValue Var = DAG.getTargetGlobalAddress(
6732 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF);
6733 return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
6734 Var,
6735 DAG.getTargetConstant(0, DL, MVT::i32)),
6736 0);
6737 }
6738
6739 case 24: {
6740 // mrs x0, TPIDR_EL0
6741 // add x0, x0, :tprel_hi12:a
6742 // add x0, x0, :tprel_lo12_nc:a
6743 SDValue HiVar = DAG.getTargetGlobalAddress(
6744 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
6745 SDValue LoVar = DAG.getTargetGlobalAddress(
6746 GV, DL, PtrVT, 0,
6747 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
6748 Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
6749 HiVar,
6750 DAG.getTargetConstant(0, DL, MVT::i32)),
6751 0);
6752 return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
6753 LoVar,
6754 DAG.getTargetConstant(0, DL, MVT::i32)),
6755 0);
6756 }
6757
6758 case 32: {
6759 // mrs x1, TPIDR_EL0
6760 // movz x0, #:tprel_g1:a
6761 // movk x0, #:tprel_g0_nc:a
6762 // add x0, x1, x0
6763 SDValue HiVar = DAG.getTargetGlobalAddress(
6764 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
6765 SDValue LoVar = DAG.getTargetGlobalAddress(
6766 GV, DL, PtrVT, 0,
6767 AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
6768 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
6769 DAG.getTargetConstant(16, DL, MVT::i32)),
6770 0);
6771 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
6772 DAG.getTargetConstant(0, DL, MVT::i32)),
6773 0);
6774 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
6775 }
6776
6777 case 48: {
6778 // mrs x1, TPIDR_EL0
6779 // movz x0, #:tprel_g2:a
6780 // movk x0, #:tprel_g1_nc:a
6781 // movk x0, #:tprel_g0_nc:a
6782 // add x0, x1, x0
6783 SDValue HiVar = DAG.getTargetGlobalAddress(
6784 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G2);
6785 SDValue MiVar = DAG.getTargetGlobalAddress(
6786 GV, DL, PtrVT, 0,
6787 AArch64II::MO_TLS | AArch64II::MO_G1 | AArch64II::MO_NC);
6788 SDValue LoVar = DAG.getTargetGlobalAddress(
6789 GV, DL, PtrVT, 0,
6790 AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
6791 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
6792 DAG.getTargetConstant(32, DL, MVT::i32)),
6793 0);
6794 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar,
6795 DAG.getTargetConstant(16, DL, MVT::i32)),
6796 0);
6797 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
6798 DAG.getTargetConstant(0, DL, MVT::i32)),
6799 0);
6800 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
6801 }
6802 }
6803}
6804
6805/// When accessing thread-local variables under either the general-dynamic or
6806/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
6807/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
6808/// is a function pointer to carry out the resolution.
6809///
6810/// The sequence is:
6811/// adrp x0, :tlsdesc:var
6812/// ldr x1, [x0, #:tlsdesc_lo12:var]
6813/// add x0, x0, #:tlsdesc_lo12:var
6814/// .tlsdesccall var
6815/// blr x1
6816/// (TPIDR_EL0 offset now in x0)
6817///
6818/// The above sequence must be produced unscheduled, to enable the linker to
6819/// optimize/relax this sequence.
6820/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
6821/// above sequence, and expanded really late in the compilation flow, to ensure
6822/// the sequence is produced as per above.
6823SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
6824 const SDLoc &DL,
6825 SelectionDAG &DAG) const {
6826 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6827
6828 SDValue Chain = DAG.getEntryNode();
6829 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
6830
6831 Chain =
6832 DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
6833 SDValue Glue = Chain.getValue(1);
6834
6835 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
6836}
6837
6838SDValue
6839AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
6840 SelectionDAG &DAG) const {
6841 assert(Subtarget->isTargetELF() && "This function expects an ELF target")(static_cast <bool> (Subtarget->isTargetELF() &&
"This function expects an ELF target") ? void (0) : __assert_fail
("Subtarget->isTargetELF() && \"This function expects an ELF target\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6841, __extension__ __PRETTY_FUNCTION__))
;
6842
6843 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
6844
6845 TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
6846
6847 if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
6848 if (Model == TLSModel::LocalDynamic)
6849 Model = TLSModel::GeneralDynamic;
6850 }
6851
6852 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
6853 Model != TLSModel::LocalExec)
6854 report_fatal_error("ELF TLS only supported in small memory model or "
6855 "in local exec TLS model");
6856 // Different choices can be made for the maximum size of the TLS area for a
6857 // module. For the small address model, the default TLS size is 16MiB and the
6858 // maximum TLS size is 4GiB.
6859 // FIXME: add tiny and large code model support for TLS access models other
6860 // than local exec. We currently generate the same code as small for tiny,
6861 // which may be larger than needed.
6862
6863 SDValue TPOff;
6864 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6865 SDLoc DL(Op);
6866 const GlobalValue *GV = GA->getGlobal();
6867
6868 SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
6869
6870 if (Model == TLSModel::LocalExec) {
6871 return LowerELFTLSLocalExec(GV, ThreadBase, DL, DAG);
6872 } else if (Model == TLSModel::InitialExec) {
6873 TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
6874 TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
6875 } else if (Model == TLSModel::LocalDynamic) {
6876 // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
6877 // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
6878 // the beginning of the module's TLS region, followed by a DTPREL offset
6879 // calculation.
6880
6881 // These accesses will need deduplicating if there's more than one.
6882 AArch64FunctionInfo *MFI =
6883 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
6884 MFI->incNumLocalDynamicTLSAccesses();
6885
6886 // The call needs a relocation too for linker relaxation. It doesn't make
6887 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
6888 // the address.
6889 SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
6890 AArch64II::MO_TLS);
6891
6892 // Now we can calculate the offset from TPIDR_EL0 to this module's
6893 // thread-local area.
6894 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
6895
6896 // Now use :dtprel_whatever: operations to calculate this variable's offset
6897 // in its thread-storage area.
6898 SDValue HiVar = DAG.getTargetGlobalAddress(
6899 GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
6900 SDValue LoVar = DAG.getTargetGlobalAddress(
6901 GV, DL, MVT::i64, 0,
6902 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
6903
6904 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
6905 DAG.getTargetConstant(0, DL, MVT::i32)),
6906 0);
6907 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
6908 DAG.getTargetConstant(0, DL, MVT::i32)),
6909 0);
6910 } else if (Model == TLSModel::GeneralDynamic) {
6911 // The call needs a relocation too for linker relaxation. It doesn't make
6912 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
6913 // the address.
6914 SDValue SymAddr =
6915 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
6916
6917 // Finally we can make a call to calculate the offset from tpidr_el0.
6918 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
6919 } else
6920 llvm_unreachable("Unsupported ELF TLS access model")::llvm::llvm_unreachable_internal("Unsupported ELF TLS access model"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6920)
;
6921
6922 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
6923}
6924
6925SDValue
6926AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
6927 SelectionDAG &DAG) const {
6928 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows specific TLS lowering") ? void (0) : __assert_fail (
"Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6928, __extension__ __PRETTY_FUNCTION__))
;
6929
6930 SDValue Chain = DAG.getEntryNode();
6931 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6932 SDLoc DL(Op);
6933
6934 SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
6935
6936 // Load the ThreadLocalStoragePointer from the TEB
6937 // A pointer to the TLS array is located at offset 0x58 from the TEB.
6938 SDValue TLSArray =
6939 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
6940 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
6941 Chain = TLSArray.getValue(1);
6942
6943 // Load the TLS index from the C runtime;
6944 // This does the same as getAddr(), but without having a GlobalAddressSDNode.
6945 // This also does the same as LOADgot, but using a generic i32 load,
6946 // while LOADgot only loads i64.
6947 SDValue TLSIndexHi =
6948 DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
6949 SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
6950 "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
6951 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
6952 SDValue TLSIndex =
6953 DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
6954 TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
6955 Chain = TLSIndex.getValue(1);
6956
6957 // The pointer to the thread's TLS data area is at the TLS Index scaled by 8
6958 // offset into the TLSArray.
6959 TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
6960 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
6961 DAG.getConstant(3, DL, PtrVT));
6962 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
6963 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
6964 MachinePointerInfo());
6965 Chain = TLS.getValue(1);
6966
6967 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
6968 const GlobalValue *GV = GA->getGlobal();
6969 SDValue TGAHi = DAG.getTargetGlobalAddress(
6970 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
6971 SDValue TGALo = DAG.getTargetGlobalAddress(
6972 GV, DL, PtrVT, 0,
6973 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
6974
6975 // Add the offset from the start of the .tls section (section base).
6976 SDValue Addr =
6977 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
6978 DAG.getTargetConstant(0, DL, MVT::i32)),
6979 0);
6980 Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
6981 return Addr;
6982}
6983
6984SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
6985 SelectionDAG &DAG) const {
6986 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
6987 if (DAG.getTarget().useEmulatedTLS())
6988 return LowerToTLSEmulatedModel(GA, DAG);
6989
6990 if (Subtarget->isTargetDarwin())
6991 return LowerDarwinGlobalTLSAddress(Op, DAG);
6992 if (Subtarget->isTargetELF())
6993 return LowerELFGlobalTLSAddress(Op, DAG);
6994 if (Subtarget->isTargetWindows())
6995 return LowerWindowsGlobalTLSAddress(Op, DAG);
6996
6997 llvm_unreachable("Unexpected platform trying to use TLS")::llvm::llvm_unreachable_internal("Unexpected platform trying to use TLS"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6997)
;
6998}
6999
7000// Looks through \param Val to determine the bit that can be used to
7001// check the sign of the value. It returns the unextended value and
7002// the sign bit position.
7003std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) {
7004 if (Val.getOpcode() == ISD::SIGN_EXTEND_INREG)
7005 return {Val.getOperand(0),
7006 cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
7007 1};
7008
7009 if (Val.getOpcode() == ISD::SIGN_EXTEND)
7010 return {Val.getOperand(0),
7011 Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};
7012
7013 return {Val, Val.getValueSizeInBits() - 1};
7014}
7015
7016SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
7017 SDValue Chain = Op.getOperand(0);
7018 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
7019 SDValue LHS = Op.getOperand(2);
7020 SDValue RHS = Op.getOperand(3);
7021 SDValue Dest = Op.getOperand(4);
7022 SDLoc dl(Op);
7023
7024 MachineFunction &MF = DAG.getMachineFunction();
7025 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
7026 // will not be produced, as they are conditional branch instructions that do
7027 // not set flags.
7028 bool ProduceNonFlagSettingCondBr =
7029 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
7030
7031 // Handle f128 first, since lowering it will result in comparing the return
7032 // value of a libcall against zero, which is just what the rest of LowerBR_CC
7033 // is expecting to deal with.
7034 if (LHS.getValueType() == MVT::f128) {
7035 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
7036
7037 // If softenSetCCOperands returned a scalar, we need to compare the result
7038 // against zero to select between true and false values.
7039 if (!RHS.getNode()) {
7040 RHS = DAG.getConstant(0, dl, LHS.getValueType());
7041 CC = ISD::SETNE;
7042 }
7043 }
7044
7045 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
7046 // instruction.
7047 if (ISD::isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
7048 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
7049 // Only lower legal XALUO ops.
7050 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
7051 return SDValue();
7052
7053 // The actual operation with overflow check.
7054 AArch64CC::CondCode OFCC;
7055 SDValue Value, Overflow;
7056 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
7057
7058 if (CC == ISD::SETNE)
7059 OFCC = getInvertedCondCode(OFCC);
7060 SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
7061
7062 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
7063 Overflow);
7064 }
7065
7066 if (LHS.getValueType().isInteger()) {
7067 assert((LHS.getValueType() == RHS.getValueType()) &&(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7068, __extension__ __PRETTY_FUNCTION__))
7068 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7068, __extension__ __PRETTY_FUNCTION__))
;
7069
7070 // If the RHS of the comparison is zero, we can potentially fold this
7071 // to a specialized branch.
7072 const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
7073 if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
7074 if (CC == ISD::SETEQ) {
7075 // See if we can use a TBZ to fold in an AND as well.
7076 // TBZ has a smaller branch displacement than CBZ. If the offset is
7077 // out of bounds, a late MI-layer pass rewrites branches.
7078 // 403.gcc is an example that hits this case.
7079 if (LHS.getOpcode() == ISD::AND &&
7080 isa<ConstantSDNode>(LHS.getOperand(1)) &&
7081 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
7082 SDValue Test = LHS.getOperand(0);
7083 uint64_t Mask = LHS.getConstantOperandVal(1);
7084 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
7085 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
7086 Dest);
7087 }
7088
7089 return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
7090 } else if (CC == ISD::SETNE) {
7091 // See if we can use a TBZ to fold in an AND as well.
7092 // TBZ has a smaller branch displacement than CBZ. If the offset is
7093 // out of bounds, a late MI-layer pass rewrites branches.
7094 // 403.gcc is an example that hits this case.
7095 if (LHS.getOpcode() == ISD::AND &&
7096 isa<ConstantSDNode>(LHS.getOperand(1)) &&
7097 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
7098 SDValue Test = LHS.getOperand(0);
7099 uint64_t Mask = LHS.getConstantOperandVal(1);
7100 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
7101 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
7102 Dest);
7103 }
7104
7105 return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
7106 } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
7107 // Don't combine AND since emitComparison converts the AND to an ANDS
7108 // (a.k.a. TST) and the test in the test bit and branch instruction
7109 // becomes redundant. This would also increase register pressure.
7110 uint64_t SignBitPos;
7111 std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
7112 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
7113 DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
7114 }
7115 }
7116 if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
7117 LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
7118 // Don't combine AND since emitComparison converts the AND to an ANDS
7119 // (a.k.a. TST) and the test in the test bit and branch instruction
7120 // becomes redundant. This would also increase register pressure.
7121 uint64_t SignBitPos;
7122 std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
7123 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
7124 DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
7125 }
7126
7127 SDValue CCVal;
7128 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
7129 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
7130 Cmp);
7131 }
7132
7133 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::bf16 || LHS.getValueType() == MVT::f32
|| LHS.getValueType() == MVT::f64) ? void (0) : __assert_fail
("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7134, __extension__ __PRETTY_FUNCTION__))
7134 LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::bf16 || LHS.getValueType() == MVT::f32
|| LHS.getValueType() == MVT::f64) ? void (0) : __assert_fail
("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7134, __extension__ __PRETTY_FUNCTION__))
;
7135
7136 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
7137 // clean. Some of them require two branches to implement.
7138 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
7139 AArch64CC::CondCode CC1, CC2;
7140 changeFPCCToAArch64CC(CC, CC1, CC2);
7141 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
7142 SDValue BR1 =
7143 DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
7144 if (CC2 != AArch64CC::AL) {
7145 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
7146 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
7147 Cmp);
7148 }
7149
7150 return BR1;
7151}
7152
7153SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
7154 SelectionDAG &DAG) const {
7155 EVT VT = Op.getValueType();
7156 SDLoc DL(Op);
7157
7158 SDValue In1 = Op.getOperand(0);
7159 SDValue In2 = Op.getOperand(1);
7160 EVT SrcVT = In2.getValueType();
7161
7162 if (VT.isScalableVector()) {
7163 if (VT != SrcVT)
7164 return SDValue();
7165
7166 // copysign(x,y) -> (y & SIGN_MASK) | (x & ~SIGN_MASK)
7167 //
7168 // A possible alternative sequence involves using FNEG_MERGE_PASSTHRU;
7169 // maybe useful for copysign operations with mismatched VTs.
7170 //
7171 // IntVT here is chosen so it's a legal type with the same element width
7172 // as the input.
7173 EVT IntVT =
7174 getPackedSVEVectorVT(VT.getVectorElementType().changeTypeToInteger());
7175 unsigned NumBits = VT.getScalarSizeInBits();
7176 SDValue SignMask = DAG.getConstant(APInt::getSignMask(NumBits), DL, IntVT);
7177 SDValue InvSignMask = DAG.getNOT(DL, SignMask, IntVT);
7178 SDValue Sign = DAG.getNode(ISD::AND, DL, IntVT, SignMask,
7179 getSVESafeBitCast(IntVT, In2, DAG));
7180 SDValue Magnitude = DAG.getNode(ISD::AND, DL, IntVT, InvSignMask,
7181 getSVESafeBitCast(IntVT, In1, DAG));
7182 SDValue IntResult = DAG.getNode(ISD::OR, DL, IntVT, Sign, Magnitude);
7183 return getSVESafeBitCast(VT, IntResult, DAG);
7184 }
7185
7186 if (SrcVT.bitsLT(VT))
7187 In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
7188 else if (SrcVT.bitsGT(VT))
7189 In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
7190
7191 EVT VecVT;
7192 uint64_t EltMask;
7193 SDValue VecVal1, VecVal2;
7194
7195 auto setVecVal = [&] (int Idx) {
7196 if (!VT.isVector()) {
7197 VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
7198 DAG.getUNDEF(VecVT), In1);
7199 VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
7200 DAG.getUNDEF(VecVT), In2);
7201 } else {
7202 VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
7203 VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
7204 }
7205 };
7206
7207 if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
7208 VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
7209 EltMask = 0x80000000ULL;
7210 setVecVal(AArch64::ssub);
7211 } else if (VT == MVT::f64 || VT == MVT::v2f64) {
7212 VecVT = MVT::v2i64;
7213
7214 // We want to materialize a mask with the high bit set, but the AdvSIMD
7215 // immediate moves cannot materialize that in a single instruction for
7216 // 64-bit elements. Instead, materialize zero and then negate it.
7217 EltMask = 0;
7218
7219 setVecVal(AArch64::dsub);
7220 } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
7221 VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
7222 EltMask = 0x8000ULL;
7223 setVecVal(AArch64::hsub);
7224 } else {
7225 llvm_unreachable("Invalid type for copysign!")::llvm::llvm_unreachable_internal("Invalid type for copysign!"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7225)
;
7226 }
7227
7228 SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
7229
7230 // If we couldn't materialize the mask above, then the mask vector will be
7231 // the zero vector, and we need to negate it here.
7232 if (VT == MVT::f64 || VT == MVT::v2f64) {
7233 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
7234 BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
7235 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
7236 }
7237
7238 SDValue Sel =
7239 DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
7240
7241 if (VT == MVT::f16)
7242 return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
7243 if (VT == MVT::f32)
7244 return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
7245 else if (VT == MVT::f64)
7246 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
7247 else
7248 return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
7249}
7250
7251SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
7252 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
7253 Attribute::NoImplicitFloat))
7254 return SDValue();
7255
7256 if (!Subtarget->hasNEON())
7257 return SDValue();
7258
7259 // While there is no integer popcount instruction, it can
7260 // be more efficiently lowered to the following sequence that uses
7261 // AdvSIMD registers/instructions as long as the copies to/from
7262 // the AdvSIMD registers are cheap.
7263 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
7264 // CNT V0.8B, V0.8B // 8xbyte pop-counts
7265 // ADDV B0, V0.8B // sum 8xbyte pop-counts
7266 // UMOV X0, V0.B[0] // copy byte result back to integer reg
7267 SDValue Val = Op.getOperand(0);
7268 SDLoc DL(Op);
7269 EVT VT = Op.getValueType();
7270
7271 if (VT == MVT::i32 || VT == MVT::i64) {
7272 if (VT == MVT::i32)
7273 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
7274 Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
7275
7276 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
7277 SDValue UaddLV = DAG.getNode(
7278 ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
7279 DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
7280
7281 if (VT == MVT::i64)
7282 UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
7283 return UaddLV;
7284 } else if (VT == MVT::i128) {
7285 Val = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Val);
7286
7287 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val);
7288 SDValue UaddLV = DAG.getNode(
7289 ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
7290 DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
7291
7292 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
7293 }
7294
7295 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
7296 return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
7297
7298 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||(static_cast <bool> ((VT == MVT::v1i64 || VT == MVT::v2i64
|| VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7300, __extension__ __PRETTY_FUNCTION__))
7299 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&(static_cast <bool> ((VT == MVT::v1i64 || VT == MVT::v2i64
|| VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7300, __extension__ __PRETTY_FUNCTION__))
7300 "Unexpected type for custom ctpop lowering")(static_cast <bool> ((VT == MVT::v1i64 || VT == MVT::v2i64
|| VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7300, __extension__ __PRETTY_FUNCTION__))
;
7301
7302 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
7303 Val = DAG.getBitcast(VT8Bit, Val);
7304 Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
7305
7306 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
7307 unsigned EltSize = 8;
7308 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
7309 while (EltSize != VT.getScalarSizeInBits()) {
7310 EltSize *= 2;
7311 NumElts /= 2;
7312 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
7313 Val = DAG.getNode(
7314 ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
7315 DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
7316 }
7317
7318 return Val;
7319}
7320
7321SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
7322 EVT VT = Op.getValueType();
7323 assert(VT.isScalableVector() ||(static_cast <bool> (VT.isScalableVector() || useSVEForFixedLengthVectorVT
(VT, true)) ? void (0) : __assert_fail ("VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, true)"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7324, __extension__ __PRETTY_FUNCTION__))
7324 useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))(static_cast <bool> (VT.isScalableVector() || useSVEForFixedLengthVectorVT
(VT, true)) ? void (0) : __assert_fail ("VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, true)"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7324, __extension__ __PRETTY_FUNCTION__))
;
7325
7326 SDLoc DL(Op);
7327 SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));
7328 return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
7329}
7330
7331SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
7332 SelectionDAG &DAG) const {
7333
7334 EVT VT = Op.getValueType();
7335 SDLoc DL(Op);
7336 unsigned Opcode = Op.getOpcode();
7337 ISD::CondCode CC;
7338 switch (Opcode) {
7339 default:
7340 llvm_unreachable("Wrong instruction")::llvm::llvm_unreachable_internal("Wrong instruction", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7340)
;
7341 case ISD::SMAX:
7342 CC = ISD::SETGT;
7343 break;
7344 case ISD::SMIN:
7345 CC = ISD::SETLT;
7346 break;
7347 case ISD::UMAX:
7348 CC = ISD::SETUGT;
7349 break;
7350 case ISD::UMIN:
7351 CC = ISD::SETULT;
7352 break;
7353 }
7354
7355 if (VT.isScalableVector() ||
7356 useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) {
7357 switch (Opcode) {
7358 default:
7359 llvm_unreachable("Wrong instruction")::llvm::llvm_unreachable_internal("Wrong instruction", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7359)
;
7360 case ISD::SMAX:
7361 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
7362 /*OverrideNEON=*/true);
7363 case ISD::SMIN:
7364 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
7365 /*OverrideNEON=*/true);
7366 case ISD::UMAX:
7367 return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
7368 /*OverrideNEON=*/true);
7369 case ISD::UMIN:
7370 return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
7371 /*OverrideNEON=*/true);
7372 }
7373 }
7374
7375 SDValue Op0 = Op.getOperand(0);
7376 SDValue Op1 = Op.getOperand(1);
7377 SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
7378 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
7379}
7380
7381SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
7382 SelectionDAG &DAG) const {
7383 EVT VT = Op.getValueType();
7384
7385 if (VT.isScalableVector() ||
7386 useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
7387 return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
7388 true);
7389
7390 SDLoc DL(Op);
7391 SDValue REVB;
7392 MVT VST;
7393
7394 switch (VT.getSimpleVT().SimpleTy) {
7395 default:
7396 llvm_unreachable("Invalid type for bitreverse!")::llvm::llvm_unreachable_internal("Invalid type for bitreverse!"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7396)
;
7397
7398 case MVT::v2i32: {
7399 VST = MVT::v8i8;
7400 REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
7401
7402 break;
7403 }
7404
7405 case MVT::v4i32: {
7406 VST = MVT::v16i8;
7407 REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
7408
7409 break;
7410 }
7411
7412 case MVT::v1i64: {
7413 VST = MVT::v8i8;
7414 REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
7415
7416 break;
7417 }
7418
7419 case MVT::v2i64: {
7420 VST = MVT::v16i8;
7421 REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
7422
7423 break;
7424 }
7425 }
7426
7427 return DAG.getNode(AArch64ISD::NVCAST, DL, VT,
7428 DAG.getNode(ISD::BITREVERSE, DL, VST, REVB));
7429}
7430
7431SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
7432
7433 if (Op.getValueType().isVector())
7434 return LowerVSETCC(Op, DAG);
7435
7436 bool IsStrict = Op->isStrictFPOpcode();
7437 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
7438 unsigned OpNo = IsStrict ? 1 : 0;
7439 SDValue Chain;
7440 if (IsStrict)
7441 Chain = Op.getOperand(0);
7442 SDValue LHS = Op.getOperand(OpNo + 0);
7443 SDValue RHS = Op.getOperand(OpNo + 1);
7444 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
7445 SDLoc dl(Op);
7446
7447 // We chose ZeroOrOneBooleanContents, so use zero and one.
7448 EVT VT = Op.getValueType();
7449 SDValue TVal = DAG.getConstant(1, dl, VT);
7450 SDValue FVal = DAG.getConstant(0, dl, VT);
7451
7452 // Handle f128 first, since one possible outcome is a normal integer
7453 // comparison which gets picked up by the next if statement.
7454 if (LHS.getValueType() == MVT::f128) {
7455 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS, Chain,
7456 IsSignaling);
7457
7458 // If softenSetCCOperands returned a scalar, use it.
7459 if (!RHS.getNode()) {
7460 assert(LHS.getValueType() == Op.getValueType() &&(static_cast <bool> (LHS.getValueType() == Op.getValueType
() && "Unexpected setcc expansion!") ? void (0) : __assert_fail
("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7461, __extension__ __PRETTY_FUNCTION__))
7461 "Unexpected setcc expansion!")(static_cast <bool> (LHS.getValueType() == Op.getValueType
() && "Unexpected setcc expansion!") ? void (0) : __assert_fail
("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7461, __extension__ __PRETTY_FUNCTION__))
;
7462 return IsStrict ? DAG.getMergeValues({LHS, Chain}, dl) : LHS;
7463 }
7464 }
7465
7466 if (LHS.getValueType().isInteger()) {
7467 SDValue CCVal;
7468 SDValue Cmp = getAArch64Cmp(
7469 LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);
7470
7471 // Note that we inverted the condition above, so we reverse the order of
7472 // the true and false operands here. This will allow the setcc to be
7473 // matched to a single CSINC instruction.
7474 SDValue Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
7475 return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
7476 }
7477
7478 // Now we know we're dealing with FP values.
7479 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7480, __extension__ __PRETTY_FUNCTION__))
7480 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7480, __extension__ __PRETTY_FUNCTION__))
;
7481
7482 // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
7483 // and do the comparison.
7484 SDValue Cmp;
7485 if (IsStrict)
7486 Cmp = emitStrictFPComparison(LHS, RHS, dl, DAG, Chain, IsSignaling);
7487 else
7488 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
7489
7490 AArch64CC::CondCode CC1, CC2;
7491 changeFPCCToAArch64CC(CC, CC1, CC2);
7492 SDValue Res;
7493 if (CC2 == AArch64CC::AL) {
7494 changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1,
7495 CC2);
7496 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
7497
7498 // Note that we inverted the condition above, so we reverse the order of
7499 // the true and false operands here. This will allow the setcc to be
7500 // matched to a single CSINC instruction.
7501 Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
7502 } else {
7503 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
7504 // totally clean. Some of them require two CSELs to implement. As is in
7505 // this case, we emit the first CSEL and then emit a second using the output
7506 // of the first as the RHS. We're effectively OR'ing the two CC's together.
7507
7508 // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
7509 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
7510 SDValue CS1 =
7511 DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
7512
7513 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
7514 Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
7515 }
7516 return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res;
7517}
7518
7519SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
7520 SDValue RHS, SDValue TVal,
7521 SDValue FVal, const SDLoc &dl,
7522 SelectionDAG &DAG) const {
7523 // Handle f128 first, because it will result in a comparison of some RTLIB
7524 // call result against zero.
7525 if (LHS.getValueType() == MVT::f128) {
7526 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
7527
7528 // If softenSetCCOperands returned a scalar, we need to compare the result
7529 // against zero to select between true and false values.
7530 if (!RHS.getNode()) {
7531 RHS = DAG.getConstant(0, dl, LHS.getValueType());
7532 CC = ISD::SETNE;
7533 }
7534 }
7535
7536 // Also handle f16, for which we need to do a f32 comparison.
7537 if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
7538 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
7539 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
7540 }
7541
7542 // Next, handle integers.
7543 if (LHS.getValueType().isInteger()) {
7544 assert((LHS.getValueType() == RHS.getValueType()) &&(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7545, __extension__ __PRETTY_FUNCTION__))
7545 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7545, __extension__ __PRETTY_FUNCTION__))
;
7546
7547 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
7548 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
7549 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
7550 // Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
7551 // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
7552 // supported types.
7553 if (CC == ISD::SETGT && RHSC && RHSC->isAllOnes() && CTVal && CFVal &&
7554 CTVal->isOne() && CFVal->isAllOnes() &&
7555 LHS.getValueType() == TVal.getValueType()) {
7556 EVT VT = LHS.getValueType();
7557 SDValue Shift =
7558 DAG.getNode(ISD::SRA, dl, VT, LHS,
7559 DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
7560 return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT));
7561 }
7562
7563 unsigned Opcode = AArch64ISD::CSEL;
7564
7565 // If both the TVal and the FVal are constants, see if we can swap them in
7566 // order to for a CSINV or CSINC out of them.
7567 if (CTVal && CFVal && CTVal->isAllOnes() && CFVal->isZero()) {
7568 std::swap(TVal, FVal);
7569 std::swap(CTVal, CFVal);
7570 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7571 } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isZero()) {
7572 std::swap(TVal, FVal);
7573 std::swap(CTVal, CFVal);
7574 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7575 } else if (TVal.getOpcode() == ISD::XOR) {
7576 // If TVal is a NOT we want to swap TVal and FVal so that we can match
7577 // with a CSINV rather than a CSEL.
7578 if (isAllOnesConstant(TVal.getOperand(1))) {
7579 std::swap(TVal, FVal);
7580 std::swap(CTVal, CFVal);
7581 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7582 }
7583 } else if (TVal.getOpcode() == ISD::SUB) {
7584 // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
7585 // that we can match with a CSNEG rather than a CSEL.
7586 if (isNullConstant(TVal.getOperand(0))) {
7587 std::swap(TVal, FVal);
7588 std::swap(CTVal, CFVal);
7589 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7590 }
7591 } else if (CTVal && CFVal) {
7592 const int64_t TrueVal = CTVal->getSExtValue();
7593 const int64_t FalseVal = CFVal->getSExtValue();
7594 bool Swap = false;
7595
7596 // If both TVal and FVal are constants, see if FVal is the
7597 // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
7598 // instead of a CSEL in that case.
7599 if (TrueVal == ~FalseVal) {
7600 Opcode = AArch64ISD::CSINV;
7601 } else if (FalseVal > std::numeric_limits<int64_t>::min() &&
7602 TrueVal == -FalseVal) {
7603 Opcode = AArch64ISD::CSNEG;
7604 } else if (TVal.getValueType() == MVT::i32) {
7605 // If our operands are only 32-bit wide, make sure we use 32-bit
7606 // arithmetic for the check whether we can use CSINC. This ensures that
7607 // the addition in the check will wrap around properly in case there is
7608 // an overflow (which would not be the case if we do the check with
7609 // 64-bit arithmetic).
7610 const uint32_t TrueVal32 = CTVal->getZExtValue();
7611 const uint32_t FalseVal32 = CFVal->getZExtValue();
7612
7613 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
7614 Opcode = AArch64ISD::CSINC;
7615
7616 if (TrueVal32 > FalseVal32) {
7617 Swap = true;
7618 }
7619 }
7620 // 64-bit check whether we can use CSINC.
7621 } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
7622 Opcode = AArch64ISD::CSINC;
7623
7624 if (TrueVal > FalseVal) {
7625 Swap = true;
7626 }
7627 }
7628
7629 // Swap TVal and FVal if necessary.
7630 if (Swap) {
7631 std::swap(TVal, FVal);
7632 std::swap(CTVal, CFVal);
7633 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7634 }
7635
7636 if (Opcode != AArch64ISD::CSEL) {
7637 // Drop FVal since we can get its value by simply inverting/negating
7638 // TVal.
7639 FVal = TVal;
7640 }
7641 }
7642
7643 // Avoid materializing a constant when possible by reusing a known value in
7644 // a register. However, don't perform this optimization if the known value
7645 // is one, zero or negative one in the case of a CSEL. We can always
7646 // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
7647 // FVal, respectively.
7648 ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
7649 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
7650 !RHSVal->isZero() && !RHSVal->isAllOnes()) {
7651 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
7652 // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
7653 // "a != C ? x : a" to avoid materializing C.
7654 if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
7655 TVal = LHS;
7656 else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
7657 FVal = LHS;
7658 } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
7659 assert (CTVal && CFVal && "Expected constant operands for CSNEG.")(static_cast <bool> (CTVal && CFVal && "Expected constant operands for CSNEG."
) ? void (0) : __assert_fail ("CTVal && CFVal && \"Expected constant operands for CSNEG.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7659, __extension__ __PRETTY_FUNCTION__))
;
7660 // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
7661 // avoid materializing C.
7662 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
7663 if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
7664 Opcode = AArch64ISD::CSINV;
7665 TVal = LHS;
7666 FVal = DAG.getConstant(0, dl, FVal.getValueType());
7667 }
7668 }
7669
7670 SDValue CCVal;
7671 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
7672 EVT VT = TVal.getValueType();
7673 return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
7674 }
7675
7676 // Now we know we're dealing with FP values.
7677 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7678, __extension__ __PRETTY_FUNCTION__))
7678 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7678, __extension__ __PRETTY_FUNCTION__))
;
7679 assert(LHS.getValueType() == RHS.getValueType())(static_cast <bool> (LHS.getValueType() == RHS.getValueType
()) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7679, __extension__ __PRETTY_FUNCTION__))
;
7680 EVT VT = TVal.getValueType();
7681 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
7682
7683 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
7684 // clean. Some of them require two CSELs to implement.
7685 AArch64CC::CondCode CC1, CC2;
7686 changeFPCCToAArch64CC(CC, CC1, CC2);
7687
7688 if (DAG.getTarget().Options.UnsafeFPMath) {
7689 // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
7690 // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
7691 ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
7692 if (RHSVal && RHSVal->isZero()) {
7693 ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
7694 ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
7695
7696 if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
7697 CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
7698 TVal = LHS;
7699 else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
7700 CFVal && CFVal->isZero() &&
7701 FVal.getValueType() == LHS.getValueType())
7702 FVal = LHS;
7703 }
7704 }
7705
7706 // Emit first, and possibly only, CSEL.
7707 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
7708 SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
7709
7710 // If we need a second CSEL, emit it, using the output of the first as the
7711 // RHS. We're effectively OR'ing the two CC's together.
7712 if (CC2 != AArch64CC::AL) {
7713 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
7714 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
7715 }
7716
7717 // Otherwise, return the output of the first CSEL.
7718 return CS1;
7719}
7720
7721SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op,
7722 SelectionDAG &DAG) const {
7723 EVT Ty = Op.getValueType();
7724 auto Idx = Op.getConstantOperandAPInt(2);
7725
7726 // This will select to an EXT instruction, which has a maximum immediate
7727 // value of 255, hence 2048-bits is the maximum value we can lower.
7728 if (Idx.sge(-1) && Idx.slt(2048 / Ty.getVectorElementType().getSizeInBits()))
7729 return Op;
7730
7731 return SDValue();
7732}
7733
7734SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
7735 SelectionDAG &DAG) const {
7736 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7737 SDValue LHS = Op.getOperand(0);
7738 SDValue RHS = Op.getOperand(1);
7739 SDValue TVal = Op.getOperand(2);
7740 SDValue FVal = Op.getOperand(3);
7741 SDLoc DL(Op);
7742 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
7743}
7744
7745SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
7746 SelectionDAG &DAG) const {
7747 SDValue CCVal = Op->getOperand(0);
7748 SDValue TVal = Op->getOperand(1);
7749 SDValue FVal = Op->getOperand(2);
7750 SDLoc DL(Op);
7751
7752 EVT Ty = Op.getValueType();
7753 if (Ty.isScalableVector()) {
7754 SDValue TruncCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, CCVal);
7755 MVT PredVT = MVT::getVectorVT(MVT::i1, Ty.getVectorElementCount());
7756 SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, TruncCC);
7757 return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
7758 }
7759
7760 if (useSVEForFixedLengthVectorVT(Ty)) {
7761 // FIXME: Ideally this would be the same as above using i1 types, however
7762 // for the moment we can't deal with fixed i1 vector types properly, so
7763 // instead extend the predicate to a result type sized integer vector.
7764 MVT SplatValVT = MVT::getIntegerVT(Ty.getScalarSizeInBits());
7765 MVT PredVT = MVT::getVectorVT(SplatValVT, Ty.getVectorElementCount());
7766 SDValue SplatVal = DAG.getSExtOrTrunc(CCVal, DL, SplatValVT);
7767 SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, SplatVal);
7768 return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
7769 }
7770
7771 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
7772 // instruction.
7773 if (ISD::isOverflowIntrOpRes(CCVal)) {
7774 // Only lower legal XALUO ops.
7775 if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
7776 return SDValue();
7777
7778 AArch64CC::CondCode OFCC;
7779 SDValue Value, Overflow;
7780 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
7781 SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
7782
7783 return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
7784 CCVal, Overflow);
7785 }
7786
7787 // Lower it the same way as we would lower a SELECT_CC node.
7788 ISD::CondCode CC;
7789 SDValue LHS, RHS;
7790 if (CCVal.getOpcode() == ISD::SETCC) {
7791 LHS = CCVal.getOperand(0);
7792 RHS = CCVal.getOperand(1);
7793 CC = cast<CondCodeSDNode>(CCVal.getOperand(2))->get();
7794 } else {
7795 LHS = CCVal;
7796 RHS = DAG.getConstant(0, DL, CCVal.getValueType());
7797 CC = ISD::SETNE;
7798 }
7799 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
7800}
7801
7802SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
7803 SelectionDAG &DAG) const {
7804 // Jump table entries as PC relative offsets. No additional tweaking
7805 // is necessary here. Just get the address of the jump table.
7806 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
7807
7808 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
7809 !Subtarget->isTargetMachO()) {
7810 return getAddrLarge(JT, DAG);
7811 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
7812 return getAddrTiny(JT, DAG);
7813 }
7814 return getAddr(JT, DAG);
7815}
7816
7817SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
7818 SelectionDAG &DAG) const {
7819 // Jump table entries as PC relative offsets. No additional tweaking
7820 // is necessary here. Just get the address of the jump table.
7821 SDLoc DL(Op);
7822 SDValue JT = Op.getOperand(1);
7823 SDValue Entry = Op.getOperand(2);
7824 int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
7825
7826 auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
7827 AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
7828
7829 SDNode *Dest =
7830 DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
7831 Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
7832 return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
7833 SDValue(Dest, 0));
7834}
7835
7836SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
7837 SelectionDAG &DAG) const {
7838 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
7839
7840 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
7841 // Use the GOT for the large code model on iOS.
7842 if (Subtarget->isTargetMachO()) {
7843 return getGOT(CP, DAG);
7844 }
7845 return getAddrLarge(CP, DAG);
7846 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
7847 return getAddrTiny(CP, DAG);
7848 } else {
7849 return getAddr(CP, DAG);
7850 }
7851}
7852
7853SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
7854 SelectionDAG &DAG) const {
7855 BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
7856 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
7857 !Subtarget->isTargetMachO()) {
7858 return getAddrLarge(BA, DAG);
7859 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
7860 return getAddrTiny(BA, DAG);
7861 }
7862 return getAddr(BA, DAG);
7863}
7864
7865SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
7866 SelectionDAG &DAG) const {
7867 AArch64FunctionInfo *FuncInfo =
7868 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
7869
7870 SDLoc DL(Op);
7871 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
7872 getPointerTy(DAG.getDataLayout()));
7873 FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout()));
7874 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7875 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
7876 MachinePointerInfo(SV));
7877}
7878
7879SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
7880 SelectionDAG &DAG) const {
7881 AArch64FunctionInfo *FuncInfo =
7882 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
7883
7884 SDLoc DL(Op);
7885 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
7886 ? FuncInfo->getVarArgsGPRIndex()
7887 : FuncInfo->getVarArgsStackIndex(),
7888 getPointerTy(DAG.getDataLayout()));
7889 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7890 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
7891 MachinePointerInfo(SV));
7892}
7893
7894SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
7895 SelectionDAG &DAG) const {
7896 // The layout of the va_list struct is specified in the AArch64 Procedure Call
7897 // Standard, section B.3.
7898 MachineFunction &MF = DAG.getMachineFunction();
7899 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
7900 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
7901 auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
7902 auto PtrVT = getPointerTy(DAG.getDataLayout());
7903 SDLoc DL(Op);
7904
7905 SDValue Chain = Op.getOperand(0);
7906 SDValue VAList = Op.getOperand(1);
7907 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7908 SmallVector<SDValue, 4> MemOps;
7909
7910 // void *__stack at offset 0
7911 unsigned Offset = 0;
7912 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
7913 Stack = DAG.getZExtOrTrunc(Stack, DL, PtrMemVT);
7914 MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
7915 MachinePointerInfo(SV), Align(PtrSize)));
7916
7917 // void *__gr_top at offset 8 (4 on ILP32)
7918 Offset += PtrSize;
7919 int GPRSize = FuncInfo->getVarArgsGPRSize();
7920 if (GPRSize > 0) {
7921 SDValue GRTop, GRTopAddr;
7922
7923 GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7924 DAG.getConstant(Offset, DL, PtrVT));
7925
7926 GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
7927 GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
7928 DAG.getConstant(GPRSize, DL, PtrVT));
7929 GRTop = DAG.getZExtOrTrunc(GRTop, DL, PtrMemVT);
7930
7931 MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
7932 MachinePointerInfo(SV, Offset),
7933 Align(PtrSize)));
7934 }
7935
7936 // void *__vr_top at offset 16 (8 on ILP32)
7937 Offset += PtrSize;
7938 int FPRSize = FuncInfo->getVarArgsFPRSize();
7939 if (FPRSize > 0) {
7940 SDValue VRTop, VRTopAddr;
7941 VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7942 DAG.getConstant(Offset, DL, PtrVT));
7943
7944 VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
7945 VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
7946 DAG.getConstant(FPRSize, DL, PtrVT));
7947 VRTop = DAG.getZExtOrTrunc(VRTop, DL, PtrMemVT);
7948
7949 MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
7950 MachinePointerInfo(SV, Offset),
7951 Align(PtrSize)));
7952 }
7953
7954 // int __gr_offs at offset 24 (12 on ILP32)
7955 Offset += PtrSize;
7956 SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7957 DAG.getConstant(Offset, DL, PtrVT));
7958 MemOps.push_back(
7959 DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32),
7960 GROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
7961
7962 // int __vr_offs at offset 28 (16 on ILP32)
7963 Offset += 4;
7964 SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7965 DAG.getConstant(Offset, DL, PtrVT));
7966 MemOps.push_back(
7967 DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32),
7968 VROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
7969
7970 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
7971}
7972
7973SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
7974 SelectionDAG &DAG) const {
7975 MachineFunction &MF = DAG.getMachineFunction();
7976
7977 if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
7978 return LowerWin64_VASTART(Op, DAG);
7979 else if (Subtarget->isTargetDarwin())
7980 return LowerDarwin_VASTART(Op, DAG);
7981 else
7982 return LowerAAPCS_VASTART(Op, DAG);
7983}
7984
7985SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
7986 SelectionDAG &DAG) const {
7987 // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
7988 // pointer.
7989 SDLoc DL(Op);
7990 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
7991 unsigned VaListSize =
7992 (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
7993 ? PtrSize
7994 : Subtarget->isTargetILP32() ? 20 : 32;
7995 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
7996 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
7997
7998 return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
7999 DAG.getConstant(VaListSize, DL, MVT::i32),
8000 Align(PtrSize), false, false, false,
8001 MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
8002}
8003
8004SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
8005 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"automatic va_arg instruction only works on Darwin") ? void (
0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8006, __extension__ __PRETTY_FUNCTION__))
8006 "automatic va_arg instruction only works on Darwin")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"automatic va_arg instruction only works on Darwin") ? void (
0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8006, __extension__ __PRETTY_FUNCTION__))
;
8007
8008 const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
8009 EVT VT = Op.getValueType();
8010 SDLoc DL(Op);
8011 SDValue Chain = Op.getOperand(0);
8012 SDValue Addr = Op.getOperand(1);
8013 MaybeAlign Align(Op.getConstantOperandVal(3));
8014 unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
8015 auto PtrVT = getPointerTy(DAG.getDataLayout());
8016 auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
8017 SDValue VAList =
8018 DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V));
8019 Chain = VAList.getValue(1);
8020 VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);
8021
8022 if (VT.isScalableVector())
8023 report_fatal_error("Passing SVE types to variadic functions is "
8024 "currently not supported");
8025
8026 if (Align && *Align > MinSlotSize) {
8027 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
8028 DAG.getConstant(Align->value() - 1, DL, PtrVT));
8029 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
8030 DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT));
8031 }
8032
8033 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
8034 unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
8035
8036 // Scalar integer and FP values smaller than 64 bits are implicitly extended
8037 // up to 64 bits. At the very least, we have to increase the striding of the
8038 // vaargs list to match this, and for FP values we need to introduce
8039 // FP_ROUND nodes as well.
8040 if (VT.isInteger() && !VT.isVector())
8041 ArgSize = std::max(ArgSize, MinSlotSize);
8042 bool NeedFPTrunc = false;
8043 if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
8044 ArgSize = 8;
8045 NeedFPTrunc = true;
8046 }
8047
8048 // Increment the pointer, VAList, to the next vaarg
8049 SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
8050 DAG.getConstant(ArgSize, DL, PtrVT));
8051 VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT);
8052
8053 // Store the incremented VAList to the legalized pointer
8054 SDValue APStore =
8055 DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
8056
8057 // Load the actual argument out of the pointer VAList
8058 if (NeedFPTrunc) {
8059 // Load the value as an f64.
8060 SDValue WideFP =
8061 DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
8062 // Round the value down to an f32.
8063 SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
8064 DAG.getIntPtrConstant(1, DL));
8065 SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
8066 // Merge the rounded value with the chain output of the load.
8067 return DAG.getMergeValues(Ops, DL);
8068 }
8069
8070 return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
8071}
8072
8073SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
8074 SelectionDAG &DAG) const {
8075 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8076 MFI.setFrameAddressIsTaken(true);
8077
8078 EVT VT = Op.getValueType();
8079 SDLoc DL(Op);
8080 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
8081 SDValue FrameAddr =
8082 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64);
8083 while (Depth--)
8084 FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
8085 MachinePointerInfo());
8086
8087 if (Subtarget->isTargetILP32())
8088 FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr,
8089 DAG.getValueType(VT));
8090
8091 return FrameAddr;
8092}
8093
8094SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
8095 SelectionDAG &DAG) const {
8096 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8097
8098 EVT VT = getPointerTy(DAG.getDataLayout());
8099 SDLoc DL(Op);
8100 int FI = MFI.CreateFixedObject(4, 0, false);
8101 return DAG.getFrameIndex(FI, VT);
8102}
8103
8104#define GET_REGISTER_MATCHER
8105#include "AArch64GenAsmMatcher.inc"
8106
8107// FIXME? Maybe this could be a TableGen attribute on some registers and
8108// this table could be generated automatically from RegInfo.
8109Register AArch64TargetLowering::
8110getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
8111 Register Reg = MatchRegisterName(RegName);
8112 if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
8113 const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
8114 unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
8115 if (!Subtarget->isXRegisterReserved(DwarfRegNum))
8116 Reg = 0;
8117 }
8118 if (Reg)
8119 return Reg;
8120 report_fatal_error(Twine("Invalid register name \""
8121 + StringRef(RegName) + "\"."));
8122}
8123
8124SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
8125 SelectionDAG &DAG) const {
8126 DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
8127
8128 EVT VT = Op.getValueType();
8129 SDLoc DL(Op);
8130
8131 SDValue FrameAddr =
8132 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
8133 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
8134
8135 return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
8136}
8137
8138SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
8139 SelectionDAG &DAG) const {
8140 MachineFunction &MF = DAG.getMachineFunction();
8141 MachineFrameInfo &MFI = MF.getFrameInfo();
8142 MFI.setReturnAddressIsTaken(true);
8143
8144 EVT VT = Op.getValueType();
8145 SDLoc DL(Op);
8146 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
8147 SDValue ReturnAddress;
8148 if (Depth) {
8149 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
8150 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
8151 ReturnAddress = DAG.getLoad(
8152 VT, DL, DAG.getEntryNode(),
8153 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo());
8154 } else {
8155 // Return LR, which contains the return address. Mark it an implicit
8156 // live-in.
8157 unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
8158 ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
8159 }
8160
8161 // The XPACLRI instruction assembles to a hint-space instruction before
8162 // Armv8.3-A therefore this instruction can be safely used for any pre
8163 // Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
8164 // that instead.
8165 SDNode *St;
8166 if (Subtarget->hasPAuth()) {
8167 St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress);
8168 } else {
8169 // XPACLRI operates on LR therefore we must move the operand accordingly.
8170 SDValue Chain =
8171 DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress);
8172 St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain);
8173 }
8174 return SDValue(St, 0);
8175}
8176
8177/// LowerShiftParts - Lower SHL_PARTS/SRA_PARTS/SRL_PARTS, which returns two
8178/// i32 values and take a 2 x i32 value to shift plus a shift amount.
8179SDValue AArch64TargetLowering::LowerShiftParts(SDValue Op,
8180 SelectionDAG &DAG) const {
8181 SDValue Lo, Hi;
8182 expandShiftParts(Op.getNode(), Lo, Hi, DAG);
8183 return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
8184}
8185
8186bool AArch64TargetLowering::isOffsetFoldingLegal(
8187 const GlobalAddressSDNode *GA) const {
8188 // Offsets are folded in the DAG combine rather than here so that we can
8189 // intelligently choose an offset based on the uses.
8190 return false;
8191}
8192
8193bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8194 bool OptForSize) const {
8195 bool IsLegal = false;
8196 // We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
8197 // 16-bit case when target has full fp16 support.
8198 // FIXME: We should be able to handle f128 as well with a clever lowering.
8199 const APInt ImmInt = Imm.bitcastToAPInt();
8200 if (VT == MVT::f64)
8201 IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
8202 else if (VT == MVT::f32)
8203 IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
8204 else if (VT == MVT::f16 && Subtarget->hasFullFP16())
8205 IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero();
8206 // TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
8207 // generate that fmov.
8208
8209 // If we can not materialize in immediate field for fmov, check if the
8210 // value can be encoded as the immediate operand of a logical instruction.
8211 // The immediate value will be created with either MOVZ, MOVN, or ORR.
8212 if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
8213 // The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
8214 // however the mov+fmov sequence is always better because of the reduced
8215 // cache pressure. The timings are still the same if you consider
8216 // movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
8217 // movw+movk is fused). So we limit up to 2 instrdduction at most.
8218 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
8219 AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(),
8220 Insn);
8221 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
8222 IsLegal = Insn.size() <= Limit;
8223 }
8224
8225 LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << (IsLegal ? "Legal " : "Illegal "
) << VT.getEVTString() << " imm value: "; Imm.dump
();; } } while (false)
8226 << " imm value: "; Imm.dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << (IsLegal ? "Legal " : "Illegal "
) << VT.getEVTString() << " imm value: "; Imm.dump
();; } } while (false)
;
8227 return IsLegal;
8228}
8229
8230//===----------------------------------------------------------------------===//
8231// AArch64 Optimization Hooks
8232//===----------------------------------------------------------------------===//
8233
8234static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
8235 SDValue Operand, SelectionDAG &DAG,
8236 int &ExtraSteps) {
8237 EVT VT = Operand.getValueType();
8238 if (ST->hasNEON() &&
8239 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
8240 VT == MVT::f32 || VT == MVT::v1f32 ||
8241 VT == MVT::v2f32 || VT == MVT::v4f32)) {
8242 if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
8243 // For the reciprocal estimates, convergence is quadratic, so the number
8244 // of digits is doubled after each iteration. In ARMv8, the accuracy of
8245 // the initial estimate is 2^-8. Thus the number of extra steps to refine
8246 // the result for float (23 mantissa bits) is 2 and for double (52
8247 // mantissa bits) is 3.
8248 ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
8249
8250 return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
8251 }
8252
8253 return SDValue();
8254}
8255
8256SDValue
8257AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
8258 const DenormalMode &Mode) const {
8259 SDLoc DL(Op);
8260 EVT VT = Op.getValueType();
8261 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8262 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
8263 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
8264}
8265
8266SDValue
8267AArch64TargetLowering::getSqrtResultForDenormInput(SDValue Op,
8268 SelectionDAG &DAG) const {
8269 return Op;
8270}
8271
8272SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
8273 SelectionDAG &DAG, int Enabled,
8274 int &ExtraSteps,
8275 bool &UseOneConst,
8276 bool Reciprocal) const {
8277 if (Enabled == ReciprocalEstimate::Enabled ||
8278 (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
8279 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
8280 DAG, ExtraSteps)) {
8281 SDLoc DL(Operand);
8282 EVT VT = Operand.getValueType();
8283
8284 SDNodeFlags Flags;
8285 Flags.setAllowReassociation(true);
8286
8287 // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
8288 // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
8289 for (int i = ExtraSteps; i > 0; --i) {
8290 SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
8291 Flags);
8292 Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
8293 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
8294 }
8295 if (!Reciprocal)
8296 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
8297
8298 ExtraSteps = 0;
8299 return Estimate;
8300 }
8301
8302 return SDValue();
8303}
8304
8305SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
8306 SelectionDAG &DAG, int Enabled,
8307 int &ExtraSteps) const {
8308 if (Enabled == ReciprocalEstimate::Enabled)
8309 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
8310 DAG, ExtraSteps)) {
8311 SDLoc DL(Operand);
8312 EVT VT = Operand.getValueType();
8313
8314 SDNodeFlags Flags;
8315 Flags.setAllowReassociation(true);
8316
8317 // Newton reciprocal iteration: E * (2 - X * E)
8318 // AArch64 reciprocal iteration instruction: (2 - M * N)
8319 for (int i = ExtraSteps; i > 0; --i) {
8320 SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
8321 Estimate, Flags);
8322 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
8323 }
8324
8325 ExtraSteps = 0;
8326 return Estimate;
8327 }
8328
8329 return SDValue();
8330}
8331
8332//===----------------------------------------------------------------------===//
8333// AArch64 Inline Assembly Support
8334//===----------------------------------------------------------------------===//
8335
8336// Table of Constraints
8337// TODO: This is the current set of constraints supported by ARM for the
8338// compiler, not all of them may make sense.
8339//
8340// r - A general register
8341// w - An FP/SIMD register of some size in the range v0-v31
8342// x - An FP/SIMD register of some size in the range v0-v15
8343// I - Constant that can be used with an ADD instruction
8344// J - Constant that can be used with a SUB instruction
8345// K - Constant that can be used with a 32-bit logical instruction
8346// L - Constant that can be used with a 64-bit logical instruction
8347// M - Constant that can be used as a 32-bit MOV immediate
8348// N - Constant that can be used as a 64-bit MOV immediate
8349// Q - A memory reference with base register and no offset
8350// S - A symbolic address
8351// Y - Floating point constant zero
8352// Z - Integer constant zero
8353//
8354// Note that general register operands will be output using their 64-bit x
8355// register name, whatever the size of the variable, unless the asm operand
8356// is prefixed by the %w modifier. Floating-point and SIMD register operands
8357// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
8358// %q modifier.
8359const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
8360 // At this point, we have to lower this constraint to something else, so we
8361 // lower it to an "r" or "w". However, by doing this we will force the result
8362 // to be in register, while the X constraint is much more permissive.
8363 //
8364 // Although we are correct (we are free to emit anything, without
8365 // constraints), we might break use cases that would expect us to be more
8366 // efficient and emit something else.
8367 if (!Subtarget->hasFPARMv8())
8368 return "r";
8369
8370 if (ConstraintVT.isFloatingPoint())
8371 return "w";
8372
8373 if (ConstraintVT.isVector() &&
8374 (ConstraintVT.getSizeInBits() == 64 ||
8375 ConstraintVT.getSizeInBits() == 128))
8376 return "w";
8377
8378 return "r";
8379}
8380
8381enum PredicateConstraint {
8382 Upl,
8383 Upa,
8384 Invalid
8385};
8386
8387static PredicateConstraint parsePredicateConstraint(StringRef Constraint) {
8388 PredicateConstraint P = PredicateConstraint::Invalid;
8389 if (Constraint == "Upa")
8390 P = PredicateConstraint::Upa;
8391 if (Constraint == "Upl")
8392 P = PredicateConstraint::Upl;
8393 return P;
8394}
8395
8396/// getConstraintType - Given a constraint letter, return the type of
8397/// constraint it is for this target.
8398AArch64TargetLowering::ConstraintType
8399AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
8400 if (Constraint.size() == 1) {
8401 switch (Constraint[0]) {
8402 default:
8403 break;
8404 case 'x':
8405 case 'w':
8406 case 'y':
8407 return C_RegisterClass;
8408 // An address with a single base register. Due to the way we
8409 // currently handle addresses it is the same as 'r'.
8410 case 'Q':
8411 return C_Memory;
8412 case 'I':
8413 case 'J':
8414 case 'K':
8415 case 'L':
8416 case 'M':
8417 case 'N':
8418 case 'Y':
8419 case 'Z':
8420 return C_Immediate;
8421 case 'z':
8422 case 'S': // A symbolic address
8423 return C_Other;
8424 }
8425 } else if (parsePredicateConstraint(Constraint) !=
8426 PredicateConstraint::Invalid)
8427 return C_RegisterClass;
8428 return TargetLowering::getConstraintType(Constraint);
8429}
8430
8431/// Examine constraint type and operand type and determine a weight value.
8432/// This object must already have been set up with the operand type
8433/// and the current alternative constraint selected.
8434TargetLowering::ConstraintWeight
8435AArch64TargetLowering::getSingleConstraintMatchWeight(
8436 AsmOperandInfo &info, const char *constraint) const {
8437 ConstraintWeight weight = CW_Invalid;
8438 Value *CallOperandVal = info.CallOperandVal;
8439 // If we don't have a value, we can't do a match,
8440 // but allow it at the lowest weight.
8441 if (!CallOperandVal)
8442 return CW_Default;
8443 Type *type = CallOperandVal->getType();
8444 // Look at the constraint type.
8445 switch (*constraint) {
8446 default:
8447 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
8448 break;
8449 case 'x':
8450 case 'w':
8451 case 'y':
8452 if (type->isFloatingPointTy() || type->isVectorTy())
8453 weight = CW_Register;
8454 break;
8455 case 'z':
8456 weight = CW_Constant;
8457 break;
8458 case 'U':
8459 if (parsePredicateConstraint(constraint) != PredicateConstraint::Invalid)
8460 weight = CW_Register;
8461 break;
8462 }
8463 return weight;
8464}
8465
8466std::pair<unsigned, const TargetRegisterClass *>
8467AArch64TargetLowering::getRegForInlineAsmConstraint(
8468 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
8469 if (Constraint.size() == 1) {
8470 switch (Constraint[0]) {
8471 case 'r':
8472 if (VT.isScalableVector())
8473 return std::make_pair(0U, nullptr);
8474 if (Subtarget->hasLS64() && VT.getSizeInBits() == 512)
8475 return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
8476 if (VT.getFixedSizeInBits() == 64)
8477 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
8478 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
8479 case 'w': {
8480 if (!Subtarget->hasFPARMv8())
8481 break;
8482 if (VT.isScalableVector()) {
8483 if (VT.getVectorElementType() != MVT::i1)
8484 return std::make_pair(0U, &AArch64::ZPRRegClass);
8485 return std::make_pair(0U, nullptr);
8486 }
8487 uint64_t VTSize = VT.getFixedSizeInBits();
8488 if (VTSize == 16)
8489 return std::make_pair(0U, &AArch64::FPR16RegClass);
8490 if (VTSize == 32)
8491 return std::make_pair(0U, &AArch64::FPR32RegClass);
8492 if (VTSize == 64)
8493 return std::make_pair(0U, &AArch64::FPR64RegClass);
8494 if (VTSize == 128)
8495 return std::make_pair(0U, &AArch64::FPR128RegClass);
8496 break;
8497 }
8498 // The instructions that this constraint is designed for can
8499 // only take 128-bit registers so just use that regclass.
8500 case 'x':
8501 if (!Subtarget->hasFPARMv8())
8502 break;
8503 if (VT.isScalableVector())
8504 return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
8505 if (VT.getSizeInBits() == 128)
8506 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
8507 break;
8508 case 'y':
8509 if (!Subtarget->hasFPARMv8())
8510 break;
8511 if (VT.isScalableVector())
8512 return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
8513 break;
8514 }
8515 } else {
8516 PredicateConstraint PC = parsePredicateConstraint(Constraint);
8517 if (PC != PredicateConstraint::Invalid) {
8518 if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
8519 return std::make_pair(0U, nullptr);
8520 bool restricted = (PC == PredicateConstraint::Upl);
8521 return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
8522 : std::make_pair(0U, &AArch64::PPRRegClass);
8523 }
8524 }
8525 if (StringRef("{cc}").equals_insensitive(Constraint))
8526 return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
8527
8528 // Use the default implementation in TargetLowering to convert the register
8529 // constraint into a member of a register class.
8530 std::pair<unsigned, const TargetRegisterClass *> Res;
8531 Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8532
8533 // Not found as a standard register?
8534 if (!Res.second) {
8535 unsigned Size = Constraint.size();
8536 if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
8537 tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
8538 int RegNo;
8539 bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
8540 if (!Failed && RegNo >= 0 && RegNo <= 31) {
8541 // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
8542 // By default we'll emit v0-v31 for this unless there's a modifier where
8543 // we'll emit the correct register as well.
8544 if (VT != MVT::Other && VT.getSizeInBits() == 64) {
8545 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
8546 Res.second = &AArch64::FPR64RegClass;
8547 } else {
8548 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
8549 Res.second = &AArch64::FPR128RegClass;
8550 }
8551 }
8552 }
8553 }
8554
8555 if (Res.second && !Subtarget->hasFPARMv8() &&
8556 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
8557 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
8558 return std::make_pair(0U, nullptr);
8559
8560 return Res;
8561}
8562
8563EVT AArch64TargetLowering::getAsmOperandValueType(const DataLayout &DL,
8564 llvm::Type *Ty,
8565 bool AllowUnknown) const {
8566 if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
8567 return EVT(MVT::i64x8);
8568
8569 return TargetLowering::getAsmOperandValueType(DL, Ty, AllowUnknown);
8570}
8571
8572/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
8573/// vector. If it is invalid, don't add anything to Ops.
8574void AArch64TargetLowering::LowerAsmOperandForConstraint(
8575 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
8576 SelectionDAG &DAG) const {
8577 SDValue Result;
8578
8579 // Currently only support length 1 constraints.
8580 if (Constraint.length() != 1)
8581 return;
8582
8583 char ConstraintLetter = Constraint[0];
8584 switch (ConstraintLetter) {
8585 default:
8586 break;
8587
8588 // This set of constraints deal with valid constants for various instructions.
8589 // Validate and return a target constant for them if we can.
8590 case 'z': {
8591 // 'z' maps to xzr or wzr so it needs an input of 0.
8592 if (!isNullConstant(Op))
8593 return;
8594
8595 if (Op.getValueType() == MVT::i64)
8596 Result = DAG.getRegister(AArch64::XZR, MVT::i64);
8597 else
8598 Result = DAG.getRegister(AArch64::WZR, MVT::i32);
8599 break;
8600 }
8601 case 'S': {
8602 // An absolute symbolic address or label reference.
8603 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
8604 Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
8605 GA->getValueType(0));
8606 } else if (const BlockAddressSDNode *BA =
8607 dyn_cast<BlockAddressSDNode>(Op)) {
8608 Result =
8609 DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0));
8610 } else
8611 return;
8612 break;
8613 }
8614
8615 case 'I':
8616 case 'J':
8617 case 'K':
8618 case 'L':
8619 case 'M':
8620 case 'N':
8621 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
8622 if (!C)
8623 return;
8624
8625 // Grab the value and do some validation.
8626 uint64_t CVal = C->getZExtValue();
8627 switch (ConstraintLetter) {
8628 // The I constraint applies only to simple ADD or SUB immediate operands:
8629 // i.e. 0 to 4095 with optional shift by 12
8630 // The J constraint applies only to ADD or SUB immediates that would be
8631 // valid when negated, i.e. if [an add pattern] were to be output as a SUB
8632 // instruction [or vice versa], in other words -1 to -4095 with optional
8633 // left shift by 12.
8634 case 'I':
8635 if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
8636 break;
8637 return;
8638 case 'J': {
8639 uint64_t NVal = -C->getSExtValue();
8640 if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
8641 CVal = C->getSExtValue();
8642 break;
8643 }
8644 return;
8645 }
8646 // The K and L constraints apply *only* to logical immediates, including
8647 // what used to be the MOVI alias for ORR (though the MOVI alias has now
8648 // been removed and MOV should be used). So these constraints have to
8649 // distinguish between bit patterns that are valid 32-bit or 64-bit
8650 // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
8651 // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
8652 // versa.
8653 case 'K':
8654 if (AArch64_AM::isLogicalImmediate(CVal, 32))
8655 break;
8656 return;
8657 case 'L':
8658 if (AArch64_AM::isLogicalImmediate(CVal, 64))
8659 break;
8660 return;
8661 // The M and N constraints are a superset of K and L respectively, for use
8662 // with the MOV (immediate) alias. As well as the logical immediates they
8663 // also match 32 or 64-bit immediates that can be loaded either using a
8664 // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
8665 // (M) or 64-bit 0x1234000000000000 (N) etc.
8666 // As a note some of this code is liberally stolen from the asm parser.
8667 case 'M': {
8668 if (!isUInt<32>(CVal))
8669 return;
8670 if (AArch64_AM::isLogicalImmediate(CVal, 32))
8671 break;
8672 if ((CVal & 0xFFFF) == CVal)
8673 break;
8674 if ((CVal & 0xFFFF0000ULL) == CVal)
8675 break;
8676 uint64_t NCVal = ~(uint32_t)CVal;
8677 if ((NCVal & 0xFFFFULL) == NCVal)
8678 break;
8679 if ((NCVal & 0xFFFF0000ULL) == NCVal)
8680 break;
8681 return;
8682 }
8683 case 'N': {
8684 if (AArch64_AM::isLogicalImmediate(CVal, 64))
8685 break;
8686 if ((CVal & 0xFFFFULL) == CVal)
8687 break;
8688 if ((CVal & 0xFFFF0000ULL) == CVal)
8689 break;
8690 if ((CVal & 0xFFFF00000000ULL) == CVal)
8691 break;
8692 if ((CVal & 0xFFFF000000000000ULL) == CVal)
8693 break;
8694 uint64_t NCVal = ~CVal;
8695 if ((NCVal & 0xFFFFULL) == NCVal)
8696 break;
8697 if ((NCVal & 0xFFFF0000ULL) == NCVal)
8698 break;
8699 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
8700 break;
8701 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
8702 break;
8703 return;
8704 }
8705 default:
8706 return;
8707 }
8708
8709 // All assembler immediates are 64-bit integers.
8710 Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
8711 break;
8712 }
8713
8714 if (Result.getNode()) {
8715 Ops.push_back(Result);
8716 return;
8717 }
8718
8719 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
8720}
8721
8722//===----------------------------------------------------------------------===//
8723// AArch64 Advanced SIMD Support
8724//===----------------------------------------------------------------------===//
8725
8726/// WidenVector - Given a value in the V64 register class, produce the
8727/// equivalent value in the V128 register class.
8728static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
8729 EVT VT = V64Reg.getValueType();
8730 unsigned NarrowSize = VT.getVectorNumElements();
8731 MVT EltTy = VT.getVectorElementType().getSimpleVT();
8732 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
8733 SDLoc DL(V64Reg);
8734
8735 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
8736 V64Reg, DAG.getConstant(0, DL, MVT::i64));
8737}
8738
8739/// getExtFactor - Determine the adjustment factor for the position when
8740/// generating an "extract from vector registers" instruction.
8741static unsigned getExtFactor(SDValue &V) {
8742 EVT EltType = V.getValueType().getVectorElementType();
8743 return EltType.getSizeInBits() / 8;
8744}
8745
8746/// NarrowVector - Given a value in the V128 register class, produce the
8747/// equivalent value in the V64 register class.
8748static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
8749 EVT VT = V128Reg.getValueType();
8750 unsigned WideSize = VT.getVectorNumElements();
8751 MVT EltTy = VT.getVectorElementType().getSimpleVT();
8752 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
8753 SDLoc DL(V128Reg);
8754
8755 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
8756}
8757
8758// Gather data to see if the operation can be modelled as a
8759// shuffle in combination with VEXTs.
8760SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
8761 SelectionDAG &DAG) const {
8762 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8762, __extension__ __PRETTY_FUNCTION__))
;
8763 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::ReconstructShuffle\n"
; } } while (false)
;
8764 SDLoc dl(Op);
8765 EVT VT = Op.getValueType();
8766 assert(!VT.isScalableVector() &&(static_cast <bool> (!VT.isScalableVector() && "Scalable vectors cannot be used with ISD::BUILD_VECTOR"
) ? void (0) : __assert_fail ("!VT.isScalableVector() && \"Scalable vectors cannot be used with ISD::BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8767, __extension__ __PRETTY_FUNCTION__))
8767 "Scalable vectors cannot be used with ISD::BUILD_VECTOR")(static_cast <bool> (!VT.isScalableVector() && "Scalable vectors cannot be used with ISD::BUILD_VECTOR"
) ? void (0) : __assert_fail ("!VT.isScalableVector() && \"Scalable vectors cannot be used with ISD::BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8767, __extension__ __PRETTY_FUNCTION__))
;
8768 unsigned NumElts = VT.getVectorNumElements();
8769
8770 struct ShuffleSourceInfo {
8771 SDValue Vec;
8772 unsigned MinElt;
8773 unsigned MaxElt;
8774
8775 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
8776 // be compatible with the shuffle we intend to construct. As a result
8777 // ShuffleVec will be some sliding window into the original Vec.
8778 SDValue ShuffleVec;
8779
8780 // Code should guarantee that element i in Vec starts at element "WindowBase
8781 // + i * WindowScale in ShuffleVec".
8782 int WindowBase;
8783 int WindowScale;
8784
8785 ShuffleSourceInfo(SDValue Vec)
8786 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
8787 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
8788
8789 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8790 };
8791
8792 // First gather all vectors used as an immediate source for this BUILD_VECTOR
8793 // node.
8794 SmallVector<ShuffleSourceInfo, 2> Sources;
8795 for (unsigned i = 0; i < NumElts; ++i) {
8796 SDValue V = Op.getOperand(i);
8797 if (V.isUndef())
8798 continue;
8799 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
8800 !isa<ConstantSDNode>(V.getOperand(1))) {
8801 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
8802 dbgs() << "Reshuffle failed: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
8803 "a shuffle can only come from building a vector from "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
8804 "various elements of other vectors, provided their "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
8805 "indices are constant\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
;
8806 return SDValue();
8807 }
8808
8809 // Add this element source to the list if it's not already there.
8810 SDValue SourceVec = V.getOperand(0);
8811 auto Source = find(Sources, SourceVec);
8812 if (Source == Sources.end())
8813 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8814
8815 // Update the minimum and maximum lane number seen.
8816 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
8817 Source->MinElt = std::min(Source->MinElt, EltNo);
8818 Source->MaxElt = std::max(Source->MaxElt, EltNo);
8819 }
8820
8821 if (Sources.size() > 2) {
8822 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
8823 dbgs() << "Reshuffle failed: currently only do something sane when at "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
8824 "most two source vectors are involved\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
;
8825 return SDValue();
8826 }
8827
8828 // Find out the smallest element size among result and two sources, and use
8829 // it as element size to build the shuffle_vector.
8830 EVT SmallestEltTy = VT.getVectorElementType();
8831 for (auto &Source : Sources) {
8832 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8833 if (SrcEltTy.bitsLT(SmallestEltTy)) {
8834 SmallestEltTy = SrcEltTy;
8835 }
8836 }
8837 unsigned ResMultiplier =
8838 VT.getScalarSizeInBits() / SmallestEltTy.getFixedSizeInBits();
8839 uint64_t VTSize = VT.getFixedSizeInBits();
8840 NumElts = VTSize / SmallestEltTy.getFixedSizeInBits();
8841 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8842
8843 // If the source vector is too wide or too narrow, we may nevertheless be able
8844 // to construct a compatible shuffle either by concatenating it with UNDEF or
8845 // extracting a suitable range of elements.
8846 for (auto &Src : Sources) {
8847 EVT SrcVT = Src.ShuffleVec.getValueType();
8848
8849 uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8850 if (SrcVTSize == VTSize)
8851 continue;
8852
8853 // This stage of the search produces a source with the same element type as
8854 // the original, but with a total width matching the BUILD_VECTOR output.
8855 EVT EltVT = SrcVT.getVectorElementType();
8856 unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8857 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8858
8859 if (SrcVTSize < VTSize) {
8860 assert(2 * SrcVTSize == VTSize)(static_cast <bool> (2 * SrcVTSize == VTSize) ? void (0
) : __assert_fail ("2 * SrcVTSize == VTSize", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8860, __extension__ __PRETTY_FUNCTION__))
;
8861 // We can pad out the smaller vector for free, so if it's part of a
8862 // shuffle...
8863 Src.ShuffleVec =
8864 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8865 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8866 continue;
8867 }
8868
8869 if (SrcVTSize != 2 * VTSize) {
8870 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: result vector too small to extract\n"
; } } while (false)
8871 dbgs() << "Reshuffle failed: result vector too small to extract\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: result vector too small to extract\n"
; } } while (false)
;
8872 return SDValue();
8873 }
8874
8875 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8876 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n"
; } } while (false)
8877 dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n"
; } } while (false)
;
8878 return SDValue();
8879 }
8880
8881 if (Src.MinElt >= NumSrcElts) {
8882 // The extraction can just take the second half
8883 Src.ShuffleVec =
8884 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8885 DAG.getConstant(NumSrcElts, dl, MVT::i64));
8886 Src.WindowBase = -NumSrcElts;
8887 } else if (Src.MaxElt < NumSrcElts) {
8888 // The extraction can just take the first half
8889 Src.ShuffleVec =
8890 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8891 DAG.getConstant(0, dl, MVT::i64));
8892 } else {
8893 // An actual VEXT is needed
8894 SDValue VEXTSrc1 =
8895 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8896 DAG.getConstant(0, dl, MVT::i64));
8897 SDValue VEXTSrc2 =
8898 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8899 DAG.getConstant(NumSrcElts, dl, MVT::i64));
8900 unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
8901
8902 if (!SrcVT.is64BitVector()) {
8903 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
"for SVE vectors."; } } while (false)
8904 dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
"for SVE vectors."; } } while (false)
8905 "for SVE vectors.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
"for SVE vectors."; } } while (false)
;
8906 return SDValue();
8907 }
8908
8909 Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
8910 VEXTSrc2,
8911 DAG.getConstant(Imm, dl, MVT::i32));
8912 Src.WindowBase = -Src.MinElt;
8913 }
8914 }
8915
8916 // Another possible incompatibility occurs from the vector element types. We
8917 // can fix this by bitcasting the source vectors to the same type we intend
8918 // for the shuffle.
8919 for (auto &Src : Sources) {
8920 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8921 if (SrcEltTy == SmallestEltTy)
8922 continue;
8923 assert(ShuffleVT.getVectorElementType() == SmallestEltTy)(static_cast <bool> (ShuffleVT.getVectorElementType() ==
SmallestEltTy) ? void (0) : __assert_fail ("ShuffleVT.getVectorElementType() == SmallestEltTy"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8923, __extension__ __PRETTY_FUNCTION__))
;
8924 Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
8925 Src.WindowScale =
8926 SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
8927 Src.WindowBase *= Src.WindowScale;
8928 }
8929
8930 // Final sanity check before we try to actually produce a shuffle.
8931 LLVM_DEBUG(for (auto Srcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8933, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
8932 : Sources)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8933, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
8933 assert(Src.ShuffleVec.getValueType() == ShuffleVT);)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8933, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
;
8934
8935 // The stars all align, our next step is to produce the mask for the shuffle.
8936 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
8937 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8938 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8939 SDValue Entry = Op.getOperand(i);
8940 if (Entry.isUndef())
8941 continue;
8942
8943 auto Src = find(Sources, Entry.getOperand(0));
8944 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8945
8946 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8947 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8948 // segment.
8949 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8950 int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8951 VT.getScalarSizeInBits());
8952 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8953
8954 // This source is expected to fill ResMultiplier lanes of the final shuffle,
8955 // starting at the appropriate offset.
8956 int *LaneMask = &Mask[i * ResMultiplier];
8957
8958 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8959 ExtractBase += NumElts * (Src - Sources.begin());
8960 for (int j = 0; j < LanesDefined; ++j)
8961 LaneMask[j] = ExtractBase + j;
8962 }
8963
8964 // Final check before we try to produce nonsense...
8965 if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
8966 LLVM_DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: illegal shuffle mask\n"
; } } while (false)
;
8967 return SDValue();
8968 }
8969
8970 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8971 for (unsigned i = 0; i < Sources.size(); ++i)
8972 ShuffleOps[i] = Sources[i].ShuffleVec;
8973
8974 SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8975 ShuffleOps[1], Mask);
8976 SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
8977
8978 LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
8979 dbgs() << "Reshuffle, creating node: "; V.dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
;
8980
8981 return V;
8982}
8983
8984// check if an EXT instruction can handle the shuffle mask when the
8985// vector sources of the shuffle are the same.
8986static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
8987 unsigned NumElts = VT.getVectorNumElements();
8988
8989 // Assume that the first shuffle index is not UNDEF. Fail if it is.
8990 if (M[0] < 0)
8991 return false;
8992
8993 Imm = M[0];
8994
8995 // If this is a VEXT shuffle, the immediate value is the index of the first
8996 // element. The other shuffle indices must be the successive elements after
8997 // the first one.
8998 unsigned ExpectedElt = Imm;
8999 for (unsigned i = 1; i < NumElts; ++i) {
9000 // Increment the expected index. If it wraps around, just follow it
9001 // back to index zero and keep going.
9002 ++ExpectedElt;
9003 if (ExpectedElt == NumElts)
9004 ExpectedElt = 0;
9005
9006 if (M[i] < 0)
9007 continue; // ignore UNDEF indices
9008 if (ExpectedElt != static_cast<unsigned>(M[i]))
9009 return false;
9010 }
9011
9012 return true;
9013}
9014
9015/// Check if a vector shuffle corresponds to a DUP instructions with a larger
9016/// element width than the vector lane type. If that is the case the function
9017/// returns true and writes the value of the DUP instruction lane operand into
9018/// DupLaneOp
9019static bool isWideDUPMask(ArrayRef<int> M, EVT VT, unsigned BlockSize,
9020 unsigned &DupLaneOp) {
9021 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for wide DUP are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for wide DUP are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9022, __extension__ __PRETTY_FUNCTION__))
9022 "Only possible block sizes for wide DUP are: 16, 32, 64")(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for wide DUP are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for wide DUP are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9022, __extension__ __PRETTY_FUNCTION__))
;
9023
9024 if (BlockSize <= VT.getScalarSizeInBits())
9025 return false;
9026 if (BlockSize % VT.getScalarSizeInBits() != 0)
9027 return false;
9028 if (VT.getSizeInBits() % BlockSize != 0)
9029 return false;
9030
9031 size_t SingleVecNumElements = VT.getVectorNumElements();
9032 size_t NumEltsPerBlock = BlockSize / VT.getScalarSizeInBits();
9033 size_t NumBlocks = VT.getSizeInBits() / BlockSize;
9034
9035 // We are looking for masks like
9036 // [0, 1, 0, 1] or [2, 3, 2, 3] or [4, 5, 6, 7, 4, 5, 6, 7] where any element
9037 // might be replaced by 'undefined'. BlockIndices will eventually contain
9038 // lane indices of the duplicated block (i.e. [0, 1], [2, 3] and [4, 5, 6, 7]
9039 // for the above examples)
9040 SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
9041 for (size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
9042 for (size_t I = 0; I < NumEltsPerBlock; I++) {
9043 int Elt = M[BlockIndex * NumEltsPerBlock + I];
9044 if (Elt < 0)
9045 continue;
9046 // For now we don't support shuffles that use the second operand
9047 if ((unsigned)Elt >= SingleVecNumElements)
9048 return false;
9049 if (BlockElts[I] < 0)
9050 BlockElts[I] = Elt;
9051 else if (BlockElts[I] != Elt)
9052 return false;
9053 }
9054
9055 // We found a candidate block (possibly with some undefs). It must be a
9056 // sequence of consecutive integers starting with a value divisible by
9057 // NumEltsPerBlock with some values possibly replaced by undef-s.
9058
9059 // Find first non-undef element
9060 auto FirstRealEltIter = find_if(BlockElts, [](int Elt) { return Elt >= 0; });
9061 assert(FirstRealEltIter != BlockElts.end() &&(static_cast <bool> (FirstRealEltIter != BlockElts.end(
) && "Shuffle with all-undefs must have been caught by previous cases, "
"e.g. isSplat()") ? void (0) : __assert_fail ("FirstRealEltIter != BlockElts.end() && \"Shuffle with all-undefs must have been caught by previous cases, \" \"e.g. isSplat()\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9063, __extension__ __PRETTY_FUNCTION__))
9062 "Shuffle with all-undefs must have been caught by previous cases, "(static_cast <bool> (FirstRealEltIter != BlockElts.end(
) && "Shuffle with all-undefs must have been caught by previous cases, "
"e.g. isSplat()") ? void (0) : __assert_fail ("FirstRealEltIter != BlockElts.end() && \"Shuffle with all-undefs must have been caught by previous cases, \" \"e.g. isSplat()\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9063, __extension__ __PRETTY_FUNCTION__))
9063 "e.g. isSplat()")(static_cast <bool> (FirstRealEltIter != BlockElts.end(
) && "Shuffle with all-undefs must have been caught by previous cases, "
"e.g. isSplat()") ? void (0) : __assert_fail ("FirstRealEltIter != BlockElts.end() && \"Shuffle with all-undefs must have been caught by previous cases, \" \"e.g. isSplat()\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9063, __extension__ __PRETTY_FUNCTION__))
;
9064 if (FirstRealEltIter == BlockElts.end()) {
9065 DupLaneOp = 0;
9066 return true;
9067 }
9068
9069 // Index of FirstRealElt in BlockElts
9070 size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
9071
9072 if ((unsigned)*FirstRealEltIter < FirstRealIndex)
9073 return false;
9074 // BlockElts[0] must have the following value if it isn't undef:
9075 size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
9076
9077 // Check the first element
9078 if (Elt0 % NumEltsPerBlock != 0)
9079 return false;
9080 // Check that the sequence indeed consists of consecutive integers (modulo
9081 // undefs)
9082 for (size_t I = 0; I < NumEltsPerBlock; I++)
9083 if (BlockElts[I] >= 0 && (unsigned)BlockElts[I] != Elt0 + I)
9084 return false;
9085
9086 DupLaneOp = Elt0 / NumEltsPerBlock;
9087 return true;
9088}
9089
9090// check if an EXT instruction can handle the shuffle mask when the
9091// vector sources of the shuffle are different.
9092static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
9093 unsigned &Imm) {
9094 // Look for the first non-undef element.
9095 const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
9096
9097 // Benefit form APInt to handle overflow when calculating expected element.
9098 unsigned NumElts = VT.getVectorNumElements();
9099 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
9100 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
9101 // The following shuffle indices must be the successive elements after the
9102 // first real element.
9103 const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
9104 [&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
9105 if (FirstWrongElt != M.end())
9106 return false;
9107
9108 // The index of an EXT is the first element if it is not UNDEF.
9109 // Watch out for the beginning UNDEFs. The EXT index should be the expected
9110 // value of the first element. E.g.
9111 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
9112 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
9113 // ExpectedElt is the last mask index plus 1.
9114 Imm = ExpectedElt.getZExtValue();
9115
9116 // There are two difference cases requiring to reverse input vectors.
9117 // For example, for vector <4 x i32> we have the following cases,
9118 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
9119 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
9120 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
9121 // to reverse two input vectors.
9122 if (Imm < NumElts)
9123 ReverseEXT = true;
9124 else
9125 Imm -= NumElts;
9126
9127 return true;
9128}
9129
9130/// isREVMask - Check if a vector shuffle corresponds to a REV
9131/// instruction with the specified blocksize. (The order of the elements
9132/// within each block of the vector is reversed.)
9133static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
9134 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for REV are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for REV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9135, __extension__ __PRETTY_FUNCTION__))
9135 "Only possible block sizes for REV are: 16, 32, 64")(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for REV are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for REV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9135, __extension__ __PRETTY_FUNCTION__))
;
9136
9137 unsigned EltSz = VT.getScalarSizeInBits();
9138 if (EltSz == 64)
9139 return false;
9140
9141 unsigned NumElts = VT.getVectorNumElements();
9142 unsigned BlockElts = M[0] + 1;
9143 // If the first shuffle index is UNDEF, be optimistic.
9144 if (M[0] < 0)
9145 BlockElts = BlockSize / EltSz;
9146
9147 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
9148 return false;
9149
9150 for (unsigned i = 0; i < NumElts; ++i) {
9151 if (M[i] < 0)
9152 continue; // ignore UNDEF indices
9153 if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
9154 return false;
9155 }
9156
9157 return true;
9158}
9159
9160static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9161 unsigned NumElts = VT.getVectorNumElements();
9162 if (NumElts % 2 != 0)
9163 return false;
9164 WhichResult = (M[0] == 0 ? 0 : 1);
9165 unsigned Idx = WhichResult * NumElts / 2;
9166 for (unsigned i = 0; i != NumElts; i += 2) {
9167 if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
9168 (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
9169 return false;
9170 Idx += 1;
9171 }
9172
9173 return true;
9174}
9175
9176static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9177 unsigned NumElts = VT.getVectorNumElements();
9178 WhichResult = (M[0] == 0 ? 0 : 1);
9179 for (unsigned i = 0; i != NumElts; ++i) {
9180 if (M[i] < 0)
9181 continue; // ignore UNDEF indices
9182 if ((unsigned)M[i] != 2 * i + WhichResult)
9183 return false;
9184 }
9185
9186 return true;
9187}
9188
9189static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9190 unsigned NumElts = VT.getVectorNumElements();
9191 if (NumElts % 2 != 0)
9192 return false;
9193 WhichResult = (M[0] == 0 ? 0 : 1);
9194 for (unsigned i = 0; i < NumElts; i += 2) {
9195 if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
9196 (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
9197 return false;
9198 }
9199 return true;
9200}
9201
9202/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
9203/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
9204/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
9205static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9206 unsigned NumElts = VT.getVectorNumElements();
9207 if (NumElts % 2 != 0)
9208 return false;
9209 WhichResult = (M[0] == 0 ? 0 : 1);
9210 unsigned Idx = WhichResult * NumElts / 2;
9211 for (unsigned i = 0; i != NumElts; i += 2) {
9212 if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
9213 (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
9214 return false;
9215 Idx += 1;
9216 }
9217
9218 return true;
9219}
9220
9221/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
9222/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
9223/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
9224static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9225 unsigned Half = VT.getVectorNumElements() / 2;
9226 WhichResult = (M[0] == 0 ? 0 : 1);
9227 for (unsigned j = 0; j != 2; ++j) {
9228 unsigned Idx = WhichResult;
9229 for (unsigned i = 0; i != Half; ++i) {
9230 int MIdx = M[i + j * Half];
9231 if (MIdx >= 0 && (unsigned)MIdx != Idx)
9232 return false;
9233 Idx += 2;
9234 }
9235 }
9236
9237 return true;
9238}
9239
9240/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
9241/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
9242/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
9243static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9244 unsigned NumElts = VT.getVectorNumElements();
9245 if (NumElts % 2 != 0)
9246 return false;
9247 WhichResult = (M[0] == 0 ? 0 : 1);
9248 for (unsigned i = 0; i < NumElts; i += 2) {
9249 if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
9250 (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
9251 return false;
9252 }
9253 return true;
9254}
9255
9256static bool isINSMask(ArrayRef<int> M, int NumInputElements,
9257 bool &DstIsLeft, int &Anomaly) {
9258 if (M.size() != static_cast<size_t>(NumInputElements))
9259 return false;
9260
9261 int NumLHSMatch = 0, NumRHSMatch = 0;
9262 int LastLHSMismatch = -1, LastRHSMismatch = -1;
9263
9264 for (int i = 0; i < NumInputElements; ++i) {
9265 if (M[i] == -1) {
9266 ++NumLHSMatch;
9267 ++NumRHSMatch;
9268 continue;
9269 }
9270
9271 if (M[i] == i)
9272 ++NumLHSMatch;
9273 else
9274 LastLHSMismatch = i;
9275
9276 if (M[i] == i + NumInputElements)
9277 ++NumRHSMatch;
9278 else
9279 LastRHSMismatch = i;
9280 }
9281
9282 if (NumLHSMatch == NumInputElements - 1) {
9283 DstIsLeft = true;
9284 Anomaly = LastLHSMismatch;
9285 return true;
9286 } else if (NumRHSMatch == NumInputElements - 1) {
9287 DstIsLeft = false;
9288 Anomaly = LastRHSMismatch;
9289 return true;
9290 }
9291
9292 return false;
9293}
9294
9295static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
9296 if (VT.getSizeInBits() != 128)
9297 return false;
9298
9299 unsigned NumElts = VT.getVectorNumElements();
9300
9301 for (int I = 0, E = NumElts / 2; I != E; I++) {
9302 if (Mask[I] != I)
9303 return false;
9304 }
9305
9306 int Offset = NumElts / 2;
9307 for (int I = NumElts / 2, E = NumElts; I != E; I++) {
9308 if (Mask[I] != I + SplitLHS * Offset)
9309 return false;
9310 }
9311
9312 return true;
9313}
9314
9315static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
9316 SDLoc DL(Op);
9317 EVT VT = Op.getValueType();
9318 SDValue V0 = Op.getOperand(0);
9319 SDValue V1 = Op.getOperand(1);
9320 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
9321
9322 if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() ||
9323 VT.getVectorElementType() != V1.getValueType().getVectorElementType())
9324 return SDValue();
9325
9326 bool SplitV0 = V0.getValueSizeInBits() == 128;
9327
9328 if (!isConcatMask(Mask, VT, SplitV0))
9329 return SDValue();
9330
9331 EVT CastVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
9332 if (SplitV0) {
9333 V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
9334 DAG.getConstant(0, DL, MVT::i64));
9335 }
9336 if (V1.getValueSizeInBits() == 128) {
9337 V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
9338 DAG.getConstant(0, DL, MVT::i64));
9339 }
9340 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
9341}
9342
9343/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9344/// the specified operations to build the shuffle.
9345static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9346 SDValue RHS, SelectionDAG &DAG,
9347 const SDLoc &dl) {
9348 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9349 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
9350 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
9351
9352 enum {
9353 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9354 OP_VREV,
9355 OP_VDUP0,
9356 OP_VDUP1,
9357 OP_VDUP2,
9358 OP_VDUP3,
9359 OP_VEXT1,
9360 OP_VEXT2,
9361 OP_VEXT3,
9362 OP_VUZPL, // VUZP, left result
9363 OP_VUZPR, // VUZP, right result
9364 OP_VZIPL, // VZIP, left result
9365 OP_VZIPR, // VZIP, right result
9366 OP_VTRNL, // VTRN, left result
9367 OP_VTRNR // VTRN, right result
9368 };
9369
9370 if (OpNum == OP_COPY) {
9371 if (LHSID == (1 * 9 + 2) * 9 + 3)
9372 return LHS;
9373 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!")(static_cast <bool> (LHSID == ((4 * 9 + 5) * 9 + 6) * 9
+ 7 && "Illegal OP_COPY!") ? void (0) : __assert_fail
("LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && \"Illegal OP_COPY!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9373, __extension__ __PRETTY_FUNCTION__))
;
9374 return RHS;
9375 }
9376
9377 SDValue OpLHS, OpRHS;
9378 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9379 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9380 EVT VT = OpLHS.getValueType();
9381
9382 switch (OpNum) {
9383 default:
9384 llvm_unreachable("Unknown shuffle opcode!")::llvm::llvm_unreachable_internal("Unknown shuffle opcode!", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9384)
;
9385 case OP_VREV:
9386 // VREV divides the vector in half and swaps within the half.
9387 if (VT.getVectorElementType() == MVT::i32 ||
9388 VT.getVectorElementType() == MVT::f32)
9389 return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
9390 // vrev <4 x i16> -> REV32
9391 if (VT.getVectorElementType() == MVT::i16 ||
9392 VT.getVectorElementType() == MVT::f16 ||
9393 VT.getVectorElementType() == MVT::bf16)
9394 return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
9395 // vrev <4 x i8> -> REV16
9396 assert(VT.getVectorElementType() == MVT::i8)(static_cast <bool> (VT.getVectorElementType() == MVT::
i8) ? void (0) : __assert_fail ("VT.getVectorElementType() == MVT::i8"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9396, __extension__ __PRETTY_FUNCTION__))
;
9397 return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
9398 case OP_VDUP0:
9399 case OP_VDUP1:
9400 case OP_VDUP2:
9401 case OP_VDUP3: {
9402 EVT EltTy = VT.getVectorElementType();
9403 unsigned Opcode;
9404 if (EltTy == MVT::i8)
9405 Opcode = AArch64ISD::DUPLANE8;
9406 else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
9407 Opcode = AArch64ISD::DUPLANE16;
9408 else if (EltTy == MVT::i32 || EltTy == MVT::f32)
9409 Opcode = AArch64ISD::DUPLANE32;
9410 else if (EltTy == MVT::i64 || EltTy == MVT::f64)
9411 Opcode = AArch64ISD::DUPLANE64;
9412 else
9413 llvm_unreachable("Invalid vector element type?")::llvm::llvm_unreachable_internal("Invalid vector element type?"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9413)
;
9414
9415 if (VT.getSizeInBits() == 64)
9416 OpLHS = WidenVector(OpLHS, DAG);
9417 SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
9418 return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
9419 }
9420 case OP_VEXT1:
9421 case OP_VEXT2:
9422 case OP_VEXT3: {
9423 unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
9424 return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
9425 DAG.getConstant(Imm, dl, MVT::i32));
9426 }
9427 case OP_VUZPL:
9428 return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
9429 OpRHS);
9430 case OP_VUZPR:
9431 return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
9432 OpRHS);
9433 case OP_VZIPL:
9434 return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
9435 OpRHS);
9436 case OP_VZIPR:
9437 return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
9438 OpRHS);
9439 case OP_VTRNL:
9440 return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
9441 OpRHS);
9442 case OP_VTRNR:
9443 return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
9444 OpRHS);
9445 }
9446}
9447
9448static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
9449 SelectionDAG &DAG) {
9450 // Check to see if we can use the TBL instruction.
9451 SDValue V1 = Op.getOperand(0);
9452 SDValue V2 = Op.getOperand(1);
9453 SDLoc DL(Op);
9454
9455 EVT EltVT = Op.getValueType().getVectorElementType();
9456 unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
9457
9458 SmallVector<SDValue, 8> TBLMask;
9459 for (int Val : ShuffleMask) {
9460 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
9461 unsigned Offset = Byte + Val * BytesPerElt;
9462 TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
9463 }
9464 }
9465
9466 MVT IndexVT = MVT::v8i8;
9467 unsigned IndexLen = 8;
9468 if (Op.getValueSizeInBits() == 128) {
9469 IndexVT = MVT::v16i8;
9470 IndexLen = 16;
9471 }
9472
9473 SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
9474 SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
9475
9476 SDValue Shuffle;
9477 if (V2.getNode()->isUndef()) {
9478 if (IndexLen == 8)
9479 V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
9480 Shuffle = DAG.getNode(
9481 ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
9482 DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
9483 DAG.getBuildVector(IndexVT, DL,
9484 makeArrayRef(TBLMask.data(), IndexLen)));
9485 } else {
9486 if (IndexLen == 8) {
9487 V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
9488 Shuffle = DAG.getNode(
9489 ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
9490 DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
9491 DAG.getBuildVector(IndexVT, DL,
9492 makeArrayRef(TBLMask.data(), IndexLen)));
9493 } else {
9494 // FIXME: We cannot, for the moment, emit a TBL2 instruction because we
9495 // cannot currently represent the register constraints on the input
9496 // table registers.
9497 // Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
9498 // DAG.getBuildVector(IndexVT, DL, &TBLMask[0],
9499 // IndexLen));
9500 Shuffle = DAG.getNode(
9501 ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
9502 DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst,
9503 V2Cst, DAG.getBuildVector(IndexVT, DL,
9504 makeArrayRef(TBLMask.data(), IndexLen)));
9505 }
9506 }
9507 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
9508}
9509
9510static unsigned getDUPLANEOp(EVT EltType) {
9511 if (EltType == MVT::i8)
9512 return AArch64ISD::DUPLANE8;
9513 if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
9514 return AArch64ISD::DUPLANE16;
9515 if (EltType == MVT::i32 || EltType == MVT::f32)
9516 return AArch64ISD::DUPLANE32;
9517 if (EltType == MVT::i64 || EltType == MVT::f64)
9518 return AArch64ISD::DUPLANE64;
9519
9520 llvm_unreachable("Invalid vector element type?")::llvm::llvm_unreachable_internal("Invalid vector element type?"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9520)
;
9521}
9522
9523static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
9524 unsigned Opcode, SelectionDAG &DAG) {
9525 // Try to eliminate a bitcasted extract subvector before a DUPLANE.
9526 auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
9527 // Match: dup (bitcast (extract_subv X, C)), LaneC
9528 if (BitCast.getOpcode() != ISD::BITCAST ||
9529 BitCast.getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR)
9530 return false;
9531
9532 // The extract index must align in the destination type. That may not
9533 // happen if the bitcast is from narrow to wide type.
9534 SDValue Extract = BitCast.getOperand(0);
9535 unsigned ExtIdx = Extract.getConstantOperandVal(1);
9536 unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
9537 unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
9538 unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
9539 if (ExtIdxInBits % CastedEltBitWidth != 0)
9540 return false;
9541
9542 // Update the lane value by offsetting with the scaled extract index.
9543 LaneC += ExtIdxInBits / CastedEltBitWidth;
9544
9545 // Determine the casted vector type of the wide vector input.
9546 // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
9547 // Examples:
9548 // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
9549 // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
9550 unsigned SrcVecNumElts =
9551 Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
9552 CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
9553 SrcVecNumElts);
9554 return true;
9555 };
9556 MVT CastVT;
9557 if (getScaledOffsetDup(V, Lane, CastVT)) {
9558 V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0));
9559 } else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9560 // The lane is incremented by the index of the extract.
9561 // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
9562 auto VecVT = V.getOperand(0).getValueType();
9563 if (VecVT.isFixedLengthVector() && VecVT.getFixedSizeInBits() <= 128) {
9564 Lane += V.getConstantOperandVal(1);
9565 V = V.getOperand(0);
9566 }
9567 } else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
9568 // The lane is decremented if we are splatting from the 2nd operand.
9569 // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
9570 unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
9571 Lane -= Idx * VT.getVectorNumElements() / 2;
9572 V = WidenVector(V.getOperand(Idx), DAG);
9573 } else if (VT.getSizeInBits() == 64) {
9574 // Widen the operand to 128-bit register with undef.
9575 V = WidenVector(V, DAG);
9576 }
9577 return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
9578}
9579
9580// Return true if we can get a new shuffle mask by checking the parameter mask
9581// array to test whether every two adjacent mask values are continuous and
9582// starting from an even number.
9583static bool isWideTypeMask(ArrayRef<int> M, EVT VT,
9584 SmallVectorImpl<int> &NewMask) {
9585 unsigned NumElts = VT.getVectorNumElements();
9586 if (NumElts % 2 != 0)
9587 return false;
9588
9589 NewMask.clear();
9590 for (unsigned i = 0; i < NumElts; i += 2) {
9591 int M0 = M[i];
9592 int M1 = M[i + 1];
9593
9594 // If both elements are undef, new mask is undef too.
9595 if (M0 == -1 && M1 == -1) {
9596 NewMask.push_back(-1);
9597 continue;
9598 }
9599
9600 if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {
9601 NewMask.push_back(M1 / 2);
9602 continue;
9603 }
9604
9605 if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) {
9606 NewMask.push_back(M0 / 2);
9607 continue;
9608 }
9609
9610 NewMask.clear();
9611 return false;
9612 }
9613
9614 assert(NewMask.size() == NumElts / 2 && "Incorrect size for mask!")(static_cast <bool> (NewMask.size() == NumElts / 2 &&
"Incorrect size for mask!") ? void (0) : __assert_fail ("NewMask.size() == NumElts / 2 && \"Incorrect size for mask!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9614, __extension__ __PRETTY_FUNCTION__))
;
9615 return true;
9616}
9617
9618// Try to widen element type to get a new mask value for a better permutation
9619// sequence, so that we can use NEON shuffle instructions, such as zip1/2,
9620// UZP1/2, TRN1/2, REV, INS, etc.
9621// For example:
9622// shufflevector <4 x i32> %a, <4 x i32> %b,
9623// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
9624// is equivalent to:
9625// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
9626// Finally, we can get:
9627// mov v0.d[0], v1.d[1]
9628static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG) {
9629 SDLoc DL(Op);
9630 EVT VT = Op.getValueType();
9631 EVT ScalarVT = VT.getVectorElementType();
9632 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
9633 SDValue V0 = Op.getOperand(0);
9634 SDValue V1 = Op.getOperand(1);
9635 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
9636
9637 // If combining adjacent elements, like two i16's -> i32, two i32's -> i64 ...
9638 // We need to make sure the wider element type is legal. Thus, ElementSize
9639 // should be not larger than 32 bits, and i1 type should also be excluded.
9640 if (ElementSize > 32 || ElementSize == 1)
9641 return SDValue();
9642
9643 SmallVector<int, 8> NewMask;
9644 if (isWideTypeMask(Mask, VT, NewMask)) {
9645 MVT NewEltVT = VT.isFloatingPoint()
9646 ? MVT::getFloatingPointVT(ElementSize * 2)
9647 : MVT::getIntegerVT(ElementSize * 2);
9648 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
9649 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
9650 V0 = DAG.getBitcast(NewVT, V0);
9651 V1 = DAG.getBitcast(NewVT, V1);
9652 return DAG.getBitcast(VT,
9653 DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
9654 }
9655 }
9656
9657 return SDValue();
9658}
9659
9660SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9661 SelectionDAG &DAG) const {
9662 SDLoc dl(Op);
9663 EVT VT = Op.getValueType();
9664
9665 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
9666
9667 if (useSVEForFixedLengthVectorVT(VT))
9668 return LowerFixedLengthVECTOR_SHUFFLEToSVE(Op, DAG);
9669
9670 // Convert shuffles that are directly supported on NEON to target-specific
9671 // DAG nodes, instead of keeping them as shuffles and matching them again
9672 // during code selection. This is more efficient and avoids the possibility
9673 // of inconsistencies between legalization and selection.
9674 ArrayRef<int> ShuffleMask = SVN->getMask();
9675
9676 SDValue V1 = Op.getOperand(0);
9677 SDValue V2 = Op.getOperand(1);
9678
9679 assert(V1.getValueType() == VT && "Unexpected VECTOR_SHUFFLE type!")(static_cast <bool> (V1.getValueType() == VT &&
"Unexpected VECTOR_SHUFFLE type!") ? void (0) : __assert_fail
("V1.getValueType() == VT && \"Unexpected VECTOR_SHUFFLE type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9679, __extension__ __PRETTY_FUNCTION__))
;
9680 assert(ShuffleMask.size() == VT.getVectorNumElements() &&(static_cast <bool> (ShuffleMask.size() == VT.getVectorNumElements
() && "Unexpected VECTOR_SHUFFLE mask size!") ? void (
0) : __assert_fail ("ShuffleMask.size() == VT.getVectorNumElements() && \"Unexpected VECTOR_SHUFFLE mask size!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9681, __extension__ __PRETTY_FUNCTION__))
9681 "Unexpected VECTOR_SHUFFLE mask size!")(static_cast <bool> (ShuffleMask.size() == VT.getVectorNumElements
() && "Unexpected VECTOR_SHUFFLE mask size!") ? void (
0) : __assert_fail ("ShuffleMask.size() == VT.getVectorNumElements() && \"Unexpected VECTOR_SHUFFLE mask size!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9681, __extension__ __PRETTY_FUNCTION__))
;
9682
9683 if (SVN->isSplat()) {
9684 int Lane = SVN->getSplatIndex();
9685 // If this is undef splat, generate it via "just" vdup, if possible.
9686 if (Lane == -1)
9687 Lane = 0;
9688
9689 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
9690 return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
9691 V1.getOperand(0));
9692 // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
9693 // constant. If so, we can just reference the lane's definition directly.
9694 if (V1.getOpcode() == ISD::BUILD_VECTOR &&
9695 !isa<ConstantSDNode>(V1.getOperand(Lane)))
9696 return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
9697
9698 // Otherwise, duplicate from the lane of the input vector.
9699 unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
9700 return constructDup(V1, Lane, dl, VT, Opcode, DAG);
9701 }
9702
9703 // Check if the mask matches a DUP for a wider element
9704 for (unsigned LaneSize : {64U, 32U, 16U}) {
9705 unsigned Lane = 0;
9706 if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
9707 unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
9708 : LaneSize == 32 ? AArch64ISD::DUPLANE32
9709 : AArch64ISD::DUPLANE16;
9710 // Cast V1 to an integer vector with required lane size
9711 MVT NewEltTy = MVT::getIntegerVT(LaneSize);
9712 unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
9713 MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount);
9714 V1 = DAG.getBitcast(NewVecTy, V1);
9715 // Constuct the DUP instruction
9716 V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
9717 // Cast back to the original type
9718 return DAG.getBitcast(VT, V1);
9719 }
9720 }
9721
9722 if (isREVMask(ShuffleMask, VT, 64))
9723 return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
9724 if (isREVMask(ShuffleMask, VT, 32))
9725 return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
9726 if (isREVMask(ShuffleMask, VT, 16))
9727 return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);
9728
9729 if (((VT.getVectorNumElements() == 8 && VT.getScalarSizeInBits() == 16) ||
9730 (VT.getVectorNumElements() == 16 && VT.getScalarSizeInBits() == 8)) &&
9731 ShuffleVectorInst::isReverseMask(ShuffleMask)) {
9732 SDValue Rev = DAG.getNode(AArch64ISD::REV64, dl, VT, V1);
9733 return DAG.getNode(AArch64ISD::EXT, dl, VT, Rev, Rev,
9734 DAG.getConstant(8, dl, MVT::i32));
9735 }
9736
9737 bool ReverseEXT = false;
9738 unsigned Imm;
9739 if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
9740 if (ReverseEXT)
9741 std::swap(V1, V2);
9742 Imm *= getExtFactor(V1);
9743 return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
9744 DAG.getConstant(Imm, dl, MVT::i32));
9745 } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
9746 Imm *= getExtFactor(V1);
9747 return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
9748 DAG.getConstant(Imm, dl, MVT::i32));
9749 }
9750
9751 unsigned WhichResult;
9752 if (isZIPMask(ShuffleMask, VT, WhichResult)) {
9753 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
9754 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
9755 }
9756 if (isUZPMask(ShuffleMask, VT, WhichResult)) {
9757 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
9758 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
9759 }
9760 if (isTRNMask(ShuffleMask, VT, WhichResult)) {
9761 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
9762 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
9763 }
9764
9765 if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
9766 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
9767 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
9768 }
9769 if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
9770 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
9771 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
9772 }
9773 if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
9774 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
9775 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
9776 }
9777
9778 if (SDValue Concat = tryFormConcatFromShuffle(Op, DAG))
9779 return Concat;
9780
9781 bool DstIsLeft;
9782 int Anomaly;
9783 int NumInputElements = V1.getValueType().getVectorNumElements();
9784 if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
9785 SDValue DstVec = DstIsLeft ? V1 : V2;
9786 SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
9787
9788 SDValue SrcVec = V1;
9789 int SrcLane = ShuffleMask[Anomaly];
9790 if (SrcLane >= NumInputElements) {
9791 SrcVec = V2;
9792 SrcLane -= VT.getVectorNumElements();
9793 }
9794 SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
9795
9796 EVT ScalarVT = VT.getVectorElementType();
9797
9798 if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger())
9799 ScalarVT = MVT::i32;
9800
9801 return DAG.getNode(
9802 ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
9803 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
9804 DstLaneV);
9805 }
9806
9807 if (SDValue NewSD = tryWidenMaskForShuffle(Op, DAG))
9808 return NewSD;
9809
9810 // If the shuffle is not directly supported and it has 4 elements, use
9811 // the PerfectShuffle-generated table to synthesize it from other shuffles.
9812 unsigned NumElts = VT.getVectorNumElements();
9813 if (NumElts == 4) {
9814 unsigned PFIndexes[4];
9815 for (unsigned i = 0; i != 4; ++i) {
9816 if (ShuffleMask[i] < 0)
9817 PFIndexes[i] = 8;
9818 else
9819 PFIndexes[i] = ShuffleMask[i];
9820 }
9821
9822 // Compute the index in the perfect shuffle table.
9823 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
9824 PFIndexes[2] * 9 + PFIndexes[3];
9825 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
9826 unsigned Cost = (PFEntry >> 30);
9827
9828 if (Cost <= 4)
9829 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9830 }
9831
9832 return GenerateTBL(Op, ShuffleMask, DAG);
9833}
9834
9835SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
9836 SelectionDAG &DAG) const {
9837 SDLoc dl(Op);
9838 EVT VT = Op.getValueType();
9839 EVT ElemVT = VT.getScalarType();
9840 SDValue SplatVal = Op.getOperand(0);
9841
9842 if (useSVEForFixedLengthVectorVT(VT))
9843 return LowerToScalableOp(Op, DAG);
9844
9845 // Extend input splat value where needed to fit into a GPR (32b or 64b only)
9846 // FPRs don't have this restriction.
9847 switch (ElemVT.getSimpleVT().SimpleTy) {
9848 case MVT::i1: {
9849 // The only legal i1 vectors are SVE vectors, so we can use SVE-specific
9850 // lowering code.
9851 if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
9852 if (ConstVal->isZero())
9853 return SDValue(DAG.getMachineNode(AArch64::PFALSE, dl, VT), 0);
9854 if (ConstVal->isOne())
9855 return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
9856 }
9857 // The general case of i1. There isn't any natural way to do this,
9858 // so we use some trickery with whilelo.
9859 SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
9860 SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, SplatVal,
9861 DAG.getValueType(MVT::i1));
9862 SDValue ID = DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl,
9863 MVT::i64);
9864 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID,
9865 DAG.getConstant(0, dl, MVT::i64), SplatVal);
9866 }
9867 case MVT::i8:
9868 case MVT::i16:
9869 case MVT::i32:
9870 SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32);
9871 break;
9872 case MVT::i64:
9873 SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
9874 break;
9875 case MVT::f16:
9876 case MVT::bf16:
9877 case MVT::f32:
9878 case MVT::f64:
9879 // Fine as is
9880 break;
9881 default:
9882 report_fatal_error("Unsupported SPLAT_VECTOR input operand type");
9883 }
9884
9885 return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal);
9886}
9887
9888SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
9889 SelectionDAG &DAG) const {
9890 SDLoc DL(Op);
9891
9892 EVT VT = Op.getValueType();
9893 if (!isTypeLegal(VT) || !VT.isScalableVector())
9894 return SDValue();
9895
9896 // Current lowering only supports the SVE-ACLE types.
9897 if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
9898 return SDValue();
9899
9900 // The DUPQ operation is indepedent of element type so normalise to i64s.
9901 SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1));
9902 SDValue Idx128 = Op.getOperand(2);
9903
9904 // DUPQ can be used when idx is in range.
9905 auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
9906 if (CIdx && (CIdx->getZExtValue() <= 3)) {
9907 SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64);
9908 SDNode *DUPQ =
9909 DAG.getMachineNode(AArch64::DUP_ZZI_Q, DL, MVT::nxv2i64, V, CI);
9910 return DAG.getNode(ISD::BITCAST, DL, VT, SDValue(DUPQ, 0));
9911 }
9912
9913 // The ACLE says this must produce the same result as:
9914 // svtbl(data, svadd_x(svptrue_b64(),
9915 // svand_x(svptrue_b64(), svindex_u64(0, 1), 1),
9916 // index * 2))
9917 SDValue One = DAG.getConstant(1, DL, MVT::i64);
9918 SDValue SplatOne = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, One);
9919
9920 // create the vector 0,1,0,1,...
9921 SDValue SV = DAG.getStepVector(DL, MVT::nxv2i64);
9922 SV = DAG.getNode(ISD::AND, DL, MVT::nxv2i64, SV, SplatOne);
9923
9924 // create the vector idx64,idx64+1,idx64,idx64+1,...
9925 SDValue Idx64 = DAG.getNode(ISD::ADD, DL, MVT::i64, Idx128, Idx128);
9926 SDValue SplatIdx64 = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Idx64);
9927 SDValue ShuffleMask = DAG.getNode(ISD::ADD, DL, MVT::nxv2i64, SV, SplatIdx64);
9928
9929 // create the vector Val[idx64],Val[idx64+1],Val[idx64],Val[idx64+1],...
9930 SDValue TBL = DAG.getNode(AArch64ISD::TBL, DL, MVT::nxv2i64, V, ShuffleMask);
9931 return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
9932}
9933
9934
9935static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
9936 APInt &UndefBits) {
9937 EVT VT = BVN->getValueType(0);
9938 APInt SplatBits, SplatUndef;
9939 unsigned SplatBitSize;
9940 bool HasAnyUndefs;
9941 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
9942 unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;
9943
9944 for (unsigned i = 0; i < NumSplats; ++i) {
9945 CnstBits <<= SplatBitSize;
9946 UndefBits <<= SplatBitSize;
9947 CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits());
9948 UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
9949 }
9950
9951 return true;
9952 }
9953
9954 return false;
9955}
9956
9957// Try 64-bit splatted SIMD immediate.
9958static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9959 const APInt &Bits) {
9960 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9961 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9962 EVT VT = Op.getValueType();
9963 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::f64;
9964
9965 if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
9966 Value = AArch64_AM::encodeAdvSIMDModImmType10(Value);
9967
9968 SDLoc dl(Op);
9969 SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
9970 DAG.getConstant(Value, dl, MVT::i32));
9971 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9972 }
9973 }
9974
9975 return SDValue();
9976}
9977
9978// Try 32-bit splatted SIMD immediate.
9979static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9980 const APInt &Bits,
9981 const SDValue *LHS = nullptr) {
9982 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9983 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9984 EVT VT = Op.getValueType();
9985 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
9986 bool isAdvSIMDModImm = false;
9987 uint64_t Shift;
9988
9989 if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) {
9990 Value = AArch64_AM::encodeAdvSIMDModImmType1(Value);
9991 Shift = 0;
9992 }
9993 else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
9994 Value = AArch64_AM::encodeAdvSIMDModImmType2(Value);
9995 Shift = 8;
9996 }
9997 else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
9998 Value = AArch64_AM::encodeAdvSIMDModImmType3(Value);
9999 Shift = 16;
10000 }
10001 else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
10002 Value = AArch64_AM::encodeAdvSIMDModImmType4(Value);
10003 Shift = 24;
10004 }
10005
10006 if (isAdvSIMDModImm) {
10007 SDLoc dl(Op);
10008 SDValue Mov;
10009
10010 if (LHS)
10011 Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
10012 DAG.getConstant(Value, dl, MVT::i32),
10013 DAG.getConstant(Shift, dl, MVT::i32));
10014 else
10015 Mov = DAG.getNode(NewOp, dl, MovTy,
10016 DAG.getConstant(Value, dl, MVT::i32),
10017 DAG.getConstant(Shift, dl, MVT::i32));
10018
10019 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
10020 }
10021 }
10022
10023 return SDValue();
10024}
10025
10026// Try 16-bit splatted SIMD immediate.
10027static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
10028 const APInt &Bits,
10029 const SDValue *LHS = nullptr) {
10030 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
10031 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
10032 EVT VT = Op.getValueType();
10033 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
10034 bool isAdvSIMDModImm = false;
10035 uint64_t Shift;
10036
10037 if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) {
10038 Value = AArch64_AM::encodeAdvSIMDModImmType5(Value);
10039 Shift = 0;
10040 }
10041 else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
10042 Value = AArch64_AM::encodeAdvSIMDModImmType6(Value);
10043 Shift = 8;
10044 }
10045
10046 if (isAdvSIMDModImm) {
10047 SDLoc dl(Op);
10048 SDValue Mov;
10049
10050 if (LHS)
10051 Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
10052 DAG.getConstant(Value, dl, MVT::i32),
10053 DAG.getConstant(Shift, dl, MVT::i32));
10054 else
10055 Mov = DAG.getNode(NewOp, dl, MovTy,
10056 DAG.getConstant(Value, dl, MVT::i32),
10057 DAG.getConstant(Shift, dl, MVT::i32));
10058
10059 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
10060 }
10061 }
10062
10063 return SDValue();
10064}
10065
10066// Try 32-bit splatted SIMD immediate with shifted ones.
10067static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op,
10068 SelectionDAG &DAG, const APInt &Bits) {
10069 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
10070 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
10071 EVT VT = Op.getValueType();
10072 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
10073 bool isAdvSIMDModImm = false;
10074 uint64_t Shift;
10075
10076 if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) {
10077 Value = AArch64_AM::encodeAdvSIMDModImmType7(Value);
10078 Shift = 264;
10079 }
10080 else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
10081 Value = AArch64_AM::encodeAdvSIMDModImmType8(Value);
10082 Shift = 272;
10083 }
10084
10085 if (isAdvSIMDModImm) {
10086 SDLoc dl(Op);
10087 SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
10088 DAG.getConstant(Value, dl, MVT::i32),
10089 DAG.getConstant(Shift, dl, MVT::i32));
10090 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
10091 }
10092 }
10093
10094 return SDValue();
10095}
10096
10097// Try 8-bit splatted SIMD immediate.
10098static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
10099 const APInt &Bits) {
10100 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
10101 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
10102 EVT VT = Op.getValueType();
10103 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
10104
10105 if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
10106 Value = AArch64_AM::encodeAdvSIMDModImmType9(Value);
10107
10108 SDLoc dl(Op);
10109 SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
10110 DAG.getConstant(Value, dl, MVT::i32));
10111 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
10112 }
10113 }
10114
10115 return SDValue();
10116}
10117
10118// Try FP splatted SIMD immediate.
10119static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
10120 const APInt &Bits) {
10121 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
10122 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
10123 EVT VT = Op.getValueType();
10124 bool isWide = (VT.getSizeInBits() == 128);
10125 MVT MovTy;
10126 bool isAdvSIMDModImm = false;
10127
10128 if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) {
10129 Value = AArch64_AM::encodeAdvSIMDModImmType11(Value);
10130 MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
10131 }
10132 else if (isWide &&
10133 (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
10134 Value = AArch64_AM::encodeAdvSIMDModImmType12(Value);
10135 MovTy = MVT::v2f64;
10136 }
10137
10138 if (isAdvSIMDModImm) {
10139 SDLoc dl(Op);
10140 SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
10141 DAG.getConstant(Value, dl, MVT::i32));
10142 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
10143 }
10144 }
10145
10146 return SDValue();
10147}
10148
10149// Specialized code to quickly find if PotentialBVec is a BuildVector that
10150// consists of only the same constant int value, returned in reference arg
10151// ConstVal
10152static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
10153 uint64_t &ConstVal) {
10154 BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
10155 if (!Bvec)
10156 return false;
10157 ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
10158 if (!FirstElt)
10159 return false;
10160 EVT VT = Bvec->getValueType(0);
10161 unsigned NumElts = VT.getVectorNumElements();
10162 for (unsigned i = 1; i < NumElts; ++i)
10163 if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
10164 return false;
10165 ConstVal = FirstElt->getZExtValue();
10166 return true;
10167}
10168
10169static unsigned getIntrinsicID(const SDNode *N) {
10170 unsigned Opcode = N->getOpcode();
10171 switch (Opcode) {
10172 default:
10173 return Intrinsic::not_intrinsic;
10174 case ISD::INTRINSIC_WO_CHAIN: {
10175 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
10176 if (IID < Intrinsic::num_intrinsics)
10177 return IID;
10178 return Intrinsic::not_intrinsic;
10179 }
10180 }
10181}
10182
10183// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
10184// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
10185// BUILD_VECTORs with constant element C1, C2 is a constant, and:
10186// - for the SLI case: C1 == ~(Ones(ElemSizeInBits) << C2)
10187// - for the SRI case: C1 == ~(Ones(ElemSizeInBits) >> C2)
10188// The (or (lsl Y, C2), (and X, BvecC1)) case is also handled.
10189static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
10190 EVT VT = N->getValueType(0);
10191
10192 if (!VT.isVector())
10193 return SDValue();
10194
10195 SDLoc DL(N);
10196
10197 SDValue And;
10198 SDValue Shift;
10199
10200 SDValue FirstOp = N->getOperand(0);
10201 unsigned FirstOpc = FirstOp.getOpcode();
10202 SDValue SecondOp = N->getOperand(1);
10203 unsigned SecondOpc = SecondOp.getOpcode();
10204
10205 // Is one of the operands an AND or a BICi? The AND may have been optimised to
10206 // a BICi in order to use an immediate instead of a register.
10207 // Is the other operand an shl or lshr? This will have been turned into:
10208 // AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift.
10209 if ((FirstOpc == ISD::AND || FirstOpc == AArch64ISD::BICi) &&
10210 (SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR)) {
10211 And = FirstOp;
10212 Shift = SecondOp;
10213
10214 } else if ((SecondOpc == ISD::AND || SecondOpc == AArch64ISD::BICi) &&
10215 (FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR)) {
10216 And = SecondOp;
10217 Shift = FirstOp;
10218 } else
10219 return SDValue();
10220
10221 bool IsAnd = And.getOpcode() == ISD::AND;
10222 bool IsShiftRight = Shift.getOpcode() == AArch64ISD::VLSHR;
10223
10224 // Is the shift amount constant?
10225 ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
10226 if (!C2node)
10227 return SDValue();
10228
10229 uint64_t C1;
10230 if (IsAnd) {
10231 // Is the and mask vector all constant?
10232 if (!isAllConstantBuildVector(And.getOperand(1), C1))
10233 return SDValue();
10234 } else {
10235 // Reconstruct the corresponding AND immediate from the two BICi immediates.
10236 ConstantSDNode *C1nodeImm = dyn_cast<ConstantSDNode>(And.getOperand(1));
10237 ConstantSDNode *C1nodeShift = dyn_cast<ConstantSDNode>(And.getOperand(2));
10238 assert(C1nodeImm && C1nodeShift)(static_cast <bool> (C1nodeImm && C1nodeShift) ?
void (0) : __assert_fail ("C1nodeImm && C1nodeShift"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10238, __extension__ __PRETTY_FUNCTION__))
;
10239 C1 = ~(C1nodeImm->getZExtValue() << C1nodeShift->getZExtValue());
10240 }
10241
10242 // Is C1 == ~(Ones(ElemSizeInBits) << C2) or
10243 // C1 == ~(Ones(ElemSizeInBits) >> C2), taking into account
10244 // how much one can shift elements of a particular size?
10245 uint64_t C2 = C2node->getZExtValue();
10246 unsigned ElemSizeInBits = VT.getScalarSizeInBits();
10247 if (C2 > ElemSizeInBits)
10248 return SDValue();
10249
10250 APInt C1AsAPInt(ElemSizeInBits, C1);
10251 APInt RequiredC1 = IsShiftRight ? APInt::getHighBitsSet(ElemSizeInBits, C2)
10252 : APInt::getLowBitsSet(ElemSizeInBits, C2);
10253 if (C1AsAPInt != RequiredC1)
10254 return SDValue();
10255
10256 SDValue X = And.getOperand(0);
10257 SDValue Y = Shift.getOperand(0);
10258
10259 unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
10260 SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1));
10261
10262 LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "aarch64-lower: transformed: \n"
; } } while (false)
;
10263 LLVM_DEBUG(N->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { N->dump(&DAG); } } while (false)
;
10264 LLVM_DEBUG(dbgs() << "into: \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "into: \n"; } } while (false
)
;
10265 LLVM_DEBUG(ResultSLI->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { ResultSLI->dump(&DAG); } } while (
false)
;
10266
10267 ++NumShiftInserts;
10268 return ResultSLI;
10269}
10270
10271SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
10272 SelectionDAG &DAG) const {
10273 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
10274 return LowerToScalableOp(Op, DAG);
10275
10276 // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
10277 if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
10278 return Res;
10279
10280 EVT VT = Op.getValueType();
10281
10282 SDValue LHS = Op.getOperand(0);
10283 BuildVectorSDNode *BVN =
10284 dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
10285 if (!BVN) {
10286 // OR commutes, so try swapping the operands.
10287 LHS = Op.getOperand(1);
10288 BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
10289 }
10290 if (!BVN)
10291 return Op;
10292
10293 APInt DefBits(VT.getSizeInBits(), 0);
10294 APInt UndefBits(VT.getSizeInBits(), 0);
10295 if (resolveBuildVector(BVN, DefBits, UndefBits)) {
10296 SDValue NewOp;
10297
10298 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
10299 DefBits, &LHS)) ||
10300 (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
10301 DefBits, &LHS)))
10302 return NewOp;
10303
10304 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
10305 UndefBits, &LHS)) ||
10306 (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
10307 UndefBits, &LHS)))
10308 return NewOp;
10309 }
10310
10311 // We can always fall back to a non-immediate OR.
10312 return Op;
10313}
10314
10315// Normalize the operands of BUILD_VECTOR. The value of constant operands will
10316// be truncated to fit element width.
10317static SDValue NormalizeBuildVector(SDValue Op,
10318 SelectionDAG &DAG) {
10319 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10319, __extension__ __PRETTY_FUNCTION__))
;
10320 SDLoc dl(Op);
10321 EVT VT = Op.getValueType();
10322 EVT EltTy= VT.getVectorElementType();
10323
10324 if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
10325 return Op;
10326
10327 SmallVector<SDValue, 16> Ops;
10328 for (SDValue Lane : Op->ops()) {
10329 // For integer vectors, type legalization would have promoted the
10330 // operands already. Otherwise, if Op is a floating-point splat
10331 // (with operands cast to integers), then the only possibilities
10332 // are constants and UNDEFs.
10333 if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
10334 APInt LowBits(EltTy.getSizeInBits(),
10335 CstLane->getZExtValue());
10336 Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
10337 } else if (Lane.getNode()->isUndef()) {
10338 Lane = DAG.getUNDEF(MVT::i32);
10339 } else {
10340 assert(Lane.getValueType() == MVT::i32 &&(static_cast <bool> (Lane.getValueType() == MVT::i32 &&
"Unexpected BUILD_VECTOR operand type") ? void (0) : __assert_fail
("Lane.getValueType() == MVT::i32 && \"Unexpected BUILD_VECTOR operand type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10341, __extension__ __PRETTY_FUNCTION__))
10341 "Unexpected BUILD_VECTOR operand type")(static_cast <bool> (Lane.getValueType() == MVT::i32 &&
"Unexpected BUILD_VECTOR operand type") ? void (0) : __assert_fail
("Lane.getValueType() == MVT::i32 && \"Unexpected BUILD_VECTOR operand type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10341, __extension__ __PRETTY_FUNCTION__))
;
10342 }
10343 Ops.push_back(Lane);
10344 }
10345 return DAG.getBuildVector(VT, dl, Ops);
10346}
10347
10348static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG) {
10349 EVT VT = Op.getValueType();
10350
10351 APInt DefBits(VT.getSizeInBits(), 0);
10352 APInt UndefBits(VT.getSizeInBits(), 0);
10353 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
10354 if (resolveBuildVector(BVN, DefBits, UndefBits)) {
10355 SDValue NewOp;
10356 if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
10357 (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
10358 (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
10359 (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
10360 (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
10361 (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
10362 return NewOp;
10363
10364 DefBits = ~DefBits;
10365 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
10366 (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
10367 (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
10368 return NewOp;
10369
10370 DefBits = UndefBits;
10371 if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
10372 (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
10373 (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
10374 (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
10375 (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
10376 (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
10377 return NewOp;
10378
10379 DefBits = ~UndefBits;
10380 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
10381 (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
10382 (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
10383 return NewOp;
10384 }
10385
10386 return SDValue();
10387}
10388
10389SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
10390 SelectionDAG &DAG) const {
10391 EVT VT = Op.getValueType();
10392
10393 // Try to build a simple constant vector.
10394 Op = NormalizeBuildVector(Op, DAG);
10395 if (VT.isInteger()) {
10396 // Certain vector constants, used to express things like logical NOT and
10397 // arithmetic NEG, are passed through unmodified. This allows special
10398 // patterns for these operations to match, which will lower these constants
10399 // to whatever is proven necessary.
10400 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
10401 if (BVN->isConstant())
10402 if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
10403 unsigned BitSize = VT.getVectorElementType().getSizeInBits();
10404 APInt Val(BitSize,
10405 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
10406 if (Val.isZero() || Val.isAllOnes())
10407 return Op;
10408 }
10409 }
10410
10411 if (SDValue V = ConstantBuildVector(Op, DAG))
10412 return V;
10413
10414 // Scan through the operands to find some interesting properties we can
10415 // exploit:
10416 // 1) If only one value is used, we can use a DUP, or
10417 // 2) if only the low element is not undef, we can just insert that, or
10418 // 3) if only one constant value is used (w/ some non-constant lanes),
10419 // we can splat the constant value into the whole vector then fill
10420 // in the non-constant lanes.
10421 // 4) FIXME: If different constant values are used, but we can intelligently
10422 // select the values we'll be overwriting for the non-constant
10423 // lanes such that we can directly materialize the vector
10424 // some other way (MOVI, e.g.), we can be sneaky.
10425 // 5) if all operands are EXTRACT_VECTOR_ELT, check for VUZP.
10426 SDLoc dl(Op);
10427 unsigned NumElts = VT.getVectorNumElements();
10428 bool isOnlyLowElement = true;
10429 bool usesOnlyOneValue = true;
10430 bool usesOnlyOneConstantValue = true;
10431 bool isConstant = true;
10432 bool AllLanesExtractElt = true;
10433 unsigned NumConstantLanes = 0;
10434 unsigned NumDifferentLanes = 0;
10435 unsigned NumUndefLanes = 0;
10436 SDValue Value;
10437 SDValue ConstantValue;
10438 for (unsigned i = 0; i < NumElts; ++i) {
10439 SDValue V = Op.getOperand(i);
10440 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
10441 AllLanesExtractElt = false;
10442 if (V.isUndef()) {
10443 ++NumUndefLanes;
10444 continue;
10445 }
10446 if (i > 0)
10447 isOnlyLowElement = false;
10448 if (!isIntOrFPConstant(V))
10449 isConstant = false;
10450
10451 if (isIntOrFPConstant(V)) {
10452 ++NumConstantLanes;
10453 if (!ConstantValue.getNode())
10454 ConstantValue = V;
10455 else if (ConstantValue != V)
10456 usesOnlyOneConstantValue = false;
10457 }
10458
10459 if (!Value.getNode())
10460 Value = V;
10461 else if (V != Value) {
10462 usesOnlyOneValue = false;
10463 ++NumDifferentLanes;
10464 }
10465 }
10466
10467 if (!Value.getNode()) {
10468 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n"
; } } while (false)
10469 dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n"
; } } while (false)
;
10470 return DAG.getUNDEF(VT);
10471 }
10472
10473 // Convert BUILD_VECTOR where all elements but the lowest are undef into
10474 // SCALAR_TO_VECTOR, except for when we have a single-element constant vector
10475 // as SimplifyDemandedBits will just turn that back into BUILD_VECTOR.
10476 if (isOnlyLowElement && !(NumElts == 1 && isIntOrFPConstant(Value))) {
10477 LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
"SCALAR_TO_VECTOR node\n"; } } while (false)
10478 "SCALAR_TO_VECTOR node\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
"SCALAR_TO_VECTOR node\n"; } } while (false)
;
10479 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
10480 }
10481
10482 if (AllLanesExtractElt) {
10483 SDNode *Vector = nullptr;
10484 bool Even = false;
10485 bool Odd = false;
10486 // Check whether the extract elements match the Even pattern <0,2,4,...> or
10487 // the Odd pattern <1,3,5,...>.
10488 for (unsigned i = 0; i < NumElts; ++i) {
10489 SDValue V = Op.getOperand(i);
10490 const SDNode *N = V.getNode();
10491 if (!isa<ConstantSDNode>(N->getOperand(1)))
10492 break;
10493 SDValue N0 = N->getOperand(0);
10494
10495 // All elements are extracted from the same vector.
10496 if (!Vector) {
10497 Vector = N0.getNode();
10498 // Check that the type of EXTRACT_VECTOR_ELT matches the type of
10499 // BUILD_VECTOR.
10500 if (VT.getVectorElementType() !=
10501 N0.getValueType().getVectorElementType())
10502 break;
10503 } else if (Vector != N0.getNode()) {
10504 Odd = false;
10505 Even = false;
10506 break;
10507 }
10508
10509 // Extracted values are either at Even indices <0,2,4,...> or at Odd
10510 // indices <1,3,5,...>.
10511 uint64_t Val = N->getConstantOperandVal(1);
10512 if (Val == 2 * i) {
10513 Even = true;
10514 continue;
10515 }
10516 if (Val - 1 == 2 * i) {
10517 Odd = true;
10518 continue;
10519 }
10520
10521 // Something does not match: abort.
10522 Odd = false;
10523 Even = false;
10524 break;
10525 }
10526 if (Even || Odd) {
10527 SDValue LHS =
10528 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
10529 DAG.getConstant(0, dl, MVT::i64));
10530 SDValue RHS =
10531 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
10532 DAG.getConstant(NumElts, dl, MVT::i64));
10533
10534 if (Even && !Odd)
10535 return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), LHS,
10536 RHS);
10537 if (Odd && !Even)
10538 return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), LHS,
10539 RHS);
10540 }
10541 }
10542
10543 // Use DUP for non-constant splats. For f32 constant splats, reduce to
10544 // i32 and try again.
10545 if (usesOnlyOneValue) {
10546 if (!isConstant) {
10547 if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
10548 Value.getValueType() != VT) {
10549 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n"
; } } while (false)
10550 dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n"
; } } while (false)
;
10551 return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
10552 }
10553
10554 // This is actually a DUPLANExx operation, which keeps everything vectory.
10555
10556 SDValue Lane = Value.getOperand(1);
10557 Value = Value.getOperand(0);
10558 if (Value.getValueSizeInBits() == 64) {
10559 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
"widening it\n"; } } while (false)
10560 dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
"widening it\n"; } } while (false)
10561 "widening it\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
"widening it\n"; } } while (false)
;
10562 Value = WidenVector(Value, DAG);
10563 }
10564
10565 unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
10566 return DAG.getNode(Opcode, dl, VT, Value, Lane);
10567 }
10568
10569 if (VT.getVectorElementType().isFloatingPoint()) {
10570 SmallVector<SDValue, 8> Ops;
10571 EVT EltTy = VT.getVectorElementType();
10572 assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||(static_cast <bool> ((EltTy == MVT::f16 || EltTy == MVT
::bf16 || EltTy == MVT::f32 || EltTy == MVT::f64) && "Unsupported floating-point vector type"
) ? void (0) : __assert_fail ("(EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 || EltTy == MVT::f64) && \"Unsupported floating-point vector type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10573, __extension__ __PRETTY_FUNCTION__))
10573 EltTy == MVT::f64) && "Unsupported floating-point vector type")(static_cast <bool> ((EltTy == MVT::f16 || EltTy == MVT
::bf16 || EltTy == MVT::f32 || EltTy == MVT::f64) && "Unsupported floating-point vector type"
) ? void (0) : __assert_fail ("(EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 || EltTy == MVT::f64) && \"Unsupported floating-point vector type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10573, __extension__ __PRETTY_FUNCTION__))
;
10574 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n"; } } while (false)
10575 dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n"; } } while (false)
10576 "BITCASTS, and try again\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n"; } } while (false)
;
10577 MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
10578 for (unsigned i = 0; i < NumElts; ++i)
10579 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
10580 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
10581 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
10582 LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: "
; Val.dump();; } } while (false)
10583 Val.dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: "
; Val.dump();; } } while (false)
;
10584 Val = LowerBUILD_VECTOR(Val, DAG);
10585 if (Val.getNode())
10586 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
10587 }
10588 }
10589
10590 // If we need to insert a small number of different non-constant elements and
10591 // the vector width is sufficiently large, prefer using DUP with the common
10592 // value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred,
10593 // skip the constant lane handling below.
10594 bool PreferDUPAndInsert =
10595 !isConstant && NumDifferentLanes >= 1 &&
10596 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
10597 NumDifferentLanes >= NumConstantLanes;
10598
10599 // If there was only one constant value used and for more than one lane,
10600 // start by splatting that value, then replace the non-constant lanes. This
10601 // is better than the default, which will perform a separate initialization
10602 // for each lane.
10603 if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
10604 // Firstly, try to materialize the splat constant.
10605 SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
10606 Val = ConstantBuildVector(Vec, DAG);
10607 if (!Val) {
10608 // Otherwise, materialize the constant and splat it.
10609 Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
10610 DAG.ReplaceAllUsesWith(Vec.getNode(), &Val);
10611 }
10612
10613 // Now insert the non-constant lanes.
10614 for (unsigned i = 0; i < NumElts; ++i) {
10615 SDValue V = Op.getOperand(i);
10616 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
10617 if (!isIntOrFPConstant(V))
10618 // Note that type legalization likely mucked about with the VT of the
10619 // source operand, so we may have to convert it here before inserting.
10620 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
10621 }
10622 return Val;
10623 }
10624
10625 // This will generate a load from the constant pool.
10626 if (isConstant) {
10627 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
"expansion\n"; } } while (false)
10628 dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
"expansion\n"; } } while (false)
10629 "expansion\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
"expansion\n"; } } while (false)
;
10630 return SDValue();
10631 }
10632
10633 // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
10634 if (NumElts >= 4) {
10635 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
10636 return shuffle;
10637 }
10638
10639 if (PreferDUPAndInsert) {
10640 // First, build a constant vector with the common element.
10641 SmallVector<SDValue, 8> Ops(NumElts, Value);
10642 SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
10643 // Next, insert the elements that do not match the common value.
10644 for (unsigned I = 0; I < NumElts; ++I)
10645 if (Op.getOperand(I) != Value)
10646 NewVector =
10647 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
10648 Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));
10649
10650 return NewVector;
10651 }
10652
10653 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
10654 // know the default expansion would otherwise fall back on something even
10655 // worse. For a vector with one or two non-undef values, that's
10656 // scalar_to_vector for the elements followed by a shuffle (provided the
10657 // shuffle is valid for the target) and materialization element by element
10658 // on the stack followed by a load for everything else.
10659 if (!isConstant && !usesOnlyOneValue) {
10660 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
"of INSERT_VECTOR_ELT\n"; } } while (false)
10661 dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
"of INSERT_VECTOR_ELT\n"; } } while (false)
10662 "of INSERT_VECTOR_ELT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
"of INSERT_VECTOR_ELT\n"; } } while (false)
;
10663
10664 SDValue Vec = DAG.getUNDEF(VT);
10665 SDValue Op0 = Op.getOperand(0);
10666 unsigned i = 0;
10667
10668 // Use SCALAR_TO_VECTOR for lane zero to
10669 // a) Avoid a RMW dependency on the full vector register, and
10670 // b) Allow the register coalescer to fold away the copy if the
10671 // value is already in an S or D register, and we're forced to emit an
10672 // INSERT_SUBREG that we can't fold anywhere.
10673 //
10674 // We also allow types like i8 and i16 which are illegal scalar but legal
10675 // vector element types. After type-legalization the inserted value is
10676 // extended (i32) and it is safe to cast them to the vector type by ignoring
10677 // the upper bits of the lowest lane (e.g. v8i8, v4i16).
10678 if (!Op0.isUndef()) {
10679 LLVM_DEBUG(dbgs() << "Creating node for op0, it is not undefined:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Creating node for op0, it is not undefined:\n"
; } } while (false)
;
10680 Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
10681 ++i;
10682 }
10683 LLVM_DEBUG(if (i < NumElts) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { if (i < NumElts) dbgs() << "Creating nodes for the other vector elements:\n"
;; } } while (false)
10684 << "Creating nodes for the other vector elements:\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { if (i < NumElts) dbgs() << "Creating nodes for the other vector elements:\n"
;; } } while (false)
;
10685 for (; i < NumElts; ++i) {
10686 SDValue V = Op.getOperand(i);
10687 if (V.isUndef())
10688 continue;
10689 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
10690 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
10691 }
10692 return Vec;
10693 }
10694
10695 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
"better alternative\n"; } } while (false)
10696 dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
"better alternative\n"; } } while (false)
10697 "better alternative\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
"better alternative\n"; } } while (false)
;
10698 return SDValue();
10699}
10700
10701SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
10702 SelectionDAG &DAG) const {
10703 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
10704 return LowerFixedLengthConcatVectorsToSVE(Op, DAG);
10705
10706 assert(Op.getValueType().isScalableVector() &&(static_cast <bool> (Op.getValueType().isScalableVector
() && isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10708, __extension__ __PRETTY_FUNCTION__))
10707 isTypeLegal(Op.getValueType()) &&(static_cast <bool> (Op.getValueType().isScalableVector
() && isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10708, __extension__ __PRETTY_FUNCTION__))
10708 "Expected legal scalable vector type!")(static_cast <bool> (Op.getValueType().isScalableVector
() && isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10708, __extension__ __PRETTY_FUNCTION__))
;
10709
10710 if (isTypeLegal(Op.getOperand(0).getValueType())) {
10711 unsigned NumOperands = Op->getNumOperands();
10712 assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&(static_cast <bool> (NumOperands > 1 && isPowerOf2_32
(NumOperands) && "Unexpected number of operands in CONCAT_VECTORS"
) ? void (0) : __assert_fail ("NumOperands > 1 && isPowerOf2_32(NumOperands) && \"Unexpected number of operands in CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10713, __extension__ __PRETTY_FUNCTION__))
10713 "Unexpected number of operands in CONCAT_VECTORS")(static_cast <bool> (NumOperands > 1 && isPowerOf2_32
(NumOperands) && "Unexpected number of operands in CONCAT_VECTORS"
) ? void (0) : __assert_fail ("NumOperands > 1 && isPowerOf2_32(NumOperands) && \"Unexpected number of operands in CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10713, __extension__ __PRETTY_FUNCTION__))
;
10714
10715 if (NumOperands == 2)
10716 return Op;
10717
10718 // Concat each pair of subvectors and pack into the lower half of the array.
10719 SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
10720 while (ConcatOps.size() > 1) {
10721 for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
10722 SDValue V1 = ConcatOps[I];
10723 SDValue V2 = ConcatOps[I + 1];
10724 EVT SubVT = V1.getValueType();
10725 EVT PairVT = SubVT.getDoubleNumVectorElementsVT(*DAG.getContext());
10726 ConcatOps[I / 2] =
10727 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), PairVT, V1, V2);
10728 }
10729 ConcatOps.resize(ConcatOps.size() / 2);
10730 }
10731 return ConcatOps[0];
10732 }
10733
10734 return SDValue();
10735}
10736
10737SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10738 SelectionDAG &DAG) const {
10739 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10739, __extension__ __PRETTY_FUNCTION__))
;
10740
10741 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
10742 return LowerFixedLengthInsertVectorElt(Op, DAG);
10743
10744 // Check for non-constant or out of range lane.
10745 EVT VT = Op.getOperand(0).getValueType();
10746
10747 if (VT.getScalarType() == MVT::i1) {
10748 EVT VectorVT = getPromotedVTForPredicate(VT);
10749 SDLoc DL(Op);
10750 SDValue ExtendedVector =
10751 DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, VectorVT);
10752 SDValue ExtendedValue =
10753 DAG.getAnyExtOrTrunc(Op.getOperand(1), DL,
10754 VectorVT.getScalarType().getSizeInBits() < 32
10755 ? MVT::i32
10756 : VectorVT.getScalarType());
10757 ExtendedVector =
10758 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VectorVT, ExtendedVector,
10759 ExtendedValue, Op.getOperand(2));
10760 return DAG.getAnyExtOrTrunc(ExtendedVector, DL, VT);
10761 }
10762
10763 ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10764 if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
10765 return SDValue();
10766
10767 // Insertion/extraction are legal for V128 types.
10768 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
10769 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
10770 VT == MVT::v8f16 || VT == MVT::v8bf16)
10771 return Op;
10772
10773 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
10774 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
10775 VT != MVT::v4bf16)
10776 return SDValue();
10777
10778 // For V64 types, we perform insertion by expanding the value
10779 // to a V128 type and perform the insertion on that.
10780 SDLoc DL(Op);
10781 SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
10782 EVT WideTy = WideVec.getValueType();
10783
10784 SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
10785 Op.getOperand(1), Op.getOperand(2));
10786 // Re-narrow the resultant vector.
10787 return NarrowVector(Node, DAG);
10788}
10789
10790SDValue
10791AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
10792 SelectionDAG &DAG) const {
10793 assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10793, __extension__ __PRETTY_FUNCTION__))
;
10794 EVT VT = Op.getOperand(0).getValueType();
10795
10796 if (VT.getScalarType() == MVT::i1) {
10797 // We can't directly extract from an SVE predicate; extend it first.
10798 // (This isn't the only possible lowering, but it's straightforward.)
10799 EVT VectorVT = getPromotedVTForPredicate(VT);
10800 SDLoc DL(Op);
10801 SDValue Extend =
10802 DAG.getNode(ISD::ANY_EXTEND, DL, VectorVT, Op.getOperand(0));
10803 MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
10804 SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractTy,
10805 Extend, Op.getOperand(1));
10806 return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType());
10807 }
10808
10809 if (useSVEForFixedLengthVectorVT(VT))
10810 return LowerFixedLengthExtractVectorElt(Op, DAG);
10811
10812 // Check for non-constant or out of range lane.
10813 ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));
10814 if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
10815 return SDValue();
10816
10817 // Insertion/extraction are legal for V128 types.
10818 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
10819 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
10820 VT == MVT::v8f16 || VT == MVT::v8bf16)
10821 return Op;
10822
10823 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
10824 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
10825 VT != MVT::v4bf16)
10826 return SDValue();
10827
10828 // For V64 types, we perform extraction by expanding the value
10829 // to a V128 type and perform the extraction on that.
10830 SDLoc DL(Op);
10831 SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
10832 EVT WideTy = WideVec.getValueType();
10833
10834 EVT ExtrTy = WideTy.getVectorElementType();
10835 if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
10836 ExtrTy = MVT::i32;
10837
10838 // For extractions, we just return the result directly.
10839 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
10840 Op.getOperand(1));
10841}
10842
10843SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
10844 SelectionDAG &DAG) const {
10845 assert(Op.getValueType().isFixedLengthVector() &&(static_cast <bool> (Op.getValueType().isFixedLengthVector
() && "Only cases that extract a fixed length vector are supported!"
) ? void (0) : __assert_fail ("Op.getValueType().isFixedLengthVector() && \"Only cases that extract a fixed length vector are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10846, __extension__ __PRETTY_FUNCTION__))
10846 "Only cases that extract a fixed length vector are supported!")(static_cast <bool> (Op.getValueType().isFixedLengthVector
() && "Only cases that extract a fixed length vector are supported!"
) ? void (0) : __assert_fail ("Op.getValueType().isFixedLengthVector() && \"Only cases that extract a fixed length vector are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10846, __extension__ __PRETTY_FUNCTION__))
;
10847
10848 EVT InVT = Op.getOperand(0).getValueType();
10849 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
10850 unsigned Size = Op.getValueSizeInBits();
10851
10852 // If we don't have legal types yet, do nothing
10853 if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT))
10854 return SDValue();
10855
10856 if (InVT.isScalableVector()) {
10857 // This will be matched by custom code during ISelDAGToDAG.
10858 if (Idx == 0 && isPackedVectorType(InVT, DAG))
10859 return Op;
10860
10861 return SDValue();
10862 }
10863
10864 // This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
10865 if (Idx == 0 && InVT.getSizeInBits() <= 128)
10866 return Op;
10867
10868 // If this is extracting the upper 64-bits of a 128-bit vector, we match
10869 // that directly.
10870 if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
10871 InVT.getSizeInBits() == 128)
10872 return Op;
10873
10874 if (useSVEForFixedLengthVectorVT(InVT)) {
10875 SDLoc DL(Op);
10876
10877 EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
10878 SDValue NewInVec =
10879 convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
10880
10881 SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ContainerVT, NewInVec,
10882 NewInVec, DAG.getConstant(Idx, DL, MVT::i64));
10883 return convertFromScalableVector(DAG, Op.getValueType(), Splice);
10884 }
10885
10886 return SDValue();
10887}
10888
10889SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
10890 SelectionDAG &DAG) const {
10891 assert(Op.getValueType().isScalableVector() &&(static_cast <bool> (Op.getValueType().isScalableVector
() && "Only expect to lower inserts into scalable vectors!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && \"Only expect to lower inserts into scalable vectors!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10892, __extension__ __PRETTY_FUNCTION__))
10892 "Only expect to lower inserts into scalable vectors!")(static_cast <bool> (Op.getValueType().isScalableVector
() && "Only expect to lower inserts into scalable vectors!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && \"Only expect to lower inserts into scalable vectors!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10892, __extension__ __PRETTY_FUNCTION__))
;
10893
10894 EVT InVT = Op.getOperand(1).getValueType();
10895 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
10896
10897 if (InVT.isScalableVector()) {
10898 SDLoc DL(Op);
10899 EVT VT = Op.getValueType();
10900
10901 if (!isTypeLegal(VT) || !VT.isInteger())
10902 return SDValue();
10903
10904 SDValue Vec0 = Op.getOperand(0);
10905 SDValue Vec1 = Op.getOperand(1);
10906
10907 // Ensure the subvector is half the size of the main vector.
10908 if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
10909 return SDValue();
10910
10911 // Extend elements of smaller vector...
10912 EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
10913 SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
10914
10915 if (Idx == 0) {
10916 SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
10917 return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0);
10918 } else if (Idx == InVT.getVectorMinNumElements()) {
10919 SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0);
10920 return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec);
10921 }
10922
10923 return SDValue();
10924 }
10925
10926 // This will be matched by custom code during ISelDAGToDAG.
10927 if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef())
10928 return Op;
10929
10930 return SDValue();
10931}
10932
10933SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
10934 EVT VT = Op.getValueType();
10935
10936 if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
10937 return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
10938
10939 assert(VT.isScalableVector() && "Expected a scalable vector.")(static_cast <bool> (VT.isScalableVector() && "Expected a scalable vector."
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Expected a scalable vector.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10939, __extension__ __PRETTY_FUNCTION__))
;
10940
10941 bool Signed = Op.getOpcode() == ISD::SDIV;
10942 unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
10943
10944 if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
10945 return LowerToPredicatedOp(Op, DAG, PredOpcode);
10946
10947 // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
10948 // operations, and truncate the result.
10949 EVT WidenedVT;
10950 if (VT == MVT::nxv16i8)
10951 WidenedVT = MVT::nxv8i16;
10952 else if (VT == MVT::nxv8i16)
10953 WidenedVT = MVT::nxv4i32;
10954 else
10955 llvm_unreachable("Unexpected Custom DIV operation")::llvm::llvm_unreachable_internal("Unexpected Custom DIV operation"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10955)
;
10956
10957 SDLoc dl(Op);
10958 unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
10959 unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
10960 SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
10961 SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));
10962 SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));
10963 SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
10964 SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
10965 SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
10966 return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
10967}
10968
10969bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
10970 // Currently no fixed length shuffles that require SVE are legal.
10971 if (useSVEForFixedLengthVectorVT(VT))
10972 return false;
10973
10974 if (VT.getVectorNumElements() == 4 &&
10975 (VT.is128BitVector() || VT.is64BitVector())) {
10976 unsigned PFIndexes[4];
10977 for (unsigned i = 0; i != 4; ++i) {
10978 if (M[i] < 0)
10979 PFIndexes[i] = 8;
10980 else
10981 PFIndexes[i] = M[i];
10982 }
10983
10984 // Compute the index in the perfect shuffle table.
10985 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10986 PFIndexes[2] * 9 + PFIndexes[3];
10987 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10988 unsigned Cost = (PFEntry >> 30);
10989
10990 if (Cost <= 4)
10991 return true;
10992 }
10993
10994 bool DummyBool;
10995 int DummyInt;
10996 unsigned DummyUnsigned;
10997
10998 return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
10999 isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
11000 isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
11001 // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
11002 isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) ||
11003 isZIPMask(M, VT, DummyUnsigned) ||
11004 isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
11005 isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
11006 isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
11007 isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) ||
11008 isConcatMask(M, VT, VT.getSizeInBits() == 128));
11009}
11010
11011/// getVShiftImm - Check if this is a valid build_vector for the immediate
11012/// operand of a vector shift operation, where all the elements of the
11013/// build_vector must have the same constant integer value.
11014static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
11015 // Ignore bit_converts.
11016 while (Op.getOpcode() == ISD::BITCAST)
11017 Op = Op.getOperand(0);
11018 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
11019 APInt SplatBits, SplatUndef;
11020 unsigned SplatBitSize;
11021 bool HasAnyUndefs;
11022 if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
11023 HasAnyUndefs, ElementBits) ||
11024 SplatBitSize > ElementBits)
11025 return false;
11026 Cnt = SplatBits.getSExtValue();
11027 return true;
11028}
11029
11030/// isVShiftLImm - Check if this is a valid build_vector for the immediate
11031/// operand of a vector shift left operation. That value must be in the range:
11032/// 0 <= Value < ElementBits for a left shift; or
11033/// 0 <= Value <= ElementBits for a long left shift.
11034static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
11035 assert(VT.isVector() && "vector shift count is not a vector type")(static_cast <bool> (VT.isVector() && "vector shift count is not a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"vector shift count is not a vector type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11035, __extension__ __PRETTY_FUNCTION__))
;
11036 int64_t ElementBits = VT.getScalarSizeInBits();
11037 if (!getVShiftImm(Op, ElementBits, Cnt))
11038 return false;
11039 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
11040}
11041
11042/// isVShiftRImm - Check if this is a valid build_vector for the immediate
11043/// operand of a vector shift right operation. The value must be in the range:
11044/// 1 <= Value <= ElementBits for a right shift; or
11045static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
11046 assert(VT.isVector() && "vector shift count is not a vector type")(static_cast <bool> (VT.isVector() && "vector shift count is not a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"vector shift count is not a vector type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11046, __extension__ __PRETTY_FUNCTION__))
;
11047 int64_t ElementBits = VT.getScalarSizeInBits();
11048 if (!getVShiftImm(Op, ElementBits, Cnt))
11049 return false;
11050 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
11051}
11052
11053SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
11054 SelectionDAG &DAG) const {
11055 EVT VT = Op.getValueType();
11056
11057 if (VT.getScalarType() == MVT::i1) {
11058 // Lower i1 truncate to `(x & 1) != 0`.
11059 SDLoc dl(Op);
11060 EVT OpVT = Op.getOperand(0).getValueType();
11061 SDValue Zero = DAG.getConstant(0, dl, OpVT);
11062 SDValue One = DAG.getConstant(1, dl, OpVT);
11063 SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One);
11064 return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE);
11065 }
11066
11067 if (!VT.isVector() || VT.isScalableVector())
11068 return SDValue();
11069
11070 if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
11071 return LowerFixedLengthVectorTruncateToSVE(Op, DAG);
11072
11073 return SDValue();
11074}
11075
11076SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
11077 SelectionDAG &DAG) const {
11078 EVT VT = Op.getValueType();
11079 SDLoc DL(Op);
11080 int64_t Cnt;
11081
11082 if (!Op.getOperand(1).getValueType().isVector())
11083 return Op;
11084 unsigned EltSize = VT.getScalarSizeInBits();
11085
11086 switch (Op.getOpcode()) {
11087 default:
11088 llvm_unreachable("unexpected shift opcode")::llvm::llvm_unreachable_internal("unexpected shift opcode", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11088)
;
11089
11090 case ISD::SHL:
11091 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
11092 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
11093
11094 if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
11095 return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
11096 DAG.getConstant(Cnt, DL, MVT::i32));
11097 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
11098 DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
11099 MVT::i32),
11100 Op.getOperand(0), Op.getOperand(1));
11101 case ISD::SRA:
11102 case ISD::SRL:
11103 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) {
11104 unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
11105 : AArch64ISD::SRL_PRED;
11106 return LowerToPredicatedOp(Op, DAG, Opc);
11107 }
11108
11109 // Right shift immediate
11110 if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
11111 unsigned Opc =
11112 (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
11113 return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
11114 DAG.getConstant(Cnt, DL, MVT::i32));
11115 }
11116
11117 // Right shift register. Note, there is not a shift right register
11118 // instruction, but the shift left register instruction takes a signed
11119 // value, where negative numbers specify a right shift.
11120 unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
11121 : Intrinsic::aarch64_neon_ushl;
11122 // negate the shift amount
11123 SDValue NegShift = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
11124 Op.getOperand(1));
11125 SDValue NegShiftLeft =
11126 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
11127 DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
11128 NegShift);
11129 return NegShiftLeft;
11130 }
11131
11132 return SDValue();
11133}
11134
11135static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
11136 AArch64CC::CondCode CC, bool NoNans, EVT VT,
11137 const SDLoc &dl, SelectionDAG &DAG) {
11138 EVT SrcVT = LHS.getValueType();
11139 assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&(static_cast <bool> (VT.getSizeInBits() == SrcVT.getSizeInBits
() && "function only supposed to emit natural comparisons"
) ? void (0) : __assert_fail ("VT.getSizeInBits() == SrcVT.getSizeInBits() && \"function only supposed to emit natural comparisons\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11140, __extension__ __PRETTY_FUNCTION__))
11140 "function only supposed to emit natural comparisons")(static_cast <bool> (VT.getSizeInBits() == SrcVT.getSizeInBits
() && "function only supposed to emit natural comparisons"
) ? void (0) : __assert_fail ("VT.getSizeInBits() == SrcVT.getSizeInBits() && \"function only supposed to emit natural comparisons\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11140, __extension__ __PRETTY_FUNCTION__))
;
11141
11142 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
11143 APInt CnstBits(VT.getSizeInBits(), 0);
11144 APInt UndefBits(VT.getSizeInBits(), 0);
11145 bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
11146 bool IsZero = IsCnst && (CnstBits == 0);
11147
11148 if (SrcVT.getVectorElementType().isFloatingPoint()) {
11149 switch (CC) {
11150 default:
11151 return SDValue();
11152 case AArch64CC::NE: {
11153 SDValue Fcmeq;
11154 if (IsZero)
11155 Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
11156 else
11157 Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
11158 return DAG.getNOT(dl, Fcmeq, VT);
11159 }
11160 case AArch64CC::EQ:
11161 if (IsZero)
11162 return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
11163 return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
11164 case AArch64CC::GE:
11165 if (IsZero)
11166 return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
11167 return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
11168 case AArch64CC::GT:
11169 if (IsZero)
11170 return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
11171 return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
11172 case AArch64CC::LS:
11173 if (IsZero)
11174 return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
11175 return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
11176 case AArch64CC::LT:
11177 if (!NoNans)
11178 return SDValue();
11179 // If we ignore NaNs then we can use to the MI implementation.
11180 LLVM_FALLTHROUGH[[gnu::fallthrough]];
11181 case AArch64CC::MI:
11182 if (IsZero)
11183 return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
11184 return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
11185 }
11186 }
11187
11188 switch (CC) {
11189 default:
11190 return SDValue();
11191 case AArch64CC::NE: {
11192 SDValue Cmeq;
11193 if (IsZero)
11194 Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
11195 else
11196 Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
11197 return DAG.getNOT(dl, Cmeq, VT);
11198 }
11199 case AArch64CC::EQ:
11200 if (IsZero)
11201 return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
11202 return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
11203 case AArch64CC::GE:
11204 if (IsZero)
11205 return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
11206 return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
11207 case AArch64CC::GT:
11208 if (IsZero)
11209 return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
11210 return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
11211 case AArch64CC::LE:
11212 if (IsZero)
11213 return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
11214 return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
11215 case AArch64CC::LS:
11216 return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
11217 case AArch64CC::LO:
11218 return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
11219 case AArch64CC::LT:
11220 if (IsZero)
11221 return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
11222 return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
11223 case AArch64CC::HI:
11224 return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
11225 case AArch64CC::HS:
11226 return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
11227 }
11228}
11229
11230SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
11231 SelectionDAG &DAG) const {
11232 if (Op.getValueType().isScalableVector())
11233 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
11234
11235 if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
11236 return LowerFixedLengthVectorSetccToSVE(Op, DAG);
11237
11238 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11239 SDValue LHS = Op.getOperand(0);
11240 SDValue RHS = Op.getOperand(1);
11241 EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();
11242 SDLoc dl(Op);
11243
11244 if (LHS.getValueType().getVectorElementType().isInteger()) {
11245 assert(LHS.getValueType() == RHS.getValueType())(static_cast <bool> (LHS.getValueType() == RHS.getValueType
()) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11245, __extension__ __PRETTY_FUNCTION__))
;
11246 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
11247 SDValue Cmp =
11248 EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
11249 return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
11250 }
11251
11252 const bool FullFP16 =
11253 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
11254
11255 // Make v4f16 (only) fcmp operations utilise vector instructions
11256 // v8f16 support will be a litle more complicated
11257 if (!FullFP16 && LHS.getValueType().getVectorElementType() == MVT::f16) {
11258 if (LHS.getValueType().getVectorNumElements() == 4) {
11259 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
11260 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
11261 SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
11262 DAG.ReplaceAllUsesWith(Op, NewSetcc);
11263 CmpVT = MVT::v4i32;
11264 } else
11265 return SDValue();
11266 }
11267
11268 assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) ||(static_cast <bool> ((!FullFP16 && LHS.getValueType
().getVectorElementType() != MVT::f16) || LHS.getValueType().
getVectorElementType() != MVT::f128) ? void (0) : __assert_fail
("(!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) || LHS.getValueType().getVectorElementType() != MVT::f128"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11269, __extension__ __PRETTY_FUNCTION__))
11269 LHS.getValueType().getVectorElementType() != MVT::f128)(static_cast <bool> ((!FullFP16 && LHS.getValueType
().getVectorElementType() != MVT::f16) || LHS.getValueType().
getVectorElementType() != MVT::f128) ? void (0) : __assert_fail
("(!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) || LHS.getValueType().getVectorElementType() != MVT::f128"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11269, __extension__ __PRETTY_FUNCTION__))
;
11270
11271 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
11272 // clean. Some of them require two branches to implement.
11273 AArch64CC::CondCode CC1, CC2;
11274 bool ShouldInvert;
11275 changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
11276
11277 bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
11278 SDValue Cmp =
11279 EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
11280 if (!Cmp.getNode())
11281 return SDValue();
11282
11283 if (CC2 != AArch64CC::AL) {
11284 SDValue Cmp2 =
11285 EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
11286 if (!Cmp2.getNode())
11287 return SDValue();
11288
11289 Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
11290 }
11291
11292 Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
11293
11294 if (ShouldInvert)
11295 Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
11296
11297 return Cmp;
11298}
11299
11300static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
11301 SelectionDAG &DAG) {
11302 SDValue VecOp = ScalarOp.getOperand(0);
11303 auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
11304 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
11305 DAG.getConstant(0, DL, MVT::i64));
11306}
11307
11308SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
11309 SelectionDAG &DAG) const {
11310 SDValue Src = Op.getOperand(0);
11311
11312 // Try to lower fixed length reductions to SVE.
11313 EVT SrcVT = Src.getValueType();
11314 bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND ||
11315 Op.getOpcode() == ISD::VECREDUCE_OR ||
11316 Op.getOpcode() == ISD::VECREDUCE_XOR ||
11317 Op.getOpcode() == ISD::VECREDUCE_FADD ||
11318 (Op.getOpcode() != ISD::VECREDUCE_ADD &&
11319 SrcVT.getVectorElementType() == MVT::i64);
11320 if (SrcVT.isScalableVector() ||
11321 useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
11322
11323 if (SrcVT.getVectorElementType() == MVT::i1)
11324 return LowerPredReductionToSVE(Op, DAG);
11325
11326 switch (Op.getOpcode()) {
11327 case ISD::VECREDUCE_ADD:
11328 return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
11329 case ISD::VECREDUCE_AND:
11330 return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
11331 case ISD::VECREDUCE_OR:
11332 return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
11333 case ISD::VECREDUCE_SMAX:
11334 return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
11335 case ISD::VECREDUCE_SMIN:
11336 return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
11337 case ISD::VECREDUCE_UMAX:
11338 return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
11339 case ISD::VECREDUCE_UMIN:
11340 return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
11341 case ISD::VECREDUCE_XOR:
11342 return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
11343 case ISD::VECREDUCE_FADD:
11344 return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
11345 case ISD::VECREDUCE_FMAX:
11346 return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
11347 case ISD::VECREDUCE_FMIN:
11348 return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
11349 default:
11350 llvm_unreachable("Unhandled fixed length reduction")::llvm::llvm_unreachable_internal("Unhandled fixed length reduction"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11350)
;
11351 }
11352 }
11353
11354 // Lower NEON reductions.
11355 SDLoc dl(Op);
11356 switch (Op.getOpcode()) {
11357 case ISD::VECREDUCE_ADD:
11358 return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
11359 case ISD::VECREDUCE_SMAX:
11360 return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
11361 case ISD::VECREDUCE_SMIN:
11362 return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
11363 case ISD::VECREDUCE_UMAX:
11364 return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
11365 case ISD::VECREDUCE_UMIN:
11366 return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
11367 case ISD::VECREDUCE_FMAX: {
11368 return DAG.getNode(
11369 ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
11370 DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
11371 Src);
11372 }
11373 case ISD::VECREDUCE_FMIN: {
11374 return DAG.getNode(
11375 ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
11376 DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
11377 Src);
11378 }
11379 default:
11380 llvm_unreachable("Unhandled reduction")::llvm::llvm_unreachable_internal("Unhandled reduction", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11380)
;
11381 }
11382}
11383
11384SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
11385 SelectionDAG &DAG) const {
11386 auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
11387 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
11388 return SDValue();
11389
11390 // LSE has an atomic load-add instruction, but not a load-sub.
11391 SDLoc dl(Op);
11392 MVT VT = Op.getSimpleValueType();
11393 SDValue RHS = Op.getOperand(2);
11394 AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
11395 RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS);
11396 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(),
11397 Op.getOperand(0), Op.getOperand(1), RHS,
11398 AN->getMemOperand());
11399}
11400
11401SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
11402 SelectionDAG &DAG) const {
11403 auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
11404 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
11405 return SDValue();
11406
11407 // LSE has an atomic load-clear instruction, but not a load-and.
11408 SDLoc dl(Op);
11409 MVT VT = Op.getSimpleValueType();
11410 SDValue RHS = Op.getOperand(2);
11411 AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
11412 RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS);
11413 return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(),
11414 Op.getOperand(0), Op.getOperand(1), RHS,
11415 AN->getMemOperand());
11416}
11417
11418SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
11419 SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const {
11420 SDLoc dl(Op);
11421 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11422 SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0);
11423
11424 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
11425 const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
11426 if (Subtarget->hasCustomCallingConv())
11427 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
11428
11429 Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
11430 DAG.getConstant(4, dl, MVT::i64));
11431 Chain = DAG.getCopyToReg(Chain, dl, AArch64::X15, Size, SDValue());
11432 Chain =
11433 DAG.getNode(AArch64ISD::CALL, dl, DAG.getVTList(MVT::Other, MVT::Glue),
11434 Chain, Callee, DAG.getRegister(AArch64::X15, MVT::i64),
11435 DAG.getRegisterMask(Mask), Chain.getValue(1));
11436 // To match the actual intent better, we should read the output from X15 here
11437 // again (instead of potentially spilling it to the stack), but rereading Size
11438 // from X15 here doesn't work at -O0, since it thinks that X15 is undefined
11439 // here.
11440
11441 Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size,
11442 DAG.getConstant(4, dl, MVT::i64));
11443 return Chain;
11444}
11445
11446SDValue
11447AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
11448 SelectionDAG &DAG) const {
11449 assert(Subtarget->isTargetWindows() &&(static_cast <bool> (Subtarget->isTargetWindows() &&
"Only Windows alloca probing supported") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Only Windows alloca probing supported\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11450, __extension__ __PRETTY_FUNCTION__))
11450 "Only Windows alloca probing supported")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Only Windows alloca probing supported") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Only Windows alloca probing supported\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11450, __extension__ __PRETTY_FUNCTION__))
;
11451 SDLoc dl(Op);
11452 // Get the inputs.
11453 SDNode *Node = Op.getNode();
11454 SDValue Chain = Op.getOperand(0);
11455 SDValue Size = Op.getOperand(1);
11456 MaybeAlign Align =
11457 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
11458 EVT VT = Node->getValueType(0);
11459
11460 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
11461 "no-stack-arg-probe")) {
11462 SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
11463 Chain = SP.getValue(1);
11464 SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
11465 if (Align)
11466 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
11467 DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
11468 Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
11469 SDValue Ops[2] = {SP, Chain};
11470 return DAG.getMergeValues(Ops, dl);
11471 }
11472
11473 Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
11474
11475 Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG);
11476
11477 SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
11478 Chain = SP.getValue(1);
11479 SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
11480 if (Align)
11481 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
11482 DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
11483 Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
11484
11485 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
11486 DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
11487
11488 SDValue Ops[2] = {SP, Chain};
11489 return DAG.getMergeValues(Ops, dl);
11490}
11491
11492SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
11493 SelectionDAG &DAG) const {
11494 EVT VT = Op.getValueType();
11495 assert(VT != MVT::i64 && "Expected illegal VSCALE node")(static_cast <bool> (VT != MVT::i64 && "Expected illegal VSCALE node"
) ? void (0) : __assert_fail ("VT != MVT::i64 && \"Expected illegal VSCALE node\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11495, __extension__ __PRETTY_FUNCTION__))
;
11496
11497 SDLoc DL(Op);
11498 APInt MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getAPIntValue();
11499 return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sextOrSelf(64)),
11500 DL, VT);
11501}
11502
11503/// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
11504template <unsigned NumVecs>
11505static bool
11506setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
11507 AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
11508 Info.opc = ISD::INTRINSIC_VOID;
11509 // Retrieve EC from first vector argument.
11510 const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
11511 ElementCount EC = VT.getVectorElementCount();
11512#ifndef NDEBUG
11513 // Check the assumption that all input vectors are the same type.
11514 for (unsigned I = 0; I < NumVecs; ++I)
11515 assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&(static_cast <bool> (VT == TLI.getMemValueType(DL, CI.getArgOperand
(I)->getType()) && "Invalid type.") ? void (0) : __assert_fail
("VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) && \"Invalid type.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11516, __extension__ __PRETTY_FUNCTION__))
11516 "Invalid type.")(static_cast <bool> (VT == TLI.getMemValueType(DL, CI.getArgOperand
(I)->getType()) && "Invalid type.") ? void (0) : __assert_fail
("VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) && \"Invalid type.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11516, __extension__ __PRETTY_FUNCTION__))
;
11517#endif
11518 // memVT is `NumVecs * VT`.
11519 Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
11520 EC * NumVecs);
11521 Info.ptrVal = CI.getArgOperand(CI.arg_size() - 1);
11522 Info.offset = 0;
11523 Info.align.reset();
11524 Info.flags = MachineMemOperand::MOStore;
11525 return true;
11526}
11527
11528/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
11529/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
11530/// specified in the intrinsic calls.
11531bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
11532 const CallInst &I,
11533 MachineFunction &MF,
11534 unsigned Intrinsic) const {
11535 auto &DL = I.getModule()->getDataLayout();
11536 switch (Intrinsic) {
11537 case Intrinsic::aarch64_sve_st2:
11538 return setInfoSVEStN<2>(*this, DL, Info, I);
11539 case Intrinsic::aarch64_sve_st3:
11540 return setInfoSVEStN<3>(*this, DL, Info, I);
11541 case Intrinsic::aarch64_sve_st4:
11542 return setInfoSVEStN<4>(*this, DL, Info, I);
11543 case Intrinsic::aarch64_neon_ld2:
11544 case Intrinsic::aarch64_neon_ld3:
11545 case Intrinsic::aarch64_neon_ld4:
11546 case Intrinsic::aarch64_neon_ld1x2:
11547 case Intrinsic::aarch64_neon_ld1x3:
11548 case Intrinsic::aarch64_neon_ld1x4:
11549 case Intrinsic::aarch64_neon_ld2lane:
11550 case Intrinsic::aarch64_neon_ld3lane:
11551 case Intrinsic::aarch64_neon_ld4lane:
11552 case Intrinsic::aarch64_neon_ld2r:
11553 case Intrinsic::aarch64_neon_ld3r:
11554 case Intrinsic::aarch64_neon_ld4r: {
11555 Info.opc = ISD::INTRINSIC_W_CHAIN;
11556 // Conservatively set memVT to the entire set of vectors loaded.
11557 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
11558 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
11559 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
11560 Info.offset = 0;
11561 Info.align.reset();
11562 // volatile loads with NEON intrinsics not supported
11563 Info.flags = MachineMemOperand::MOLoad;
11564 return true;
11565 }
11566 case Intrinsic::aarch64_neon_st2:
11567 case Intrinsic::aarch64_neon_st3:
11568 case Intrinsic::aarch64_neon_st4:
11569 case Intrinsic::aarch64_neon_st1x2:
11570 case Intrinsic::aarch64_neon_st1x3:
11571 case Intrinsic::aarch64_neon_st1x4:
11572 case Intrinsic::aarch64_neon_st2lane:
11573 case Intrinsic::aarch64_neon_st3lane:
11574 case Intrinsic::aarch64_neon_st4lane: {
11575 Info.opc = ISD::INTRINSIC_VOID;
11576 // Conservatively set memVT to the entire set of vectors stored.
11577 unsigned NumElts = 0;
11578 for (const Value *Arg : I.args()) {
11579 Type *ArgTy = Arg->getType();
11580 if (!ArgTy->isVectorTy())
11581 break;
11582 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
11583 }
11584 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
11585 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
11586 Info.offset = 0;
11587 Info.align.reset();
11588 // volatile stores with NEON intrinsics not supported
11589 Info.flags = MachineMemOperand::MOStore;
11590 return true;
11591 }
11592 case Intrinsic::aarch64_ldaxr:
11593 case Intrinsic::aarch64_ldxr: {
11594 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
11595 Info.opc = ISD::INTRINSIC_W_CHAIN;
11596 Info.memVT = MVT::getVT(PtrTy->getElementType());
11597 Info.ptrVal = I.getArgOperand(0);
11598 Info.offset = 0;
11599 Info.align = DL.getABITypeAlign(PtrTy->getElementType());
11600 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
11601 return true;
11602 }
11603 case Intrinsic::aarch64_stlxr:
11604 case Intrinsic::aarch64_stxr: {
11605 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
11606 Info.opc = ISD::INTRINSIC_W_CHAIN;
11607 Info.memVT = MVT::getVT(PtrTy->getElementType());
11608 Info.ptrVal = I.getArgOperand(1);
11609 Info.offset = 0;
11610 Info.align = DL.getABITypeAlign(PtrTy->getElementType());
11611 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
11612 return true;
11613 }
11614 case Intrinsic::aarch64_ldaxp:
11615 case Intrinsic::aarch64_ldxp:
11616 Info.opc = ISD::INTRINSIC_W_CHAIN;
11617 Info.memVT = MVT::i128;
11618 Info.ptrVal = I.getArgOperand(0);
11619 Info.offset = 0;
11620 Info.align = Align(16);
11621 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
11622 return true;
11623 case Intrinsic::aarch64_stlxp:
11624 case Intrinsic::aarch64_stxp:
11625 Info.opc = ISD::INTRINSIC_W_CHAIN;
11626 Info.memVT = MVT::i128;
11627 Info.ptrVal = I.getArgOperand(2);
11628 Info.offset = 0;
11629 Info.align = Align(16);
11630 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
11631 return true;
11632 case Intrinsic::aarch64_sve_ldnt1: {
11633 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
11634 Info.opc = ISD::INTRINSIC_W_CHAIN;
11635 Info.memVT = MVT::getVT(I.getType());
11636 Info.ptrVal = I.getArgOperand(1);
11637 Info.offset = 0;
11638 Info.align = DL.getABITypeAlign(PtrTy->getElementType());
11639 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal;
11640 return true;
11641 }
11642 case Intrinsic::aarch64_sve_stnt1: {
11643 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType());
11644 Info.opc = ISD::INTRINSIC_W_CHAIN;
11645 Info.memVT = MVT::getVT(I.getOperand(0)->getType());
11646 Info.ptrVal = I.getArgOperand(2);
11647 Info.offset = 0;
11648 Info.align = DL.getABITypeAlign(PtrTy->getElementType());
11649 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
11650 return true;
11651 }
11652 default:
11653 break;
11654 }
11655
11656 return false;
11657}
11658
11659bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
11660 ISD::LoadExtType ExtTy,
11661 EVT NewVT) const {
11662 // TODO: This may be worth removing. Check regression tests for diffs.
11663 if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
11664 return false;
11665
11666 // If we're reducing the load width in order to avoid having to use an extra
11667 // instruction to do extension then it's probably a good idea.
11668 if (ExtTy != ISD::NON_EXTLOAD)
11669 return true;
11670 // Don't reduce load width if it would prevent us from combining a shift into
11671 // the offset.
11672 MemSDNode *Mem = dyn_cast<MemSDNode>(Load);
11673 assert(Mem)(static_cast <bool> (Mem) ? void (0) : __assert_fail ("Mem"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11673, __extension__ __PRETTY_FUNCTION__))
;
11674 const SDValue &Base = Mem->getBasePtr();
11675 if (Base.getOpcode() == ISD::ADD &&
11676 Base.getOperand(1).getOpcode() == ISD::SHL &&
11677 Base.getOperand(1).hasOneUse() &&
11678 Base.getOperand(1).getOperand(1).getOpcode() == ISD::Constant) {
11679 // The shift can be combined if it matches the size of the value being
11680 // loaded (and so reducing the width would make it not match).
11681 uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1);
11682 uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
11683 if (ShiftAmount == Log2_32(LoadBytes))
11684 return false;
11685 }
11686 // We have no reason to disallow reducing the load width, so allow it.
11687 return true;
11688}
11689
11690// Truncations from 64-bit GPR to 32-bit GPR is free.
11691bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
11692 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
11693 return false;
11694 uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedSize();
11695 uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedSize();
11696 return NumBits1 > NumBits2;
11697}
11698bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
11699 if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
11700 return false;
11701 uint64_t NumBits1 = VT1.getFixedSizeInBits();
11702 uint64_t NumBits2 = VT2.getFixedSizeInBits();
11703 return NumBits1 > NumBits2;
11704}
11705
11706/// Check if it is profitable to hoist instruction in then/else to if.
11707/// Not profitable if I and it's user can form a FMA instruction
11708/// because we prefer FMSUB/FMADD.
11709bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
11710 if (I->getOpcode() != Instruction::FMul)
11711 return true;
11712
11713 if (!I->hasOneUse())
11714 return true;
11715
11716 Instruction *User = I->user_back();
11717
11718 if (User &&
11719 !(User->getOpcode() == Instruction::FSub ||
11720 User->getOpcode() == Instruction::FAdd))
11721 return true;
11722
11723 const TargetOptions &Options = getTargetMachine().Options;
11724 const Function *F = I->getFunction();
11725 const DataLayout &DL = F->getParent()->getDataLayout();
11726 Type *Ty = User->getOperand(0)->getType();
11727
11728 return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
11729 isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
11730 (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11731 Options.UnsafeFPMath));
11732}
11733
11734// All 32-bit GPR operations implicitly zero the high-half of the corresponding
11735// 64-bit GPR.
11736bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
11737 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
11738 return false;
11739 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
11740 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
11741 return NumBits1 == 32 && NumBits2 == 64;
11742}
11743bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
11744 if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
11745 return false;
11746 unsigned NumBits1 = VT1.getSizeInBits();
11747 unsigned NumBits2 = VT2.getSizeInBits();
11748 return NumBits1 == 32 && NumBits2 == 64;
11749}
11750
11751bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
11752 EVT VT1 = Val.getValueType();
11753 if (isZExtFree(VT1, VT2)) {
11754 return true;
11755 }
11756
11757 if (Val.getOpcode() != ISD::LOAD)
11758 return false;
11759
11760 // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
11761 return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
11762 VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
11763 VT1.getSizeInBits() <= 32);
11764}
11765
11766bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
11767 if (isa<FPExtInst>(Ext))
11768 return false;
11769
11770 // Vector types are not free.
11771 if (Ext->getType()->isVectorTy())
11772 return false;
11773
11774 for (const Use &U : Ext->uses()) {
11775 // The extension is free if we can fold it with a left shift in an
11776 // addressing mode or an arithmetic operation: add, sub, and cmp.
11777
11778 // Is there a shift?
11779 const Instruction *Instr = cast<Instruction>(U.getUser());
11780
11781 // Is this a constant shift?
11782 switch (Instr->getOpcode()) {
11783 case Instruction::Shl:
11784 if (!isa<ConstantInt>(Instr->getOperand(1)))
11785 return false;
11786 break;
11787 case Instruction::GetElementPtr: {
11788 gep_type_iterator GTI = gep_type_begin(Instr);
11789 auto &DL = Ext->getModule()->getDataLayout();
11790 std::advance(GTI, U.getOperandNo()-1);
11791 Type *IdxTy = GTI.getIndexedType();
11792 // This extension will end up with a shift because of the scaling factor.
11793 // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
11794 // Get the shift amount based on the scaling factor:
11795 // log2(sizeof(IdxTy)) - log2(8).
11796 uint64_t ShiftAmt =
11797 countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - 3;
11798 // Is the constant foldable in the shift of the addressing mode?
11799 // I.e., shift amount is between 1 and 4 inclusive.
11800 if (ShiftAmt == 0 || ShiftAmt > 4)
11801 return false;
11802 break;
11803 }
11804 case Instruction::Trunc:
11805 // Check if this is a noop.
11806 // trunc(sext ty1 to ty2) to ty1.
11807 if (Instr->getType() == Ext->getOperand(0)->getType())
11808 continue;
11809 LLVM_FALLTHROUGH[[gnu::fallthrough]];
11810 default:
11811 return false;
11812 }
11813
11814 // At this point we can use the bfm family, so this extension is free
11815 // for that use.
11816 }
11817 return true;
11818}
11819
11820/// Check if both Op1 and Op2 are shufflevector extracts of either the lower
11821/// or upper half of the vector elements.
11822static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
11823 auto areTypesHalfed = [](Value *FullV, Value *HalfV) {
11824 auto *FullTy = FullV->getType();
11825 auto *HalfTy = HalfV->getType();
11826 return FullTy->getPrimitiveSizeInBits().getFixedSize() ==
11827 2 * HalfTy->getPrimitiveSizeInBits().getFixedSize();
11828 };
11829
11830 auto extractHalf = [](Value *FullV, Value *HalfV) {
11831 auto *FullVT = cast<FixedVectorType>(FullV->getType());
11832 auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
11833 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
11834 };
11835
11836 ArrayRef<int> M1, M2;
11837 Value *S1Op1, *S2Op1;
11838 if (!match(Op1, m_Shuffle(m_Value(S1Op1), m_Undef(), m_Mask(M1))) ||
11839 !match(Op2, m_Shuffle(m_Value(S2Op1), m_Undef(), m_Mask(M2))))
11840 return false;
11841
11842 // Check that the operands are half as wide as the result and we extract
11843 // half of the elements of the input vectors.
11844 if (!areTypesHalfed(S1Op1, Op1) || !areTypesHalfed(S2Op1, Op2) ||
11845 !extractHalf(S1Op1, Op1) || !extractHalf(S2Op1, Op2))
11846 return false;
11847
11848 // Check the mask extracts either the lower or upper half of vector
11849 // elements.
11850 int M1Start = -1;
11851 int M2Start = -1;
11852 int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2;
11853 if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) ||
11854 !ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) ||
11855 M1Start != M2Start || (M1Start != 0 && M2Start != (NumElements / 2)))
11856 return false;
11857
11858 return true;
11859}
11860
11861/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
11862/// of the vector elements.
11863static bool areExtractExts(Value *Ext1, Value *Ext2) {
11864 auto areExtDoubled = [](Instruction *Ext) {
11865 return Ext->getType()->getScalarSizeInBits() ==
11866 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
11867 };
11868
11869 if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
11870 !match(Ext2, m_ZExtOrSExt(m_Value())) ||
11871 !areExtDoubled(cast<Instruction>(Ext1)) ||
11872 !areExtDoubled(cast<Instruction>(Ext2)))
11873 return false;
11874
11875 return true;
11876}
11877
11878/// Check if Op could be used with vmull_high_p64 intrinsic.
11879static bool isOperandOfVmullHighP64(Value *Op) {
11880 Value *VectorOperand = nullptr;
11881 ConstantInt *ElementIndex = nullptr;
11882 return match(Op, m_ExtractElt(m_Value(VectorOperand),
11883 m_ConstantInt(ElementIndex))) &&
11884 ElementIndex->getValue() == 1 &&
11885 isa<FixedVectorType>(VectorOperand->getType()) &&
11886 cast<FixedVectorType>(VectorOperand->getType())->getNumElements() == 2;
11887}
11888
11889/// Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
11890static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2) {
11891 return isOperandOfVmullHighP64(Op1) && isOperandOfVmullHighP64(Op2);
11892}
11893
11894/// Check if sinking \p I's operands to I's basic block is profitable, because
11895/// the operands can be folded into a target instruction, e.g.
11896/// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
11897bool AArch64TargetLowering::shouldSinkOperands(
11898 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
11899 if (!I->getType()->isVectorTy())
11900 return false;
11901
11902 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
11903 switch (II->getIntrinsicID()) {
11904 case Intrinsic::aarch64_neon_umull:
11905 if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1)))
11906 return false;
11907 Ops.push_back(&II->getOperandUse(0));
11908 Ops.push_back(&II->getOperandUse(1));
11909 return true;
11910
11911 case Intrinsic::aarch64_neon_pmull64:
11912 if (!areOperandsOfVmullHighP64(II->getArgOperand(0),
11913 II->getArgOperand(1)))
11914 return false;
11915 Ops.push_back(&II->getArgOperandUse(0));
11916 Ops.push_back(&II->getArgOperandUse(1));
11917 return true;
11918
11919 default:
11920 return false;
11921 }
11922 }
11923
11924 switch (I->getOpcode()) {
11925 case Instruction::Sub:
11926 case Instruction::Add: {
11927 if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
11928 return false;
11929
11930 // If the exts' operands extract either the lower or upper elements, we
11931 // can sink them too.
11932 auto Ext1 = cast<Instruction>(I->getOperand(0));
11933 auto Ext2 = cast<Instruction>(I->getOperand(1));
11934 if (areExtractShuffleVectors(Ext1->getOperand(0), Ext2->getOperand(0))) {
11935 Ops.push_back(&Ext1->getOperandUse(0));
11936 Ops.push_back(&Ext2->getOperandUse(0));
11937 }
11938
11939 Ops.push_back(&I->getOperandUse(0));
11940 Ops.push_back(&I->getOperandUse(1));
11941
11942 return true;
11943 }
11944 case Instruction::Mul: {
11945 bool IsProfitable = false;
11946 for (auto &Op : I->operands()) {
11947 // Make sure we are not already sinking this operand
11948 if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
11949 continue;
11950
11951 ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
11952 if (!Shuffle || !Shuffle->isZeroEltSplat())
11953 continue;
11954
11955 Value *ShuffleOperand = Shuffle->getOperand(0);
11956 InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
11957 if (!Insert)
11958 continue;
11959
11960 Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
11961 if (!OperandInstr)
11962 continue;
11963
11964 ConstantInt *ElementConstant =
11965 dyn_cast<ConstantInt>(Insert->getOperand(2));
11966 // Check that the insertelement is inserting into element 0
11967 if (!ElementConstant || ElementConstant->getZExtValue() != 0)
11968 continue;
11969
11970 unsigned Opcode = OperandInstr->getOpcode();
11971 if (Opcode != Instruction::SExt && Opcode != Instruction::ZExt)
11972 continue;
11973
11974 Ops.push_back(&Shuffle->getOperandUse(0));
11975 Ops.push_back(&Op);
11976 IsProfitable = true;
11977 }
11978
11979 return IsProfitable;
11980 }
11981 default:
11982 return false;
11983 }
11984 return false;
11985}
11986
11987bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
11988 Align &RequiredAligment) const {
11989 if (!LoadedType.isSimple() ||
11990 (!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
11991 return false;
11992 // Cyclone supports unaligned accesses.
11993 RequiredAligment = Align(1);
11994 unsigned NumBits = LoadedType.getSizeInBits();
11995 return NumBits == 32 || NumBits == 64;
11996}
11997
11998/// A helper function for determining the number of interleaved accesses we
11999/// will generate when lowering accesses of the given type.
12000unsigned
12001AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
12002 const DataLayout &DL) const {
12003 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
12004}
12005
12006MachineMemOperand::Flags
12007AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
12008 if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
12009 I.getMetadata(FALKOR_STRIDED_ACCESS_MD"falkor.strided.access") != nullptr)
12010 return MOStridedAccess;
12011 return MachineMemOperand::MONone;
12012}
12013
12014bool AArch64TargetLowering::isLegalInterleavedAccessType(
12015 VectorType *VecTy, const DataLayout &DL) const {
12016
12017 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
12018 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
12019
12020 // Ensure the number of vector elements is greater than 1.
12021 if (cast<FixedVectorType>(VecTy)->getNumElements() < 2)
12022 return false;
12023
12024 // Ensure the element type is legal.
12025 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
12026 return false;
12027
12028 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
12029 // 128 will be split into multiple interleaved accesses.
12030 return VecSize == 64 || VecSize % 128 == 0;
12031}
12032
12033/// Lower an interleaved load into a ldN intrinsic.
12034///
12035/// E.g. Lower an interleaved load (Factor = 2):
12036/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
12037/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
12038/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
12039///
12040/// Into:
12041/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
12042/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
12043/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
12044bool AArch64TargetLowering::lowerInterleavedLoad(
12045 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
12046 ArrayRef<unsigned> Indices, unsigned Factor) const {
12047 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12048, __extension__ __PRETTY_FUNCTION__))
12048 "Invalid interleave factor")(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12048, __extension__ __PRETTY_FUNCTION__))
;
12049 assert(!Shuffles.empty() && "Empty shufflevector input")(static_cast <bool> (!Shuffles.empty() && "Empty shufflevector input"
) ? void (0) : __assert_fail ("!Shuffles.empty() && \"Empty shufflevector input\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12049, __extension__ __PRETTY_FUNCTION__))
;
12050 assert(Shuffles.size() == Indices.size() &&(static_cast <bool> (Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices") ? void (0)
: __assert_fail ("Shuffles.size() == Indices.size() && \"Unmatched number of shufflevectors and indices\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12051, __extension__ __PRETTY_FUNCTION__))
12051 "Unmatched number of shufflevectors and indices")(static_cast <bool> (Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices") ? void (0)
: __assert_fail ("Shuffles.size() == Indices.size() && \"Unmatched number of shufflevectors and indices\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12051, __extension__ __PRETTY_FUNCTION__))
;
12052
12053 const DataLayout &DL = LI->getModule()->getDataLayout();
12054
12055 VectorType *VTy = Shuffles[0]->getType();
12056
12057 // Skip if we do not have NEON and skip illegal vector types. We can
12058 // "legalize" wide vector types into multiple interleaved accesses as long as
12059 // the vector types are divisible by 128.
12060 if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VTy, DL))
12061 return false;
12062
12063 unsigned NumLoads = getNumInterleavedAccesses(VTy, DL);
12064
12065 auto *FVTy = cast<FixedVectorType>(VTy);
12066
12067 // A pointer vector can not be the return type of the ldN intrinsics. Need to
12068 // load integer vectors first and then convert to pointer vectors.
12069 Type *EltTy = FVTy->getElementType();
12070 if (EltTy->isPointerTy())
12071 FVTy =
12072 FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
12073
12074 IRBuilder<> Builder(LI);
12075
12076 // The base address of the load.
12077 Value *BaseAddr = LI->getPointerOperand();
12078
12079 if (NumLoads > 1) {
12080 // If we're going to generate more than one load, reset the sub-vector type
12081 // to something legal.
12082 FVTy = FixedVectorType::get(FVTy->getElementType(),
12083 FVTy->getNumElements() / NumLoads);
12084
12085 // We will compute the pointer operand of each load from the original base
12086 // address using GEPs. Cast the base address to a pointer to the scalar
12087 // element type.
12088 BaseAddr = Builder.CreateBitCast(
12089 BaseAddr,
12090 FVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
12091 }
12092
12093 Type *PtrTy = FVTy->getPointerTo(LI->getPointerAddressSpace());
12094 Type *Tys[2] = {FVTy, PtrTy};
12095 static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
12096 Intrinsic::aarch64_neon_ld3,
12097 Intrinsic::aarch64_neon_ld4};
12098 Function *LdNFunc =
12099 Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
12100
12101 // Holds sub-vectors extracted from the load intrinsic return values. The
12102 // sub-vectors are associated with the shufflevector instructions they will
12103 // replace.
12104 DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
12105
12106 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
12107
12108 // If we're generating more than one load, compute the base address of
12109 // subsequent loads as an offset from the previous.
12110 if (LoadCount > 0)
12111 BaseAddr = Builder.CreateConstGEP1_32(FVTy->getElementType(), BaseAddr,
12112 FVTy->getNumElements() * Factor);
12113
12114 CallInst *LdN = Builder.CreateCall(
12115 LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
12116
12117 // Extract and store the sub-vectors returned by the load intrinsic.
12118 for (unsigned i = 0; i < Shuffles.size(); i++) {
12119 ShuffleVectorInst *SVI = Shuffles[i];
12120 unsigned Index = Indices[i];
12121
12122 Value *SubVec = Builder.CreateExtractValue(LdN, Index);
12123
12124 // Convert the integer vector to pointer vector if the element is pointer.
12125 if (EltTy->isPointerTy())
12126 SubVec = Builder.CreateIntToPtr(
12127 SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
12128 FVTy->getNumElements()));
12129 SubVecs[SVI].push_back(SubVec);
12130 }
12131 }
12132
12133 // Replace uses of the shufflevector instructions with the sub-vectors
12134 // returned by the load intrinsic. If a shufflevector instruction is
12135 // associated with more than one sub-vector, those sub-vectors will be
12136 // concatenated into a single wide vector.
12137 for (ShuffleVectorInst *SVI : Shuffles) {
12138 auto &SubVec = SubVecs[SVI];
12139 auto *WideVec =
12140 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
12141 SVI->replaceAllUsesWith(WideVec);
12142 }
12143
12144 return true;
12145}
12146
12147/// Lower an interleaved store into a stN intrinsic.
12148///
12149/// E.g. Lower an interleaved store (Factor = 3):
12150/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
12151/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
12152/// store <12 x i32> %i.vec, <12 x i32>* %ptr
12153///
12154/// Into:
12155/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
12156/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
12157/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
12158/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
12159///
12160/// Note that the new shufflevectors will be removed and we'll only generate one
12161/// st3 instruction in CodeGen.
12162///
12163/// Example for a more general valid mask (Factor 3). Lower:
12164/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
12165/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
12166/// store <12 x i32> %i.vec, <12 x i32>* %ptr
12167///
12168/// Into:
12169/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
12170/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
12171/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
12172/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
12173bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
12174 ShuffleVectorInst *SVI,
12175 unsigned Factor) const {
12176 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12177, __extension__ __PRETTY_FUNCTION__))
12177 "Invalid interleave factor")(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12177, __extension__ __PRETTY_FUNCTION__))
;
12178
12179 auto *VecTy = cast<FixedVectorType>(SVI->getType());
12180 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store")(static_cast <bool> (VecTy->getNumElements() % Factor
== 0 && "Invalid interleaved store") ? void (0) : __assert_fail
("VecTy->getNumElements() % Factor == 0 && \"Invalid interleaved store\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12180, __extension__ __PRETTY_FUNCTION__))
;
12181
12182 unsigned LaneLen = VecTy->getNumElements() / Factor;
12183 Type *EltTy = VecTy->getElementType();
12184 auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
12185
12186 const DataLayout &DL = SI->getModule()->getDataLayout();
12187
12188 // Skip if we do not have NEON and skip illegal vector types. We can
12189 // "legalize" wide vector types into multiple interleaved accesses as long as
12190 // the vector types are divisible by 128.
12191 if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
12192 return false;
12193
12194 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
12195
12196 Value *Op0 = SVI->getOperand(0);
12197 Value *Op1 = SVI->getOperand(1);
12198 IRBuilder<> Builder(SI);
12199
12200 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
12201 // vectors to integer vectors.
12202 if (EltTy->isPointerTy()) {
12203 Type *IntTy = DL.getIntPtrType(EltTy);
12204 unsigned NumOpElts =
12205 cast<FixedVectorType>(Op0->getType())->getNumElements();
12206
12207 // Convert to the corresponding integer vector.
12208 auto *IntVecTy = FixedVectorType::get(IntTy, NumOpElts);
12209 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
12210 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
12211
12212 SubVecTy = FixedVectorType::get(IntTy, LaneLen);
12213 }
12214
12215 // The base address of the store.
12216 Value *BaseAddr = SI->getPointerOperand();
12217
12218 if (NumStores > 1) {
12219 // If we're going to generate more than one store, reset the lane length
12220 // and sub-vector type to something legal.
12221 LaneLen /= NumStores;
12222 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
12223
12224 // We will compute the pointer operand of each store from the original base
12225 // address using GEPs. Cast the base address to a pointer to the scalar
12226 // element type.
12227 BaseAddr = Builder.CreateBitCast(
12228 BaseAddr,
12229 SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
12230 }
12231
12232 auto Mask = SVI->getShuffleMask();
12233
12234 Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
12235 Type *Tys[2] = {SubVecTy, PtrTy};
12236 static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
12237 Intrinsic::aarch64_neon_st3,
12238 Intrinsic::aarch64_neon_st4};
12239 Function *StNFunc =
12240 Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
12241
12242 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
12243
12244 SmallVector<Value *, 5> Ops;
12245
12246 // Split the shufflevector operands into sub vectors for the new stN call.
12247 for (unsigned i = 0; i < Factor; i++) {
12248 unsigned IdxI = StoreCount * LaneLen * Factor + i;
12249 if (Mask[IdxI] >= 0) {
12250 Ops.push_back(Builder.CreateShuffleVector(
12251 Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
12252 } else {
12253 unsigned StartMask = 0;
12254 for (unsigned j = 1; j < LaneLen; j++) {
12255 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
12256 if (Mask[IdxJ * Factor + IdxI] >= 0) {
12257 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
12258 break;
12259 }
12260 }
12261 // Note: Filling undef gaps with random elements is ok, since
12262 // those elements were being written anyway (with undefs).
12263 // In the case of all undefs we're defaulting to using elems from 0
12264 // Note: StartMask cannot be negative, it's checked in
12265 // isReInterleaveMask
12266 Ops.push_back(Builder.CreateShuffleVector(
12267 Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
12268 }
12269 }
12270
12271 // If we generating more than one store, we compute the base address of
12272 // subsequent stores as an offset from the previous.
12273 if (StoreCount > 0)
12274 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
12275 BaseAddr, LaneLen * Factor);
12276
12277 Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
12278 Builder.CreateCall(StNFunc, Ops);
12279 }
12280 return true;
12281}
12282
12283// Lower an SVE structured load intrinsic returning a tuple type to target
12284// specific intrinsic taking the same input but returning a multi-result value
12285// of the split tuple type.
12286//
12287// E.g. Lowering an LD3:
12288//
12289// call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32(
12290// <vscale x 4 x i1> %pred,
12291// <vscale x 4 x i32>* %addr)
12292//
12293// Output DAG:
12294//
12295// t0: ch = EntryToken
12296// t2: nxv4i1,ch = CopyFromReg t0, Register:nxv4i1 %0
12297// t4: i64,ch = CopyFromReg t0, Register:i64 %1
12298// t5: nxv4i32,nxv4i32,nxv4i32,ch = AArch64ISD::SVE_LD3 t0, t2, t4
12299// t6: nxv12i32 = concat_vectors t5, t5:1, t5:2
12300//
12301// This is called pre-legalization to avoid widening/splitting issues with
12302// non-power-of-2 tuple types used for LD3, such as nxv12i32.
12303SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
12304 ArrayRef<SDValue> LoadOps,
12305 EVT VT, SelectionDAG &DAG,
12306 const SDLoc &DL) const {
12307 assert(VT.isScalableVector() && "Can only lower scalable vectors")(static_cast <bool> (VT.isScalableVector() && "Can only lower scalable vectors"
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Can only lower scalable vectors\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12307, __extension__ __PRETTY_FUNCTION__))
;
12308
12309 unsigned N, Opcode;
12310 static std::map<unsigned, std::pair<unsigned, unsigned>> IntrinsicMap = {
12311 {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}},
12312 {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}},
12313 {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};
12314
12315 std::tie(N, Opcode) = IntrinsicMap[Intrinsic];
12316 assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 &&(static_cast <bool> (VT.getVectorElementCount().getKnownMinValue
() % N == 0 && "invalid tuple vector type!") ? void (
0) : __assert_fail ("VT.getVectorElementCount().getKnownMinValue() % N == 0 && \"invalid tuple vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12317, __extension__ __PRETTY_FUNCTION__))
12317 "invalid tuple vector type!")(static_cast <bool> (VT.getVectorElementCount().getKnownMinValue
() % N == 0 && "invalid tuple vector type!") ? void (
0) : __assert_fail ("VT.getVectorElementCount().getKnownMinValue() % N == 0 && \"invalid tuple vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12317, __extension__ __PRETTY_FUNCTION__))
;
12318
12319 EVT SplitVT =
12320 EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
12321 VT.getVectorElementCount().divideCoefficientBy(N));
12322 assert(isTypeLegal(SplitVT))(static_cast <bool> (isTypeLegal(SplitVT)) ? void (0) :
__assert_fail ("isTypeLegal(SplitVT)", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12322, __extension__ __PRETTY_FUNCTION__))
;
12323
12324 SmallVector<EVT, 5> VTs(N, SplitVT);
12325 VTs.push_back(MVT::Other); // Chain
12326 SDVTList NodeTys = DAG.getVTList(VTs);
12327
12328 SDValue PseudoLoad = DAG.getNode(Opcode, DL, NodeTys, LoadOps);
12329 SmallVector<SDValue, 4> PseudoLoadOps;
12330 for (unsigned I = 0; I < N; ++I)
12331 PseudoLoadOps.push_back(SDValue(PseudoLoad.getNode(), I));
12332 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, PseudoLoadOps);
12333}
12334
12335EVT AArch64TargetLowering::getOptimalMemOpType(
12336 const MemOp &Op, const AttributeList &FuncAttributes) const {
12337 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
12338 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
12339 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
12340 // Only use AdvSIMD to implement memset of 32-byte and above. It would have
12341 // taken one instruction to materialize the v2i64 zero and one store (with
12342 // restrictive addressing mode). Just do i64 stores.
12343 bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
12344 auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
12345 if (Op.isAligned(AlignCheck))
12346 return true;
12347 bool Fast;
12348 return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
12349 MachineMemOperand::MONone, &Fast) &&
12350 Fast;
12351 };
12352
12353 if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
12354 AlignmentIsAcceptable(MVT::v16i8, Align(16)))
12355 return MVT::v16i8;
12356 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
12357 return MVT::f128;
12358 if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
12359 return MVT::i64;
12360 if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
12361 return MVT::i32;
12362 return MVT::Other;
12363}
12364
12365LLT AArch64TargetLowering::getOptimalMemOpLLT(
12366 const MemOp &Op, const AttributeList &FuncAttributes) const {
12367 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
12368 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
12369 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
12370 // Only use AdvSIMD to implement memset of 32-byte and above. It would have
12371 // taken one instruction to materialize the v2i64 zero and one store (with
12372 // restrictive addressing mode). Just do i64 stores.
12373 bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
12374 auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
12375 if (Op.isAligned(AlignCheck))
12376 return true;
12377 bool Fast;
12378 return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
12379 MachineMemOperand::MONone, &Fast) &&
12380 Fast;
12381 };
12382
12383 if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
12384 AlignmentIsAcceptable(MVT::v2i64, Align(16)))
12385 return LLT::fixed_vector(2, 64);
12386 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
12387 return LLT::scalar(128);
12388 if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
12389 return LLT::scalar(64);
12390 if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
12391 return LLT::scalar(32);
12392 return LLT();
12393}
12394
12395// 12-bit optionally shifted immediates are legal for adds.
12396bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
12397 if (Immed == std::numeric_limits<int64_t>::min()) {
12398 LLVM_DEBUG(dbgs() << "Illegal add imm " << Immeddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal add imm " <<
Immed << ": avoid UB for INT64_MIN\n"; } } while (false
)
12399 << ": avoid UB for INT64_MIN\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal add imm " <<
Immed << ": avoid UB for INT64_MIN\n"; } } while (false
)
;
12400 return false;
12401 }
12402 // Same encoding for add/sub, just flip the sign.
12403 Immed = std::abs(Immed);
12404 bool IsLegal = ((Immed >> 12) == 0 ||
12405 ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
12406 LLVM_DEBUG(dbgs() << "Is " << Immeddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is " << Immed <<
" legal add imm: " << (IsLegal ? "yes" : "no") <<
"\n"; } } while (false)
12407 << " legal add imm: " << (IsLegal ? "yes" : "no") << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is " << Immed <<
" legal add imm: " << (IsLegal ? "yes" : "no") <<
"\n"; } } while (false)
;
12408 return IsLegal;
12409}
12410
12411// Return false to prevent folding
12412// (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine,
12413// if the folding leads to worse code.
12414bool AArch64TargetLowering::isMulAddWithConstProfitable(
12415 const SDValue &AddNode, const SDValue &ConstNode) const {
12416 // Let the DAGCombiner decide for vector types and large types.
12417 const EVT VT = AddNode.getValueType();
12418 if (VT.isVector() || VT.getScalarSizeInBits() > 64)
12419 return true;
12420
12421 // It is worse if c1 is legal add immediate, while c1*c2 is not
12422 // and has to be composed by at least two instructions.
12423 const ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
12424 const ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
12425 const int64_t C1 = C1Node->getSExtValue();
12426 const APInt C1C2 = C1Node->getAPIntValue() * C2Node->getAPIntValue();
12427 if (!isLegalAddImmediate(C1) || isLegalAddImmediate(C1C2.getSExtValue()))
12428 return true;
12429 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
12430 AArch64_IMM::expandMOVImm(C1C2.getZExtValue(), VT.getSizeInBits(), Insn);
12431 if (Insn.size() > 1)
12432 return false;
12433
12434 // Default to true and let the DAGCombiner decide.
12435 return true;
12436}
12437
12438// Integer comparisons are implemented with ADDS/SUBS, so the range of valid
12439// immediates is the same as for an add or a sub.
12440bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
12441 return isLegalAddImmediate(Immed);
12442}
12443
12444/// isLegalAddressingMode - Return true if the addressing mode represented
12445/// by AM is legal for this target, for a load/store of the specified type.
12446bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
12447 const AddrMode &AM, Type *Ty,
12448 unsigned AS, Instruction *I) const {
12449 // AArch64 has five basic addressing modes:
12450 // reg
12451 // reg + 9-bit signed offset
12452 // reg + SIZE_IN_BYTES * 12-bit unsigned offset
12453 // reg1 + reg2
12454 // reg + SIZE_IN_BYTES * reg
12455
12456 // No global is ever allowed as a base.
12457 if (AM.BaseGV)
12458 return false;
12459
12460 // No reg+reg+imm addressing.
12461 if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
12462 return false;
12463
12464 // FIXME: Update this method to support scalable addressing modes.
12465 if (isa<ScalableVectorType>(Ty)) {
12466 uint64_t VecElemNumBytes =
12467 DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
12468 return AM.HasBaseReg && !AM.BaseOffs &&
12469 (AM.Scale == 0 || (uint64_t)AM.Scale == VecElemNumBytes);
12470 }
12471
12472 // check reg + imm case:
12473 // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
12474 uint64_t NumBytes = 0;
12475 if (Ty->isSized()) {
12476 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
12477 NumBytes = NumBits / 8;
12478 if (!isPowerOf2_64(NumBits))
12479 NumBytes = 0;
12480 }
12481
12482 if (!AM.Scale) {
12483 int64_t Offset = AM.BaseOffs;
12484
12485 // 9-bit signed offset
12486 if (isInt<9>(Offset))
12487 return true;
12488
12489 // 12-bit unsigned offset
12490 unsigned shift = Log2_64(NumBytes);
12491 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
12492 // Must be a multiple of NumBytes (NumBytes is a power of 2)
12493 (Offset >> shift) << shift == Offset)
12494 return true;
12495 return false;
12496 }
12497
12498 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
12499
12500 return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes);
12501}
12502
12503bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const {
12504 // Consider splitting large offset of struct or array.
12505 return true;
12506}
12507
12508InstructionCost AArch64TargetLowering::getScalingFactorCost(
12509 const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const {
12510 // Scaling factors are not free at all.
12511 // Operands | Rt Latency
12512 // -------------------------------------------
12513 // Rt, [Xn, Xm] | 4
12514 // -------------------------------------------
12515 // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
12516 // Rt, [Xn, Wm, <extend> #imm] |
12517 if (isLegalAddressingMode(DL, AM, Ty, AS))
12518 // Scale represents reg2 * scale, thus account for 1 if
12519 // it is not equal to 0 or 1.
12520 return AM.Scale != 0 && AM.Scale != 1;
12521 return -1;
12522}
12523
12524bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
12525 const MachineFunction &MF, EVT VT) const {
12526 VT = VT.getScalarType();
12527
12528 if (!VT.isSimple())
12529 return false;
12530
12531 switch (VT.getSimpleVT().SimpleTy) {
12532 case MVT::f16:
12533 return Subtarget->hasFullFP16();
12534 case MVT::f32:
12535 case MVT::f64:
12536 return true;
12537 default:
12538 break;
12539 }
12540
12541 return false;
12542}
12543
12544bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
12545 Type *Ty) const {
12546 switch (Ty->getScalarType()->getTypeID()) {
12547 case Type::FloatTyID:
12548 case Type::DoubleTyID:
12549 return true;
12550 default:
12551 return false;
12552 }
12553}
12554
12555bool AArch64TargetLowering::generateFMAsInMachineCombiner(
12556 EVT VT, CodeGenOpt::Level OptLevel) const {
12557 return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector();
12558}
12559
12560const MCPhysReg *
12561AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
12562 // LR is a callee-save register, but we must treat it as clobbered by any call
12563 // site. Hence we include LR in the scratch registers, which are in turn added
12564 // as implicit-defs for stackmaps and patchpoints.
12565 static const MCPhysReg ScratchRegs[] = {
12566 AArch64::X16, AArch64::X17, AArch64::LR, 0
12567 };
12568 return ScratchRegs;
12569}
12570
12571bool
12572AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
12573 CombineLevel Level) const {
12574 N = N->getOperand(0).getNode();
12575 EVT VT = N->getValueType(0);
12576 // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
12577 // it with shift to let it be lowered to UBFX.
12578 if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
12579 isa<ConstantSDNode>(N->getOperand(1))) {
12580 uint64_t TruncMask = N->getConstantOperandVal(1);
12581 if (isMask_64(TruncMask) &&
12582 N->getOperand(0).getOpcode() == ISD::SRL &&
12583 isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
12584 return false;
12585 }
12586 return true;
12587}
12588
12589bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
12590 Type *Ty) const {
12591 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12591, __extension__ __PRETTY_FUNCTION__))
;
12592
12593 unsigned BitSize = Ty->getPrimitiveSizeInBits();
12594 if (BitSize == 0)
12595 return false;
12596
12597 int64_t Val = Imm.getSExtValue();
12598 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize))
12599 return true;
12600
12601 if ((int64_t)Val < 0)
12602 Val = ~Val;
12603 if (BitSize == 32)
12604 Val &= (1LL << 32) - 1;
12605
12606 unsigned LZ = countLeadingZeros((uint64_t)Val);
12607 unsigned Shift = (63 - LZ) / 16;
12608 // MOVZ is free so return true for one or fewer MOVK.
12609 return Shift < 3;
12610}
12611
12612bool AArch64TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
12613 unsigned Index) const {
12614 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
12615 return false;
12616
12617 return (Index == 0 || Index == ResVT.getVectorNumElements());
12618}
12619
12620/// Turn vector tests of the signbit in the form of:
12621/// xor (sra X, elt_size(X)-1), -1
12622/// into:
12623/// cmge X, X, #0
12624static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
12625 const AArch64Subtarget *Subtarget) {
12626 EVT VT = N->getValueType(0);
12627 if (!Subtarget->hasNEON() || !VT.isVector())
12628 return SDValue();
12629
12630 // There must be a shift right algebraic before the xor, and the xor must be a
12631 // 'not' operation.
12632 SDValue Shift = N->getOperand(0);
12633 SDValue Ones = N->getOperand(1);
12634 if (Shift.getOpcode() != AArch64ISD::VASHR || !Shift.hasOneUse() ||
12635 !ISD::isBuildVectorAllOnes(Ones.getNode()))
12636 return SDValue();
12637
12638 // The shift should be smearing the sign bit across each vector element.
12639 auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
12640 EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
12641 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
12642 return SDValue();
12643
12644 return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
12645}
12646
12647// Given a vecreduce_add node, detect the below pattern and convert it to the
12648// node sequence with UABDL, [S|U]ADB and UADDLP.
12649//
12650// i32 vecreduce_add(
12651// v16i32 abs(
12652// v16i32 sub(
12653// v16i32 [sign|zero]_extend(v16i8 a), v16i32 [sign|zero]_extend(v16i8 b))))
12654// =================>
12655// i32 vecreduce_add(
12656// v4i32 UADDLP(
12657// v8i16 add(
12658// v8i16 zext(
12659// v8i8 [S|U]ABD low8:v16i8 a, low8:v16i8 b
12660// v8i16 zext(
12661// v8i8 [S|U]ABD high8:v16i8 a, high8:v16i8 b
12662static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N,
12663 SelectionDAG &DAG) {
12664 // Assumed i32 vecreduce_add
12665 if (N->getValueType(0) != MVT::i32)
12666 return SDValue();
12667
12668 SDValue VecReduceOp0 = N->getOperand(0);
12669 unsigned Opcode = VecReduceOp0.getOpcode();
12670 // Assumed v16i32 abs
12671 if (Opcode != ISD::ABS || VecReduceOp0->getValueType(0) != MVT::v16i32)
12672 return SDValue();
12673
12674 SDValue ABS = VecReduceOp0;
12675 // Assumed v16i32 sub
12676 if (ABS->getOperand(0)->getOpcode() != ISD::SUB ||
12677 ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
12678 return SDValue();
12679
12680 SDValue SUB = ABS->getOperand(0);
12681 unsigned Opcode0 = SUB->getOperand(0).getOpcode();
12682 unsigned Opcode1 = SUB->getOperand(1).getOpcode();
12683 // Assumed v16i32 type
12684 if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 ||
12685 SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
12686 return SDValue();
12687
12688 // Assumed zext or sext
12689 bool IsZExt = false;
12690 if (Opcode0 == ISD::ZERO_EXTEND && Opcode1 == ISD::ZERO_EXTEND) {
12691 IsZExt = true;
12692 } else if (Opcode0 == ISD::SIGN_EXTEND && Opcode1 == ISD::SIGN_EXTEND) {
12693 IsZExt = false;
12694 } else
12695 return SDValue();
12696
12697 SDValue EXT0 = SUB->getOperand(0);
12698 SDValue EXT1 = SUB->getOperand(1);
12699 // Assumed zext's operand has v16i8 type
12700 if (EXT0->getOperand(0)->getValueType(0) != MVT::v16i8 ||
12701 EXT1->getOperand(0)->getValueType(0) != MVT::v16i8)
12702 return SDValue();
12703
12704 // Pattern is dectected. Let's convert it to sequence of nodes.
12705 SDLoc DL(N);
12706
12707 // First, create the node pattern of UABD/SABD.
12708 SDValue UABDHigh8Op0 =
12709 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
12710 DAG.getConstant(8, DL, MVT::i64));
12711 SDValue UABDHigh8Op1 =
12712 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
12713 DAG.getConstant(8, DL, MVT::i64));
12714 SDValue UABDHigh8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
12715 UABDHigh8Op0, UABDHigh8Op1);
12716 SDValue UABDL = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDHigh8);
12717
12718 // Second, create the node pattern of UABAL.
12719 SDValue UABDLo8Op0 =
12720 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
12721 DAG.getConstant(0, DL, MVT::i64));
12722 SDValue UABDLo8Op1 =
12723 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
12724 DAG.getConstant(0, DL, MVT::i64));
12725 SDValue UABDLo8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
12726 UABDLo8Op0, UABDLo8Op1);
12727 SDValue ZExtUABD = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDLo8);
12728 SDValue UABAL = DAG.getNode(ISD::ADD, DL, MVT::v8i16, UABDL, ZExtUABD);
12729
12730 // Third, create the node of UADDLP.
12731 SDValue UADDLP = DAG.getNode(AArch64ISD::UADDLP, DL, MVT::v4i32, UABAL);
12732
12733 // Fourth, create the node of VECREDUCE_ADD.
12734 return DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, UADDLP);
12735}
12736
12737// Turn a v8i8/v16i8 extended vecreduce into a udot/sdot and vecreduce
12738// vecreduce.add(ext(A)) to vecreduce.add(DOT(zero, A, one))
12739// vecreduce.add(mul(ext(A), ext(B))) to vecreduce.add(DOT(zero, A, B))
12740static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
12741 const AArch64Subtarget *ST) {
12742 if (!ST->hasDotProd())
12743 return performVecReduceAddCombineWithUADDLP(N, DAG);
12744
12745 SDValue Op0 = N->getOperand(0);
12746 if (N->getValueType(0) != MVT::i32 ||
12747 Op0.getValueType().getVectorElementType() != MVT::i32)
12748 return SDValue();
12749
12750 unsigned ExtOpcode = Op0.getOpcode();
12751 SDValue A = Op0;
12752 SDValue B;
12753 if (ExtOpcode == ISD::MUL) {
12754 A = Op0.getOperand(0);
12755 B = Op0.getOperand(1);
12756 if (A.getOpcode() != B.getOpcode() ||
12757 A.getOperand(0).getValueType() != B.getOperand(0).getValueType())
12758 return SDValue();
12759 ExtOpcode = A.getOpcode();
12760 }
12761 if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND)
12762 return SDValue();
12763
12764 EVT Op0VT = A.getOperand(0).getValueType();
12765 if (Op0VT != MVT::v8i8 && Op0VT != MVT::v16i8)
12766 return SDValue();
12767
12768 SDLoc DL(Op0);
12769 // For non-mla reductions B can be set to 1. For MLA we take the operand of
12770 // the extend B.
12771 if (!B)
12772 B = DAG.getConstant(1, DL, Op0VT);
12773 else
12774 B = B.getOperand(0);
12775
12776 SDValue Zeros =
12777 DAG.getConstant(0, DL, Op0VT == MVT::v8i8 ? MVT::v2i32 : MVT::v4i32);
12778 auto DotOpcode =
12779 (ExtOpcode == ISD::ZERO_EXTEND) ? AArch64ISD::UDOT : AArch64ISD::SDOT;
12780 SDValue Dot = DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros,
12781 A.getOperand(0), B);
12782 return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
12783}
12784
12785static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
12786 TargetLowering::DAGCombinerInfo &DCI,
12787 const AArch64Subtarget *Subtarget) {
12788 if (DCI.isBeforeLegalizeOps())
12789 return SDValue();
12790
12791 return foldVectorXorShiftIntoCmp(N, DAG, Subtarget);
12792}
12793
12794SDValue
12795AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
12796 SelectionDAG &DAG,
12797 SmallVectorImpl<SDNode *> &Created) const {
12798 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
12799 if (isIntDivCheap(N->getValueType(0), Attr))
1
Assuming the condition is false
2
Taking false branch
12800 return SDValue(N,0); // Lower SDIV as SDIV
12801
12802 // fold (sdiv X, pow2)
12803 EVT VT = N->getValueType(0);
12804 if ((VT != MVT::i32 && VT != MVT::i64) ||
3
Taking false branch
12805 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
12806 return SDValue();
12807
12808 SDLoc DL(N);
12809 SDValue N0 = N->getOperand(0);
12810 unsigned Lg2 = Divisor.countTrailingZeros();
4
Calling 'APInt::countTrailingZeros'
20
Returning from 'APInt::countTrailingZeros'
21
'Lg2' initialized to 64
12811 SDValue Zero = DAG.getConstant(0, DL, VT);
12812 SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
22
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'unsigned long long'
12813
12814 // Add (N0 < 0) ? Pow2 - 1 : 0;
12815 SDValue CCVal;
12816 SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
12817 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
12818 SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
12819
12820 Created.push_back(Cmp.getNode());
12821 Created.push_back(Add.getNode());
12822 Created.push_back(CSel.getNode());
12823
12824 // Divide by pow2.
12825 SDValue SRA =
12826 DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
12827
12828 // If we're dividing by a positive value, we're done. Otherwise, we must
12829 // negate the result.
12830 if (Divisor.isNonNegative())
12831 return SRA;
12832
12833 Created.push_back(SRA.getNode());
12834 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
12835}
12836
12837static bool IsSVECntIntrinsic(SDValue S) {
12838 switch(getIntrinsicID(S.getNode())) {
12839 default:
12840 break;
12841 case Intrinsic::aarch64_sve_cntb:
12842 case Intrinsic::aarch64_sve_cnth:
12843 case Intrinsic::aarch64_sve_cntw:
12844 case Intrinsic::aarch64_sve_cntd:
12845 return true;
12846 }
12847 return false;
12848}
12849
12850/// Calculates what the pre-extend type is, based on the extension
12851/// operation node provided by \p Extend.
12852///
12853/// In the case that \p Extend is a SIGN_EXTEND or a ZERO_EXTEND, the
12854/// pre-extend type is pulled directly from the operand, while other extend
12855/// operations need a bit more inspection to get this information.
12856///
12857/// \param Extend The SDNode from the DAG that represents the extend operation
12858/// \param DAG The SelectionDAG hosting the \p Extend node
12859///
12860/// \returns The type representing the \p Extend source type, or \p MVT::Other
12861/// if no valid type can be determined
12862static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG) {
12863 switch (Extend.getOpcode()) {
12864 case ISD::SIGN_EXTEND:
12865 case ISD::ZERO_EXTEND:
12866 return Extend.getOperand(0).getValueType();
12867 case ISD::AssertSext:
12868 case ISD::AssertZext:
12869 case ISD::SIGN_EXTEND_INREG: {
12870 VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1));
12871 if (!TypeNode)
12872 return MVT::Other;
12873 return TypeNode->getVT();
12874 }
12875 case ISD::AND: {
12876 ConstantSDNode *Constant =
12877 dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode());
12878 if (!Constant)
12879 return MVT::Other;
12880
12881 uint32_t Mask = Constant->getZExtValue();
12882
12883 if (Mask == UCHAR_MAX(127*2 +1))
12884 return MVT::i8;
12885 else if (Mask == USHRT_MAX(32767 *2 +1))
12886 return MVT::i16;
12887 else if (Mask == UINT_MAX(2147483647 *2U +1U))
12888 return MVT::i32;
12889
12890 return MVT::Other;
12891 }
12892 default:
12893 return MVT::Other;
12894 }
12895
12896 llvm_unreachable("Code path unhandled in calculatePreExtendType!")::llvm::llvm_unreachable_internal("Code path unhandled in calculatePreExtendType!"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12896)
;
12897}
12898
12899/// Combines a dup(sext/zext) node pattern into sext/zext(dup)
12900/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
12901static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
12902 SelectionDAG &DAG) {
12903
12904 ShuffleVectorSDNode *ShuffleNode =
12905 dyn_cast<ShuffleVectorSDNode>(VectorShuffle.getNode());
12906 if (!ShuffleNode)
12907 return SDValue();
12908
12909 // Ensuring the mask is zero before continuing
12910 if (!ShuffleNode->isSplat() || ShuffleNode->getSplatIndex() != 0)
12911 return SDValue();
12912
12913 SDValue InsertVectorElt = VectorShuffle.getOperand(0);
12914
12915 if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
12916 return SDValue();
12917
12918 SDValue InsertLane = InsertVectorElt.getOperand(2);
12919 ConstantSDNode *Constant = dyn_cast<ConstantSDNode>(InsertLane.getNode());
12920 // Ensures the insert is inserting into lane 0
12921 if (!Constant || Constant->getZExtValue() != 0)
12922 return SDValue();
12923
12924 SDValue Extend = InsertVectorElt.getOperand(1);
12925 unsigned ExtendOpcode = Extend.getOpcode();
12926
12927 bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND ||
12928 ExtendOpcode == ISD::SIGN_EXTEND_INREG ||
12929 ExtendOpcode == ISD::AssertSext;
12930 if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
12931 ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
12932 return SDValue();
12933
12934 EVT TargetType = VectorShuffle.getValueType();
12935 EVT PreExtendType = calculatePreExtendType(Extend, DAG);
12936
12937 if ((TargetType != MVT::v8i16 && TargetType != MVT::v4i32 &&
12938 TargetType != MVT::v2i64) ||
12939 (PreExtendType == MVT::Other))
12940 return SDValue();
12941
12942 // Restrict valid pre-extend data type
12943 if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 &&
12944 PreExtendType != MVT::i32)
12945 return SDValue();
12946
12947 EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType);
12948
12949 if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount())
12950 return SDValue();
12951
12952 if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2)
12953 return SDValue();
12954
12955 SDLoc DL(VectorShuffle);
12956
12957 SDValue InsertVectorNode = DAG.getNode(
12958 InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT),
12959 DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
12960 DAG.getConstant(0, DL, MVT::i64));
12961
12962 std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue());
12963
12964 SDValue VectorShuffleNode =
12965 DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
12966 DAG.getUNDEF(PreExtendVT), ShuffleMask);
12967
12968 SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
12969 DL, TargetType, VectorShuffleNode);
12970
12971 return ExtendNode;
12972}
12973
12974/// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
12975/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
12976static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
12977 // If the value type isn't a vector, none of the operands are going to be dups
12978 if (!Mul->getValueType(0).isVector())
12979 return SDValue();
12980
12981 SDValue Op0 = performCommonVectorExtendCombine(Mul->getOperand(0), DAG);
12982 SDValue Op1 = performCommonVectorExtendCombine(Mul->getOperand(1), DAG);
12983
12984 // Neither operands have been changed, don't make any further changes
12985 if (!Op0 && !Op1)
12986 return SDValue();
12987
12988 SDLoc DL(Mul);
12989 return DAG.getNode(Mul->getOpcode(), DL, Mul->getValueType(0),
12990 Op0 ? Op0 : Mul->getOperand(0),
12991 Op1 ? Op1 : Mul->getOperand(1));
12992}
12993
12994static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
12995 TargetLowering::DAGCombinerInfo &DCI,
12996 const AArch64Subtarget *Subtarget) {
12997
12998 if (SDValue Ext = performMulVectorExtendCombine(N, DAG))
12999 return Ext;
13000
13001 if (DCI.isBeforeLegalizeOps())
13002 return SDValue();
13003
13004 // The below optimizations require a constant RHS.
13005 if (!isa<ConstantSDNode>(N->getOperand(1)))
13006 return SDValue();
13007
13008 SDValue N0 = N->getOperand(0);
13009 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(1));
13010 const APInt &ConstValue = C->getAPIntValue();
13011
13012 // Allow the scaling to be folded into the `cnt` instruction by preventing
13013 // the scaling to be obscured here. This makes it easier to pattern match.
13014 if (IsSVECntIntrinsic(N0) ||
13015 (N0->getOpcode() == ISD::TRUNCATE &&
13016 (IsSVECntIntrinsic(N0->getOperand(0)))))
13017 if (ConstValue.sge(1) && ConstValue.sle(16))
13018 return SDValue();
13019
13020 // Multiplication of a power of two plus/minus one can be done more
13021 // cheaply as as shift+add/sub. For now, this is true unilaterally. If
13022 // future CPUs have a cheaper MADD instruction, this may need to be
13023 // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
13024 // 64-bit is 5 cycles, so this is always a win.
13025 // More aggressively, some multiplications N0 * C can be lowered to
13026 // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
13027 // e.g. 6=3*2=(2+1)*2.
13028 // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
13029 // which equals to (1+2)*16-(1+2).
13030
13031 // TrailingZeroes is used to test if the mul can be lowered to
13032 // shift+add+shift.
13033 unsigned TrailingZeroes = ConstValue.countTrailingZeros();
13034 if (TrailingZeroes) {
13035 // Conservatively do not lower to shift+add+shift if the mul might be
13036 // folded into smul or umul.
13037 if (N0->hasOneUse() && (isSignExtended(N0.getNode(), DAG) ||
13038 isZeroExtended(N0.getNode(), DAG)))
13039 return SDValue();
13040 // Conservatively do not lower to shift+add+shift if the mul might be
13041 // folded into madd or msub.
13042 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD ||
13043 N->use_begin()->getOpcode() == ISD::SUB))
13044 return SDValue();
13045 }
13046 // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
13047 // and shift+add+shift.
13048 APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
13049
13050 unsigned ShiftAmt, AddSubOpc;
13051 // Is the shifted value the LHS operand of the add/sub?
13052 bool ShiftValUseIsN0 = true;
13053 // Do we need to negate the result?
13054 bool NegateResult = false;
13055
13056 if (ConstValue.isNonNegative()) {
13057 // (mul x, 2^N + 1) => (add (shl x, N), x)
13058 // (mul x, 2^N - 1) => (sub (shl x, N), x)
13059 // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
13060 APInt SCVMinus1 = ShiftedConstValue - 1;
13061 APInt CVPlus1 = ConstValue + 1;
13062 if (SCVMinus1.isPowerOf2()) {
13063 ShiftAmt = SCVMinus1.logBase2();
13064 AddSubOpc = ISD::ADD;
13065 } else if (CVPlus1.isPowerOf2()) {
13066 ShiftAmt = CVPlus1.logBase2();
13067 AddSubOpc = ISD::SUB;
13068 } else
13069 return SDValue();
13070 } else {
13071 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
13072 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
13073 APInt CVNegPlus1 = -ConstValue + 1;
13074 APInt CVNegMinus1 = -ConstValue - 1;
13075 if (CVNegPlus1.isPowerOf2()) {
13076 ShiftAmt = CVNegPlus1.logBase2();
13077 AddSubOpc = ISD::SUB;
13078 ShiftValUseIsN0 = false;
13079 } else if (CVNegMinus1.isPowerOf2()) {
13080 ShiftAmt = CVNegMinus1.logBase2();
13081 AddSubOpc = ISD::ADD;
13082 NegateResult = true;
13083 } else
13084 return SDValue();
13085 }
13086
13087 SDLoc DL(N);
13088 EVT VT = N->getValueType(0);
13089 SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0,
13090 DAG.getConstant(ShiftAmt, DL, MVT::i64));
13091
13092 SDValue AddSubN0 = ShiftValUseIsN0 ? ShiftedVal : N0;
13093 SDValue AddSubN1 = ShiftValUseIsN0 ? N0 : ShiftedVal;
13094 SDValue Res = DAG.getNode(AddSubOpc, DL, VT, AddSubN0, AddSubN1);
13095 assert(!(NegateResult && TrailingZeroes) &&(static_cast <bool> (!(NegateResult && TrailingZeroes
) && "NegateResult and TrailingZeroes cannot both be true for now."
) ? void (0) : __assert_fail ("!(NegateResult && TrailingZeroes) && \"NegateResult and TrailingZeroes cannot both be true for now.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13096, __extension__ __PRETTY_FUNCTION__))
13096 "NegateResult and TrailingZeroes cannot both be true for now.")(static_cast <bool> (!(NegateResult && TrailingZeroes
) && "NegateResult and TrailingZeroes cannot both be true for now."
) ? void (0) : __assert_fail ("!(NegateResult && TrailingZeroes) && \"NegateResult and TrailingZeroes cannot both be true for now.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13096, __extension__ __PRETTY_FUNCTION__))
;
13097 // Negate the result.
13098 if (NegateResult)
13099 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
13100 // Shift the result.
13101 if (TrailingZeroes)
13102 return DAG.getNode(ISD::SHL, DL, VT, Res,
13103 DAG.getConstant(TrailingZeroes, DL, MVT::i64));
13104 return Res;
13105}
13106
13107static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
13108 SelectionDAG &DAG) {
13109 // Take advantage of vector comparisons producing 0 or -1 in each lane to
13110 // optimize away operation when it's from a constant.
13111 //
13112 // The general transformation is:
13113 // UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
13114 // AND(VECTOR_CMP(x,y), constant2)
13115 // constant2 = UNARYOP(constant)
13116
13117 // Early exit if this isn't a vector operation, the operand of the
13118 // unary operation isn't a bitwise AND, or if the sizes of the operations
13119 // aren't the same.
13120 EVT VT = N->getValueType(0);
13121 if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
13122 N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
13123 VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
13124 return SDValue();
13125
13126 // Now check that the other operand of the AND is a constant. We could
13127 // make the transformation for non-constant splats as well, but it's unclear
13128 // that would be a benefit as it would not eliminate any operations, just
13129 // perform one more step in scalar code before moving to the vector unit.
13130 if (BuildVectorSDNode *BV =
13131 dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
13132 // Bail out if the vector isn't a constant.
13133 if (!BV->isConstant())
13134 return SDValue();
13135
13136 // Everything checks out. Build up the new and improved node.
13137 SDLoc DL(N);
13138 EVT IntVT = BV->getValueType(0);
13139 // Create a new constant of the appropriate type for the transformed
13140 // DAG.
13141 SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
13142 // The AND node needs bitcasts to/from an integer vector type around it.
13143 SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
13144 SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
13145 N->getOperand(0)->getOperand(0), MaskConst);
13146 SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
13147 return Res;
13148 }
13149
13150 return SDValue();
13151}
13152
13153static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
13154 const AArch64Subtarget *Subtarget) {
13155 // First try to optimize away the conversion when it's conditionally from
13156 // a constant. Vectors only.
13157 if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
13158 return Res;
13159
13160 EVT VT = N->getValueType(0);
13161 if (VT != MVT::f32 && VT != MVT::f64)
13162 return SDValue();
13163
13164 // Only optimize when the source and destination types have the same width.
13165 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
13166 return SDValue();
13167
13168 // If the result of an integer load is only used by an integer-to-float
13169 // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
13170 // This eliminates an "integer-to-vector-move" UOP and improves throughput.
13171 SDValue N0 = N->getOperand(0);
13172 if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13173 // Do not change the width of a volatile load.
13174 !cast<LoadSDNode>(N0)->isVolatile()) {
13175 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13176 SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
13177 LN0->getPointerInfo(), LN0->getAlignment(),
13178 LN0->getMemOperand()->getFlags());
13179
13180 // Make sure successors of the original load stay after it by updating them
13181 // to use the new Chain.
13182 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
13183
13184 unsigned Opcode =
13185 (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
13186 return DAG.getNode(Opcode, SDLoc(N), VT, Load);
13187 }
13188
13189 return SDValue();
13190}
13191
13192/// Fold a floating-point multiply by power of two into floating-point to
13193/// fixed-point conversion.
13194static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
13195 TargetLowering::DAGCombinerInfo &DCI,
13196 const AArch64Subtarget *Subtarget) {
13197 if (!Subtarget->hasNEON())
13198 return SDValue();
13199
13200 if (!N->getValueType(0).isSimple())
13201 return SDValue();
13202
13203 SDValue Op = N->getOperand(0);
13204 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
13205 Op.getOpcode() != ISD::FMUL)
13206 return SDValue();
13207
13208 SDValue ConstVec = Op->getOperand(1);
13209 if (!isa<BuildVectorSDNode>(ConstVec))
13210 return SDValue();
13211
13212 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
13213 uint32_t FloatBits = FloatTy.getSizeInBits();
13214 if (FloatBits != 32 && FloatBits != 64)
13215 return SDValue();
13216
13217 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
13218 uint32_t IntBits = IntTy.getSizeInBits();
13219 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
13220 return SDValue();
13221
13222 // Avoid conversions where iN is larger than the float (e.g., float -> i64).
13223 if (IntBits > FloatBits)
13224 return SDValue();
13225
13226 BitVector UndefElements;
13227 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
13228 int32_t Bits = IntBits == 64 ? 64 : 32;
13229 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
13230 if (C == -1 || C == 0 || C > Bits)
13231 return SDValue();
13232
13233 MVT ResTy;
13234 unsigned NumLanes = Op.getValueType().getVectorNumElements();
13235 switch (NumLanes) {
13236 default:
13237 return SDValue();
13238 case 2:
13239 ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
13240 break;
13241 case 4:
13242 ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
13243 break;
13244 }
13245
13246 if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
13247 return SDValue();
13248
13249 assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) &&(static_cast <bool> ((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps
()) && "Illegal vector type after legalization") ? void
(0) : __assert_fail ("(ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) && \"Illegal vector type after legalization\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13250, __extension__ __PRETTY_FUNCTION__))
13250 "Illegal vector type after legalization")(static_cast <bool> ((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps
()) && "Illegal vector type after legalization") ? void
(0) : __assert_fail ("(ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) && \"Illegal vector type after legalization\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13250, __extension__ __PRETTY_FUNCTION__))
;
13251
13252 SDLoc DL(N);
13253 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
13254 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
13255 : Intrinsic::aarch64_neon_vcvtfp2fxu;
13256 SDValue FixConv =
13257 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
13258 DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
13259 Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
13260 // We can handle smaller integers by generating an extra trunc.
13261 if (IntBits < FloatBits)
13262 FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
13263
13264 return FixConv;
13265}
13266
13267/// Fold a floating-point divide by power of two into fixed-point to
13268/// floating-point conversion.
13269static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
13270 TargetLowering::DAGCombinerInfo &DCI,
13271 const AArch64Subtarget *Subtarget) {
13272 if (!Subtarget->hasNEON())
13273 return SDValue();
13274
13275 SDValue Op = N->getOperand(0);
13276 unsigned Opc = Op->getOpcode();
13277 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
13278 !Op.getOperand(0).getValueType().isSimple() ||
13279 (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
13280 return SDValue();
13281
13282 SDValue ConstVec = N->getOperand(1);
13283 if (!isa<BuildVectorSDNode>(ConstVec))
13284 return SDValue();
13285
13286 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
13287 int32_t IntBits = IntTy.getSizeInBits();
13288 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
13289 return SDValue();
13290
13291 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
13292 int32_t FloatBits = FloatTy.getSizeInBits();
13293 if (FloatBits != 32 && FloatBits != 64)
13294 return SDValue();
13295
13296 // Avoid conversions where iN is larger than the float (e.g., i64 -> float).
13297 if (IntBits > FloatBits)
13298 return SDValue();
13299
13300 BitVector UndefElements;
13301 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
13302 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
13303 if (C == -1 || C == 0 || C > FloatBits)
13304 return SDValue();
13305
13306 MVT ResTy;
13307 unsigned NumLanes = Op.getValueType().getVectorNumElements();
13308 switch (NumLanes) {
13309 default:
13310 return SDValue();
13311 case 2:
13312 ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
13313 break;
13314 case 4:
13315 ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
13316 break;
13317 }
13318
13319 if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
13320 return SDValue();
13321
13322 SDLoc DL(N);
13323 SDValue ConvInput = Op.getOperand(0);
13324 bool IsSigned = Opc == ISD::SINT_TO_FP;
13325 if (IntBits < FloatBits)
13326 ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
13327 ResTy, ConvInput);
13328
13329 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
13330 : Intrinsic::aarch64_neon_vcvtfxu2fp;
13331 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
13332 DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
13333 DAG.getConstant(C, DL, MVT::i32));
13334}
13335
13336/// An EXTR instruction is made up of two shifts, ORed together. This helper
13337/// searches for and classifies those shifts.
13338static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
13339 bool &FromHi) {
13340 if (N.getOpcode() == ISD::SHL)
13341 FromHi = false;
13342 else if (N.getOpcode() == ISD::SRL)
13343 FromHi = true;
13344 else
13345 return false;
13346
13347 if (!isa<ConstantSDNode>(N.getOperand(1)))
13348 return false;
13349
13350 ShiftAmount = N->getConstantOperandVal(1);
13351 Src = N->getOperand(0);
13352 return true;
13353}
13354
13355/// EXTR instruction extracts a contiguous chunk of bits from two existing
13356/// registers viewed as a high/low pair. This function looks for the pattern:
13357/// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it
13358/// with an EXTR. Can't quite be done in TableGen because the two immediates
13359/// aren't independent.
13360static SDValue tryCombineToEXTR(SDNode *N,
13361 TargetLowering::DAGCombinerInfo &DCI) {
13362 SelectionDAG &DAG = DCI.DAG;
13363 SDLoc DL(N);
13364 EVT VT = N->getValueType(0);
13365
13366 assert(N->getOpcode() == ISD::OR && "Unexpected root")(static_cast <bool> (N->getOpcode() == ISD::OR &&
"Unexpected root") ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Unexpected root\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13366, __extension__ __PRETTY_FUNCTION__))
;
13367
13368 if (VT != MVT::i32 && VT != MVT::i64)
13369 return SDValue();
13370
13371 SDValue LHS;
13372 uint32_t ShiftLHS = 0;
13373 bool LHSFromHi = false;
13374 if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
13375 return SDValue();
13376
13377 SDValue RHS;
13378 uint32_t ShiftRHS = 0;
13379 bool RHSFromHi = false;
13380 if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
13381 return SDValue();
13382
13383 // If they're both trying to come from the high part of the register, they're
13384 // not really an EXTR.
13385 if (LHSFromHi == RHSFromHi)
13386 return SDValue();
13387
13388 if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
13389 return SDValue();
13390
13391 if (LHSFromHi) {
13392 std::swap(LHS, RHS);
13393 std::swap(ShiftLHS, ShiftRHS);
13394 }
13395
13396 return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
13397 DAG.getConstant(ShiftRHS, DL, MVT::i64));
13398}
13399
13400static SDValue tryCombineToBSL(SDNode *N,
13401 TargetLowering::DAGCombinerInfo &DCI) {
13402 EVT VT = N->getValueType(0);
13403 SelectionDAG &DAG = DCI.DAG;
13404 SDLoc DL(N);
13405
13406 if (!VT.isVector())
13407 return SDValue();
13408
13409 // The combining code currently only works for NEON vectors. In particular,
13410 // it does not work for SVE when dealing with vectors wider than 128 bits.
13411 if (!VT.is64BitVector() && !VT.is128BitVector())
13412 return SDValue();
13413
13414 SDValue N0 = N->getOperand(0);
13415 if (N0.getOpcode() != ISD::AND)
13416 return SDValue();
13417
13418 SDValue N1 = N->getOperand(1);
13419 if (N1.getOpcode() != ISD::AND)
13420 return SDValue();
13421
13422 // InstCombine does (not (neg a)) => (add a -1).
13423 // Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c)
13424 // Loop over all combinations of AND operands.
13425 for (int i = 1; i >= 0; --i) {
13426 for (int j = 1; j >= 0; --j) {
13427 SDValue O0 = N0->getOperand(i);
13428 SDValue O1 = N1->getOperand(j);
13429 SDValue Sub, Add, SubSibling, AddSibling;
13430
13431 // Find a SUB and an ADD operand, one from each AND.
13432 if (O0.getOpcode() == ISD::SUB && O1.getOpcode() == ISD::ADD) {
13433 Sub = O0;
13434 Add = O1;
13435 SubSibling = N0->getOperand(1 - i);
13436 AddSibling = N1->getOperand(1 - j);
13437 } else if (O0.getOpcode() == ISD::ADD && O1.getOpcode() == ISD::SUB) {
13438 Add = O0;
13439 Sub = O1;
13440 AddSibling = N0->getOperand(1 - i);
13441 SubSibling = N1->getOperand(1 - j);
13442 } else
13443 continue;
13444
13445 if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()))
13446 continue;
13447
13448 // Constant ones is always righthand operand of the Add.
13449 if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
13450 continue;
13451
13452 if (Sub.getOperand(1) != Add.getOperand(0))
13453 continue;
13454
13455 return DAG.getNode(AArch64ISD::BSP, DL, VT, Sub, SubSibling, AddSibling);
13456 }
13457 }
13458
13459 // (or (and a b) (and (not a) c)) => (bsl a b c)
13460 // We only have to look for constant vectors here since the general, variable
13461 // case can be handled in TableGen.
13462 unsigned Bits = VT.getScalarSizeInBits();
13463 uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
13464 for (int i = 1; i >= 0; --i)
13465 for (int j = 1; j >= 0; --j) {
13466 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
13467 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
13468 if (!BVN0 || !BVN1)
13469 continue;
13470
13471 bool FoundMatch = true;
13472 for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
13473 ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
13474 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
13475 if (!CN0 || !CN1 ||
13476 CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
13477 FoundMatch = false;
13478 break;
13479 }
13480 }
13481
13482 if (FoundMatch)
13483 return DAG.getNode(AArch64ISD::BSP, DL, VT, SDValue(BVN0, 0),
13484 N0->getOperand(1 - i), N1->getOperand(1 - j));
13485 }
13486
13487 return SDValue();
13488}
13489
13490static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
13491 const AArch64Subtarget *Subtarget) {
13492 // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
13493 SelectionDAG &DAG = DCI.DAG;
13494 EVT VT = N->getValueType(0);
13495
13496 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
13497 return SDValue();
13498
13499 if (SDValue Res = tryCombineToEXTR(N, DCI))
13500 return Res;
13501
13502 if (SDValue Res = tryCombineToBSL(N, DCI))
13503 return Res;
13504
13505 return SDValue();
13506}
13507
13508static bool isConstantSplatVectorMaskForType(SDNode *N, EVT MemVT) {
13509 if (!MemVT.getVectorElementType().isSimple())
13510 return false;
13511
13512 uint64_t MaskForTy = 0ull;
13513 switch (MemVT.getVectorElementType().getSimpleVT().SimpleTy) {
13514 case MVT::i8:
13515 MaskForTy = 0xffull;
13516 break;
13517 case MVT::i16:
13518 MaskForTy = 0xffffull;
13519 break;
13520 case MVT::i32:
13521 MaskForTy = 0xffffffffull;
13522 break;
13523 default:
13524 return false;
13525 break;
13526 }
13527
13528 if (N->getOpcode() == AArch64ISD::DUP || N->getOpcode() == ISD::SPLAT_VECTOR)
13529 if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0)))
13530 return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
13531
13532 return false;
13533}
13534
13535static SDValue performSVEAndCombine(SDNode *N,
13536 TargetLowering::DAGCombinerInfo &DCI) {
13537 if (DCI.isBeforeLegalizeOps())
13538 return SDValue();
13539
13540 SelectionDAG &DAG = DCI.DAG;
13541 SDValue Src = N->getOperand(0);
13542 unsigned Opc = Src->getOpcode();
13543
13544 // Zero/any extend of an unsigned unpack
13545 if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
13546 SDValue UnpkOp = Src->getOperand(0);
13547 SDValue Dup = N->getOperand(1);
13548
13549 if (Dup.getOpcode() != AArch64ISD::DUP)
13550 return SDValue();
13551
13552 SDLoc DL(N);
13553 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
13554 if (!C)
13555 return SDValue();
13556
13557 uint64_t ExtVal = C->getZExtValue();
13558
13559 // If the mask is fully covered by the unpack, we don't need to push
13560 // a new AND onto the operand
13561 EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
13562 if ((ExtVal == 0xFF && EltTy == MVT::i8) ||
13563 (ExtVal == 0xFFFF && EltTy == MVT::i16) ||
13564 (ExtVal == 0xFFFFFFFF && EltTy == MVT::i32))
13565 return Src;
13566
13567 // Truncate to prevent a DUP with an over wide constant
13568 APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());
13569
13570 // Otherwise, make sure we propagate the AND to the operand
13571 // of the unpack
13572 Dup = DAG.getNode(AArch64ISD::DUP, DL,
13573 UnpkOp->getValueType(0),
13574 DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32));
13575
13576 SDValue And = DAG.getNode(ISD::AND, DL,
13577 UnpkOp->getValueType(0), UnpkOp, Dup);
13578
13579 return DAG.getNode(Opc, DL, N->getValueType(0), And);
13580 }
13581
13582 if (!EnableCombineMGatherIntrinsics)
13583 return SDValue();
13584
13585 SDValue Mask = N->getOperand(1);
13586
13587 if (!Src.hasOneUse())
13588 return SDValue();
13589
13590 EVT MemVT;
13591
13592 // SVE load instructions perform an implicit zero-extend, which makes them
13593 // perfect candidates for combining.
13594 switch (Opc) {
13595 case AArch64ISD::LD1_MERGE_ZERO:
13596 case AArch64ISD::LDNF1_MERGE_ZERO:
13597 case AArch64ISD::LDFF1_MERGE_ZERO:
13598 MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
13599 break;
13600 case AArch64ISD::GLD1_MERGE_ZERO:
13601 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
13602 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
13603 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
13604 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
13605 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
13606 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
13607 case AArch64ISD::GLDFF1_MERGE_ZERO:
13608 case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
13609 case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
13610 case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
13611 case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
13612 case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
13613 case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
13614 case AArch64ISD::GLDNT1_MERGE_ZERO:
13615 MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
13616 break;
13617 default:
13618 return SDValue();
13619 }
13620
13621 if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT))
13622 return Src;
13623
13624 return SDValue();
13625}
13626
13627static SDValue performANDCombine(SDNode *N,
13628 TargetLowering::DAGCombinerInfo &DCI) {
13629 SelectionDAG &DAG = DCI.DAG;
13630 SDValue LHS = N->getOperand(0);
13631 EVT VT = N->getValueType(0);
13632 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
13633 return SDValue();
13634
13635 if (VT.isScalableVector())
13636 return performSVEAndCombine(N, DCI);
13637
13638 // The combining code below works only for NEON vectors. In particular, it
13639 // does not work for SVE when dealing with vectors wider than 128 bits.
13640 if (!(VT.is64BitVector() || VT.is128BitVector()))
13641 return SDValue();
13642
13643 BuildVectorSDNode *BVN =
13644 dyn_cast<BuildVectorSDNode>(N->getOperand(1).getNode());
13645 if (!BVN)
13646 return SDValue();
13647
13648 // AND does not accept an immediate, so check if we can use a BIC immediate
13649 // instruction instead. We do this here instead of using a (and x, (mvni imm))
13650 // pattern in isel, because some immediates may be lowered to the preferred
13651 // (and x, (movi imm)) form, even though an mvni representation also exists.
13652 APInt DefBits(VT.getSizeInBits(), 0);
13653 APInt UndefBits(VT.getSizeInBits(), 0);
13654 if (resolveBuildVector(BVN, DefBits, UndefBits)) {
13655 SDValue NewOp;
13656
13657 DefBits = ~DefBits;
13658 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
13659 DefBits, &LHS)) ||
13660 (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
13661 DefBits, &LHS)))
13662 return NewOp;
13663
13664 UndefBits = ~UndefBits;
13665 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
13666 UndefBits, &LHS)) ||
13667 (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
13668 UndefBits, &LHS)))
13669 return NewOp;
13670 }
13671
13672 return SDValue();
13673}
13674
13675static SDValue performSRLCombine(SDNode *N,
13676 TargetLowering::DAGCombinerInfo &DCI) {
13677 SelectionDAG &DAG = DCI.DAG;
13678 EVT VT = N->getValueType(0);
13679 if (VT != MVT::i32 && VT != MVT::i64)
13680 return SDValue();
13681
13682 // Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the
13683 // high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32)
13684 // to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero.
13685 SDValue N0 = N->getOperand(0);
13686 if (N0.getOpcode() == ISD::BSWAP) {
13687 SDLoc DL(N);
13688 SDValue N1 = N->getOperand(1);
13689 SDValue N00 = N0.getOperand(0);
13690 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
13691 uint64_t ShiftAmt = C->getZExtValue();
13692 if (VT == MVT::i32 && ShiftAmt == 16 &&
13693 DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16)))
13694 return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
13695 if (VT == MVT::i64 && ShiftAmt == 32 &&
13696 DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32)))
13697 return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
13698 }
13699 }
13700 return SDValue();
13701}
13702
13703// Attempt to form urhadd(OpA, OpB) from
13704// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
13705// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
13706// The original form of the first expression is
13707// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and the
13708// (OpA + OpB + 1) subexpression will have been changed to (OpB - (~OpA)).
13709// Before this function is called the srl will have been lowered to
13710// AArch64ISD::VLSHR.
13711// This pass can also recognize signed variants of the patterns that use sign
13712// extension instead of zero extension and form a srhadd(OpA, OpB) or a
13713// shadd(OpA, OpB) from them.
13714static SDValue
13715performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
13716 SelectionDAG &DAG) {
13717 EVT VT = N->getValueType(0);
13718
13719 // Since we are looking for a right shift by a constant value of 1 and we are
13720 // operating on types at least 16 bits in length (sign/zero extended OpA and
13721 // OpB, which are at least 8 bits), it follows that the truncate will always
13722 // discard the shifted-in bit and therefore the right shift will be logical
13723 // regardless of the signedness of OpA and OpB.
13724 SDValue Shift = N->getOperand(0);
13725 if (Shift.getOpcode() != AArch64ISD::VLSHR)
13726 return SDValue();
13727
13728 // Is the right shift using an immediate value of 1?
13729 uint64_t ShiftAmount = Shift.getConstantOperandVal(1);
13730 if (ShiftAmount != 1)
13731 return SDValue();
13732
13733 SDValue ExtendOpA, ExtendOpB;
13734 SDValue ShiftOp0 = Shift.getOperand(0);
13735 unsigned ShiftOp0Opc = ShiftOp0.getOpcode();
13736 if (ShiftOp0Opc == ISD::SUB) {
13737
13738 SDValue Xor = ShiftOp0.getOperand(1);
13739 if (Xor.getOpcode() != ISD::XOR)
13740 return SDValue();
13741
13742 // Is the XOR using a constant amount of all ones in the right hand side?
13743 uint64_t C;
13744 if (!isAllConstantBuildVector(Xor.getOperand(1), C))
13745 return SDValue();
13746
13747 unsigned ElemSizeInBits = VT.getScalarSizeInBits();
13748 APInt CAsAPInt(ElemSizeInBits, C);
13749 if (CAsAPInt != APInt::getAllOnes(ElemSizeInBits))
13750 return SDValue();
13751
13752 ExtendOpA = Xor.getOperand(0);
13753 ExtendOpB = ShiftOp0.getOperand(0);
13754 } else if (ShiftOp0Opc == ISD::ADD) {
13755 ExtendOpA = ShiftOp0.getOperand(0);
13756 ExtendOpB = ShiftOp0.getOperand(1);
13757 } else
13758 return SDValue();
13759
13760 unsigned ExtendOpAOpc = ExtendOpA.getOpcode();
13761 unsigned ExtendOpBOpc = ExtendOpB.getOpcode();
13762 if (!(ExtendOpAOpc == ExtendOpBOpc &&
13763 (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND)))
13764 return SDValue();
13765
13766 // Is the result of the right shift being truncated to the same value type as
13767 // the original operands, OpA and OpB?
13768 SDValue OpA = ExtendOpA.getOperand(0);
13769 SDValue OpB = ExtendOpB.getOperand(0);
13770 EVT OpAVT = OpA.getValueType();
13771 assert(ExtendOpA.getValueType() == ExtendOpB.getValueType())(static_cast <bool> (ExtendOpA.getValueType() == ExtendOpB
.getValueType()) ? void (0) : __assert_fail ("ExtendOpA.getValueType() == ExtendOpB.getValueType()"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13771, __extension__ __PRETTY_FUNCTION__))
;
13772 if (!(VT == OpAVT && OpAVT == OpB.getValueType()))
13773 return SDValue();
13774
13775 SDLoc DL(N);
13776 bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND;
13777 bool IsRHADD = ShiftOp0Opc == ISD::SUB;
13778 unsigned HADDOpc = IsSignExtend
13779 ? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
13780 : (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD);
13781 SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB);
13782
13783 return ResultHADD;
13784}
13785
13786static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
13787 switch (Opcode) {
13788 case ISD::FADD:
13789 return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
13790 case ISD::ADD:
13791 return VT == MVT::i64;
13792 default:
13793 return false;
13794 }
13795}
13796
13797static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) {
13798 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13799 ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);
13800
13801 EVT VT = N->getValueType(0);
13802 const bool FullFP16 =
13803 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
13804
13805 // Rewrite for pairwise fadd pattern
13806 // (f32 (extract_vector_elt
13807 // (fadd (vXf32 Other)
13808 // (vector_shuffle (vXf32 Other) undef <1,X,...> )) 0))
13809 // ->
13810 // (f32 (fadd (extract_vector_elt (vXf32 Other) 0)
13811 // (extract_vector_elt (vXf32 Other) 1))
13812 if (ConstantN1 && ConstantN1->getZExtValue() == 0 &&
13813 hasPairwiseAdd(N0->getOpcode(), VT, FullFP16)) {
13814 SDLoc DL(N0);
13815 SDValue N00 = N0->getOperand(0);
13816 SDValue N01 = N0->getOperand(1);
13817
13818 ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01);
13819 SDValue Other = N00;
13820
13821 // And handle the commutative case.
13822 if (!Shuffle) {
13823 Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
13824 Other = N01;
13825 }
13826
13827 if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
13828 Other == Shuffle->getOperand(0)) {
13829 return DAG.getNode(N0->getOpcode(), DL, VT,
13830 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
13831 DAG.getConstant(0, DL, MVT::i64)),
13832 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
13833 DAG.getConstant(1, DL, MVT::i64)));
13834 }
13835 }
13836
13837 return SDValue();
13838}
13839
13840static SDValue performConcatVectorsCombine(SDNode *N,
13841 TargetLowering::DAGCombinerInfo &DCI,
13842 SelectionDAG &DAG) {
13843 SDLoc dl(N);
13844 EVT VT = N->getValueType(0);
13845 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13846 unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
13847
13848 // Optimize concat_vectors of truncated vectors, where the intermediate
13849 // type is illegal, to avoid said illegality, e.g.,
13850 // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
13851 // (v2i16 (truncate (v2i64)))))
13852 // ->
13853 // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
13854 // (v4i32 (bitcast (v2i64))),
13855 // <0, 2, 4, 6>)))
13856 // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
13857 // on both input and result type, so we might generate worse code.
13858 // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
13859 if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
13860 N1Opc == ISD::TRUNCATE) {
13861 SDValue N00 = N0->getOperand(0);
13862 SDValue N10 = N1->getOperand(0);
13863 EVT N00VT = N00.getValueType();
13864
13865 if (N00VT == N10.getValueType() &&
13866 (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
13867 N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
13868 MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
13869 SmallVector<int, 8> Mask(MidVT.getVectorNumElements());
13870 for (size_t i = 0; i < Mask.size(); ++i)
13871 Mask[i] = i * 2;
13872 return DAG.getNode(ISD::TRUNCATE, dl, VT,
13873 DAG.getVectorShuffle(
13874 MidVT, dl,
13875 DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
13876 DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
13877 }
13878 }
13879
13880 // Wait 'til after everything is legalized to try this. That way we have
13881 // legal vector types and such.
13882 if (DCI.isBeforeLegalizeOps())
13883 return SDValue();
13884
13885 // Optimise concat_vectors of two [us]rhadds or [us]hadds that use extracted
13886 // subvectors from the same original vectors. Combine these into a single
13887 // [us]rhadd or [us]hadd that operates on the two original vectors. Example:
13888 // (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>),
13889 // extract_subvector (v16i8 OpB,
13890 // <0>))),
13891 // (v8i8 (urhadd (extract_subvector (v16i8 OpA, <8>),
13892 // extract_subvector (v16i8 OpB,
13893 // <8>)))))
13894 // ->
13895 // (v16i8(urhadd(v16i8 OpA, v16i8 OpB)))
13896 if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
13897 (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD ||
13898 N0Opc == AArch64ISD::UHADD || N0Opc == AArch64ISD::SHADD)) {
13899 SDValue N00 = N0->getOperand(0);
13900 SDValue N01 = N0->getOperand(1);
13901 SDValue N10 = N1->getOperand(0);
13902 SDValue N11 = N1->getOperand(1);
13903
13904 EVT N00VT = N00.getValueType();
13905 EVT N10VT = N10.getValueType();
13906
13907 if (N00->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13908 N01->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13909 N10->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13910 N11->getOpcode() == ISD::EXTRACT_SUBVECTOR && N00VT == N10VT) {
13911 SDValue N00Source = N00->getOperand(0);
13912 SDValue N01Source = N01->getOperand(0);
13913 SDValue N10Source = N10->getOperand(0);
13914 SDValue N11Source = N11->getOperand(0);
13915
13916 if (N00Source == N10Source && N01Source == N11Source &&
13917 N00Source.getValueType() == VT && N01Source.getValueType() == VT) {
13918 assert(N0.getValueType() == N1.getValueType())(static_cast <bool> (N0.getValueType() == N1.getValueType
()) ? void (0) : __assert_fail ("N0.getValueType() == N1.getValueType()"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13918, __extension__ __PRETTY_FUNCTION__))
;
13919
13920 uint64_t N00Index = N00.getConstantOperandVal(1);
13921 uint64_t N01Index = N01.getConstantOperandVal(1);
13922 uint64_t N10Index = N10.getConstantOperandVal(1);
13923 uint64_t N11Index = N11.getConstantOperandVal(1);
13924
13925 if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 &&
13926 N10Index == N00VT.getVectorNumElements())
13927 return DAG.getNode(N0Opc, dl, VT, N00Source, N01Source);
13928 }
13929 }
13930 }
13931
13932 // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
13933 // splat. The indexed instructions are going to be expecting a DUPLANE64, so
13934 // canonicalise to that.
13935 if (N->getNumOperands() == 2 && N0 == N1 && VT.getVectorNumElements() == 2) {
13936 assert(VT.getScalarSizeInBits() == 64)(static_cast <bool> (VT.getScalarSizeInBits() == 64) ? void
(0) : __assert_fail ("VT.getScalarSizeInBits() == 64", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13936, __extension__ __PRETTY_FUNCTION__))
;
13937 return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
13938 DAG.getConstant(0, dl, MVT::i64));
13939 }
13940
13941 // Canonicalise concat_vectors so that the right-hand vector has as few
13942 // bit-casts as possible before its real operation. The primary matching
13943 // destination for these operations will be the narrowing "2" instructions,
13944 // which depend on the operation being performed on this right-hand vector.
13945 // For example,
13946 // (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS))))
13947 // becomes
13948 // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
13949
13950 if (N->getNumOperands() != 2 || N1Opc != ISD::BITCAST)
13951 return SDValue();
13952 SDValue RHS = N1->getOperand(0);
13953 MVT RHSTy = RHS.getValueType().getSimpleVT();
13954 // If the RHS is not a vector, this is not the pattern we're looking for.
13955 if (!RHSTy.isVector())
13956 return SDValue();
13957
13958 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n"
; } } while (false)
13959 dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n"
; } } while (false)
;
13960
13961 MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
13962 RHSTy.getVectorNumElements() * 2);
13963 return DAG.getNode(ISD::BITCAST, dl, VT,
13964 DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
13965 DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
13966 RHS));
13967}
13968
13969static SDValue
13970performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
13971 SelectionDAG &DAG) {
13972 SDValue Vec = N->getOperand(0);
13973 SDValue SubVec = N->getOperand(1);
13974 uint64_t IdxVal = N->getConstantOperandVal(2);
13975 EVT VecVT = Vec.getValueType();
13976 EVT SubVT = SubVec.getValueType();
13977
13978 // Only do this for legal fixed vector types.
13979 if (!VecVT.isFixedLengthVector() ||
13980 !DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
13981 !DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
13982 return SDValue();
13983
13984 // Ignore widening patterns.
13985 if (IdxVal == 0 && Vec.isUndef())
13986 return SDValue();
13987
13988 // Subvector must be half the width and an "aligned" insertion.
13989 unsigned NumSubElts = SubVT.getVectorNumElements();
13990 if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
13991 (IdxVal != 0 && IdxVal != NumSubElts))
13992 return SDValue();
13993
13994 // Fold insert_subvector -> concat_vectors
13995 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
13996 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
13997 SDLoc DL(N);
13998 SDValue Lo, Hi;
13999 if (IdxVal == 0) {
14000 Lo = SubVec;
14001 Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
14002 DAG.getVectorIdxConstant(NumSubElts, DL));
14003 } else {
14004 Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
14005 DAG.getVectorIdxConstant(0, DL));
14006 Hi = SubVec;
14007 }
14008 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
14009}
14010
14011static SDValue tryCombineFixedPointConvert(SDNode *N,
14012 TargetLowering::DAGCombinerInfo &DCI,
14013 SelectionDAG &DAG) {
14014 // Wait until after everything is legalized to try this. That way we have
14015 // legal vector types and such.
14016 if (DCI.isBeforeLegalizeOps())
14017 return SDValue();
14018 // Transform a scalar conversion of a value from a lane extract into a
14019 // lane extract of a vector conversion. E.g., from foo1 to foo2:
14020 // double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); }
14021 // double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; }
14022 //
14023 // The second form interacts better with instruction selection and the
14024 // register allocator to avoid cross-class register copies that aren't
14025 // coalescable due to a lane reference.
14026
14027 // Check the operand and see if it originates from a lane extract.
14028 SDValue Op1 = N->getOperand(1);
14029 if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14030 // Yep, no additional predication needed. Perform the transform.
14031 SDValue IID = N->getOperand(0);
14032 SDValue Shift = N->getOperand(2);
14033 SDValue Vec = Op1.getOperand(0);
14034 SDValue Lane = Op1.getOperand(1);
14035 EVT ResTy = N->getValueType(0);
14036 EVT VecResTy;
14037 SDLoc DL(N);
14038
14039 // The vector width should be 128 bits by the time we get here, even
14040 // if it started as 64 bits (the extract_vector handling will have
14041 // done so).
14042 assert(Vec.getValueSizeInBits() == 128 &&(static_cast <bool> (Vec.getValueSizeInBits() == 128 &&
"unexpected vector size on extract_vector_elt!") ? void (0) :
__assert_fail ("Vec.getValueSizeInBits() == 128 && \"unexpected vector size on extract_vector_elt!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14043, __extension__ __PRETTY_FUNCTION__))
14043 "unexpected vector size on extract_vector_elt!")(static_cast <bool> (Vec.getValueSizeInBits() == 128 &&
"unexpected vector size on extract_vector_elt!") ? void (0) :
__assert_fail ("Vec.getValueSizeInBits() == 128 && \"unexpected vector size on extract_vector_elt!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14043, __extension__ __PRETTY_FUNCTION__))
;
14044 if (Vec.getValueType() == MVT::v4i32)
14045 VecResTy = MVT::v4f32;
14046 else if (Vec.getValueType() == MVT::v2i64)
14047 VecResTy = MVT::v2f64;
14048 else
14049 llvm_unreachable("unexpected vector type!")::llvm::llvm_unreachable_internal("unexpected vector type!", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14049)
;
14050
14051 SDValue Convert =
14052 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
14053 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
14054 }
14055 return SDValue();
14056}
14057
14058// AArch64 high-vector "long" operations are formed by performing the non-high
14059// version on an extract_subvector of each operand which gets the high half:
14060//
14061// (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS))
14062//
14063// However, there are cases which don't have an extract_high explicitly, but
14064// have another operation that can be made compatible with one for free. For
14065// example:
14066//
14067// (dupv64 scalar) --> (extract_high (dup128 scalar))
14068//
14069// This routine does the actual conversion of such DUPs, once outer routines
14070// have determined that everything else is in order.
14071// It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
14072// similarly here.
14073static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
14074 switch (N.getOpcode()) {
14075 case AArch64ISD::DUP:
14076 case AArch64ISD::DUPLANE8:
14077 case AArch64ISD::DUPLANE16:
14078 case AArch64ISD::DUPLANE32:
14079 case AArch64ISD::DUPLANE64:
14080 case AArch64ISD::MOVI:
14081 case AArch64ISD::MOVIshift:
14082 case AArch64ISD::MOVIedit:
14083 case AArch64ISD::MOVImsl:
14084 case AArch64ISD::MVNIshift:
14085 case AArch64ISD::MVNImsl:
14086 break;
14087 default:
14088 // FMOV could be supported, but isn't very useful, as it would only occur
14089 // if you passed a bitcast' floating point immediate to an eligible long
14090 // integer op (addl, smull, ...).
14091 return SDValue();
14092 }
14093
14094 MVT NarrowTy = N.getSimpleValueType();
14095 if (!NarrowTy.is64BitVector())
14096 return SDValue();
14097
14098 MVT ElementTy = NarrowTy.getVectorElementType();
14099 unsigned NumElems = NarrowTy.getVectorNumElements();
14100 MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
14101
14102 SDLoc dl(N);
14103 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy,
14104 DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()),
14105 DAG.getConstant(NumElems, dl, MVT::i64));
14106}
14107
14108static bool isEssentiallyExtractHighSubvector(SDValue N) {
14109 if (N.getOpcode() == ISD::BITCAST)
14110 N = N.getOperand(0);
14111 if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14112 return false;
14113 if (N.getOperand(0).getValueType().isScalableVector())
14114 return false;
14115 return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
14116 N.getOperand(0).getValueType().getVectorNumElements() / 2;
14117}
14118
14119/// Helper structure to keep track of ISD::SET_CC operands.
14120struct GenericSetCCInfo {
14121 const SDValue *Opnd0;
14122 const SDValue *Opnd1;
14123 ISD::CondCode CC;
14124};
14125
14126/// Helper structure to keep track of a SET_CC lowered into AArch64 code.
14127struct AArch64SetCCInfo {
14128 const SDValue *Cmp;
14129 AArch64CC::CondCode CC;
14130};
14131
14132/// Helper structure to keep track of SetCC information.
14133union SetCCInfo {
14134 GenericSetCCInfo Generic;
14135 AArch64SetCCInfo AArch64;
14136};
14137
14138/// Helper structure to be able to read SetCC information. If set to
14139/// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a
14140/// GenericSetCCInfo.
14141struct SetCCInfoAndKind {
14142 SetCCInfo Info;
14143 bool IsAArch64;
14144};
14145
14146/// Check whether or not \p Op is a SET_CC operation, either a generic or
14147/// an
14148/// AArch64 lowered one.
14149/// \p SetCCInfo is filled accordingly.
14150/// \post SetCCInfo is meanginfull only when this function returns true.
14151/// \return True when Op is a kind of SET_CC operation.
14152static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
14153 // If this is a setcc, this is straight forward.
14154 if (Op.getOpcode() == ISD::SETCC) {
14155 SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
14156 SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
14157 SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
14158 SetCCInfo.IsAArch64 = false;
14159 return true;
14160 }
14161 // Otherwise, check if this is a matching csel instruction.
14162 // In other words:
14163 // - csel 1, 0, cc
14164 // - csel 0, 1, !cc
14165 if (Op.getOpcode() != AArch64ISD::CSEL)
14166 return false;
14167 // Set the information about the operands.
14168 // TODO: we want the operands of the Cmp not the csel
14169 SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
14170 SetCCInfo.IsAArch64 = true;
14171 SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>(
14172 cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
14173
14174 // Check that the operands matches the constraints:
14175 // (1) Both operands must be constants.
14176 // (2) One must be 1 and the other must be 0.
14177 ConstantSDNode *TValue = dyn_cast<ConstantSDNode>(Op.getOperand(0));
14178 ConstantSDNode *FValue = dyn_cast<ConstantSDNode>(Op.getOperand(1));
14179
14180 // Check (1).
14181 if (!TValue || !FValue)
14182 return false;
14183
14184 // Check (2).
14185 if (!TValue->isOne()) {
14186 // Update the comparison when we are interested in !cc.
14187 std::swap(TValue, FValue);
14188 SetCCInfo.Info.AArch64.CC =
14189 AArch64CC::getInvertedCondCode(SetCCInfo.Info.AArch64.CC);
14190 }
14191 return TValue->isOne() && FValue->isZero();
14192}
14193
14194// Returns true if Op is setcc or zext of setcc.
14195static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) {
14196 if (isSetCC(Op, Info))
14197 return true;
14198 return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
14199 isSetCC(Op->getOperand(0), Info));
14200}
14201
14202// The folding we want to perform is:
14203// (add x, [zext] (setcc cc ...) )
14204// -->
14205// (csel x, (add x, 1), !cc ...)
14206//
14207// The latter will get matched to a CSINC instruction.
14208static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
14209 assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!")(static_cast <bool> (Op && Op->getOpcode() ==
ISD::ADD && "Unexpected operation!") ? void (0) : __assert_fail
("Op && Op->getOpcode() == ISD::ADD && \"Unexpected operation!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14209, __extension__ __PRETTY_FUNCTION__))
;
14210 SDValue LHS = Op->getOperand(0);
14211 SDValue RHS = Op->getOperand(1);
14212 SetCCInfoAndKind InfoAndKind;
14213
14214 // If both operands are a SET_CC, then we don't want to perform this
14215 // folding and create another csel as this results in more instructions
14216 // (and higher register usage).
14217 if (isSetCCOrZExtSetCC(LHS, InfoAndKind) &&
14218 isSetCCOrZExtSetCC(RHS, InfoAndKind))
14219 return SDValue();
14220
14221 // If neither operand is a SET_CC, give up.
14222 if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) {
14223 std::swap(LHS, RHS);
14224 if (!isSetCCOrZExtSetCC(LHS, InfoAndKind))
14225 return SDValue();
14226 }
14227
14228 // FIXME: This could be generatized to work for FP comparisons.
14229 EVT CmpVT = InfoAndKind.IsAArch64
14230 ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
14231 : InfoAndKind.Info.Generic.Opnd0->getValueType();
14232 if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
14233 return SDValue();
14234
14235 SDValue CCVal;
14236 SDValue Cmp;
14237 SDLoc dl(Op);
14238 if (InfoAndKind.IsAArch64) {
14239 CCVal = DAG.getConstant(
14240 AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
14241 MVT::i32);
14242 Cmp = *InfoAndKind.Info.AArch64.Cmp;
14243 } else
14244 Cmp = getAArch64Cmp(
14245 *InfoAndKind.Info.Generic.Opnd0, *InfoAndKind.Info.Generic.Opnd1,
14246 ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG,
14247 dl);
14248
14249 EVT VT = Op->getValueType(0);
14250 LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
14251 return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
14252}
14253
14254// ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
14255static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
14256 EVT VT = N->getValueType(0);
14257 // Only scalar integer and vector types.
14258 if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
14259 return SDValue();
14260
14261 SDValue LHS = N->getOperand(0);
14262 SDValue RHS = N->getOperand(1);
14263 if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14264 RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getValueType() != VT)
14265 return SDValue();
14266
14267 auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
14268 auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
14269 if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero())
14270 return SDValue();
14271
14272 SDValue Op1 = LHS->getOperand(0);
14273 SDValue Op2 = RHS->getOperand(0);
14274 EVT OpVT1 = Op1.getValueType();
14275 EVT OpVT2 = Op2.getValueType();
14276 if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 ||
14277 Op2.getOpcode() != AArch64ISD::UADDV ||
14278 OpVT1.getVectorElementType() != VT)
14279 return SDValue();
14280
14281 SDValue Val1 = Op1.getOperand(0);
14282 SDValue Val2 = Op2.getOperand(0);
14283 EVT ValVT = Val1->getValueType(0);
14284 SDLoc DL(N);
14285 SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
14286 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
14287 DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
14288 DAG.getConstant(0, DL, MVT::i64));
14289}
14290
14291// ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y)
14292static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG) {
14293 EVT VT = N->getValueType(0);
14294 if (N->getOpcode() != ISD::ADD)
14295 return SDValue();
14296
14297 SDValue Dot = N->getOperand(0);
14298 SDValue A = N->getOperand(1);
14299 // Handle commutivity
14300 auto isZeroDot = [](SDValue Dot) {
14301 return (Dot.getOpcode() == AArch64ISD::UDOT ||
14302 Dot.getOpcode() == AArch64ISD::SDOT) &&
14303 isZerosVector(Dot.getOperand(0).getNode());
14304 };
14305 if (!isZeroDot(Dot))
14306 std::swap(Dot, A);
14307 if (!isZeroDot(Dot))
14308 return SDValue();
14309
14310 return DAG.getNode(Dot.getOpcode(), SDLoc(N), VT, A, Dot.getOperand(1),
14311 Dot.getOperand(2));
14312}
14313
14314// The basic add/sub long vector instructions have variants with "2" on the end
14315// which act on the high-half of their inputs. They are normally matched by
14316// patterns like:
14317//
14318// (add (zeroext (extract_high LHS)),
14319// (zeroext (extract_high RHS)))
14320// -> uaddl2 vD, vN, vM
14321//
14322// However, if one of the extracts is something like a duplicate, this
14323// instruction can still be used profitably. This function puts the DAG into a
14324// more appropriate form for those patterns to trigger.
14325static SDValue performAddSubLongCombine(SDNode *N,
14326 TargetLowering::DAGCombinerInfo &DCI,
14327 SelectionDAG &DAG) {
14328 if (DCI.isBeforeLegalizeOps())
14329 return SDValue();
14330
14331 MVT VT = N->getSimpleValueType(0);
14332 if (!VT.is128BitVector()) {
14333 if (N->getOpcode() == ISD::ADD)
14334 return performSetccAddFolding(N, DAG);
14335 return SDValue();
14336 }
14337
14338 // Make sure both branches are extended in the same way.
14339 SDValue LHS = N->getOperand(0);
14340 SDValue RHS = N->getOperand(1);
14341 if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
14342 LHS.getOpcode() != ISD::SIGN_EXTEND) ||
14343 LHS.getOpcode() != RHS.getOpcode())
14344 return SDValue();
14345
14346 unsigned ExtType = LHS.getOpcode();
14347
14348 // It's not worth doing if at least one of the inputs isn't already an
14349 // extract, but we don't know which it'll be so we have to try both.
14350 if (isEssentiallyExtractHighSubvector(LHS.getOperand(0))) {
14351 RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
14352 if (!RHS.getNode())
14353 return SDValue();
14354
14355 RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
14356 } else if (isEssentiallyExtractHighSubvector(RHS.getOperand(0))) {
14357 LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
14358 if (!LHS.getNode())
14359 return SDValue();
14360
14361 LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
14362 }
14363
14364 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
14365}
14366
14367static SDValue performAddSubCombine(SDNode *N,
14368 TargetLowering::DAGCombinerInfo &DCI,
14369 SelectionDAG &DAG) {
14370 // Try to change sum of two reductions.
14371 if (SDValue Val = performUADDVCombine(N, DAG))
14372 return Val;
14373 if (SDValue Val = performAddDotCombine(N, DAG))
14374 return Val;
14375
14376 return performAddSubLongCombine(N, DCI, DAG);
14377}
14378
14379// Massage DAGs which we can use the high-half "long" operations on into
14380// something isel will recognize better. E.g.
14381//
14382// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
14383// (aarch64_neon_umull (extract_high (v2i64 vec)))
14384// (extract_high (v2i64 (dup128 scalar)))))
14385//
14386static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
14387 TargetLowering::DAGCombinerInfo &DCI,
14388 SelectionDAG &DAG) {
14389 if (DCI.isBeforeLegalizeOps())
14390 return SDValue();
14391
14392 SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
14393 SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
14394 assert(LHS.getValueType().is64BitVector() &&(static_cast <bool> (LHS.getValueType().is64BitVector()
&& RHS.getValueType().is64BitVector() && "unexpected shape for long operation"
) ? void (0) : __assert_fail ("LHS.getValueType().is64BitVector() && RHS.getValueType().is64BitVector() && \"unexpected shape for long operation\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14396, __extension__ __PRETTY_FUNCTION__))
14395 RHS.getValueType().is64BitVector() &&(static_cast <bool> (LHS.getValueType().is64BitVector()
&& RHS.getValueType().is64BitVector() && "unexpected shape for long operation"
) ? void (0) : __assert_fail ("LHS.getValueType().is64BitVector() && RHS.getValueType().is64BitVector() && \"unexpected shape for long operation\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14396, __extension__ __PRETTY_FUNCTION__))
14396 "unexpected shape for long operation")(static_cast <bool> (LHS.getValueType().is64BitVector()
&& RHS.getValueType().is64BitVector() && "unexpected shape for long operation"
) ? void (0) : __assert_fail ("LHS.getValueType().is64BitVector() && RHS.getValueType().is64BitVector() && \"unexpected shape for long operation\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14396, __extension__ __PRETTY_FUNCTION__))
;
14397
14398 // Either node could be a DUP, but it's not worth doing both of them (you'd
14399 // just as well use the non-high version) so look for a corresponding extract
14400 // operation on the other "wing".
14401 if (isEssentiallyExtractHighSubvector(LHS)) {
14402 RHS = tryExtendDUPToExtractHigh(RHS, DAG);
14403 if (!RHS.getNode())
14404 return SDValue();
14405 } else if (isEssentiallyExtractHighSubvector(RHS)) {
14406 LHS = tryExtendDUPToExtractHigh(LHS, DAG);
14407 if (!LHS.getNode())
14408 return SDValue();
14409 }
14410
14411 if (IID == Intrinsic::not_intrinsic)
14412 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
14413
14414 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
14415 N->getOperand(0), LHS, RHS);
14416}
14417
14418static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
14419 MVT ElemTy = N->getSimpleValueType(0).getScalarType();
14420 unsigned ElemBits = ElemTy.getSizeInBits();
14421
14422 int64_t ShiftAmount;
14423 if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
14424 APInt SplatValue, SplatUndef;
14425 unsigned SplatBitSize;
14426 bool HasAnyUndefs;
14427 if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
14428 HasAnyUndefs, ElemBits) ||
14429 SplatBitSize != ElemBits)
14430 return SDValue();
14431
14432 ShiftAmount = SplatValue.getSExtValue();
14433 } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
14434 ShiftAmount = CVN->getSExtValue();
14435 } else
14436 return SDValue();
14437
14438 unsigned Opcode;
14439 bool IsRightShift;
14440 switch (IID) {
14441 default:
14442 llvm_unreachable("Unknown shift intrinsic")::llvm::llvm_unreachable_internal("Unknown shift intrinsic", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14442)
;
14443 case Intrinsic::aarch64_neon_sqshl:
14444 Opcode = AArch64ISD::SQSHL_I;
14445 IsRightShift = false;
14446 break;
14447 case Intrinsic::aarch64_neon_uqshl:
14448 Opcode = AArch64ISD::UQSHL_I;
14449 IsRightShift = false;
14450 break;
14451 case Intrinsic::aarch64_neon_srshl:
14452 Opcode = AArch64ISD::SRSHR_I;
14453 IsRightShift = true;
14454 break;
14455 case Intrinsic::aarch64_neon_urshl:
14456 Opcode = AArch64ISD::URSHR_I;
14457 IsRightShift = true;
14458 break;
14459 case Intrinsic::aarch64_neon_sqshlu:
14460 Opcode = AArch64ISD::SQSHLU_I;
14461 IsRightShift = false;
14462 break;
14463 case Intrinsic::aarch64_neon_sshl:
14464 case Intrinsic::aarch64_neon_ushl:
14465 // For positive shift amounts we can use SHL, as ushl/sshl perform a regular
14466 // left shift for positive shift amounts. Below, we only replace the current
14467 // node with VSHL, if this condition is met.
14468 Opcode = AArch64ISD::VSHL;
14469 IsRightShift = false;
14470 break;
14471 }
14472
14473 if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
14474 SDLoc dl(N);
14475 return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
14476 DAG.getConstant(-ShiftAmount, dl, MVT::i32));
14477 } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
14478 SDLoc dl(N);
14479 return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
14480 DAG.getConstant(ShiftAmount, dl, MVT::i32));
14481 }
14482
14483 return SDValue();
14484}
14485
14486// The CRC32[BH] instructions ignore the high bits of their data operand. Since
14487// the intrinsics must be legal and take an i32, this means there's almost
14488// certainly going to be a zext in the DAG which we can eliminate.
14489static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
14490 SDValue AndN = N->getOperand(2);
14491 if (AndN.getOpcode() != ISD::AND)
14492 return SDValue();
14493
14494 ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1));
14495 if (!CMask || CMask->getZExtValue() != Mask)
14496 return SDValue();
14497
14498 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), MVT::i32,
14499 N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
14500}
14501
14502static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
14503 SelectionDAG &DAG) {
14504 SDLoc dl(N);
14505 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
14506 DAG.getNode(Opc, dl,
14507 N->getOperand(1).getSimpleValueType(),
14508 N->getOperand(1)),
14509 DAG.getConstant(0, dl, MVT::i64));
14510}
14511
14512static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) {
14513 SDLoc DL(N);
14514 SDValue Op1 = N->getOperand(1);
14515 SDValue Op2 = N->getOperand(2);
14516 EVT ScalarTy = Op2.getValueType();
14517 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
14518 ScalarTy = MVT::i32;
14519
14520 // Lower index_vector(base, step) to mul(step step_vector(1)) + splat(base).
14521 SDValue StepVector = DAG.getStepVector(DL, N->getValueType(0));
14522 SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2);
14523 SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step);
14524 SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1);
14525 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base);
14526}
14527
14528static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG) {
14529 SDLoc dl(N);
14530 SDValue Scalar = N->getOperand(3);
14531 EVT ScalarTy = Scalar.getValueType();
14532
14533 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
14534 Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
14535
14536 SDValue Passthru = N->getOperand(1);
14537 SDValue Pred = N->getOperand(2);
14538 return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
14539 Pred, Scalar, Passthru);
14540}
14541
14542static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
14543 SDLoc dl(N);
14544 LLVMContext &Ctx = *DAG.getContext();
14545 EVT VT = N->getValueType(0);
14546
14547 assert(VT.isScalableVector() && "Expected a scalable vector.")(static_cast <bool> (VT.isScalableVector() && "Expected a scalable vector."
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Expected a scalable vector.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14547, __extension__ __PRETTY_FUNCTION__))
;
14548
14549 // Current lowering only supports the SVE-ACLE types.
14550 if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
14551 return SDValue();
14552
14553 unsigned ElemSize = VT.getVectorElementType().getSizeInBits() / 8;
14554 unsigned ByteSize = VT.getSizeInBits().getKnownMinSize() / 8;
14555 EVT ByteVT =
14556 EVT::getVectorVT(Ctx, MVT::i8, ElementCount::getScalable(ByteSize));
14557
14558 // Convert everything to the domain of EXT (i.e bytes).
14559 SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
14560 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2));
14561 SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3),
14562 DAG.getConstant(ElemSize, dl, MVT::i32));
14563
14564 SDValue EXT = DAG.getNode(AArch64ISD::EXT, dl, ByteVT, Op0, Op1, Op2);
14565 return DAG.getNode(ISD::BITCAST, dl, VT, EXT);
14566}
14567
14568static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC,
14569 TargetLowering::DAGCombinerInfo &DCI,
14570 SelectionDAG &DAG) {
14571 if (DCI.isBeforeLegalize())
14572 return SDValue();
14573
14574 SDValue Comparator = N->getOperand(3);
14575 if (Comparator.getOpcode() == AArch64ISD::DUP ||
14576 Comparator.getOpcode() == ISD::SPLAT_VECTOR) {
14577 unsigned IID = getIntrinsicID(N);
14578 EVT VT = N->getValueType(0);
14579 EVT CmpVT = N->getOperand(2).getValueType();
14580 SDValue Pred = N->getOperand(1);
14581 SDValue Imm;
14582 SDLoc DL(N);
14583
14584 switch (IID) {
14585 default:
14586 llvm_unreachable("Called with wrong intrinsic!")::llvm::llvm_unreachable_internal("Called with wrong intrinsic!"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14586)
;
14587 break;
14588
14589 // Signed comparisons
14590 case Intrinsic::aarch64_sve_cmpeq_wide:
14591 case Intrinsic::aarch64_sve_cmpne_wide:
14592 case Intrinsic::aarch64_sve_cmpge_wide:
14593 case Intrinsic::aarch64_sve_cmpgt_wide:
14594 case Intrinsic::aarch64_sve_cmplt_wide:
14595 case Intrinsic::aarch64_sve_cmple_wide: {
14596 if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
14597 int64_t ImmVal = CN->getSExtValue();
14598 if (ImmVal >= -16 && ImmVal <= 15)
14599 Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
14600 else
14601 return SDValue();
14602 }
14603 break;
14604 }
14605 // Unsigned comparisons
14606 case Intrinsic::aarch64_sve_cmphs_wide:
14607 case Intrinsic::aarch64_sve_cmphi_wide:
14608 case Intrinsic::aarch64_sve_cmplo_wide:
14609 case Intrinsic::aarch64_sve_cmpls_wide: {
14610 if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
14611 uint64_t ImmVal = CN->getZExtValue();
14612 if (ImmVal <= 127)
14613 Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
14614 else
14615 return SDValue();
14616 }
14617 break;
14618 }
14619 }
14620
14621 if (!Imm)
14622 return SDValue();
14623
14624 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, CmpVT, Imm);
14625 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, VT, Pred,
14626 N->getOperand(2), Splat, DAG.getCondCode(CC));
14627 }
14628
14629 return SDValue();
14630}
14631
14632static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
14633 AArch64CC::CondCode Cond) {
14634 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14635
14636 SDLoc DL(Op);
14637 assert(Op.getValueType().isScalableVector() &&(static_cast <bool> (Op.getValueType().isScalableVector
() && TLI.isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && TLI.isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14639, __extension__ __PRETTY_FUNCTION__))
14638 TLI.isTypeLegal(Op.getValueType()) &&(static_cast <bool> (Op.getValueType().isScalableVector
() && TLI.isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && TLI.isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14639, __extension__ __PRETTY_FUNCTION__))
14639 "Expected legal scalable vector type!")(static_cast <bool> (Op.getValueType().isScalableVector
() && TLI.isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && TLI.isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14639, __extension__ __PRETTY_FUNCTION__))
;
14640
14641 // Ensure target specific opcodes are using legal type.
14642 EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
14643 SDValue TVal = DAG.getConstant(1, DL, OutVT);
14644 SDValue FVal = DAG.getConstant(0, DL, OutVT);
14645
14646 // Set condition code (CC) flags.
14647 SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op);
14648
14649 // Convert CC to integer based on requested condition.
14650 // NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
14651 SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32);
14652 SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);
14653 return DAG.getZExtOrTrunc(Res, DL, VT);
14654}
14655
14656static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc,
14657 SelectionDAG &DAG) {
14658 SDLoc DL(N);
14659
14660 SDValue Pred = N->getOperand(1);
14661 SDValue VecToReduce = N->getOperand(2);
14662
14663 // NOTE: The integer reduction's result type is not always linked to the
14664 // operand's element type so we construct it from the intrinsic's result type.
14665 EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
14666 SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
14667
14668 // SVE reductions set the whole vector register with the first element
14669 // containing the reduction result, which we'll now extract.
14670 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14671 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
14672 Zero);
14673}
14674
14675static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
14676 SelectionDAG &DAG) {
14677 SDLoc DL(N);
14678
14679 SDValue Pred = N->getOperand(1);
14680 SDValue VecToReduce = N->getOperand(2);
14681
14682 EVT ReduceVT = VecToReduce.getValueType();
14683 SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
14684
14685 // SVE reductions set the whole vector register with the first element
14686 // containing the reduction result, which we'll now extract.
14687 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14688 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
14689 Zero);
14690}
14691
14692static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc,
14693 SelectionDAG &DAG) {
14694 SDLoc DL(N);
14695
14696 SDValue Pred = N->getOperand(1);
14697 SDValue InitVal = N->getOperand(2);
14698 SDValue VecToReduce = N->getOperand(3);
14699 EVT ReduceVT = VecToReduce.getValueType();
14700
14701 // Ordered reductions use the first lane of the result vector as the
14702 // reduction's initial value.
14703 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14704 InitVal = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ReduceVT,
14705 DAG.getUNDEF(ReduceVT), InitVal, Zero);
14706
14707 SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, InitVal, VecToReduce);
14708
14709 // SVE reductions set the whole vector register with the first element
14710 // containing the reduction result, which we'll now extract.
14711 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
14712 Zero);
14713}
14714
14715static bool isAllActivePredicate(SDValue N) {
14716 unsigned NumElts = N.getValueType().getVectorMinNumElements();
14717
14718 // Look through cast.
14719 while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
14720 N = N.getOperand(0);
14721 // When reinterpreting from a type with fewer elements the "new" elements
14722 // are not active, so bail if they're likely to be used.
14723 if (N.getValueType().getVectorMinNumElements() < NumElts)
14724 return false;
14725 }
14726
14727 // "ptrue p.<ty>, all" can be considered all active when <ty> is the same size
14728 // or smaller than the implicit element type represented by N.
14729 // NOTE: A larger element count implies a smaller element type.
14730 if (N.getOpcode() == AArch64ISD::PTRUE &&
14731 N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
14732 return N.getValueType().getVectorMinNumElements() >= NumElts;
14733
14734 return false;
14735}
14736
14737// If a merged operation has no inactive lanes we can relax it to a predicated
14738// or unpredicated operation, which potentially allows better isel (perhaps
14739// using immediate forms) or relaxing register reuse requirements.
14740static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
14741 SelectionDAG &DAG, bool UnpredOp = false,
14742 bool SwapOperands = false) {
14743 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!")(static_cast <bool> (N->getOpcode() == ISD::INTRINSIC_WO_CHAIN
&& "Expected intrinsic!") ? void (0) : __assert_fail
("N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && \"Expected intrinsic!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14743, __extension__ __PRETTY_FUNCTION__))
;
14744 assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!")(static_cast <bool> (N->getNumOperands() == 4 &&
"Expected 3 operand intrinsic!") ? void (0) : __assert_fail (
"N->getNumOperands() == 4 && \"Expected 3 operand intrinsic!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14744, __extension__ __PRETTY_FUNCTION__))
;
14745 SDValue Pg = N->getOperand(1);
14746 SDValue Op1 = N->getOperand(SwapOperands ? 3 : 2);
14747 SDValue Op2 = N->getOperand(SwapOperands ? 2 : 3);
14748
14749 // ISD way to specify an all active predicate.
14750 if (isAllActivePredicate(Pg)) {
14751 if (UnpredOp)
14752 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op1, Op2);
14753
14754 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg, Op1, Op2);
14755 }
14756
14757 // FUTURE: SplatVector(true)
14758 return SDValue();
14759}
14760
14761static SDValue performIntrinsicCombine(SDNode *N,
14762 TargetLowering::DAGCombinerInfo &DCI,
14763 const AArch64Subtarget *Subtarget) {
14764 SelectionDAG &DAG = DCI.DAG;
14765 unsigned IID = getIntrinsicID(N);
14766 switch (IID) {
14767 default:
14768 break;
14769 case Intrinsic::aarch64_neon_vcvtfxs2fp:
14770 case Intrinsic::aarch64_neon_vcvtfxu2fp:
14771 return tryCombineFixedPointConvert(N, DCI, DAG);
14772 case Intrinsic::aarch64_neon_saddv:
14773 return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
14774 case Intrinsic::aarch64_neon_uaddv:
14775 return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG);
14776 case Intrinsic::aarch64_neon_sminv:
14777 return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG);
14778 case Intrinsic::aarch64_neon_uminv:
14779 return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG);
14780 case Intrinsic::aarch64_neon_smaxv:
14781 return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG);
14782 case Intrinsic::aarch64_neon_umaxv:
14783 return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
14784 case Intrinsic::aarch64_neon_fmax:
14785 return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
14786 N->getOperand(1), N->getOperand(2));
14787 case Intrinsic::aarch64_neon_fmin:
14788 return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
14789 N->getOperand(1), N->getOperand(2));
14790 case Intrinsic::aarch64_neon_fmaxnm:
14791 return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
14792 N->getOperand(1), N->getOperand(2));
14793 case Intrinsic::aarch64_neon_fminnm:
14794 return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
14795 N->getOperand(1), N->getOperand(2));
14796 case Intrinsic::aarch64_neon_smull:
14797 case Intrinsic::aarch64_neon_umull:
14798 case Intrinsic::aarch64_neon_pmull:
14799 case Intrinsic::aarch64_neon_sqdmull:
14800 return tryCombineLongOpWithDup(IID, N, DCI, DAG);
14801 case Intrinsic::aarch64_neon_sqshl:
14802 case Intrinsic::aarch64_neon_uqshl:
14803 case Intrinsic::aarch64_neon_sqshlu:
14804 case Intrinsic::aarch64_neon_srshl:
14805 case Intrinsic::aarch64_neon_urshl:
14806 case Intrinsic::aarch64_neon_sshl:
14807 case Intrinsic::aarch64_neon_ushl:
14808 return tryCombineShiftImm(IID, N, DAG);
14809 case Intrinsic::aarch64_crc32b:
14810 case Intrinsic::aarch64_crc32cb:
14811 return tryCombineCRC32(0xff, N, DAG);
14812 case Intrinsic::aarch64_crc32h:
14813 case Intrinsic::aarch64_crc32ch:
14814 return tryCombineCRC32(0xffff, N, DAG);
14815 case Intrinsic::aarch64_sve_saddv:
14816 // There is no i64 version of SADDV because the sign is irrelevant.
14817 if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
14818 return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
14819 else
14820 return combineSVEReductionInt(N, AArch64ISD::SADDV_PRED, DAG);
14821 case Intrinsic::aarch64_sve_uaddv:
14822 return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
14823 case Intrinsic::aarch64_sve_smaxv:
14824 return combineSVEReductionInt(N, AArch64ISD::SMAXV_PRED, DAG);
14825 case Intrinsic::aarch64_sve_umaxv:
14826 return combineSVEReductionInt(N, AArch64ISD::UMAXV_PRED, DAG);
14827 case Intrinsic::aarch64_sve_sminv:
14828 return combineSVEReductionInt(N, AArch64ISD::SMINV_PRED, DAG);
14829 case Intrinsic::aarch64_sve_uminv:
14830 return combineSVEReductionInt(N, AArch64ISD::UMINV_PRED, DAG);
14831 case Intrinsic::aarch64_sve_orv:
14832 return combineSVEReductionInt(N, AArch64ISD::ORV_PRED, DAG);
14833 case Intrinsic::aarch64_sve_eorv:
14834 return combineSVEReductionInt(N, AArch64ISD::EORV_PRED, DAG);
14835 case Intrinsic::aarch64_sve_andv:
14836 return combineSVEReductionInt(N, AArch64ISD::ANDV_PRED, DAG);
14837 case Intrinsic::aarch64_sve_index:
14838 return LowerSVEIntrinsicIndex(N, DAG);
14839 case Intrinsic::aarch64_sve_dup:
14840 return LowerSVEIntrinsicDUP(N, DAG);
14841 case Intrinsic::aarch64_sve_dup_x:
14842 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),
14843 N->getOperand(1));
14844 case Intrinsic::aarch64_sve_ext:
14845 return LowerSVEIntrinsicEXT(N, DAG);
14846 case Intrinsic::aarch64_sve_mul:
14847 return convertMergedOpToPredOp(N, AArch64ISD::MUL_PRED, DAG);
14848 case Intrinsic::aarch64_sve_smulh:
14849 return convertMergedOpToPredOp(N, AArch64ISD::MULHS_PRED, DAG);
14850 case Intrinsic::aarch64_sve_umulh:
14851 return convertMergedOpToPredOp(N, AArch64ISD::MULHU_PRED, DAG);
14852 case Intrinsic::aarch64_sve_smin:
14853 return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG);
14854 case Intrinsic::aarch64_sve_umin:
14855 return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG);
14856 case Intrinsic::aarch64_sve_smax:
14857 return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG);
14858 case Intrinsic::aarch64_sve_umax:
14859 return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG);
14860 case Intrinsic::aarch64_sve_lsl:
14861 return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG);
14862 case Intrinsic::aarch64_sve_lsr:
14863 return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);
14864 case Intrinsic::aarch64_sve_asr:
14865 return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);
14866 case Intrinsic::aarch64_sve_fadd:
14867 return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG);
14868 case Intrinsic::aarch64_sve_fsub:
14869 return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG);
14870 case Intrinsic::aarch64_sve_fmul:
14871 return convertMergedOpToPredOp(N, AArch64ISD::FMUL_PRED, DAG);
14872 case Intrinsic::aarch64_sve_add:
14873 return convertMergedOpToPredOp(N, ISD::ADD, DAG, true);
14874 case Intrinsic::aarch64_sve_sub:
14875 return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);
14876 case Intrinsic::aarch64_sve_subr:
14877 return convertMergedOpToPredOp(N, ISD::SUB, DAG, true, true);
14878 case Intrinsic::aarch64_sve_and:
14879 return convertMergedOpToPredOp(N, ISD::AND, DAG, true);
14880 case Intrinsic::aarch64_sve_bic:
14881 return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true);
14882 case Intrinsic::aarch64_sve_eor:
14883 return convertMergedOpToPredOp(N, ISD::XOR, DAG, true);
14884 case Intrinsic::aarch64_sve_orr:
14885 return convertMergedOpToPredOp(N, ISD::OR, DAG, true);
14886 case Intrinsic::aarch64_sve_sqadd:
14887 return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true);
14888 case Intrinsic::aarch64_sve_sqsub:
14889 return convertMergedOpToPredOp(N, ISD::SSUBSAT, DAG, true);
14890 case Intrinsic::aarch64_sve_uqadd:
14891 return convertMergedOpToPredOp(N, ISD::UADDSAT, DAG, true);
14892 case Intrinsic::aarch64_sve_uqsub:
14893 return convertMergedOpToPredOp(N, ISD::USUBSAT, DAG, true);
14894 case Intrinsic::aarch64_sve_sqadd_x:
14895 return DAG.getNode(ISD::SADDSAT, SDLoc(N), N->getValueType(0),
14896 N->getOperand(1), N->getOperand(2));
14897 case Intrinsic::aarch64_sve_sqsub_x:
14898 return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
14899 N->getOperand(1), N->getOperand(2));
14900 case Intrinsic::aarch64_sve_uqadd_x:
14901 return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0),
14902 N->getOperand(1), N->getOperand(2));
14903 case Intrinsic::aarch64_sve_uqsub_x:
14904 return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
14905 N->getOperand(1), N->getOperand(2));
14906 case Intrinsic::aarch64_sve_cmphs:
14907 if (!N->getOperand(2).getValueType().isFloatingPoint())
14908 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14909 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14910 N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
14911 break;
14912 case Intrinsic::aarch64_sve_cmphi:
14913 if (!N->getOperand(2).getValueType().isFloatingPoint())
14914 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14915 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14916 N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
14917 break;
14918 case Intrinsic::aarch64_sve_fcmpge:
14919 case Intrinsic::aarch64_sve_cmpge:
14920 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14921 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14922 N->getOperand(3), DAG.getCondCode(ISD::SETGE));
14923 break;
14924 case Intrinsic::aarch64_sve_fcmpgt:
14925 case Intrinsic::aarch64_sve_cmpgt:
14926 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14927 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14928 N->getOperand(3), DAG.getCondCode(ISD::SETGT));
14929 break;
14930 case Intrinsic::aarch64_sve_fcmpeq:
14931 case Intrinsic::aarch64_sve_cmpeq:
14932 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14933 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14934 N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
14935 break;
14936 case Intrinsic::aarch64_sve_fcmpne:
14937 case Intrinsic::aarch64_sve_cmpne:
14938 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14939 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14940 N->getOperand(3), DAG.getCondCode(ISD::SETNE));
14941 break;
14942 case Intrinsic::aarch64_sve_fcmpuo:
14943 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14944 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14945 N->getOperand(3), DAG.getCondCode(ISD::SETUO));
14946 break;
14947 case Intrinsic::aarch64_sve_fadda:
14948 return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG);
14949 case Intrinsic::aarch64_sve_faddv:
14950 return combineSVEReductionFP(N, AArch64ISD::FADDV_PRED, DAG);
14951 case Intrinsic::aarch64_sve_fmaxnmv:
14952 return combineSVEReductionFP(N, AArch64ISD::FMAXNMV_PRED, DAG);
14953 case Intrinsic::aarch64_sve_fmaxv:
14954 return combineSVEReductionFP(N, AArch64ISD::FMAXV_PRED, DAG);
14955 case Intrinsic::aarch64_sve_fminnmv:
14956 return combineSVEReductionFP(N, AArch64ISD::FMINNMV_PRED, DAG);
14957 case Intrinsic::aarch64_sve_fminv:
14958 return combineSVEReductionFP(N, AArch64ISD::FMINV_PRED, DAG);
14959 case Intrinsic::aarch64_sve_sel:
14960 return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
14961 N->getOperand(1), N->getOperand(2), N->getOperand(3));
14962 case Intrinsic::aarch64_sve_cmpeq_wide:
14963 return tryConvertSVEWideCompare(N, ISD::SETEQ, DCI, DAG);
14964 case Intrinsic::aarch64_sve_cmpne_wide:
14965 return tryConvertSVEWideCompare(N, ISD::SETNE, DCI, DAG);
14966 case Intrinsic::aarch64_sve_cmpge_wide:
14967 return tryConvertSVEWideCompare(N, ISD::SETGE, DCI, DAG);
14968 case Intrinsic::aarch64_sve_cmpgt_wide:
14969 return tryConvertSVEWideCompare(N, ISD::SETGT, DCI, DAG);
14970 case Intrinsic::aarch64_sve_cmplt_wide:
14971 return tryConvertSVEWideCompare(N, ISD::SETLT, DCI, DAG);
14972 case Intrinsic::aarch64_sve_cmple_wide:
14973 return tryConvertSVEWideCompare(N, ISD::SETLE, DCI, DAG);
14974 case Intrinsic::aarch64_sve_cmphs_wide:
14975 return tryConvertSVEWideCompare(N, ISD::SETUGE, DCI, DAG);
14976 case Intrinsic::aarch64_sve_cmphi_wide:
14977 return tryConvertSVEWideCompare(N, ISD::SETUGT, DCI, DAG);
14978 case Intrinsic::aarch64_sve_cmplo_wide:
14979 return tryConvertSVEWideCompare(N, ISD::SETULT, DCI, DAG);
14980 case Intrinsic::aarch64_sve_cmpls_wide:
14981 return tryConvertSVEWideCompare(N, ISD::SETULE, DCI, DAG);
14982 case Intrinsic::aarch64_sve_ptest_any:
14983 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
14984 AArch64CC::ANY_ACTIVE);
14985 case Intrinsic::aarch64_sve_ptest_first:
14986 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
14987 AArch64CC::FIRST_ACTIVE);
14988 case Intrinsic::aarch64_sve_ptest_last:
14989 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
14990 AArch64CC::LAST_ACTIVE);
14991 }
14992 return SDValue();
14993}
14994
14995static SDValue performExtendCombine(SDNode *N,
14996 TargetLowering::DAGCombinerInfo &DCI,
14997 SelectionDAG &DAG) {
14998 // If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
14999 // we can convert that DUP into another extract_high (of a bigger DUP), which
15000 // helps the backend to decide that an sabdl2 would be useful, saving a real
15001 // extract_high operation.
15002 if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
15003 (N->getOperand(0).getOpcode() == ISD::ABDU ||
15004 N->getOperand(0).getOpcode() == ISD::ABDS)) {
15005 SDNode *ABDNode = N->getOperand(0).getNode();
15006 SDValue NewABD =
15007 tryCombineLongOpWithDup(Intrinsic::not_intrinsic, ABDNode, DCI, DAG);
15008 if (!NewABD.getNode())
15009 return SDValue();
15010
15011 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
15012 }
15013 return SDValue();
15014}
15015
15016static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
15017 SDValue SplatVal, unsigned NumVecElts) {
15018 assert(!St.isTruncatingStore() && "cannot split truncating vector store")(static_cast <bool> (!St.isTruncatingStore() &&
"cannot split truncating vector store") ? void (0) : __assert_fail
("!St.isTruncatingStore() && \"cannot split truncating vector store\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15018, __extension__ __PRETTY_FUNCTION__))
;
15019 unsigned OrigAlignment = St.getAlignment();
15020 unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
15021
15022 // Create scalar stores. This is at least as good as the code sequence for a
15023 // split unaligned store which is a dup.s, ext.b, and two stores.
15024 // Most of the time the three stores should be replaced by store pair
15025 // instructions (stp).
15026 SDLoc DL(&St);
15027 SDValue BasePtr = St.getBasePtr();
15028 uint64_t BaseOffset = 0;
15029
15030 const MachinePointerInfo &PtrInfo = St.getPointerInfo();
15031 SDValue NewST1 =
15032 DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
15033 OrigAlignment, St.getMemOperand()->getFlags());
15034
15035 // As this in ISel, we will not merge this add which may degrade results.
15036 if (BasePtr->getOpcode() == ISD::ADD &&
15037 isa<ConstantSDNode>(BasePtr->getOperand(1))) {
15038 BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
15039 BasePtr = BasePtr->getOperand(0);
15040 }
15041
15042 unsigned Offset = EltOffset;
15043 while (--NumVecElts) {
15044 unsigned Alignment = MinAlign(OrigAlignment, Offset);
15045 SDValue OffsetPtr =
15046 DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
15047 DAG.getConstant(BaseOffset + Offset, DL, MVT::i64));
15048 NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
15049 PtrInfo.getWithOffset(Offset), Alignment,
15050 St.getMemOperand()->getFlags());
15051 Offset += EltOffset;
15052 }
15053 return NewST1;
15054}
15055
15056// Returns an SVE type that ContentTy can be trivially sign or zero extended
15057// into.
15058static MVT getSVEContainerType(EVT ContentTy) {
15059 assert(ContentTy.isSimple() && "No SVE containers for extended types")(static_cast <bool> (ContentTy.isSimple() && "No SVE containers for extended types"
) ? void (0) : __assert_fail ("ContentTy.isSimple() && \"No SVE containers for extended types\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15059, __extension__ __PRETTY_FUNCTION__))
;
15060
15061 switch (ContentTy.getSimpleVT().SimpleTy) {
15062 default:
15063 llvm_unreachable("No known SVE container for this MVT type")::llvm::llvm_unreachable_internal("No known SVE container for this MVT type"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15063)
;
15064 case MVT::nxv2i8:
15065 case MVT::nxv2i16:
15066 case MVT::nxv2i32:
15067 case MVT::nxv2i64:
15068 case MVT::nxv2f32:
15069 case MVT::nxv2f64:
15070 return MVT::nxv2i64;
15071 case MVT::nxv4i8:
15072 case MVT::nxv4i16:
15073 case MVT::nxv4i32:
15074 case MVT::nxv4f32:
15075 return MVT::nxv4i32;
15076 case MVT::nxv8i8:
15077 case MVT::nxv8i16:
15078 case MVT::nxv8f16:
15079 case MVT::nxv8bf16:
15080 return MVT::nxv8i16;
15081 case MVT::nxv16i8:
15082 return MVT::nxv16i8;
15083 }
15084}
15085
15086static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc) {
15087 SDLoc DL(N);
15088 EVT VT = N->getValueType(0);
15089
15090 if (VT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
15091 return SDValue();
15092
15093 EVT ContainerVT = VT;
15094 if (ContainerVT.isInteger())
15095 ContainerVT = getSVEContainerType(ContainerVT);
15096
15097 SDVTList VTs = DAG.getVTList(ContainerVT, MVT::Other);
15098 SDValue Ops[] = { N->getOperand(0), // Chain
15099 N->getOperand(2), // Pg
15100 N->getOperand(3), // Base
15101 DAG.getValueType(VT) };
15102
15103 SDValue Load = DAG.getNode(Opc, DL, VTs, Ops);
15104 SDValue LoadChain = SDValue(Load.getNode(), 1);
15105
15106 if (ContainerVT.isInteger() && (VT != ContainerVT))
15107 Load = DAG.getNode(ISD::TRUNCATE, DL, VT, Load.getValue(0));
15108
15109 return DAG.getMergeValues({ Load, LoadChain }, DL);
15110}
15111
15112static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
15113 SDLoc DL(N);
15114 EVT VT = N->getValueType(0);
15115 EVT PtrTy = N->getOperand(3).getValueType();
15116
15117 if (VT == MVT::nxv8bf16 &&
15118 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
15119 return SDValue();
15120
15121 EVT LoadVT = VT;
15122 if (VT.isFloatingPoint())
15123 LoadVT = VT.changeTypeToInteger();
15124
15125 auto *MINode = cast<MemIntrinsicSDNode>(N);
15126 SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
15127 SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
15128 MINode->getOperand(3), DAG.getUNDEF(PtrTy),
15129 MINode->getOperand(2), PassThru,
15130 MINode->getMemoryVT(), MINode->getMemOperand(),
15131 ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
15132
15133 if (VT.isFloatingPoint()) {
15134 SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
15135 return DAG.getMergeValues(Ops, DL);
15136 }
15137
15138 return L;
15139}
15140
15141template <unsigned Opcode>
15142static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG) {
15143 static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO ||
15144 Opcode == AArch64ISD::LD1RO_MERGE_ZERO,
15145 "Unsupported opcode.");
15146 SDLoc DL(N);
15147 EVT VT = N->getValueType(0);
15148 if (VT == MVT::nxv8bf16 &&
15149 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
15150 return SDValue();
15151
15152 EVT LoadVT = VT;
15153 if (VT.isFloatingPoint())
15154 LoadVT = VT.changeTypeToInteger();
15155
15156 SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)};
15157 SDValue Load = DAG.getNode(Opcode, DL, {LoadVT, MVT::Other}, Ops);
15158 SDValue LoadChain = SDValue(Load.getNode(), 1);
15159
15160 if (VT.isFloatingPoint())
15161 Load = DAG.getNode(ISD::BITCAST, DL, VT, Load.getValue(0));
15162
15163 return DAG.getMergeValues({Load, LoadChain}, DL);
15164}
15165
15166static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG) {
15167 SDLoc DL(N);
15168 SDValue Data = N->getOperand(2);
15169 EVT DataVT = Data.getValueType();
15170 EVT HwSrcVt = getSVEContainerType(DataVT);
15171 SDValue InputVT = DAG.getValueType(DataVT);
15172
15173 if (DataVT == MVT::nxv8bf16 &&
15174 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
15175 return SDValue();
15176
15177 if (DataVT.isFloatingPoint())
15178 InputVT = DAG.getValueType(HwSrcVt);
15179
15180 SDValue SrcNew;
15181 if (Data.getValueType().isFloatingPoint())
15182 SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Data);
15183 else
15184 SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Data);
15185
15186 SDValue Ops[] = { N->getOperand(0), // Chain
15187 SrcNew,
15188 N->getOperand(4), // Base
15189 N->getOperand(3), // Pg
15190 InputVT
15191 };
15192
15193 return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
15194}
15195
15196static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) {
15197 SDLoc DL(N);
15198
15199 SDValue Data = N->getOperand(2);
15200 EVT DataVT = Data.getValueType();
15201 EVT PtrTy = N->getOperand(4).getValueType();
15202
15203 if (DataVT == MVT::nxv8bf16 &&
15204 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
15205 return SDValue();
15206
15207 if (DataVT.isFloatingPoint())
15208 Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);
15209
15210 auto *MINode = cast<MemIntrinsicSDNode>(N);
15211 return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
15212 DAG.getUNDEF(PtrTy), MINode->getOperand(3),
15213 MINode->getMemoryVT(), MINode->getMemOperand(),
15214 ISD::UNINDEXED, false, false);
15215}
15216
15217/// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
15218/// load store optimizer pass will merge them to store pair stores. This should
15219/// be better than a movi to create the vector zero followed by a vector store
15220/// if the zero constant is not re-used, since one instructions and one register
15221/// live range will be removed.
15222///
15223/// For example, the final generated code should be:
15224///
15225/// stp xzr, xzr, [x0]
15226///
15227/// instead of:
15228///
15229/// movi v0.2d, #0
15230/// str q0, [x0]
15231///
15232static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
15233 SDValue StVal = St.getValue();
15234 EVT VT = StVal.getValueType();
15235
15236 // Avoid scalarizing zero splat stores for scalable vectors.
15237 if (VT.isScalableVector())
15238 return SDValue();
15239
15240 // It is beneficial to scalarize a zero splat store for 2 or 3 i64 elements or
15241 // 2, 3 or 4 i32 elements.
15242 int NumVecElts = VT.getVectorNumElements();
15243 if (!(((NumVecElts == 2 || NumVecElts == 3) &&
15244 VT.getVectorElementType().getSizeInBits() == 64) ||
15245 ((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
15246 VT.getVectorElementType().getSizeInBits() == 32)))
15247 return SDValue();
15248
15249 if (StVal.getOpcode() != ISD::BUILD_VECTOR)
15250 return SDValue();
15251
15252 // If the zero constant has more than one use then the vector store could be
15253 // better since the constant mov will be amortized and stp q instructions
15254 // should be able to be formed.
15255 if (!StVal.hasOneUse())
15256 return SDValue();
15257
15258 // If the store is truncating then it's going down to i16 or smaller, which
15259 // means it can be implemented in a single store anyway.
15260 if (St.isTruncatingStore())
15261 return SDValue();
15262
15263 // If the immediate offset of the address operand is too large for the stp
15264 // instruction, then bail out.
15265 if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
15266 int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
15267 if (Offset < -512 || Offset > 504)
15268 return SDValue();
15269 }
15270
15271 for (int I = 0; I < NumVecElts; ++I) {
15272 SDValue EltVal = StVal.getOperand(I);
15273 if (!isNullConstant(EltVal) && !isNullFPConstant(EltVal))
15274 return SDValue();
15275 }
15276
15277 // Use a CopyFromReg WZR/XZR here to prevent
15278 // DAGCombiner::MergeConsecutiveStores from undoing this transformation.
15279 SDLoc DL(&St);
15280 unsigned ZeroReg;
15281 EVT ZeroVT;
15282 if (VT.getVectorElementType().getSizeInBits() == 32) {
15283 ZeroReg = AArch64::WZR;
15284 ZeroVT = MVT::i32;
15285 } else {
15286 ZeroReg = AArch64::XZR;
15287 ZeroVT = MVT::i64;
15288 }
15289 SDValue SplatVal =
15290 DAG.getCopyFromReg(DAG.getEntryNode(), DL, ZeroReg, ZeroVT);
15291 return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
15292}
15293
15294/// Replace a splat of a scalar to a vector store by scalar stores of the scalar
15295/// value. The load store optimizer pass will merge them to store pair stores.
15296/// This has better performance than a splat of the scalar followed by a split
15297/// vector store. Even if the stores are not merged it is four stores vs a dup,
15298/// followed by an ext.b and two stores.
15299static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
15300 SDValue StVal = St.getValue();
15301 EVT VT = StVal.getValueType();
15302
15303 // Don't replace floating point stores, they possibly won't be transformed to
15304 // stp because of the store pair suppress pass.
15305 if (VT.isFloatingPoint())
15306 return SDValue();
15307
15308 // We can express a splat as store pair(s) for 2 or 4 elements.
15309 unsigned NumVecElts = VT.getVectorNumElements();
15310 if (NumVecElts != 4 && NumVecElts != 2)
15311 return SDValue();
15312
15313 // If the store is truncating then it's going down to i16 or smaller, which
15314 // means it can be implemented in a single store anyway.
15315 if (St.isTruncatingStore())
15316 return SDValue();
15317
15318 // Check that this is a splat.
15319 // Make sure that each of the relevant vector element locations are inserted
15320 // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
15321 std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
15322 SDValue SplatVal;
15323 for (unsigned I = 0; I < NumVecElts; ++I) {
15324 // Check for insert vector elements.
15325 if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
15326 return SDValue();
15327
15328 // Check that same value is inserted at each vector element.
15329 if (I == 0)
15330 SplatVal = StVal.getOperand(1);
15331 else if (StVal.getOperand(1) != SplatVal)
15332 return SDValue();
15333
15334 // Check insert element index.
15335 ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
15336 if (!CIndex)
15337 return SDValue();
15338 uint64_t IndexVal = CIndex->getZExtValue();
15339 if (IndexVal >= NumVecElts)
15340 return SDValue();
15341 IndexNotInserted.reset(IndexVal);
15342
15343 StVal = StVal.getOperand(0);
15344 }
15345 // Check that all vector element locations were inserted to.
15346 if (IndexNotInserted.any())
15347 return SDValue();
15348
15349 return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
15350}
15351
15352static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
15353 SelectionDAG &DAG,
15354 const AArch64Subtarget *Subtarget) {
15355
15356 StoreSDNode *S = cast<StoreSDNode>(N);
15357 if (S->isVolatile() || S->isIndexed())
15358 return SDValue();
15359
15360 SDValue StVal = S->getValue();
15361 EVT VT = StVal.getValueType();
15362
15363 if (!VT.isFixedLengthVector())
15364 return SDValue();
15365
15366 // If we get a splat of zeros, convert this vector store to a store of
15367 // scalars. They will be merged into store pairs of xzr thereby removing one
15368 // instruction and one register.
15369 if (SDValue ReplacedZeroSplat = replaceZeroVectorStore(DAG, *S))
15370 return ReplacedZeroSplat;
15371
15372 // FIXME: The logic for deciding if an unaligned store should be split should
15373 // be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
15374 // a call to that function here.
15375
15376 if (!Subtarget->isMisaligned128StoreSlow())
15377 return SDValue();
15378
15379 // Don't split at -Oz.
15380 if (DAG.getMachineFunction().getFunction().hasMinSize())
15381 return SDValue();
15382
15383 // Don't split v2i64 vectors. Memcpy lowering produces those and splitting
15384 // those up regresses performance on micro-benchmarks and olden/bh.
15385 if (VT.getVectorNumElements() < 2 || VT == MVT::v2i64)
15386 return SDValue();
15387
15388 // Split unaligned 16B stores. They are terrible for performance.
15389 // Don't split stores with alignment of 1 or 2. Code that uses clang vector
15390 // extensions can use this to mark that it does not want splitting to happen
15391 // (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
15392 // eliminating alignment hazards is only 1 in 8 for alignment of 2.
15393 if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 ||
15394 S->getAlignment() <= 2)
15395 return SDValue();
15396
15397 // If we get a splat of a scalar convert this vector store to a store of
15398 // scalars. They will be merged into store pairs thereby removing two
15399 // instructions.
15400 if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, *S))
15401 return ReplacedSplat;
15402
15403 SDLoc DL(S);
15404
15405 // Split VT into two.
15406 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
15407 unsigned NumElts = HalfVT.getVectorNumElements();
15408 SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
15409 DAG.getConstant(0, DL, MVT::i64));
15410 SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
15411 DAG.getConstant(NumElts, DL, MVT::i64));
15412 SDValue BasePtr = S->getBasePtr();
15413 SDValue NewST1 =
15414 DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
15415 S->getAlignment(), S->getMemOperand()->getFlags());
15416 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
15417 DAG.getConstant(8, DL, MVT::i64));
15418 return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
15419 S->getPointerInfo(), S->getAlignment(),
15420 S->getMemOperand()->getFlags());
15421}
15422
15423static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG) {
15424 assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexepected Opcode!")(static_cast <bool> (N->getOpcode() == AArch64ISD::SPLICE
&& "Unexepected Opcode!") ? void (0) : __assert_fail
("N->getOpcode() == AArch64ISD::SPLICE && \"Unexepected Opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15424, __extension__ __PRETTY_FUNCTION__))
;
15425
15426 // splice(pg, op1, undef) -> op1
15427 if (N->getOperand(2).isUndef())
15428 return N->getOperand(1);
15429
15430 return SDValue();
15431}
15432
15433static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
15434 SDLoc DL(N);
15435 SDValue Op0 = N->getOperand(0);
15436 SDValue Op1 = N->getOperand(1);
15437 EVT ResVT = N->getValueType(0);
15438
15439 // uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
15440 if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
15441 if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
15442 SDValue X = Op0.getOperand(0).getOperand(0);
15443 return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1);
15444 }
15445 }
15446
15447 // uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z)
15448 if (Op1.getOpcode() == AArch64ISD::UUNPKHI) {
15449 if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
15450 SDValue Z = Op1.getOperand(0).getOperand(1);
15451 return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z);
15452 }
15453 }
15454
15455 return SDValue();
15456}
15457
15458static SDValue performGLD1Combine(SDNode *N, SelectionDAG &DAG) {
15459 unsigned Opc = N->getOpcode();
15460
15461 assert(((Opc >= AArch64ISD::GLD1_MERGE_ZERO && // unsigned gather loads(static_cast <bool> (((Opc >= AArch64ISD::GLD1_MERGE_ZERO
&& Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc
>= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD
::GLD1S_IMM_MERGE_ZERO)) && "Invalid opcode.") ? void
(0) : __assert_fail ("((Opc >= AArch64ISD::GLD1_MERGE_ZERO && Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) && \"Invalid opcode.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15465, __extension__ __PRETTY_FUNCTION__))
15462 Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) ||(static_cast <bool> (((Opc >= AArch64ISD::GLD1_MERGE_ZERO
&& Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc
>= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD
::GLD1S_IMM_MERGE_ZERO)) && "Invalid opcode.") ? void
(0) : __assert_fail ("((Opc >= AArch64ISD::GLD1_MERGE_ZERO && Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) && \"Invalid opcode.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15465, __extension__ __PRETTY_FUNCTION__))
15463 (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && // signed gather loads(static_cast <bool> (((Opc >= AArch64ISD::GLD1_MERGE_ZERO
&& Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc
>= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD
::GLD1S_IMM_MERGE_ZERO)) && "Invalid opcode.") ? void
(0) : __assert_fail ("((Opc >= AArch64ISD::GLD1_MERGE_ZERO && Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) && \"Invalid opcode.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15465, __extension__ __PRETTY_FUNCTION__))
15464 Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) &&(static_cast <bool> (((Opc >= AArch64ISD::GLD1_MERGE_ZERO
&& Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc
>= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD
::GLD1S_IMM_MERGE_ZERO)) && "Invalid opcode.") ? void
(0) : __assert_fail ("((Opc >= AArch64ISD::GLD1_MERGE_ZERO && Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) && \"Invalid opcode.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15465, __extension__ __PRETTY_FUNCTION__))
15465 "Invalid opcode.")(static_cast <bool> (((Opc >= AArch64ISD::GLD1_MERGE_ZERO
&& Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc
>= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD
::GLD1S_IMM_MERGE_ZERO)) && "Invalid opcode.") ? void
(0) : __assert_fail ("((Opc >= AArch64ISD::GLD1_MERGE_ZERO && Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) && \"Invalid opcode.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15465, __extension__ __PRETTY_FUNCTION__))
;
15466
15467 const bool Scaled = Opc == AArch64ISD::GLD1_SCALED_MERGE_ZERO ||
15468 Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
15469 const bool Signed = Opc == AArch64ISD::GLD1S_MERGE_ZERO ||
15470 Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
15471 const bool Extended = Opc == AArch64ISD::GLD1_SXTW_MERGE_ZERO ||
15472 Opc == AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO ||
15473 Opc == AArch64ISD::GLD1_UXTW_MERGE_ZERO ||
15474 Opc == AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO;
15475
15476 SDLoc DL(N);
15477 SDValue Chain = N->getOperand(0);
15478 SDValue Pg = N->getOperand(1);
15479 SDValue Base = N->getOperand(2);
15480 SDValue Offset = N->getOperand(3);
15481 SDValue Ty = N->getOperand(4);
15482
15483 EVT ResVT = N->getValueType(0);
15484
15485 const auto OffsetOpc = Offset.getOpcode();
15486 const bool OffsetIsZExt =
15487 OffsetOpc == AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU;
15488 const bool OffsetIsSExt =
15489 OffsetOpc == AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU;
15490
15491 // Fold sign/zero extensions of vector offsets into GLD1 nodes where possible.
15492 if (!Extended && (OffsetIsSExt || OffsetIsZExt)) {
15493 SDValue ExtPg = Offset.getOperand(0);
15494 VTSDNode *ExtFrom = cast<VTSDNode>(Offset.getOperand(2).getNode());
15495 EVT ExtFromEVT = ExtFrom->getVT().getVectorElementType();
15496
15497 // If the predicate for the sign- or zero-extended offset is the
15498 // same as the predicate used for this load and the sign-/zero-extension
15499 // was from a 32-bits...
15500 if (ExtPg == Pg && ExtFromEVT == MVT::i32) {
15501 SDValue UnextendedOffset = Offset.getOperand(1);
15502
15503 unsigned NewOpc = getGatherVecOpcode(Scaled, OffsetIsSExt, true);
15504 if (Signed)
15505 NewOpc = getSignExtendedGatherOpcode(NewOpc);
15506
15507 return DAG.getNode(NewOpc, DL, {ResVT, MVT::Other},
15508 {Chain, Pg, Base, UnextendedOffset, Ty});
15509 }
15510 }
15511
15512 return SDValue();
15513}
15514
15515/// Optimize a vector shift instruction and its operand if shifted out
15516/// bits are not used.
15517static SDValue performVectorShiftCombine(SDNode *N,
15518 const AArch64TargetLowering &TLI,
15519 TargetLowering::DAGCombinerInfo &DCI) {
15520 assert(N->getOpcode() == AArch64ISD::VASHR ||(static_cast <bool> (N->getOpcode() == AArch64ISD::VASHR
|| N->getOpcode() == AArch64ISD::VLSHR) ? void (0) : __assert_fail
("N->getOpcode() == AArch64ISD::VASHR || N->getOpcode() == AArch64ISD::VLSHR"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15521, __extension__ __PRETTY_FUNCTION__))
15521 N->getOpcode() == AArch64ISD::VLSHR)(static_cast <bool> (N->getOpcode() == AArch64ISD::VASHR
|| N->getOpcode() == AArch64ISD::VLSHR) ? void (0) : __assert_fail
("N->getOpcode() == AArch64ISD::VASHR || N->getOpcode() == AArch64ISD::VLSHR"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15521, __extension__ __PRETTY_FUNCTION__))
;
15522
15523 SDValue Op = N->getOperand(0);
15524 unsigned OpScalarSize = Op.getScalarValueSizeInBits();
15525
15526 unsigned ShiftImm = N->getConstantOperandVal(1);
15527 assert(OpScalarSize > ShiftImm && "Invalid shift imm")(static_cast <bool> (OpScalarSize > ShiftImm &&
"Invalid shift imm") ? void (0) : __assert_fail ("OpScalarSize > ShiftImm && \"Invalid shift imm\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15527, __extension__ __PRETTY_FUNCTION__))
;
15528
15529 APInt ShiftedOutBits = APInt::getLowBitsSet(OpScalarSize, ShiftImm);
15530 APInt DemandedMask = ~ShiftedOutBits;
15531
15532 if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15533 return SDValue(N, 0);
15534
15535 return SDValue();
15536}
15537
15538/// Target-specific DAG combine function for post-increment LD1 (lane) and
15539/// post-increment LD1R.
15540static SDValue performPostLD1Combine(SDNode *N,
15541 TargetLowering::DAGCombinerInfo &DCI,
15542 bool IsLaneOp) {
15543 if (DCI.isBeforeLegalizeOps())
15544 return SDValue();
15545
15546 SelectionDAG &DAG = DCI.DAG;
15547 EVT VT = N->getValueType(0);
15548
15549 if (VT.isScalableVector())
15550 return SDValue();
15551
15552 unsigned LoadIdx = IsLaneOp ? 1 : 0;
15553 SDNode *LD = N->getOperand(LoadIdx).getNode();
15554 // If it is not LOAD, can not do such combine.
15555 if (LD->getOpcode() != ISD::LOAD)
15556 return SDValue();
15557
15558 // The vector lane must be a constant in the LD1LANE opcode.
15559 SDValue Lane;
15560 if (IsLaneOp) {
15561 Lane = N->getOperand(2);
15562 auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
15563 if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
15564 return SDValue();
15565 }
15566
15567 LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
15568 EVT MemVT = LoadSDN->getMemoryVT();
15569 // Check if memory operand is the same type as the vector element.
15570 if (MemVT != VT.getVectorElementType())
15571 return SDValue();
15572
15573 // Check if there are other uses. If so, do not combine as it will introduce
15574 // an extra load.
15575 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE;
15576 ++UI) {
15577 if (UI.getUse().getResNo() == 1) // Ignore uses of the chain result.
15578 continue;
15579 if (*UI != N)
15580 return SDValue();
15581 }
15582
15583 SDValue Addr = LD->getOperand(1);
15584 SDValue Vector = N->getOperand(0);
15585 // Search for a use of the address operand that is an increment.
15586 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
15587 Addr.getNode()->use_end(); UI != UE; ++UI) {
15588 SDNode *User = *UI;
15589 if (User->getOpcode() != ISD::ADD
15590 || UI.getUse().getResNo() != Addr.getResNo())
15591 continue;
15592
15593 // If the increment is a constant, it must match the memory ref size.
15594 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
15595 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
15596 uint32_t IncVal = CInc->getZExtValue();
15597 unsigned NumBytes = VT.getScalarSizeInBits() / 8;
15598 if (IncVal != NumBytes)
15599 continue;
15600 Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
15601 }
15602
15603 // To avoid cycle construction make sure that neither the load nor the add
15604 // are predecessors to each other or the Vector.
15605 SmallPtrSet<const SDNode *, 32> Visited;
15606 SmallVector<const SDNode *, 16> Worklist;
15607 Visited.insert(Addr.getNode());
15608 Worklist.push_back(User);
15609 Worklist.push_back(LD);
15610 Worklist.push_back(Vector.getNode());
15611 if (SDNode::hasPredecessorHelper(LD, Visited, Worklist) ||
15612 SDNode::hasPredecessorHelper(User, Visited, Worklist))
15613 continue;
15614
15615 SmallVector<SDValue, 8> Ops;
15616 Ops.push_back(LD->getOperand(0)); // Chain
15617 if (IsLaneOp) {
15618 Ops.push_back(Vector); // The vector to be inserted
15619 Ops.push_back(Lane); // The lane to be inserted in the vector
15620 }
15621 Ops.push_back(Addr);
15622 Ops.push_back(Inc);
15623
15624 EVT Tys[3] = { VT, MVT::i64, MVT::Other };
15625 SDVTList SDTys = DAG.getVTList(Tys);
15626 unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
15627 SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
15628 MemVT,
15629 LoadSDN->getMemOperand());
15630
15631 // Update the uses.
15632 SDValue NewResults[] = {
15633 SDValue(LD, 0), // The result of load
15634 SDValue(UpdN.getNode(), 2) // Chain
15635 };
15636 DCI.CombineTo(LD, NewResults);
15637 DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
15638 DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
15639
15640 break;
15641 }
15642 return SDValue();
15643}
15644
15645/// Simplify ``Addr`` given that the top byte of it is ignored by HW during
15646/// address translation.
15647static bool performTBISimplification(SDValue Addr,
15648 TargetLowering::DAGCombinerInfo &DCI,
15649 SelectionDAG &DAG) {
15650 APInt DemandedMask = APInt::getLowBitsSet(64, 56);
15651 KnownBits Known;
15652 TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
15653 !DCI.isBeforeLegalizeOps());
15654 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15655 if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) {
15656 DCI.CommitTargetLoweringOpt(TLO);
15657 return true;
15658 }
15659 return false;
15660}
15661
15662static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) {
15663 assert((N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) &&(static_cast <bool> ((N->getOpcode() == ISD::STORE ||
N->getOpcode() == ISD::MSTORE) && "Expected STORE dag node in input!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) && \"Expected STORE dag node in input!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15664, __extension__ __PRETTY_FUNCTION__))
15664 "Expected STORE dag node in input!")(static_cast <bool> ((N->getOpcode() == ISD::STORE ||
N->getOpcode() == ISD::MSTORE) && "Expected STORE dag node in input!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) && \"Expected STORE dag node in input!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15664, __extension__ __PRETTY_FUNCTION__))
;
15665
15666 if (auto Store = dyn_cast<StoreSDNode>(N)) {
15667 if (!Store->isTruncatingStore() || Store->isIndexed())
15668 return SDValue();
15669 SDValue Ext = Store->getValue();
15670 auto ExtOpCode = Ext.getOpcode();
15671 if (ExtOpCode != ISD::ZERO_EXTEND && ExtOpCode != ISD::SIGN_EXTEND &&
15672 ExtOpCode != ISD::ANY_EXTEND)
15673 return SDValue();
15674 SDValue Orig = Ext->getOperand(0);
15675 if (Store->getMemoryVT() != Orig.getValueType())
15676 return SDValue();
15677 return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
15678 Store->getBasePtr(), Store->getMemOperand());
15679 }
15680
15681 return SDValue();
15682}
15683
15684static SDValue performSTORECombine(SDNode *N,
15685 TargetLowering::DAGCombinerInfo &DCI,
15686 SelectionDAG &DAG,
15687 const AArch64Subtarget *Subtarget) {
15688 if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
15689 return Split;
15690
15691 if (Subtarget->supportsAddressTopByteIgnored() &&
15692 performTBISimplification(N->getOperand(2), DCI, DAG))
15693 return SDValue(N, 0);
15694
15695 if (SDValue Store = foldTruncStoreOfExt(DAG, N))
15696 return Store;
15697
15698 return SDValue();
15699}
15700
15701/// Target-specific DAG combine function for NEON load/store intrinsics
15702/// to merge base address updates.
15703static SDValue performNEONPostLDSTCombine(SDNode *N,
15704 TargetLowering::DAGCombinerInfo &DCI,
15705 SelectionDAG &DAG) {
15706 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
15707 return SDValue();
15708
15709 unsigned AddrOpIdx = N->getNumOperands() - 1;
15710 SDValue Addr = N->getOperand(AddrOpIdx);
15711
15712 // Search for a use of the address operand that is an increment.
15713 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
15714 UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
15715 SDNode *User = *UI;
15716 if (User->getOpcode() != ISD::ADD ||
15717 UI.getUse().getResNo() != Addr.getResNo())
15718 continue;
15719
15720 // Check that the add is independent of the load/store. Otherwise, folding
15721 // it would create a cycle.
15722 SmallPtrSet<const SDNode *, 32> Visited;
15723 SmallVector<const SDNode *, 16> Worklist;
15724 Visited.insert(Addr.getNode());
15725 Worklist.push_back(N);
15726 Worklist.push_back(User);
15727 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
15728 SDNode::hasPredecessorHelper(User, Visited, Worklist))
15729 continue;
15730
15731 // Find the new opcode for the updating load/store.
15732 bool IsStore = false;
15733 bool IsLaneOp = false;
15734 bool IsDupOp = false;
15735 unsigned NewOpc = 0;
15736 unsigned NumVecs = 0;
15737 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
15738 switch (IntNo) {
15739 default: llvm_unreachable("unexpected intrinsic for Neon base update")::llvm::llvm_unreachable_internal("unexpected intrinsic for Neon base update"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15739)
;
15740 case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
15741 NumVecs = 2; break;
15742 case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
15743 NumVecs = 3; break;
15744 case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
15745 NumVecs = 4; break;
15746 case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
15747 NumVecs = 2; IsStore = true; break;
15748 case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
15749 NumVecs = 3; IsStore = true; break;
15750 case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
15751 NumVecs = 4; IsStore = true; break;
15752 case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
15753 NumVecs = 2; break;
15754 case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
15755 NumVecs = 3; break;
15756 case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
15757 NumVecs = 4; break;
15758 case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
15759 NumVecs = 2; IsStore = true; break;
15760 case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
15761 NumVecs = 3; IsStore = true; break;
15762 case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
15763 NumVecs = 4; IsStore = true; break;
15764 case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
15765 NumVecs = 2; IsDupOp = true; break;
15766 case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
15767 NumVecs = 3; IsDupOp = true; break;
15768 case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
15769 NumVecs = 4; IsDupOp = true; break;
15770 case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
15771 NumVecs = 2; IsLaneOp = true; break;
15772 case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
15773 NumVecs = 3; IsLaneOp = true; break;
15774 case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
15775 NumVecs = 4; IsLaneOp = true; break;
15776 case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
15777 NumVecs = 2; IsStore = true; IsLaneOp = true; break;
15778 case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
15779 NumVecs = 3; IsStore = true; IsLaneOp = true; break;
15780 case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
15781 NumVecs = 4; IsStore = true; IsLaneOp = true; break;
15782 }
15783
15784 EVT VecTy;
15785 if (IsStore)
15786 VecTy = N->getOperand(2).getValueType();
15787 else
15788 VecTy = N->getValueType(0);
15789
15790 // If the increment is a constant, it must match the memory ref size.
15791 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
15792 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
15793 uint32_t IncVal = CInc->getZExtValue();
15794 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
15795 if (IsLaneOp || IsDupOp)
15796 NumBytes /= VecTy.getVectorNumElements();
15797 if (IncVal != NumBytes)
15798 continue;
15799 Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
15800 }
15801 SmallVector<SDValue, 8> Ops;
15802 Ops.push_back(N->getOperand(0)); // Incoming chain
15803 // Load lane and store have vector list as input.
15804 if (IsLaneOp || IsStore)
15805 for (unsigned i = 2; i < AddrOpIdx; ++i)
15806 Ops.push_back(N->getOperand(i));
15807 Ops.push_back(Addr); // Base register
15808 Ops.push_back(Inc);
15809
15810 // Return Types.
15811 EVT Tys[6];
15812 unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
15813 unsigned n;
15814 for (n = 0; n < NumResultVecs; ++n)
15815 Tys[n] = VecTy;
15816 Tys[n++] = MVT::i64; // Type of write back register
15817 Tys[n] = MVT::Other; // Type of the chain
15818 SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
15819
15820 MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
15821 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
15822 MemInt->getMemoryVT(),
15823 MemInt->getMemOperand());
15824
15825 // Update the uses.
15826 std::vector<SDValue> NewResults;
15827 for (unsigned i = 0; i < NumResultVecs; ++i) {
15828 NewResults.push_back(SDValue(UpdN.getNode(), i));
15829 }
15830 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1));
15831 DCI.CombineTo(N, NewResults);
15832 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
15833
15834 break;
15835 }
15836 return SDValue();
15837}
15838
15839// Checks to see if the value is the prescribed width and returns information
15840// about its extension mode.
15841static
15842bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
15843 ExtType = ISD::NON_EXTLOAD;
15844 switch(V.getNode()->getOpcode()) {
15845 default:
15846 return false;
15847 case ISD::LOAD: {
15848 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
15849 if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
15850 || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
15851 ExtType = LoadNode->getExtensionType();
15852 return true;
15853 }
15854 return false;
15855 }
15856 case ISD::AssertSext: {
15857 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
15858 if ((TypeNode->getVT() == MVT::i8 && width == 8)
15859 || (TypeNode->getVT() == MVT::i16 && width == 16)) {
15860 ExtType = ISD::SEXTLOAD;
15861 return true;
15862 }
15863 return false;
15864 }
15865 case ISD::AssertZext: {
15866 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
15867 if ((TypeNode->getVT() == MVT::i8 && width == 8)
15868 || (TypeNode->getVT() == MVT::i16 && width == 16)) {
15869 ExtType = ISD::ZEXTLOAD;
15870 return true;
15871 }
15872 return false;
15873 }
15874 case ISD::Constant:
15875 case ISD::TargetConstant: {
15876 return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
15877 1LL << (width - 1);
15878 }
15879 }
15880
15881 return true;
15882}
15883
15884// This function does a whole lot of voodoo to determine if the tests are
15885// equivalent without and with a mask. Essentially what happens is that given a
15886// DAG resembling:
15887//
15888// +-------------+ +-------------+ +-------------+ +-------------+
15889// | Input | | AddConstant | | CompConstant| | CC |
15890// +-------------+ +-------------+ +-------------+ +-------------+
15891// | | | |
15892// V V | +----------+
15893// +-------------+ +----+ | |
15894// | ADD | |0xff| | |
15895// +-------------+ +----+ | |
15896// | | | |
15897// V V | |
15898// +-------------+ | |
15899// | AND | | |
15900// +-------------+ | |
15901// | | |
15902// +-----+ | |
15903// | | |
15904// V V V
15905// +-------------+
15906// | CMP |
15907// +-------------+
15908//
15909// The AND node may be safely removed for some combinations of inputs. In
15910// particular we need to take into account the extension type of the Input,
15911// the exact values of AddConstant, CompConstant, and CC, along with the nominal
15912// width of the input (this can work for any width inputs, the above graph is
15913// specific to 8 bits.
15914//
15915// The specific equations were worked out by generating output tables for each
15916// AArch64CC value in terms of and AddConstant (w1), CompConstant(w2). The
15917// problem was simplified by working with 4 bit inputs, which means we only
15918// needed to reason about 24 distinct bit patterns: 8 patterns unique to zero
15919// extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
15920// patterns present in both extensions (0,7). For every distinct set of
15921// AddConstant and CompConstants bit patterns we can consider the masked and
15922// unmasked versions to be equivalent if the result of this function is true for
15923// all 16 distinct bit patterns of for the current extension type of Input (w0).
15924//
15925// sub w8, w0, w1
15926// and w10, w8, #0x0f
15927// cmp w8, w2
15928// cset w9, AArch64CC
15929// cmp w10, w2
15930// cset w11, AArch64CC
15931// cmp w9, w11
15932// cset w0, eq
15933// ret
15934//
15935// Since the above function shows when the outputs are equivalent it defines
15936// when it is safe to remove the AND. Unfortunately it only runs on AArch64 and
15937// would be expensive to run during compiles. The equations below were written
15938// in a test harness that confirmed they gave equivalent outputs to the above
15939// for all inputs function, so they can be used determine if the removal is
15940// legal instead.
15941//
15942// isEquivalentMaskless() is the code for testing if the AND can be removed
15943// factored out of the DAG recognition as the DAG can take several forms.
15944
15945static bool isEquivalentMaskless(unsigned CC, unsigned width,
15946 ISD::LoadExtType ExtType, int AddConstant,
15947 int CompConstant) {
15948 // By being careful about our equations and only writing the in term
15949 // symbolic values and well known constants (0, 1, -1, MaxUInt) we can
15950 // make them generally applicable to all bit widths.
15951 int MaxUInt = (1 << width);
15952
15953 // For the purposes of these comparisons sign extending the type is
15954 // equivalent to zero extending the add and displacing it by half the integer
15955 // width. Provided we are careful and make sure our equations are valid over
15956 // the whole range we can just adjust the input and avoid writing equations
15957 // for sign extended inputs.
15958 if (ExtType == ISD::SEXTLOAD)
15959 AddConstant -= (1 << (width-1));
15960
15961 switch(CC) {
15962 case AArch64CC::LE:
15963 case AArch64CC::GT:
15964 if ((AddConstant == 0) ||
15965 (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
15966 (AddConstant >= 0 && CompConstant < 0) ||
15967 (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
15968 return true;
15969 break;
15970 case AArch64CC::LT:
15971 case AArch64CC::GE:
15972 if ((AddConstant == 0) ||
15973 (AddConstant >= 0 && CompConstant <= 0) ||
15974 (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
15975 return true;
15976 break;
15977 case AArch64CC::HI:
15978 case AArch64CC::LS:
15979 if ((AddConstant >= 0 && CompConstant < 0) ||
15980 (AddConstant <= 0 && CompConstant >= -1 &&
15981 CompConstant < AddConstant + MaxUInt))
15982 return true;
15983 break;
15984 case AArch64CC::PL:
15985 case AArch64CC::MI:
15986 if ((AddConstant == 0) ||
15987 (AddConstant > 0 && CompConstant <= 0) ||
15988 (AddConstant < 0 && CompConstant <= AddConstant))
15989 return true;
15990 break;
15991 case AArch64CC::LO:
15992 case AArch64CC::HS:
15993 if ((AddConstant >= 0 && CompConstant <= 0) ||
15994 (AddConstant <= 0 && CompConstant >= 0 &&
15995 CompConstant <= AddConstant + MaxUInt))
15996 return true;
15997 break;
15998 case AArch64CC::EQ:
15999 case AArch64CC::NE:
16000 if ((AddConstant > 0 && CompConstant < 0) ||
16001 (AddConstant < 0 && CompConstant >= 0 &&
16002 CompConstant < AddConstant + MaxUInt) ||
16003 (AddConstant >= 0 && CompConstant >= 0 &&
16004 CompConstant >= AddConstant) ||
16005 (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
16006 return true;
16007 break;
16008 case AArch64CC::VS:
16009 case AArch64CC::VC:
16010 case AArch64CC::AL:
16011 case AArch64CC::NV:
16012 return true;
16013 case AArch64CC::Invalid:
16014 break;
16015 }
16016
16017 return false;
16018}
16019
16020static
16021SDValue performCONDCombine(SDNode *N,
16022 TargetLowering::DAGCombinerInfo &DCI,
16023 SelectionDAG &DAG, unsigned CCIndex,
16024 unsigned CmpIndex) {
16025 unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
16026 SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
16027 unsigned CondOpcode = SubsNode->getOpcode();
16028
16029 if (CondOpcode != AArch64ISD::SUBS)
16030 return SDValue();
16031
16032 // There is a SUBS feeding this condition. Is it fed by a mask we can
16033 // use?
16034
16035 SDNode *AndNode = SubsNode->getOperand(0).getNode();
16036 unsigned MaskBits = 0;
16037
16038 if (AndNode->getOpcode() != ISD::AND)
16039 return SDValue();
16040
16041 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
16042 uint32_t CNV = CN->getZExtValue();
16043 if (CNV == 255)
16044 MaskBits = 8;
16045 else if (CNV == 65535)
16046 MaskBits = 16;
16047 }
16048
16049 if (!MaskBits)
16050 return SDValue();
16051
16052 SDValue AddValue = AndNode->getOperand(0);
16053
16054 if (AddValue.getOpcode() != ISD::ADD)
16055 return SDValue();
16056
16057 // The basic dag structure is correct, grab the inputs and validate them.
16058
16059 SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
16060 SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
16061 SDValue SubsInputValue = SubsNode->getOperand(1);
16062
16063 // The mask is present and the provenance of all the values is a smaller type,
16064 // lets see if the mask is superfluous.
16065
16066 if (!isa<ConstantSDNode>(AddInputValue2.getNode()) ||
16067 !isa<ConstantSDNode>(SubsInputValue.getNode()))
16068 return SDValue();
16069
16070 ISD::LoadExtType ExtType;
16071
16072 if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) ||
16073 !checkValueWidth(AddInputValue2, MaskBits, ExtType) ||
16074 !checkValueWidth(AddInputValue1, MaskBits, ExtType) )
16075 return SDValue();
16076
16077 if(!isEquivalentMaskless(CC, MaskBits, ExtType,
16078 cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
16079 cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
16080 return SDValue();
16081
16082 // The AND is not necessary, remove it.
16083
16084 SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
16085 SubsNode->getValueType(1));
16086 SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
16087
16088 SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
16089 DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());
16090
16091 return SDValue(N, 0);
16092}
16093
16094// Optimize compare with zero and branch.
16095static SDValue performBRCONDCombine(SDNode *N,
16096 TargetLowering::DAGCombinerInfo &DCI,
16097 SelectionDAG &DAG) {
16098 MachineFunction &MF = DAG.getMachineFunction();
16099 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
16100 // will not be produced, as they are conditional branch instructions that do
16101 // not set flags.
16102 if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
16103 return SDValue();
16104
16105 if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))
16106 N = NV.getNode();
16107 SDValue Chain = N->getOperand(0);
16108 SDValue Dest = N->getOperand(1);
16109 SDValue CCVal = N->getOperand(2);
16110 SDValue Cmp = N->getOperand(3);
16111
16112 assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!")(static_cast <bool> (isa<ConstantSDNode>(CCVal) &&
"Expected a ConstantSDNode here!") ? void (0) : __assert_fail
("isa<ConstantSDNode>(CCVal) && \"Expected a ConstantSDNode here!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16112, __extension__ __PRETTY_FUNCTION__))
;
16113 unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
16114 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
16115 return SDValue();
16116
16117 unsigned CmpOpc = Cmp.getOpcode();
16118 if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
16119 return SDValue();
16120
16121 // Only attempt folding if there is only one use of the flag and no use of the
16122 // value.
16123 if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
16124 return SDValue();
16125
16126 SDValue LHS = Cmp.getOperand(0);
16127 SDValue RHS = Cmp.getOperand(1);
16128
16129 assert(LHS.getValueType() == RHS.getValueType() &&(static_cast <bool> (LHS.getValueType() == RHS.getValueType
() && "Expected the value type to be the same for both operands!"
) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType() && \"Expected the value type to be the same for both operands!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16130, __extension__ __PRETTY_FUNCTION__))
16130 "Expected the value type to be the same for both operands!")(static_cast <bool> (LHS.getValueType() == RHS.getValueType
() && "Expected the value type to be the same for both operands!"
) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType() && \"Expected the value type to be the same for both operands!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16130, __extension__ __PRETTY_FUNCTION__))
;
16131 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
16132 return SDValue();
16133
16134 if (isNullConstant(LHS))
16135 std::swap(LHS, RHS);
16136
16137 if (!isNullConstant(RHS))
16138 return SDValue();
16139
16140 if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
16141 LHS.getOpcode() == ISD::SRL)
16142 return SDValue();
16143
16144 // Fold the compare into the branch instruction.
16145 SDValue BR;
16146 if (CC == AArch64CC::EQ)
16147 BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
16148 else
16149 BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
16150
16151 // Do not add new nodes to DAG combiner worklist.
16152 DCI.CombineTo(N, BR, false);
16153
16154 return SDValue();
16155}
16156
16157// Optimize CSEL instructions
16158static SDValue performCSELCombine(SDNode *N,
16159 TargetLowering::DAGCombinerInfo &DCI,
16160 SelectionDAG &DAG) {
16161 // CSEL x, x, cc -> x
16162 if (N->getOperand(0) == N->getOperand(1))
16163 return N->getOperand(0);
16164
16165 return performCONDCombine(N, DCI, DAG, 2, 3);
16166}
16167
16168static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
16169 assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!")(static_cast <bool> (N->getOpcode() == ISD::SETCC &&
"Unexpected opcode!") ? void (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"Unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16169, __extension__ __PRETTY_FUNCTION__))
;
16170 SDValue LHS = N->getOperand(0);
16171 SDValue RHS = N->getOperand(1);
16172 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
16173
16174 // setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X
16175 if (Cond == ISD::SETNE && isOneConstant(RHS) &&
16176 LHS->getOpcode() == AArch64ISD::CSEL &&
16177 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
16178 LHS->hasOneUse()) {
16179 SDLoc DL(N);
16180
16181 // Invert CSEL's condition.
16182 auto *OpCC = cast<ConstantSDNode>(LHS.getOperand(2));
16183 auto OldCond = static_cast<AArch64CC::CondCode>(OpCC->getZExtValue());
16184 auto NewCond = getInvertedCondCode(OldCond);
16185
16186 // csel 0, 1, !cond, X
16187 SDValue CSEL =
16188 DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(), LHS.getOperand(0),
16189 LHS.getOperand(1), DAG.getConstant(NewCond, DL, MVT::i32),
16190 LHS.getOperand(3));
16191 return DAG.getZExtOrTrunc(CSEL, DL, N->getValueType(0));
16192 }
16193
16194 return SDValue();
16195}
16196
16197static SDValue performSetccMergeZeroCombine(SDNode *N, SelectionDAG &DAG) {
16198 assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&(static_cast <bool> (N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO
&& "Unexpected opcode!") ? void (0) : __assert_fail (
"N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO && \"Unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16199, __extension__ __PRETTY_FUNCTION__))
16199 "Unexpected opcode!")(static_cast <bool> (N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO
&& "Unexpected opcode!") ? void (0) : __assert_fail (
"N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO && \"Unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16199, __extension__ __PRETTY_FUNCTION__))
;
16200
16201 SDValue Pred = N->getOperand(0);
16202 SDValue LHS = N->getOperand(1);
16203 SDValue RHS = N->getOperand(2);
16204 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
16205
16206 // setcc_merge_zero pred (sign_extend (setcc_merge_zero ... pred ...)), 0, ne
16207 // => inner setcc_merge_zero
16208 if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) &&
16209 LHS->getOpcode() == ISD::SIGN_EXTEND &&
16210 LHS->getOperand(0)->getValueType(0) == N->getValueType(0) &&
16211 LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
16212 LHS->getOperand(0)->getOperand(0) == Pred)
16213 return LHS->getOperand(0);
16214
16215 return SDValue();
16216}
16217
16218// Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test
16219// as well as whether the test should be inverted. This code is required to
16220// catch these cases (as opposed to standard dag combines) because
16221// AArch64ISD::TBZ is matched during legalization.
16222static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
16223 SelectionDAG &DAG) {
16224
16225 if (!Op->hasOneUse())
16226 return Op;
16227
16228 // We don't handle undef/constant-fold cases below, as they should have
16229 // already been taken care of (e.g. and of 0, test of undefined shifted bits,
16230 // etc.)
16231
16232 // (tbz (trunc x), b) -> (tbz x, b)
16233 // This case is just here to enable more of the below cases to be caught.
16234 if (Op->getOpcode() == ISD::TRUNCATE &&
16235 Bit < Op->getValueType(0).getSizeInBits()) {
16236 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16237 }
16238
16239 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
16240 if (Op->getOpcode() == ISD::ANY_EXTEND &&
16241 Bit < Op->getOperand(0).getValueSizeInBits()) {
16242 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16243 }
16244
16245 if (Op->getNumOperands() != 2)
16246 return Op;
16247
16248 auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
16249 if (!C)
16250 return Op;
16251
16252 switch (Op->getOpcode()) {
16253 default:
16254 return Op;
16255
16256 // (tbz (and x, m), b) -> (tbz x, b)
16257 case ISD::AND:
16258 if ((C->getZExtValue() >> Bit) & 1)
16259 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16260 return Op;
16261
16262 // (tbz (shl x, c), b) -> (tbz x, b-c)
16263 case ISD::SHL:
16264 if (C->getZExtValue() <= Bit &&
16265 (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
16266 Bit = Bit - C->getZExtValue();
16267 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16268 }
16269 return Op;
16270
16271 // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
16272 case ISD::SRA:
16273 Bit = Bit + C->getZExtValue();
16274 if (Bit >= Op->getValueType(0).getSizeInBits())
16275 Bit = Op->getValueType(0).getSizeInBits() - 1;
16276 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16277
16278 // (tbz (srl x, c), b) -> (tbz x, b+c)
16279 case ISD::SRL:
16280 if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
16281 Bit = Bit + C->getZExtValue();
16282 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16283 }
16284 return Op;
16285
16286 // (tbz (xor x, -1), b) -> (tbnz x, b)
16287 case ISD::XOR:
16288 if ((C->getZExtValue() >> Bit) & 1)
16289 Invert = !Invert;
16290 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16291 }
16292}
16293
16294// Optimize test single bit zero/non-zero and branch.
16295static SDValue performTBZCombine(SDNode *N,
16296 TargetLowering::DAGCombinerInfo &DCI,
16297 SelectionDAG &DAG) {
16298 unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
16299 bool Invert = false;
16300 SDValue TestSrc = N->getOperand(1);
16301 SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);
16302
16303 if (TestSrc == NewTestSrc)
16304 return SDValue();
16305
16306 unsigned NewOpc = N->getOpcode();
16307 if (Invert) {
16308 if (NewOpc == AArch64ISD::TBZ)
16309 NewOpc = AArch64ISD::TBNZ;
16310 else {
16311 assert(NewOpc == AArch64ISD::TBNZ)(static_cast <bool> (NewOpc == AArch64ISD::TBNZ) ? void
(0) : __assert_fail ("NewOpc == AArch64ISD::TBNZ", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16311, __extension__ __PRETTY_FUNCTION__))
;
16312 NewOpc = AArch64ISD::TBZ;
16313 }
16314 }
16315
16316 SDLoc DL(N);
16317 return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
16318 DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
16319}
16320
16321// vselect (v1i1 setcc) ->
16322// vselect (v1iXX setcc) (XX is the size of the compared operand type)
16323// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
16324// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
16325// such VSELECT.
16326static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
16327 SDValue N0 = N->getOperand(0);
16328 EVT CCVT = N0.getValueType();
16329
16330 // Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
16331 // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
16332 // supported types.
16333 SDValue SetCC = N->getOperand(0);
16334 if (SetCC.getOpcode() == ISD::SETCC &&
16335 SetCC.getOperand(2) == DAG.getCondCode(ISD::SETGT)) {
16336 SDValue CmpLHS = SetCC.getOperand(0);
16337 EVT VT = CmpLHS.getValueType();
16338 SDNode *CmpRHS = SetCC.getOperand(1).getNode();
16339 SDNode *SplatLHS = N->getOperand(1).getNode();
16340 SDNode *SplatRHS = N->getOperand(2).getNode();
16341 APInt SplatLHSVal;
16342 if (CmpLHS.getValueType() == N->getOperand(1).getValueType() &&
16343 VT.isSimple() &&
16344 is_contained(
16345 makeArrayRef({MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
16346 MVT::v2i32, MVT::v4i32, MVT::v2i64}),
16347 VT.getSimpleVT().SimpleTy) &&
16348 ISD::isConstantSplatVector(SplatLHS, SplatLHSVal) &&
16349 SplatLHSVal.isOne() && ISD::isConstantSplatVectorAllOnes(CmpRHS) &&
16350 ISD::isConstantSplatVectorAllOnes(SplatRHS)) {
16351 unsigned NumElts = VT.getVectorNumElements();
16352 SmallVector<SDValue, 8> Ops(
16353 NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N),
16354 VT.getScalarType()));
16355 SDValue Val = DAG.getBuildVector(VT, SDLoc(N), Ops);
16356
16357 auto Shift = DAG.getNode(ISD::SRA, SDLoc(N), VT, CmpLHS, Val);
16358 auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1));
16359 return Or;
16360 }
16361 }
16362
16363 if (N0.getOpcode() != ISD::SETCC ||
16364 CCVT.getVectorElementCount() != ElementCount::getFixed(1) ||
16365 CCVT.getVectorElementType() != MVT::i1)
16366 return SDValue();
16367
16368 EVT ResVT = N->getValueType(0);
16369 EVT CmpVT = N0.getOperand(0).getValueType();
16370 // Only combine when the result type is of the same size as the compared
16371 // operands.
16372 if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
16373 return SDValue();
16374
16375 SDValue IfTrue = N->getOperand(1);
16376 SDValue IfFalse = N->getOperand(2);
16377 SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
16378 N0.getOperand(0), N0.getOperand(1),
16379 cast<CondCodeSDNode>(N0.getOperand(2))->get());
16380 return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
16381 IfTrue, IfFalse);
16382}
16383
16384/// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
16385/// the compare-mask instructions rather than going via NZCV, even if LHS and
16386/// RHS are really scalar. This replaces any scalar setcc in the above pattern
16387/// with a vector one followed by a DUP shuffle on the result.
16388static SDValue performSelectCombine(SDNode *N,
16389 TargetLowering::DAGCombinerInfo &DCI) {
16390 SelectionDAG &DAG = DCI.DAG;
16391 SDValue N0 = N->getOperand(0);
16392 EVT ResVT = N->getValueType(0);
16393
16394 if (N0.getOpcode() != ISD::SETCC)
16395 return SDValue();
16396
16397 if (ResVT.isScalableVector())
16398 return SDValue();
16399
16400 // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
16401 // scalar SetCCResultType. We also don't expect vectors, because we assume
16402 // that selects fed by vector SETCCs are canonicalized to VSELECT.
16403 assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&(static_cast <bool> ((N0.getValueType() == MVT::i1 || N0
.getValueType() == MVT::i32) && "Scalar-SETCC feeding SELECT has unexpected result type!"
) ? void (0) : __assert_fail ("(N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) && \"Scalar-SETCC feeding SELECT has unexpected result type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16404, __extension__ __PRETTY_FUNCTION__))
16404 "Scalar-SETCC feeding SELECT has unexpected result type!")(static_cast <bool> ((N0.getValueType() == MVT::i1 || N0
.getValueType() == MVT::i32) && "Scalar-SETCC feeding SELECT has unexpected result type!"
) ? void (0) : __assert_fail ("(N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) && \"Scalar-SETCC feeding SELECT has unexpected result type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16404, __extension__ __PRETTY_FUNCTION__))
;
16405
16406 // If NumMaskElts == 0, the comparison is larger than select result. The
16407 // largest real NEON comparison is 64-bits per lane, which means the result is
16408 // at most 32-bits and an illegal vector. Just bail out for now.
16409 EVT SrcVT = N0.getOperand(0).getValueType();
16410
16411 // Don't try to do this optimization when the setcc itself has i1 operands.
16412 // There are no legal vectors of i1, so this would be pointless.
16413 if (SrcVT == MVT::i1)
16414 return SDValue();
16415
16416 int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
16417 if (!ResVT.isVector() || NumMaskElts == 0)
16418 return SDValue();
16419
16420 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
16421 EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
16422
16423 // Also bail out if the vector CCVT isn't the same size as ResVT.
16424 // This can happen if the SETCC operand size doesn't divide the ResVT size
16425 // (e.g., f64 vs v3f32).
16426 if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
16427 return SDValue();
16428
16429 // Make sure we didn't create illegal types, if we're not supposed to.
16430 assert(DCI.isBeforeLegalize() ||(static_cast <bool> (DCI.isBeforeLegalize() || DAG.getTargetLoweringInfo
().isTypeLegal(SrcVT)) ? void (0) : __assert_fail ("DCI.isBeforeLegalize() || DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16431, __extension__ __PRETTY_FUNCTION__))
16431 DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))(static_cast <bool> (DCI.isBeforeLegalize() || DAG.getTargetLoweringInfo
().isTypeLegal(SrcVT)) ? void (0) : __assert_fail ("DCI.isBeforeLegalize() || DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16431, __extension__ __PRETTY_FUNCTION__))
;
16432
16433 // First perform a vector comparison, where lane 0 is the one we're interested
16434 // in.
16435 SDLoc DL(N0);
16436 SDValue LHS =
16437 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
16438 SDValue RHS =
16439 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
16440 SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));
16441
16442 // Now duplicate the comparison mask we want across all other lanes.
16443 SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
16444 SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask);
16445 Mask = DAG.getNode(ISD::BITCAST, DL,
16446 ResVT.changeVectorElementTypeToInteger(), Mask);
16447
16448 return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
16449}
16450
16451/// Get rid of unnecessary NVCASTs (that don't change the type).
16452static SDValue performNVCASTCombine(SDNode *N) {
16453 if (N->getValueType(0) == N->getOperand(0).getValueType())
16454 return N->getOperand(0);
16455
16456 return SDValue();
16457}
16458
16459// If all users of the globaladdr are of the form (globaladdr + constant), find
16460// the smallest constant, fold it into the globaladdr's offset and rewrite the
16461// globaladdr as (globaladdr + constant) - constant.
16462static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
16463 const AArch64Subtarget *Subtarget,
16464 const TargetMachine &TM) {
16465 auto *GN = cast<GlobalAddressSDNode>(N);
16466 if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
16467 AArch64II::MO_NO_FLAG)
16468 return SDValue();
16469
16470 uint64_t MinOffset = -1ull;
16471 for (SDNode *N : GN->uses()) {
16472 if (N->getOpcode() != ISD::ADD)
16473 return SDValue();
16474 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
16475 if (!C)
16476 C = dyn_cast<ConstantSDNode>(N->getOperand(1));
16477 if (!C)
16478 return SDValue();
16479 MinOffset = std::min(MinOffset, C->getZExtValue());
16480 }
16481 uint64_t Offset = MinOffset + GN->getOffset();
16482
16483 // Require that the new offset is larger than the existing one. Otherwise, we
16484 // can end up oscillating between two possible DAGs, for example,
16485 // (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
16486 if (Offset <= uint64_t(GN->getOffset()))
16487 return SDValue();
16488
16489 // Check whether folding this offset is legal. It must not go out of bounds of
16490 // the referenced object to avoid violating the code model, and must be
16491 // smaller than 2^21 because this is the largest offset expressible in all
16492 // object formats.
16493 //
16494 // This check also prevents us from folding negative offsets, which will end
16495 // up being treated in the same way as large positive ones. They could also
16496 // cause code model violations, and aren't really common enough to matter.
16497 if (Offset >= (1 << 21))
16498 return SDValue();
16499
16500 const GlobalValue *GV = GN->getGlobal();
16501 Type *T = GV->getValueType();
16502 if (!T->isSized() ||
16503 Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
16504 return SDValue();
16505
16506 SDLoc DL(GN);
16507 SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
16508 return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
16509 DAG.getConstant(MinOffset, DL, MVT::i64));
16510}
16511
16512// Turns the vector of indices into a vector of byte offstes by scaling Offset
16513// by (BitWidth / 8).
16514static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset,
16515 SDLoc DL, unsigned BitWidth) {
16516 assert(Offset.getValueType().isScalableVector() &&(static_cast <bool> (Offset.getValueType().isScalableVector
() && "This method is only for scalable vectors of offsets"
) ? void (0) : __assert_fail ("Offset.getValueType().isScalableVector() && \"This method is only for scalable vectors of offsets\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16517, __extension__ __PRETTY_FUNCTION__))
16517 "This method is only for scalable vectors of offsets")(static_cast <bool> (Offset.getValueType().isScalableVector
() && "This method is only for scalable vectors of offsets"
) ? void (0) : __assert_fail ("Offset.getValueType().isScalableVector() && \"This method is only for scalable vectors of offsets\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16517, __extension__ __PRETTY_FUNCTION__))
;
16518
16519 SDValue Shift = DAG.getConstant(Log2_32(BitWidth / 8), DL, MVT::i64);
16520 SDValue SplatShift = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Shift);
16521
16522 return DAG.getNode(ISD::SHL, DL, MVT::nxv2i64, Offset, SplatShift);
16523}
16524
16525/// Check if the value of \p OffsetInBytes can be used as an immediate for
16526/// the gather load/prefetch and scatter store instructions with vector base and
16527/// immediate offset addressing mode:
16528///
16529/// [<Zn>.[S|D]{, #<imm>}]
16530///
16531/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
16532inline static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes,
16533 unsigned ScalarSizeInBytes) {
16534 // The immediate is not a multiple of the scalar size.
16535 if (OffsetInBytes % ScalarSizeInBytes)
16536 return false;
16537
16538 // The immediate is out of range.
16539 if (OffsetInBytes / ScalarSizeInBytes > 31)
16540 return false;
16541
16542 return true;
16543}
16544
16545/// Check if the value of \p Offset represents a valid immediate for the SVE
16546/// gather load/prefetch and scatter store instructiona with vector base and
16547/// immediate offset addressing mode:
16548///
16549/// [<Zn>.[S|D]{, #<imm>}]
16550///
16551/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
16552static bool isValidImmForSVEVecImmAddrMode(SDValue Offset,
16553 unsigned ScalarSizeInBytes) {
16554 ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
16555 return OffsetConst && isValidImmForSVEVecImmAddrMode(
16556 OffsetConst->getZExtValue(), ScalarSizeInBytes);
16557}
16558
16559static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
16560 unsigned Opcode,
16561 bool OnlyPackedOffsets = true) {
16562 const SDValue Src = N->getOperand(2);
16563 const EVT SrcVT = Src->getValueType(0);
16564 assert(SrcVT.isScalableVector() &&(static_cast <bool> (SrcVT.isScalableVector() &&
"Scatter stores are only possible for SVE vectors") ? void (
0) : __assert_fail ("SrcVT.isScalableVector() && \"Scatter stores are only possible for SVE vectors\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16565, __extension__ __PRETTY_FUNCTION__))
16565 "Scatter stores are only possible for SVE vectors")(static_cast <bool> (SrcVT.isScalableVector() &&
"Scatter stores are only possible for SVE vectors") ? void (
0) : __assert_fail ("SrcVT.isScalableVector() && \"Scatter stores are only possible for SVE vectors\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16565, __extension__ __PRETTY_FUNCTION__))
;
16566
16567 SDLoc DL(N);
16568 MVT SrcElVT = SrcVT.getVectorElementType().getSimpleVT();
16569
16570 // Make sure that source data will fit into an SVE register
16571 if (SrcVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
16572 return SDValue();
16573
16574 // For FPs, ACLE only supports _packed_ single and double precision types.
16575 if (SrcElVT.isFloatingPoint())
16576 if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))
16577 return SDValue();
16578
16579 // Depending on the addressing mode, this is either a pointer or a vector of
16580 // pointers (that fits into one register)
16581 SDValue Base = N->getOperand(4);
16582 // Depending on the addressing mode, this is either a single offset or a
16583 // vector of offsets (that fits into one register)
16584 SDValue Offset = N->getOperand(5);
16585
16586 // For "scalar + vector of indices", just scale the indices. This only
16587 // applies to non-temporal scatters because there's no instruction that takes
16588 // indicies.
16589 if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) {
16590 Offset =
16591 getScaledOffsetForBitWidth(DAG, Offset, DL, SrcElVT.getSizeInBits());
16592 Opcode = AArch64ISD::SSTNT1_PRED;
16593 }
16594
16595 // In the case of non-temporal gather loads there's only one SVE instruction
16596 // per data-size: "scalar + vector", i.e.
16597 // * stnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
16598 // Since we do have intrinsics that allow the arguments to be in a different
16599 // order, we may need to swap them to match the spec.
16600 if (Opcode == AArch64ISD::SSTNT1_PRED && Offset.getValueType().isVector())
16601 std::swap(Base, Offset);
16602
16603 // SST1_IMM requires that the offset is an immediate that is:
16604 // * a multiple of #SizeInBytes,
16605 // * in the range [0, 31 x #SizeInBytes],
16606 // where #SizeInBytes is the size in bytes of the stored items. For
16607 // immediates outside that range and non-immediate scalar offsets use SST1 or
16608 // SST1_UXTW instead.
16609 if (Opcode == AArch64ISD::SST1_IMM_PRED) {
16610 if (!isValidImmForSVEVecImmAddrMode(Offset,
16611 SrcVT.getScalarSizeInBits() / 8)) {
16612 if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
16613 Opcode = AArch64ISD::SST1_UXTW_PRED;
16614 else
16615 Opcode = AArch64ISD::SST1_PRED;
16616
16617 std::swap(Base, Offset);
16618 }
16619 }
16620
16621 auto &TLI = DAG.getTargetLoweringInfo();
16622 if (!TLI.isTypeLegal(Base.getValueType()))
16623 return SDValue();
16624
16625 // Some scatter store variants allow unpacked offsets, but only as nxv2i32
16626 // vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
16627 // nxv2i64. Legalize accordingly.
16628 if (!OnlyPackedOffsets &&
16629 Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
16630 Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);
16631
16632 if (!TLI.isTypeLegal(Offset.getValueType()))
16633 return SDValue();
16634
16635 // Source value type that is representable in hardware
16636 EVT HwSrcVt = getSVEContainerType(SrcVT);
16637
16638 // Keep the original type of the input data to store - this is needed to be
16639 // able to select the correct instruction, e.g. ST1B, ST1H, ST1W and ST1D. For
16640 // FP values we want the integer equivalent, so just use HwSrcVt.
16641 SDValue InputVT = DAG.getValueType(SrcVT);
16642 if (SrcVT.isFloatingPoint())
16643 InputVT = DAG.getValueType(HwSrcVt);
16644
16645 SDVTList VTs = DAG.getVTList(MVT::Other);
16646 SDValue SrcNew;
16647
16648 if (Src.getValueType().isFloatingPoint())
16649 SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Src);
16650 else
16651 SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Src);
16652
16653 SDValue Ops[] = {N->getOperand(0), // Chain
16654 SrcNew,
16655 N->getOperand(3), // Pg
16656 Base,
16657 Offset,
16658 InputVT};
16659
16660 return DAG.getNode(Opcode, DL, VTs, Ops);
16661}
16662
16663static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
16664 unsigned Opcode,
16665 bool OnlyPackedOffsets = true) {
16666 const EVT RetVT = N->getValueType(0);
16667 assert(RetVT.isScalableVector() &&(static_cast <bool> (RetVT.isScalableVector() &&
"Gather loads are only possible for SVE vectors") ? void (0)
: __assert_fail ("RetVT.isScalableVector() && \"Gather loads are only possible for SVE vectors\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16668, __extension__ __PRETTY_FUNCTION__))
16668 "Gather loads are only possible for SVE vectors")(static_cast <bool> (RetVT.isScalableVector() &&
"Gather loads are only possible for SVE vectors") ? void (0)
: __assert_fail ("RetVT.isScalableVector() && \"Gather loads are only possible for SVE vectors\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16668, __extension__ __PRETTY_FUNCTION__))
;
16669
16670 SDLoc DL(N);
16671
16672 // Make sure that the loaded data will fit into an SVE register
16673 if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
16674 return SDValue();
16675
16676 // Depending on the addressing mode, this is either a pointer or a vector of
16677 // pointers (that fits into one register)
16678 SDValue Base = N->getOperand(3);
16679 // Depending on the addressing mode, this is either a single offset or a
16680 // vector of offsets (that fits into one register)
16681 SDValue Offset = N->getOperand(4);
16682
16683 // For "scalar + vector of indices", just scale the indices. This only
16684 // applies to non-temporal gathers because there's no instruction that takes
16685 // indicies.
16686 if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) {
16687 Offset = getScaledOffsetForBitWidth(DAG, Offset, DL,
16688 RetVT.getScalarSizeInBits());
16689 Opcode = AArch64ISD::GLDNT1_MERGE_ZERO;
16690 }
16691
16692 // In the case of non-temporal gather loads there's only one SVE instruction
16693 // per data-size: "scalar + vector", i.e.
16694 // * ldnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
16695 // Since we do have intrinsics that allow the arguments to be in a different
16696 // order, we may need to swap them to match the spec.
16697 if (Opcode == AArch64ISD::GLDNT1_MERGE_ZERO &&
16698 Offset.getValueType().isVector())
16699 std::swap(Base, Offset);
16700
16701 // GLD{FF}1_IMM requires that the offset is an immediate that is:
16702 // * a multiple of #SizeInBytes,
16703 // * in the range [0, 31 x #SizeInBytes],
16704 // where #SizeInBytes is the size in bytes of the loaded items. For
16705 // immediates outside that range and non-immediate scalar offsets use
16706 // GLD1_MERGE_ZERO or GLD1_UXTW_MERGE_ZERO instead.
16707 if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO ||
16708 Opcode == AArch64ISD::GLDFF1_IMM_MERGE_ZERO) {
16709 if (!isValidImmForSVEVecImmAddrMode(Offset,
16710 RetVT.getScalarSizeInBits() / 8)) {
16711 if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
16712 Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
16713 ? AArch64ISD::GLD1_UXTW_MERGE_ZERO
16714 : AArch64ISD::GLDFF1_UXTW_MERGE_ZERO;
16715 else
16716 Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
16717 ? AArch64ISD::GLD1_MERGE_ZERO
16718 : AArch64ISD::GLDFF1_MERGE_ZERO;
16719
16720 std::swap(Base, Offset);
16721 }
16722 }
16723
16724 auto &TLI = DAG.getTargetLoweringInfo();
16725 if (!TLI.isTypeLegal(Base.getValueType()))
16726 return SDValue();
16727
16728 // Some gather load variants allow unpacked offsets, but only as nxv2i32
16729 // vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
16730 // nxv2i64. Legalize accordingly.
16731 if (!OnlyPackedOffsets &&
16732 Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
16733 Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);
16734
16735 // Return value type that is representable in hardware
16736 EVT HwRetVt = getSVEContainerType(RetVT);
16737
16738 // Keep the original output value type around - this is needed to be able to
16739 // select the correct instruction, e.g. LD1B, LD1H, LD1W and LD1D. For FP
16740 // values we want the integer equivalent, so just use HwRetVT.
16741 SDValue OutVT = DAG.getValueType(RetVT);
16742 if (RetVT.isFloatingPoint())
16743 OutVT = DAG.getValueType(HwRetVt);
16744
16745 SDVTList VTs = DAG.getVTList(HwRetVt, MVT::Other);
16746 SDValue Ops[] = {N->getOperand(0), // Chain
16747 N->getOperand(2), // Pg
16748 Base, Offset, OutVT};
16749
16750 SDValue Load = DAG.getNode(Opcode, DL, VTs, Ops);
16751 SDValue LoadChain = SDValue(Load.getNode(), 1);
16752
16753 if (RetVT.isInteger() && (RetVT != HwRetVt))
16754 Load = DAG.getNode(ISD::TRUNCATE, DL, RetVT, Load.getValue(0));
16755
16756 // If the original return value was FP, bitcast accordingly. Doing it here
16757 // means that we can avoid adding TableGen patterns for FPs.
16758 if (RetVT.isFloatingPoint())
16759 Load = DAG.getNode(ISD::BITCAST, DL, RetVT, Load.getValue(0));
16760
16761 return DAG.getMergeValues({Load, LoadChain}, DL);
16762}
16763
16764static SDValue
16765performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
16766 SelectionDAG &DAG) {
16767 SDLoc DL(N);
16768 SDValue Src = N->getOperand(0);
16769 unsigned Opc = Src->getOpcode();
16770
16771 // Sign extend of an unsigned unpack -> signed unpack
16772 if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
16773
16774 unsigned SOpc = Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
16775 : AArch64ISD::SUNPKLO;
16776
16777 // Push the sign extend to the operand of the unpack
16778 // This is necessary where, for example, the operand of the unpack
16779 // is another unpack:
16780 // 4i32 sign_extend_inreg (4i32 uunpklo(8i16 uunpklo (16i8 opnd)), from 4i8)
16781 // ->
16782 // 4i32 sunpklo (8i16 sign_extend_inreg(8i16 uunpklo (16i8 opnd), from 8i8)
16783 // ->
16784 // 4i32 sunpklo(8i16 sunpklo(16i8 opnd))
16785 SDValue ExtOp = Src->getOperand(0);
16786 auto VT = cast<VTSDNode>(N->getOperand(1))->getVT();
16787 EVT EltTy = VT.getVectorElementType();
16788 (void)EltTy;
16789
16790 assert((EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) &&(static_cast <bool> ((EltTy == MVT::i8 || EltTy == MVT::
i16 || EltTy == MVT::i32) && "Sign extending from an invalid type"
) ? void (0) : __assert_fail ("(EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) && \"Sign extending from an invalid type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16791, __extension__ __PRETTY_FUNCTION__))
16791 "Sign extending from an invalid type")(static_cast <bool> ((EltTy == MVT::i8 || EltTy == MVT::
i16 || EltTy == MVT::i32) && "Sign extending from an invalid type"
) ? void (0) : __assert_fail ("(EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) && \"Sign extending from an invalid type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16791, __extension__ __PRETTY_FUNCTION__))
;
16792
16793 EVT ExtVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext());
16794
16795 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ExtOp.getValueType(),
16796 ExtOp, DAG.getValueType(ExtVT));
16797
16798 return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
16799 }
16800
16801 if (DCI.isBeforeLegalizeOps())
16802 return SDValue();
16803
16804 if (!EnableCombineMGatherIntrinsics)
16805 return SDValue();
16806
16807 // SVE load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
16808 // for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes.
16809 unsigned NewOpc;
16810 unsigned MemVTOpNum = 4;
16811 switch (Opc) {
16812 case AArch64ISD::LD1_MERGE_ZERO:
16813 NewOpc = AArch64ISD::LD1S_MERGE_ZERO;
16814 MemVTOpNum = 3;
16815 break;
16816 case AArch64ISD::LDNF1_MERGE_ZERO:
16817 NewOpc = AArch64ISD::LDNF1S_MERGE_ZERO;
16818 MemVTOpNum = 3;
16819 break;
16820 case AArch64ISD::LDFF1_MERGE_ZERO:
16821 NewOpc = AArch64ISD::LDFF1S_MERGE_ZERO;
16822 MemVTOpNum = 3;
16823 break;
16824 case AArch64ISD::GLD1_MERGE_ZERO:
16825 NewOpc = AArch64ISD::GLD1S_MERGE_ZERO;
16826 break;
16827 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
16828 NewOpc = AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
16829 break;
16830 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
16831 NewOpc = AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
16832 break;
16833 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
16834 NewOpc = AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
16835 break;
16836 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
16837 NewOpc = AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
16838 break;
16839 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
16840 NewOpc = AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
16841 break;
16842 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
16843 NewOpc = AArch64ISD::GLD1S_IMM_MERGE_ZERO;
16844 break;
16845 case AArch64ISD::GLDFF1_MERGE_ZERO:
16846 NewOpc = AArch64ISD::GLDFF1S_MERGE_ZERO;
16847 break;
16848 case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
16849 NewOpc = AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO;
16850 break;
16851 case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
16852 NewOpc = AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO;
16853 break;
16854 case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
16855 NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO;
16856 break;
16857 case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
16858 NewOpc = AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO;
16859 break;
16860 case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
16861 NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO;
16862 break;
16863 case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
16864 NewOpc = AArch64ISD::GLDFF1S_IMM_MERGE_ZERO;
16865 break;
16866 case AArch64ISD::GLDNT1_MERGE_ZERO:
16867 NewOpc = AArch64ISD::GLDNT1S_MERGE_ZERO;
16868 break;
16869 default:
16870 return SDValue();
16871 }
16872
16873 EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16874 EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();
16875
16876 if ((SignExtSrcVT != SrcMemVT) || !Src.hasOneUse())
16877 return SDValue();
16878
16879 EVT DstVT = N->getValueType(0);
16880 SDVTList VTs = DAG.getVTList(DstVT, MVT::Other);
16881
16882 SmallVector<SDValue, 5> Ops;
16883 for (unsigned I = 0; I < Src->getNumOperands(); ++I)
16884 Ops.push_back(Src->getOperand(I));
16885
16886 SDValue ExtLoad = DAG.getNode(NewOpc, SDLoc(N), VTs, Ops);
16887 DCI.CombineTo(N, ExtLoad);
16888 DCI.CombineTo(Src.getNode(), ExtLoad, ExtLoad.getValue(1));
16889
16890 // Return N so it doesn't get rechecked
16891 return SDValue(N, 0);
16892}
16893
16894/// Legalize the gather prefetch (scalar + vector addressing mode) when the
16895/// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
16896/// != nxv2i32) do not need legalization.
16897static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG) {
16898 const unsigned OffsetPos = 4;
16899 SDValue Offset = N->getOperand(OffsetPos);
16900
16901 // Not an unpacked vector, bail out.
16902 if (Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32)
16903 return SDValue();
16904
16905 // Extend the unpacked offset vector to 64-bit lanes.
16906 SDLoc DL(N);
16907 Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset);
16908 SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
16909 // Replace the offset operand with the 64-bit one.
16910 Ops[OffsetPos] = Offset;
16911
16912 return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
16913}
16914
16915/// Combines a node carrying the intrinsic
16916/// `aarch64_sve_prf<T>_gather_scalar_offset` into a node that uses
16917/// `aarch64_sve_prfb_gather_uxtw_index` when the scalar offset passed to
16918/// `aarch64_sve_prf<T>_gather_scalar_offset` is not a valid immediate for the
16919/// sve gather prefetch instruction with vector plus immediate addressing mode.
16920static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG,
16921 unsigned ScalarSizeInBytes) {
16922 const unsigned ImmPos = 4, OffsetPos = 3;
16923 // No need to combine the node if the immediate is valid...
16924 if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes))
16925 return SDValue();
16926
16927 // ...otherwise swap the offset base with the offset...
16928 SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
16929 std::swap(Ops[ImmPos], Ops[OffsetPos]);
16930 // ...and remap the intrinsic `aarch64_sve_prf<T>_gather_scalar_offset` to
16931 // `aarch64_sve_prfb_gather_uxtw_index`.
16932 SDLoc DL(N);
16933 Ops[1] = DAG.getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index, DL,
16934 MVT::i64);
16935
16936 return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
16937}
16938
16939// Return true if the vector operation can guarantee only the first lane of its
16940// result contains data, with all bits in other lanes set to zero.
16941static bool isLanes1toNKnownZero(SDValue Op) {
16942 switch (Op.getOpcode()) {
16943 default:
16944 return false;
16945 case AArch64ISD::ANDV_PRED:
16946 case AArch64ISD::EORV_PRED:
16947 case AArch64ISD::FADDA_PRED:
16948 case AArch64ISD::FADDV_PRED:
16949 case AArch64ISD::FMAXNMV_PRED:
16950 case AArch64ISD::FMAXV_PRED:
16951 case AArch64ISD::FMINNMV_PRED:
16952 case AArch64ISD::FMINV_PRED:
16953 case AArch64ISD::ORV_PRED:
16954 case AArch64ISD::SADDV_PRED:
16955 case AArch64ISD::SMAXV_PRED:
16956 case AArch64ISD::SMINV_PRED:
16957 case AArch64ISD::UADDV_PRED:
16958 case AArch64ISD::UMAXV_PRED:
16959 case AArch64ISD::UMINV_PRED:
16960 return true;
16961 }
16962}
16963
16964static SDValue removeRedundantInsertVectorElt(SDNode *N) {
16965 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!")(static_cast <bool> (N->getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Unexpected node!") ? void (0) : __assert_fail ("N->getOpcode() == ISD::INSERT_VECTOR_ELT && \"Unexpected node!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16965, __extension__ __PRETTY_FUNCTION__))
;
16966 SDValue InsertVec = N->getOperand(0);
16967 SDValue InsertElt = N->getOperand(1);
16968 SDValue InsertIdx = N->getOperand(2);
16969
16970 // We only care about inserts into the first element...
16971 if (!isNullConstant(InsertIdx))
16972 return SDValue();
16973 // ...of a zero'd vector...
16974 if (!ISD::isConstantSplatVectorAllZeros(InsertVec.getNode()))
16975 return SDValue();
16976 // ...where the inserted data was previously extracted...
16977 if (InsertElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16978 return SDValue();
16979
16980 SDValue ExtractVec = InsertElt.getOperand(0);
16981 SDValue ExtractIdx = InsertElt.getOperand(1);
16982
16983 // ...from the first element of a vector.
16984 if (!isNullConstant(ExtractIdx))
16985 return SDValue();
16986
16987 // If we get here we are effectively trying to zero lanes 1-N of a vector.
16988
16989 // Ensure there's no type conversion going on.
16990 if (N->getValueType(0) != ExtractVec.getValueType())
16991 return SDValue();
16992
16993 if (!isLanes1toNKnownZero(ExtractVec))
16994 return SDValue();
16995
16996 // The explicit zeroing is redundant.
16997 return ExtractVec;
16998}
16999
17000static SDValue
17001performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
17002 if (SDValue Res = removeRedundantInsertVectorElt(N))
17003 return Res;
17004
17005 return performPostLD1Combine(N, DCI, true);
17006}
17007
17008SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) {
17009 EVT Ty = N->getValueType(0);
17010 if (Ty.isInteger())
17011 return SDValue();
17012
17013 EVT IntTy = Ty.changeVectorElementTypeToInteger();
17014 EVT ExtIntTy = getPackedSVEVectorVT(IntTy.getVectorElementCount());
17015 if (ExtIntTy.getVectorElementType().getScalarSizeInBits() <
17016 IntTy.getVectorElementType().getScalarSizeInBits())
17017 return SDValue();
17018
17019 SDLoc DL(N);
17020 SDValue LHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(0)),
17021 DL, ExtIntTy);
17022 SDValue RHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(1)),
17023 DL, ExtIntTy);
17024 SDValue Idx = N->getOperand(2);
17025 SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ExtIntTy, LHS, RHS, Idx);
17026 SDValue Trunc = DAG.getAnyExtOrTrunc(Splice, DL, IntTy);
17027 return DAG.getBitcast(Ty, Trunc);
17028}
17029
17030SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
17031 DAGCombinerInfo &DCI) const {
17032 SelectionDAG &DAG = DCI.DAG;
17033 switch (N->getOpcode()) {
17034 default:
17035 LLVM_DEBUG(dbgs() << "Custom combining: skipping\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom combining: skipping\n"
; } } while (false)
;
17036 break;
17037 case ISD::ADD:
17038 case ISD::SUB:
17039 return performAddSubCombine(N, DCI, DAG);
17040 case ISD::XOR:
17041 return performXorCombine(N, DAG, DCI, Subtarget);
17042 case ISD::MUL:
17043 return performMulCombine(N, DAG, DCI, Subtarget);
17044 case ISD::SINT_TO_FP:
17045 case ISD::UINT_TO_FP:
17046 return performIntToFpCombine(N, DAG, Subtarget);
17047 case ISD::FP_TO_SINT:
17048 case ISD::FP_TO_UINT:
17049 return performFpToIntCombine(N, DAG, DCI, Subtarget);
17050 case ISD::FDIV:
17051 return performFDivCombine(N, DAG, DCI, Subtarget);
17052 case ISD::OR:
17053 return performORCombine(N, DCI, Subtarget);
17054 case ISD::AND:
17055 return performANDCombine(N, DCI);
17056 case ISD::SRL:
17057 return performSRLCombine(N, DCI);
17058 case ISD::INTRINSIC_WO_CHAIN:
17059 return performIntrinsicCombine(N, DCI, Subtarget);
17060 case ISD::ANY_EXTEND:
17061 case ISD::ZERO_EXTEND:
17062 case ISD::SIGN_EXTEND:
17063 return performExtendCombine(N, DCI, DAG);
17064 case ISD::SIGN_EXTEND_INREG:
17065 return performSignExtendInRegCombine(N, DCI, DAG);
17066 case ISD::TRUNCATE:
17067 return performVectorTruncateCombine(N, DCI, DAG);
17068 case ISD::CONCAT_VECTORS:
17069 return performConcatVectorsCombine(N, DCI, DAG);
17070 case ISD::INSERT_SUBVECTOR:
17071 return performInsertSubvectorCombine(N, DCI, DAG);
17072 case ISD::SELECT:
17073 return performSelectCombine(N, DCI);
17074 case ISD::VSELECT:
17075 return performVSelectCombine(N, DCI.DAG);
17076 case ISD::SETCC:
17077 return performSETCCCombine(N, DAG);
17078 case ISD::LOAD:
17079 if (performTBISimplification(N->getOperand(1), DCI, DAG))
17080 return SDValue(N, 0);
17081 break;
17082 case ISD::STORE:
17083 return performSTORECombine(N, DCI, DAG, Subtarget);
17084 case ISD::VECTOR_SPLICE:
17085 return performSVESpliceCombine(N, DAG);
17086 case AArch64ISD::BRCOND:
17087 return performBRCONDCombine(N, DCI, DAG);
17088 case AArch64ISD::TBNZ:
17089 case AArch64ISD::TBZ:
17090 return performTBZCombine(N, DCI, DAG);
17091 case AArch64ISD::CSEL:
17092 return performCSELCombine(N, DCI, DAG);
17093 case AArch64ISD::DUP:
17094 return performPostLD1Combine(N, DCI, false);
17095 case AArch64ISD::NVCAST:
17096 return performNVCASTCombine(N);
17097 case AArch64ISD::SPLICE:
17098 return performSpliceCombine(N, DAG);
17099 case AArch64ISD::UZP1:
17100 return performUzpCombine(N, DAG);
17101 case AArch64ISD::SETCC_MERGE_ZERO:
17102 return performSetccMergeZeroCombine(N, DAG);
17103 case AArch64ISD::GLD1_MERGE_ZERO:
17104 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
17105 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
17106 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
17107 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
17108 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
17109 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
17110 case AArch64ISD::GLD1S_MERGE_ZERO:
17111 case AArch64ISD::GLD1S_SCALED_MERGE_ZERO:
17112 case AArch64ISD::GLD1S_UXTW_MERGE_ZERO:
17113 case AArch64ISD::GLD1S_SXTW_MERGE_ZERO:
17114 case AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO:
17115 case AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO:
17116 case AArch64ISD::GLD1S_IMM_MERGE_ZERO:
17117 return performGLD1Combine(N, DAG);
17118 case AArch64ISD::VASHR:
17119 case AArch64ISD::VLSHR:
17120 return performVectorShiftCombine(N, *this, DCI);
17121 case ISD::INSERT_VECTOR_ELT:
17122 return performInsertVectorEltCombine(N, DCI);
17123 case ISD::EXTRACT_VECTOR_ELT:
17124 return performExtractVectorEltCombine(N, DAG);
17125 case ISD::VECREDUCE_ADD:
17126 return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
17127 case ISD::INTRINSIC_VOID:
17128 case ISD::INTRINSIC_W_CHAIN:
17129 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
17130 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
17131 return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /*=ScalarSizeInBytes*/);
17132 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
17133 return combineSVEPrefetchVecBaseImmOff(N, DAG, 2 /*=ScalarSizeInBytes*/);
17134 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
17135 return combineSVEPrefetchVecBaseImmOff(N, DAG, 4 /*=ScalarSizeInBytes*/);
17136 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
17137 return combineSVEPrefetchVecBaseImmOff(N, DAG, 8 /*=ScalarSizeInBytes*/);
17138 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
17139 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
17140 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
17141 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
17142 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
17143 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
17144 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
17145 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
17146 return legalizeSVEGatherPrefetchOffsVec(N, DAG);
17147 case Intrinsic::aarch64_neon_ld2:
17148 case Intrinsic::aarch64_neon_ld3:
17149 case Intrinsic::aarch64_neon_ld4:
17150 case Intrinsic::aarch64_neon_ld1x2:
17151 case Intrinsic::aarch64_neon_ld1x3:
17152 case Intrinsic::aarch64_neon_ld1x4:
17153 case Intrinsic::aarch64_neon_ld2lane:
17154 case Intrinsic::aarch64_neon_ld3lane:
17155 case Intrinsic::aarch64_neon_ld4lane:
17156 case Intrinsic::aarch64_neon_ld2r:
17157 case Intrinsic::aarch64_neon_ld3r:
17158 case Intrinsic::aarch64_neon_ld4r:
17159 case Intrinsic::aarch64_neon_st2:
17160 case Intrinsic::aarch64_neon_st3:
17161 case Intrinsic::aarch64_neon_st4:
17162 case Intrinsic::aarch64_neon_st1x2:
17163 case Intrinsic::aarch64_neon_st1x3:
17164 case Intrinsic::aarch64_neon_st1x4:
17165 case Intrinsic::aarch64_neon_st2lane:
17166 case Intrinsic::aarch64_neon_st3lane:
17167 case Intrinsic::aarch64_neon_st4lane:
17168 return performNEONPostLDSTCombine(N, DCI, DAG);
17169 case Intrinsic::aarch64_sve_ldnt1:
17170 return performLDNT1Combine(N, DAG);
17171 case Intrinsic::aarch64_sve_ld1rq:
17172 return performLD1ReplicateCombine<AArch64ISD::LD1RQ_MERGE_ZERO>(N, DAG);
17173 case Intrinsic::aarch64_sve_ld1ro:
17174 return performLD1ReplicateCombine<AArch64ISD::LD1RO_MERGE_ZERO>(N, DAG);
17175 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
17176 return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
17177 case Intrinsic::aarch64_sve_ldnt1_gather:
17178 return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
17179 case Intrinsic::aarch64_sve_ldnt1_gather_index:
17180 return performGatherLoadCombine(N, DAG,
17181 AArch64ISD::GLDNT1_INDEX_MERGE_ZERO);
17182 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
17183 return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
17184 case Intrinsic::aarch64_sve_ld1:
17185 return performLD1Combine(N, DAG, AArch64ISD::LD1_MERGE_ZERO);
17186 case Intrinsic::aarch64_sve_ldnf1:
17187 return performLD1Combine(N, DAG, AArch64ISD::LDNF1_MERGE_ZERO);
17188 case Intrinsic::aarch64_sve_ldff1:
17189 return performLD1Combine(N, DAG, AArch64ISD::LDFF1_MERGE_ZERO);
17190 case Intrinsic::aarch64_sve_st1:
17191 return performST1Combine(N, DAG);
17192 case Intrinsic::aarch64_sve_stnt1:
17193 return performSTNT1Combine(N, DAG);
17194 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
17195 return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
17196 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
17197 return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
17198 case Intrinsic::aarch64_sve_stnt1_scatter:
17199 return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
17200 case Intrinsic::aarch64_sve_stnt1_scatter_index:
17201 return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_INDEX_PRED);
17202 case Intrinsic::aarch64_sve_ld1_gather:
17203 return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_MERGE_ZERO);
17204 case Intrinsic::aarch64_sve_ld1_gather_index:
17205 return performGatherLoadCombine(N, DAG,
17206 AArch64ISD::GLD1_SCALED_MERGE_ZERO);
17207 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
17208 return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW_MERGE_ZERO,
17209 /*OnlyPackedOffsets=*/false);
17210 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
17211 return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW_MERGE_ZERO,
17212 /*OnlyPackedOffsets=*/false);
17213 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
17214 return performGatherLoadCombine(N, DAG,
17215 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO,
17216 /*OnlyPackedOffsets=*/false);
17217 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
17218 return performGatherLoadCombine(N, DAG,
17219 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO,
17220 /*OnlyPackedOffsets=*/false);
17221 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
17222 return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_IMM_MERGE_ZERO);
17223 case Intrinsic::aarch64_sve_ldff1_gather:
17224 return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_MERGE_ZERO);
17225 case Intrinsic::aarch64_sve_ldff1_gather_index:
17226 return performGatherLoadCombine(N, DAG,
17227 AArch64ISD::GLDFF1_SCALED_MERGE_ZERO);
17228 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
17229 return performGatherLoadCombine(N, DAG,
17230 AArch64ISD::GLDFF1_SXTW_MERGE_ZERO,
17231 /*OnlyPackedOffsets=*/false);
17232 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
17233 return performGatherLoadCombine(N, DAG,
17234 AArch64ISD::GLDFF1_UXTW_MERGE_ZERO,
17235 /*OnlyPackedOffsets=*/false);
17236 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
17237 return performGatherLoadCombine(N, DAG,
17238 AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO,
17239 /*OnlyPackedOffsets=*/false);
17240 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
17241 return performGatherLoadCombine(N, DAG,
17242 AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO,
17243 /*OnlyPackedOffsets=*/false);
17244 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
17245 return performGatherLoadCombine(N, DAG,
17246 AArch64ISD::GLDFF1_IMM_MERGE_ZERO);
17247 case Intrinsic::aarch64_sve_st1_scatter:
17248 return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_PRED);
17249 case Intrinsic::aarch64_sve_st1_scatter_index:
17250 return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SCALED_PRED);
17251 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
17252 return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW_PRED,
17253 /*OnlyPackedOffsets=*/false);
17254 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
17255 return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW_PRED,
17256 /*OnlyPackedOffsets=*/false);
17257 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
17258 return performScatterStoreCombine(N, DAG,
17259 AArch64ISD::SST1_SXTW_SCALED_PRED,
17260 /*OnlyPackedOffsets=*/false);
17261 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
17262 return performScatterStoreCombine(N, DAG,
17263 AArch64ISD::SST1_UXTW_SCALED_PRED,
17264 /*OnlyPackedOffsets=*/false);
17265 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
17266 return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED);
17267 case Intrinsic::aarch64_sve_tuple_get: {
17268 SDLoc DL(N);
17269 SDValue Chain = N->getOperand(0);
17270 SDValue Src1 = N->getOperand(2);
17271 SDValue Idx = N->getOperand(3);
17272
17273 uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
17274 EVT ResVT = N->getValueType(0);
17275 uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue();
17276 SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
17277 SDValue Val =
17278 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
17279 return DAG.getMergeValues({Val, Chain}, DL);
17280 }
17281 case Intrinsic::aarch64_sve_tuple_set: {
17282 SDLoc DL(N);
17283 SDValue Chain = N->getOperand(0);
17284 SDValue Tuple = N->getOperand(2);
17285 SDValue Idx = N->getOperand(3);
17286 SDValue Vec = N->getOperand(4);
17287
17288 EVT TupleVT = Tuple.getValueType();
17289 uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue();
17290
17291 uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
17292 uint64_t NumLanes =
17293 Vec.getValueType().getVectorElementCount().getKnownMinValue();
17294
17295 if ((TupleLanes % NumLanes) != 0)
17296 report_fatal_error("invalid tuple vector!");
17297
17298 uint64_t NumVecs = TupleLanes / NumLanes;
17299
17300 SmallVector<SDValue, 4> Opnds;
17301 for (unsigned I = 0; I < NumVecs; ++I) {
17302 if (I == IdxConst)
17303 Opnds.push_back(Vec);
17304 else {
17305 SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
17306 Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
17307 Vec.getValueType(), Tuple, ExtIdx));
17308 }
17309 }
17310 SDValue Concat =
17311 DAG.getNode(ISD::CONCAT_VECTORS, DL, Tuple.getValueType(), Opnds);
17312 return DAG.getMergeValues({Concat, Chain}, DL);
17313 }
17314 case Intrinsic::aarch64_sve_tuple_create2:
17315 case Intrinsic::aarch64_sve_tuple_create3:
17316 case Intrinsic::aarch64_sve_tuple_create4: {
17317 SDLoc DL(N);
17318 SDValue Chain = N->getOperand(0);
17319
17320 SmallVector<SDValue, 4> Opnds;
17321 for (unsigned I = 2; I < N->getNumOperands(); ++I)
17322 Opnds.push_back(N->getOperand(I));
17323
17324 EVT VT = Opnds[0].getValueType();
17325 EVT EltVT = VT.getVectorElementType();
17326 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
17327 VT.getVectorElementCount() *
17328 (N->getNumOperands() - 2));
17329 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Opnds);
17330 return DAG.getMergeValues({Concat, Chain}, DL);
17331 }
17332 case Intrinsic::aarch64_sve_ld2:
17333 case Intrinsic::aarch64_sve_ld3:
17334 case Intrinsic::aarch64_sve_ld4: {
17335 SDLoc DL(N);
17336 SDValue Chain = N->getOperand(0);
17337 SDValue Mask = N->getOperand(2);
17338 SDValue BasePtr = N->getOperand(3);
17339 SDValue LoadOps[] = {Chain, Mask, BasePtr};
17340 unsigned IntrinsicID =
17341 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
17342 SDValue Result =
17343 LowerSVEStructLoad(IntrinsicID, LoadOps, N->getValueType(0), DAG, DL);
17344 return DAG.getMergeValues({Result, Chain}, DL);
17345 }
17346 case Intrinsic::aarch64_rndr:
17347 case Intrinsic::aarch64_rndrrs: {
17348 unsigned IntrinsicID =
17349 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
17350 auto Register =
17351 (IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR
17352 : AArch64SysReg::RNDRRS);
17353 SDLoc DL(N);
17354 SDValue A = DAG.getNode(
17355 AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, MVT::Glue, MVT::Other),
17356 N->getOperand(0), DAG.getConstant(Register, DL, MVT::i64));
17357 SDValue B = DAG.getNode(
17358 AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32),
17359 DAG.getConstant(0, DL, MVT::i32),
17360 DAG.getConstant(AArch64CC::NE, DL, MVT::i32), A.getValue(1));
17361 return DAG.getMergeValues(
17362 {A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL);
17363 }
17364 default:
17365 break;
17366 }
17367 break;
17368 case ISD::GlobalAddress:
17369 return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
17370 }
17371 return SDValue();
17372}
17373
17374// Check if the return value is used as only a return value, as otherwise
17375// we can't perform a tail-call. In particular, we need to check for
17376// target ISD nodes that are returns and any other "odd" constructs
17377// that the generic analysis code won't necessarily catch.
17378bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
17379 SDValue &Chain) const {
17380 if (N->getNumValues() != 1)
17381 return false;
17382 if (!N->hasNUsesOfValue(1, 0))
17383 return false;
17384
17385 SDValue TCChain = Chain;
17386 SDNode *Copy = *N->use_begin();
17387 if (Copy->getOpcode() == ISD::CopyToReg) {
17388 // If the copy has a glue operand, we conservatively assume it isn't safe to
17389 // perform a tail call.
17390 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
17391 MVT::Glue)
17392 return false;
17393 TCChain = Copy->getOperand(0);
17394 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
17395 return false;
17396
17397 bool HasRet = false;
17398 for (SDNode *Node : Copy->uses()) {
17399 if (Node->getOpcode() != AArch64ISD::RET_FLAG)
17400 return false;
17401 HasRet = true;
17402 }
17403
17404 if (!HasRet)
17405 return false;
17406
17407 Chain = TCChain;
17408 return true;
17409}
17410
17411// Return whether the an instruction can potentially be optimized to a tail
17412// call. This will cause the optimizers to attempt to move, or duplicate,
17413// return instructions to help enable tail call optimizations for this
17414// instruction.
17415bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
17416 return CI->isTailCall();
17417}
17418
17419bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
17420 SDValue &Offset,
17421 ISD::MemIndexedMode &AM,
17422 bool &IsInc,
17423 SelectionDAG &DAG) const {
17424 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
17425 return false;
17426
17427 Base = Op->getOperand(0);
17428 // All of the indexed addressing mode instructions take a signed
17429 // 9 bit immediate offset.
17430 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
17431 int64_t RHSC = RHS->getSExtValue();
17432 if (Op->getOpcode() == ISD::SUB)
17433 RHSC = -(uint64_t)RHSC;
17434 if (!isInt<9>(RHSC))
17435 return false;
17436 IsInc = (Op->getOpcode() == ISD::ADD);
17437 Offset = Op->getOperand(1);
17438 return true;
17439 }
17440 return false;
17441}
17442
17443bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
17444 SDValue &Offset,
17445 ISD::MemIndexedMode &AM,
17446 SelectionDAG &DAG) const {
17447 EVT VT;
17448 SDValue Ptr;
17449 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
17450 VT = LD->getMemoryVT();
17451 Ptr = LD->getBasePtr();
17452 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
17453 VT = ST->getMemoryVT();
17454 Ptr = ST->getBasePtr();
17455 } else
17456 return false;
17457
17458 bool IsInc;
17459 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
17460 return false;
17461 AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
17462 return true;
17463}
17464
17465bool AArch64TargetLowering::getPostIndexedAddressParts(
17466 SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset,
17467 ISD::MemIndexedMode &AM, SelectionDAG &DAG) const {
17468 EVT VT;
17469 SDValue Ptr;
17470 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
17471 VT = LD->getMemoryVT();
17472 Ptr = LD->getBasePtr();
17473 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
17474 VT = ST->getMemoryVT();
17475 Ptr = ST->getBasePtr();
17476 } else
17477 return false;
17478
17479 bool IsInc;
17480 if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
17481 return false;
17482 // Post-indexing updates the base, so it's not a valid transform
17483 // if that's not the same as the load's pointer.
17484 if (Ptr != Base)
17485 return false;
17486 AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
17487 return true;
17488}
17489
17490void AArch64TargetLowering::ReplaceBITCASTResults(
17491 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
17492 SDLoc DL(N);
17493 SDValue Op = N->getOperand(0);
17494 EVT VT = N->getValueType(0);
17495 EVT SrcVT = Op.getValueType();
17496
17497 if (VT.isScalableVector() && !isTypeLegal(VT) && isTypeLegal(SrcVT)) {
17498 assert(!VT.isFloatingPoint() && SrcVT.isFloatingPoint() &&(static_cast <bool> (!VT.isFloatingPoint() && SrcVT
.isFloatingPoint() && "Expected fp->int bitcast!")
? void (0) : __assert_fail ("!VT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"Expected fp->int bitcast!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17499, __extension__ __PRETTY_FUNCTION__))
17499 "Expected fp->int bitcast!")(static_cast <bool> (!VT.isFloatingPoint() && SrcVT
.isFloatingPoint() && "Expected fp->int bitcast!")
? void (0) : __assert_fail ("!VT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"Expected fp->int bitcast!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17499, __extension__ __PRETTY_FUNCTION__))
;
17500 SDValue CastResult = getSVESafeBitCast(getSVEContainerType(VT), Op, DAG);
17501 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, CastResult));
17502 return;
17503 }
17504
17505 if (VT != MVT::i16 || (SrcVT != MVT::f16 && SrcVT != MVT::bf16))
17506 return;
17507
17508 Op = SDValue(
17509 DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
17510 DAG.getUNDEF(MVT::i32), Op,
17511 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
17512 0);
17513 Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
17514 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
17515}
17516
17517static void ReplaceReductionResults(SDNode *N,
17518 SmallVectorImpl<SDValue> &Results,
17519 SelectionDAG &DAG, unsigned InterOp,
17520 unsigned AcrossOp) {
17521 EVT LoVT, HiVT;
17522 SDValue Lo, Hi;
17523 SDLoc dl(N);
17524 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
17525 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
17526 SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi);
17527 SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal);
17528 Results.push_back(SplitVal);
17529}
17530
17531static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) {
17532 SDLoc DL(N);
17533 SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, N);
17534 SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
17535 DAG.getNode(ISD::SRL, DL, MVT::i128, N,
17536 DAG.getConstant(64, DL, MVT::i64)));
17537 return std::make_pair(Lo, Hi);
17538}
17539
17540void AArch64TargetLowering::ReplaceExtractSubVectorResults(
17541 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
17542 SDValue In = N->getOperand(0);
17543 EVT InVT = In.getValueType();
17544
17545 // Common code will handle these just fine.
17546 if (!InVT.isScalableVector() || !InVT.isInteger())
17547 return;
17548
17549 SDLoc DL(N);
17550 EVT VT = N->getValueType(0);
17551
17552 // The following checks bail if this is not a halving operation.
17553
17554 ElementCount ResEC = VT.getVectorElementCount();
17555
17556 if (InVT.getVectorElementCount() != (ResEC * 2))
17557 return;
17558
17559 auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
17560 if (!CIndex)
17561 return;
17562
17563 unsigned Index = CIndex->getZExtValue();
17564 if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
17565 return;
17566
17567 unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
17568 EVT ExtendedHalfVT = VT.widenIntegerVectorElementType(*DAG.getContext());
17569
17570 SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
17571 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Half));
17572}
17573
17574// Create an even/odd pair of X registers holding integer value V.
17575static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
17576 SDLoc dl(V.getNode());
17577 SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64);
17578 SDValue VHi = DAG.getAnyExtOrTrunc(
17579 DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)),
17580 dl, MVT::i64);
17581 if (DAG.getDataLayout().isBigEndian())
17582 std::swap (VLo, VHi);
17583 SDValue RegClass =
17584 DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
17585 SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
17586 SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
17587 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
17588 return SDValue(
17589 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
17590}
17591
17592static void ReplaceCMP_SWAP_128Results(SDNode *N,
17593 SmallVectorImpl<SDValue> &Results,
17594 SelectionDAG &DAG,
17595 const AArch64Subtarget *Subtarget) {
17596 assert(N->getValueType(0) == MVT::i128 &&(static_cast <bool> (N->getValueType(0) == MVT::i128
&& "AtomicCmpSwap on types less than 128 should be legal"
) ? void (0) : __assert_fail ("N->getValueType(0) == MVT::i128 && \"AtomicCmpSwap on types less than 128 should be legal\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17597, __extension__ __PRETTY_FUNCTION__))
17597 "AtomicCmpSwap on types less than 128 should be legal")(static_cast <bool> (N->getValueType(0) == MVT::i128
&& "AtomicCmpSwap on types less than 128 should be legal"
) ? void (0) : __assert_fail ("N->getValueType(0) == MVT::i128 && \"AtomicCmpSwap on types less than 128 should be legal\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17597, __extension__ __PRETTY_FUNCTION__))
;
17598
17599 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
17600 if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
17601 // LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
17602 // so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
17603 SDValue Ops[] = {
17604 createGPRPairNode(DAG, N->getOperand(2)), // Compare value
17605 createGPRPairNode(DAG, N->getOperand(3)), // Store value
17606 N->getOperand(1), // Ptr
17607 N->getOperand(0), // Chain in
17608 };
17609
17610 unsigned Opcode;
17611 switch (MemOp->getMergedOrdering()) {
17612 case AtomicOrdering::Monotonic:
17613 Opcode = AArch64::CASPX;
17614 break;
17615 case AtomicOrdering::Acquire:
17616 Opcode = AArch64::CASPAX;
17617 break;
17618 case AtomicOrdering::Release:
17619 Opcode = AArch64::CASPLX;
17620 break;
17621 case AtomicOrdering::AcquireRelease:
17622 case AtomicOrdering::SequentiallyConsistent:
17623 Opcode = AArch64::CASPALX;
17624 break;
17625 default:
17626 llvm_unreachable("Unexpected ordering!")::llvm::llvm_unreachable_internal("Unexpected ordering!", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17626)
;
17627 }
17628
17629 MachineSDNode *CmpSwap = DAG.getMachineNode(
17630 Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
17631 DAG.setNodeMemRefs(CmpSwap, {MemOp});
17632
17633 unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
17634 if (DAG.getDataLayout().isBigEndian())
17635 std::swap(SubReg1, SubReg2);
17636 SDValue Lo = DAG.getTargetExtractSubreg(SubReg1, SDLoc(N), MVT::i64,
17637 SDValue(CmpSwap, 0));
17638 SDValue Hi = DAG.getTargetExtractSubreg(SubReg2, SDLoc(N), MVT::i64,
17639 SDValue(CmpSwap, 0));
17640 Results.push_back(
17641 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, Lo, Hi));
17642 Results.push_back(SDValue(CmpSwap, 1)); // Chain out
17643 return;
17644 }
17645
17646 unsigned Opcode;
17647 switch (MemOp->getMergedOrdering()) {
17648 case AtomicOrdering::Monotonic:
17649 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
17650 break;
17651 case AtomicOrdering::Acquire:
17652 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
17653 break;
17654 case AtomicOrdering::Release:
17655 Opcode = AArch64::CMP_SWAP_128_RELEASE;
17656 break;
17657 case AtomicOrdering::AcquireRelease:
17658 case AtomicOrdering::SequentiallyConsistent:
17659 Opcode = AArch64::CMP_SWAP_128;
17660 break;
17661 default:
17662 llvm_unreachable("Unexpected ordering!")::llvm::llvm_unreachable_internal("Unexpected ordering!", "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17662)
;
17663 }
17664
17665 auto Desired = splitInt128(N->getOperand(2), DAG);
17666 auto New = splitInt128(N->getOperand(3), DAG);
17667 SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
17668 New.first, New.second, N->getOperand(0)};
17669 SDNode *CmpSwap = DAG.getMachineNode(
17670 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other),
17671 Ops);
17672 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
17673
17674 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
17675 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
17676 Results.push_back(SDValue(CmpSwap, 3));
17677}
17678
17679void AArch64TargetLowering::ReplaceNodeResults(
17680 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
17681 switch (N->getOpcode()) {
17682 default:
17683 llvm_unreachable("Don't know how to custom expand this")::llvm::llvm_unreachable_internal("Don't know how to custom expand this"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17683)
;
17684 case ISD::BITCAST:
17685 ReplaceBITCASTResults(N, Results, DAG);
17686 return;
17687 case ISD::VECREDUCE_ADD:
17688 case ISD::VECREDUCE_SMAX:
17689 case ISD::VECREDUCE_SMIN:
17690 case ISD::VECREDUCE_UMAX:
17691 case ISD::VECREDUCE_UMIN:
17692 Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG));
17693 return;
17694
17695 case ISD::CTPOP:
17696 if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG))
17697 Results.push_back(Result);
17698 return;
17699 case AArch64ISD::SADDV:
17700 ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV);
17701 return;
17702 case AArch64ISD::UADDV:
17703 ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::UADDV);
17704 return;
17705 case AArch64ISD::SMINV:
17706 ReplaceReductionResults(N, Results, DAG, ISD::SMIN, AArch64ISD::SMINV);
17707 return;
17708 case AArch64ISD::UMINV:
17709 ReplaceReductionResults(N, Results, DAG, ISD::UMIN, AArch64ISD::UMINV);
17710 return;
17711 case AArch64ISD::SMAXV:
17712 ReplaceReductionResults(N, Results, DAG, ISD::SMAX, AArch64ISD::SMAXV);
17713 return;
17714 case AArch64ISD::UMAXV:
17715 ReplaceReductionResults(N, Results, DAG, ISD::UMAX, AArch64ISD::UMAXV);
17716 return;
17717 case ISD::FP_TO_UINT:
17718 case ISD::FP_TO_SINT:
17719 case ISD::STRICT_FP_TO_SINT:
17720 case ISD::STRICT_FP_TO_UINT:
17721 assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion")(static_cast <bool> (N->getValueType(0) == MVT::i128
&& "unexpected illegal conversion") ? void (0) : __assert_fail
("N->getValueType(0) == MVT::i128 && \"unexpected illegal conversion\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17721, __extension__ __PRETTY_FUNCTION__))
;
17722 // Let normal code take care of it by not adding anything to Results.
17723 return;
17724 case ISD::ATOMIC_CMP_SWAP:
17725 ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
17726 return;
17727 case ISD::ATOMIC_LOAD:
17728 case ISD::LOAD: {
17729 assert(SDValue(N, 0).getValueType() == MVT::i128 &&(static_cast <bool> (SDValue(N, 0).getValueType() == MVT
::i128 && "unexpected load's value type") ? void (0) :
__assert_fail ("SDValue(N, 0).getValueType() == MVT::i128 && \"unexpected load's value type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17730, __extension__ __PRETTY_FUNCTION__))
17730 "unexpected load's value type")(static_cast <bool> (SDValue(N, 0).getValueType() == MVT
::i128 && "unexpected load's value type") ? void (0) :
__assert_fail ("SDValue(N, 0).getValueType() == MVT::i128 && \"unexpected load's value type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17730, __extension__ __PRETTY_FUNCTION__))
;
17731 MemSDNode *LoadNode = cast<MemSDNode>(N);
17732 if ((!LoadNode->isVolatile() && !LoadNode->isAtomic()) ||
17733 LoadNode->getMemoryVT() != MVT::i128) {
17734 // Non-volatile or atomic loads are optimized later in AArch64's load/store
17735 // optimizer.
17736 return;
17737 }
17738
17739 SDValue Result = DAG.getMemIntrinsicNode(
17740 AArch64ISD::LDP, SDLoc(N),
17741 DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
17742 {LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->getMemoryVT(),
17743 LoadNode->getMemOperand());
17744
17745 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
17746 Result.getValue(0), Result.getValue(1));
17747 Results.append({Pair, Result.getValue(2) /* Chain */});
17748 return;
17749 }
17750 case ISD::EXTRACT_SUBVECTOR:
17751 ReplaceExtractSubVectorResults(N, Results, DAG);
17752 return;
17753 case ISD::INSERT_SUBVECTOR:
17754 // Custom lowering has been requested for INSERT_SUBVECTOR -- but delegate
17755 // to common code for result type legalisation
17756 return;
17757 case ISD::INTRINSIC_WO_CHAIN: {
17758 EVT VT = N->getValueType(0);
17759 assert((VT == MVT::i8 || VT == MVT::i16) &&(static_cast <bool> ((VT == MVT::i8 || VT == MVT::i16) &&
"custom lowering for unexpected type") ? void (0) : __assert_fail
("(VT == MVT::i8 || VT == MVT::i16) && \"custom lowering for unexpected type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17760, __extension__ __PRETTY_FUNCTION__))
17760 "custom lowering for unexpected type")(static_cast <bool> ((VT == MVT::i8 || VT == MVT::i16) &&
"custom lowering for unexpected type") ? void (0) : __assert_fail
("(VT == MVT::i8 || VT == MVT::i16) && \"custom lowering for unexpected type\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17760, __extension__ __PRETTY_FUNCTION__))
;
17761
17762 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(0));
17763 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
17764 switch (IntID) {
17765 default:
17766 return;
17767 case Intrinsic::aarch64_sve_clasta_n: {
17768 SDLoc DL(N);
17769 auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
17770 auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32,
17771 N->getOperand(1), Op2, N->getOperand(3));
17772 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
17773 return;
17774 }
17775 case Intrinsic::aarch64_sve_clastb_n: {
17776 SDLoc DL(N);
17777 auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
17778 auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32,
17779 N->getOperand(1), Op2, N->getOperand(3));
17780 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
17781 return;
17782 }
17783 case Intrinsic::aarch64_sve_lasta: {
17784 SDLoc DL(N);
17785 auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32,
17786 N->getOperand(1), N->getOperand(2));
17787 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
17788 return;
17789 }
17790 case Intrinsic::aarch64_sve_lastb: {
17791 SDLoc DL(N);
17792 auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32,
17793 N->getOperand(1), N->getOperand(2));
17794 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
17795 return;
17796 }
17797 }
17798 }
17799 }
17800}
17801
17802bool AArch64TargetLowering::useLoadStackGuardNode() const {
17803 if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
17804 return TargetLowering::useLoadStackGuardNode();
17805 return true;
17806}
17807
17808unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
17809 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
17810 // reciprocal if there are three or more FDIVs.
17811 return 3;
17812}
17813
17814TargetLoweringBase::LegalizeTypeAction
17815AArch64TargetLowering::getPreferredVectorAction(MVT VT) const {
17816 // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
17817 // v4i16, v2i32 instead of to promote.
17818 if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 ||
17819 VT == MVT::v1f32)
17820 return TypeWidenVector;
17821
17822 return TargetLoweringBase::getPreferredVectorAction(VT);
17823}
17824
17825// In v8.4a, ldp and stp instructions are guaranteed to be single-copy atomic
17826// provided the address is 16-byte aligned.
17827bool AArch64TargetLowering::isOpSuitableForLDPSTP(const Instruction *I) const {
17828 if (!Subtarget->hasLSE2())
17829 return false;
17830
17831 if (auto LI = dyn_cast<LoadInst>(I))
17832 return LI->getType()->getPrimitiveSizeInBits() == 128 &&
17833 LI->getAlignment() >= 16;
17834
17835 if (auto SI = dyn_cast<StoreInst>(I))
17836 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
17837 SI->getAlignment() >= 16;
17838
17839 return false;
17840}
17841
17842bool AArch64TargetLowering::shouldInsertFencesForAtomic(
17843 const Instruction *I) const {
17844 return isOpSuitableForLDPSTP(I);
17845}
17846
17847// Loads and stores less than 128-bits are already atomic; ones above that
17848// are doomed anyway, so defer to the default libcall and blame the OS when
17849// things go wrong.
17850bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
17851 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
17852 if (Size != 128)
17853 return false;
17854
17855 return !isOpSuitableForLDPSTP(SI);
17856}
17857
17858// Loads and stores less than 128-bits are already atomic; ones above that
17859// are doomed anyway, so defer to the default libcall and blame the OS when
17860// things go wrong.
17861TargetLowering::AtomicExpansionKind
17862AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
17863 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
17864
17865 if (Size != 128 || isOpSuitableForLDPSTP(LI))
17866 return AtomicExpansionKind::None;
17867
17868 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
17869 // implement atomicrmw without spilling. If the target address is also on the
17870 // stack and close enough to the spill slot, this can lead to a situation
17871 // where the monitor always gets cleared and the atomic operation can never
17872 // succeed. So at -O0 lower this operation to a CAS loop.
17873 if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
17874 return AtomicExpansionKind::CmpXChg;
17875
17876 return AtomicExpansionKind::LLSC;
17877}
17878
17879// For the real atomic operations, we have ldxr/stxr up to 128 bits,
17880TargetLowering::AtomicExpansionKind
17881AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
17882 if (AI->isFloatingPointOperation())
17883 return AtomicExpansionKind::CmpXChg;
17884
17885 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
17886 if (Size > 128) return AtomicExpansionKind::None;
17887
17888 // Nand is not supported in LSE.
17889 // Leave 128 bits to LLSC or CmpXChg.
17890 if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
17891 if (Subtarget->hasLSE())
17892 return AtomicExpansionKind::None;
17893 if (Subtarget->outlineAtomics()) {
17894 // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
17895 // Don't outline them unless
17896 // (1) high level <atomic> support approved:
17897 // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
17898 // (2) low level libgcc and compiler-rt support implemented by:
17899 // min/max outline atomics helpers
17900 if (AI->getOperation() != AtomicRMWInst::Min &&
17901 AI->getOperation() != AtomicRMWInst::Max &&
17902 AI->getOperation() != AtomicRMWInst::UMin &&
17903 AI->getOperation() != AtomicRMWInst::UMax) {
17904 return AtomicExpansionKind::None;
17905 }
17906 }
17907 }
17908
17909 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
17910 // implement atomicrmw without spilling. If the target address is also on the
17911 // stack and close enough to the spill slot, this can lead to a situation
17912 // where the monitor always gets cleared and the atomic operation can never
17913 // succeed. So at -O0 lower this operation to a CAS loop.
17914 if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
17915 return AtomicExpansionKind::CmpXChg;
17916
17917 return AtomicExpansionKind::LLSC;
17918}
17919
17920TargetLowering::AtomicExpansionKind
17921AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
17922 AtomicCmpXchgInst *AI) const {
17923 // If subtarget has LSE, leave cmpxchg intact for codegen.
17924 if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
17925 return AtomicExpansionKind::None;
17926 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
17927 // implement cmpxchg without spilling. If the address being exchanged is also
17928 // on the stack and close enough to the spill slot, this can lead to a
17929 // situation where the monitor always gets cleared and the atomic operation
17930 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
17931 if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
17932 return AtomicExpansionKind::None;
17933
17934 // 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
17935 // it.
17936 unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
17937 if (Size > 64)
17938 return AtomicExpansionKind::None;
17939
17940 return AtomicExpansionKind::LLSC;
17941}
17942
17943Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
17944 Type *ValueTy, Value *Addr,
17945 AtomicOrdering Ord) const {
17946 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
17947 bool IsAcquire = isAcquireOrStronger(Ord);
17948
17949 // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
17950 // intrinsic must return {i64, i64} and we have to recombine them into a
17951 // single i128 here.
17952 if (ValueTy->getPrimitiveSizeInBits() == 128) {
17953 Intrinsic::ID Int =
17954 IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
17955 Function *Ldxr = Intrinsic::getDeclaration(M, Int);
17956
17957 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
17958 Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
17959
17960 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
17961 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
17962 Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
17963 Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
17964 return Builder.CreateOr(
17965 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");
17966 }
17967
17968 Type *Tys[] = { Addr->getType() };
17969 Intrinsic::ID Int =
17970 IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
17971 Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
17972
17973 const DataLayout &DL = M->getDataLayout();
17974 IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
17975 Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy);
17976
17977 return Builder.CreateBitCast(Trunc, ValueTy);
17978}
17979
17980void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
17981 IRBuilderBase &Builder) const {
17982 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
17983 Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
17984}
17985
17986Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
17987 Value *Val, Value *Addr,
17988 AtomicOrdering Ord) const {
17989 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
17990 bool IsRelease = isReleaseOrStronger(Ord);
17991
17992 // Since the intrinsics must have legal type, the i128 intrinsics take two
17993 // parameters: "i64, i64". We must marshal Val into the appropriate form
17994 // before the call.
17995 if (Val->getType()->getPrimitiveSizeInBits() == 128) {
17996 Intrinsic::ID Int =
17997 IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
17998 Function *Stxr = Intrinsic::getDeclaration(M, Int);
17999 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18000
18001 Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
18002 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
18003 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
18004 return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
18005 }
18006
18007 Intrinsic::ID Int =
18008 IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
18009 Type *Tys[] = { Addr->getType() };
18010 Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
18011
18012 const DataLayout &DL = M->getDataLayout();
18013 IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
18014 Val = Builder.CreateBitCast(Val, IntValTy);
18015
18016 return Builder.CreateCall(Stxr,
18017 {Builder.CreateZExtOrBitCast(
18018 Val, Stxr->getFunctionType()->getParamType(0)),
18019 Addr});
18020}
18021
18022bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
18023 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
18024 const DataLayout &DL) const {
18025 if (!Ty->isArrayTy()) {
18026 const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
18027 return TySize.isScalable() && TySize.getKnownMinSize() > 128;
18028 }
18029
18030 // All non aggregate members of the type must have the same type
18031 SmallVector<EVT> ValueVTs;
18032 ComputeValueVTs(*this, DL, Ty, ValueVTs);
18033 return is_splat(ValueVTs);
18034}
18035
18036bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
18037 EVT) const {
18038 return false;
18039}
18040
18041static Value *UseTlsOffset(IRBuilderBase &IRB, unsigned Offset) {
18042 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
18043 Function *ThreadPointerFunc =
18044 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
18045 return IRB.CreatePointerCast(
18046 IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc),
18047 Offset),
18048 IRB.getInt8PtrTy()->getPointerTo(0));
18049}
18050
18051Value *AArch64TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
18052 // Android provides a fixed TLS slot for the stack cookie. See the definition
18053 // of TLS_SLOT_STACK_GUARD in
18054 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
18055 if (Subtarget->isTargetAndroid())
18056 return UseTlsOffset(IRB, 0x28);
18057
18058 // Fuchsia is similar.
18059 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
18060 if (Subtarget->isTargetFuchsia())
18061 return UseTlsOffset(IRB, -0x10);
18062
18063 return TargetLowering::getIRStackGuard(IRB);
18064}
18065
18066void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
18067 // MSVC CRT provides functionalities for stack protection.
18068 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
18069 // MSVC CRT has a global variable holding security cookie.
18070 M.getOrInsertGlobal("__security_cookie",
18071 Type::getInt8PtrTy(M.getContext()));
18072
18073 // MSVC CRT has a function to validate security cookie.
18074 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
18075 "__security_check_cookie", Type::getVoidTy(M.getContext()),
18076 Type::getInt8PtrTy(M.getContext()));
18077 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
18078 F->setCallingConv(CallingConv::Win64);
18079 F->addParamAttr(0, Attribute::AttrKind::InReg);
18080 }
18081 return;
18082 }
18083 TargetLowering::insertSSPDeclarations(M);
18084}
18085
18086Value *AArch64TargetLowering::getSDagStackGuard(const Module &M) const {
18087 // MSVC CRT has a global variable holding security cookie.
18088 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
18089 return M.getGlobalVariable("__security_cookie");
18090 return TargetLowering::getSDagStackGuard(M);
18091}
18092
18093Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
18094 // MSVC CRT has a function to validate security cookie.
18095 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
18096 return M.getFunction("__security_check_cookie");
18097 return TargetLowering::getSSPStackGuardCheck(M);
18098}
18099
18100Value *
18101AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
18102 // Android provides a fixed TLS slot for the SafeStack pointer. See the
18103 // definition of TLS_SLOT_SAFESTACK in
18104 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
18105 if (Subtarget->isTargetAndroid())
18106 return UseTlsOffset(IRB, 0x48);
18107
18108 // Fuchsia is similar.
18109 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
18110 if (Subtarget->isTargetFuchsia())
18111 return UseTlsOffset(IRB, -0x8);
18112
18113 return TargetLowering::getSafeStackPointerLocation(IRB);
18114}
18115
18116bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
18117 const Instruction &AndI) const {
18118 // Only sink 'and' mask to cmp use block if it is masking a single bit, since
18119 // this is likely to be fold the and/cmp/br into a single tbz instruction. It
18120 // may be beneficial to sink in other cases, but we would have to check that
18121 // the cmp would not get folded into the br to form a cbz for these to be
18122 // beneficial.
18123 ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
18124 if (!Mask)
18125 return false;
18126 return Mask->getValue().isPowerOf2();
18127}
18128
18129bool AArch64TargetLowering::
18130 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
18131 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
18132 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
18133 SelectionDAG &DAG) const {
18134 // Does baseline recommend not to perform the fold by default?
18135 if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
18136 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
18137 return false;
18138 // Else, if this is a vector shift, prefer 'shl'.
18139 return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL;
18140}
18141
18142bool AArch64TargetLowering::shouldExpandShift(SelectionDAG &DAG,
18143 SDNode *N) const {
18144 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
18145 !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
18146 return false;
18147 return true;
18148}
18149
18150void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
18151 // Update IsSplitCSR in AArch64unctionInfo.
18152 AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
18153 AFI->setIsSplitCSR(true);
18154}
18155
18156void AArch64TargetLowering::insertCopiesSplitCSR(
18157 MachineBasicBlock *Entry,
18158 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
18159 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
18160 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
18161 if (!IStart)
18162 return;
18163
18164 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
18165 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
18166 MachineBasicBlock::iterator MBBI = Entry->begin();
18167 for (const MCPhysReg *I = IStart; *I; ++I) {
18168 const TargetRegisterClass *RC = nullptr;
18169 if (AArch64::GPR64RegClass.contains(*I))
18170 RC = &AArch64::GPR64RegClass;
18171 else if (AArch64::FPR64RegClass.contains(*I))
18172 RC = &AArch64::FPR64RegClass;
18173 else
18174 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18174)
;
18175
18176 Register NewVR = MRI->createVirtualRegister(RC);
18177 // Create copy from CSR to a virtual register.
18178 // FIXME: this currently does not emit CFI pseudo-instructions, it works
18179 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
18180 // nounwind. If we want to generalize this later, we may need to emit
18181 // CFI pseudo-instructions.
18182 assert(Entry->getParent()->getFunction().hasFnAttribute((static_cast <bool> (Entry->getParent()->getFunction
().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? void (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18184, __extension__ __PRETTY_FUNCTION__))
18183 Attribute::NoUnwind) &&(static_cast <bool> (Entry->getParent()->getFunction
().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? void (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18184, __extension__ __PRETTY_FUNCTION__))
18184 "Function should be nounwind in insertCopiesSplitCSR!")(static_cast <bool> (Entry->getParent()->getFunction
().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? void (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18184, __extension__ __PRETTY_FUNCTION__))
;
18185 Entry->addLiveIn(*I);
18186 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
18187 .addReg(*I);
18188
18189 // Insert the copy-back instructions right before the terminator.
18190 for (auto *Exit : Exits)
18191 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
18192 TII->get(TargetOpcode::COPY), *I)
18193 .addReg(NewVR);
18194 }
18195}
18196
18197bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
18198 // Integer division on AArch64 is expensive. However, when aggressively
18199 // optimizing for code size, we prefer to use a div instruction, as it is
18200 // usually smaller than the alternative sequence.
18201 // The exception to this is vector division. Since AArch64 doesn't have vector
18202 // integer division, leaving the division as-is is a loss even in terms of
18203 // size, because it will have to be scalarized, while the alternative code
18204 // sequence can be performed in vector form.
18205 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
18206 return OptSize && !VT.isVector();
18207}
18208
18209bool AArch64TargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
18210 // We want inc-of-add for scalars and sub-of-not for vectors.
18211 return VT.isScalarInteger();
18212}
18213
18214bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const {
18215 return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
18216}
18217
18218unsigned
18219AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
18220 if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
18221 return getPointerTy(DL).getSizeInBits();
18222
18223 return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
18224}
18225
18226void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
18227 MF.getFrameInfo().computeMaxCallFrameSize(MF);
18228 TargetLoweringBase::finalizeLowering(MF);
18229}
18230
18231// Unlike X86, we let frame lowering assign offsets to all catch objects.
18232bool AArch64TargetLowering::needsFixedCatchObjects() const {
18233 return false;
18234}
18235
18236bool AArch64TargetLowering::shouldLocalize(
18237 const MachineInstr &MI, const TargetTransformInfo *TTI) const {
18238 switch (MI.getOpcode()) {
18239 case TargetOpcode::G_GLOBAL_VALUE: {
18240 // On Darwin, TLS global vars get selected into function calls, which
18241 // we don't want localized, as they can get moved into the middle of a
18242 // another call sequence.
18243 const GlobalValue &GV = *MI.getOperand(1).getGlobal();
18244 if (GV.isThreadLocal() && Subtarget->isTargetMachO())
18245 return false;
18246 break;
18247 }
18248 // If we legalized G_GLOBAL_VALUE into ADRP + G_ADD_LOW, mark both as being
18249 // localizable.
18250 case AArch64::ADRP:
18251 case AArch64::G_ADD_LOW:
18252 return true;
18253 default:
18254 break;
18255 }
18256 return TargetLoweringBase::shouldLocalize(MI, TTI);
18257}
18258
18259bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
18260 if (isa<ScalableVectorType>(Inst.getType()))
18261 return true;
18262
18263 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
18264 if (isa<ScalableVectorType>(Inst.getOperand(i)->getType()))
18265 return true;
18266
18267 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
18268 if (isa<ScalableVectorType>(AI->getAllocatedType()))
18269 return true;
18270 }
18271
18272 return false;
18273}
18274
18275// Return the largest legal scalable vector type that matches VT's element type.
18276static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT) {
18277 assert(VT.isFixedLengthVector() &&(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18279, __extension__ __PRETTY_FUNCTION__))
18278 DAG.getTargetLoweringInfo().isTypeLegal(VT) &&(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18279, __extension__ __PRETTY_FUNCTION__))
18279 "Expected legal fixed length vector!")(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18279, __extension__ __PRETTY_FUNCTION__))
;
18280 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
18281 default:
18282 llvm_unreachable("unexpected element type for SVE container")::llvm::llvm_unreachable_internal("unexpected element type for SVE container"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18282)
;
18283 case MVT::i8:
18284 return EVT(MVT::nxv16i8);
18285 case MVT::i16:
18286 return EVT(MVT::nxv8i16);
18287 case MVT::i32:
18288 return EVT(MVT::nxv4i32);
18289 case MVT::i64:
18290 return EVT(MVT::nxv2i64);
18291 case MVT::f16:
18292 return EVT(MVT::nxv8f16);
18293 case MVT::f32:
18294 return EVT(MVT::nxv4f32);
18295 case MVT::f64:
18296 return EVT(MVT::nxv2f64);
18297 }
18298}
18299
18300// Return a PTRUE with active lanes corresponding to the extent of VT.
18301static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
18302 EVT VT) {
18303 assert(VT.isFixedLengthVector() &&(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18305, __extension__ __PRETTY_FUNCTION__))
18304 DAG.getTargetLoweringInfo().isTypeLegal(VT) &&(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18305, __extension__ __PRETTY_FUNCTION__))
18305 "Expected legal fixed length vector!")(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18305, __extension__ __PRETTY_FUNCTION__))
;
18306
18307 unsigned PgPattern =
18308 getSVEPredPatternFromNumElements(VT.getVectorNumElements());
18309 assert(PgPattern && "Unexpected element count for SVE predicate")(static_cast <bool> (PgPattern && "Unexpected element count for SVE predicate"
) ? void (0) : __assert_fail ("PgPattern && \"Unexpected element count for SVE predicate\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18309, __extension__ __PRETTY_FUNCTION__))
;
18310
18311 // For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use
18312 // AArch64SVEPredPattern::all, which can enable the use of unpredicated
18313 // variants of instructions when available.
18314 const auto &Subtarget =
18315 static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
18316 unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
18317 unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
18318 if (MaxSVESize && MinSVESize == MaxSVESize &&
18319 MaxSVESize == VT.getSizeInBits())
18320 PgPattern = AArch64SVEPredPattern::all;
18321
18322 MVT MaskVT;
18323 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
18324 default:
18325 llvm_unreachable("unexpected element type for SVE predicate")::llvm::llvm_unreachable_internal("unexpected element type for SVE predicate"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18325)
;
18326 case MVT::i8:
18327 MaskVT = MVT::nxv16i1;
18328 break;
18329 case MVT::i16:
18330 case MVT::f16:
18331 MaskVT = MVT::nxv8i1;
18332 break;
18333 case MVT::i32:
18334 case MVT::f32:
18335 MaskVT = MVT::nxv4i1;
18336 break;
18337 case MVT::i64:
18338 case MVT::f64:
18339 MaskVT = MVT::nxv2i1;
18340 break;
18341 }
18342
18343 return getPTrue(DAG, DL, MaskVT, PgPattern);
18344}
18345
18346static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
18347 EVT VT) {
18348 assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&(static_cast <bool> (VT.isScalableVector() && DAG
.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal scalable vector!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal scalable vector!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18349, __extension__ __PRETTY_FUNCTION__))
18349 "Expected legal scalable vector!")(static_cast <bool> (VT.isScalableVector() && DAG
.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal scalable vector!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal scalable vector!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18349, __extension__ __PRETTY_FUNCTION__))
;
18350 auto PredTy = VT.changeVectorElementType(MVT::i1);
18351 return getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);
18352}
18353
18354static SDValue getPredicateForVector(SelectionDAG &DAG, SDLoc &DL, EVT VT) {
18355 if (VT.isFixedLengthVector())
18356 return getPredicateForFixedLengthVector(DAG, DL, VT);
18357
18358 return getPredicateForScalableVector(DAG, DL, VT);
18359}
18360
18361// Grow V to consume an entire SVE register.
18362static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
18363 assert(VT.isScalableVector() &&(static_cast <bool> (VT.isScalableVector() && "Expected to convert into a scalable vector!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Expected to convert into a scalable vector!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18364, __extension__ __PRETTY_FUNCTION__))
18364 "Expected to convert into a scalable vector!")(static_cast <bool> (VT.isScalableVector() && "Expected to convert into a scalable vector!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Expected to convert into a scalable vector!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18364, __extension__ __PRETTY_FUNCTION__))
;
18365 assert(V.getValueType().isFixedLengthVector() &&(static_cast <bool> (V.getValueType().isFixedLengthVector
() && "Expected a fixed length vector operand!") ? void
(0) : __assert_fail ("V.getValueType().isFixedLengthVector() && \"Expected a fixed length vector operand!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18366, __extension__ __PRETTY_FUNCTION__))
18366 "Expected a fixed length vector operand!")(static_cast <bool> (V.getValueType().isFixedLengthVector
() && "Expected a fixed length vector operand!") ? void
(0) : __assert_fail ("V.getValueType().isFixedLengthVector() && \"Expected a fixed length vector operand!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18366, __extension__ __PRETTY_FUNCTION__))
;
18367 SDLoc DL(V);
18368 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
18369 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
18370}
18371
18372// Shrink V so it's just big enough to maintain a VT's worth of data.
18373static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
18374 assert(VT.isFixedLengthVector() &&(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected to convert into a fixed length vector!") ? void (0
) : __assert_fail ("VT.isFixedLengthVector() && \"Expected to convert into a fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18375, __extension__ __PRETTY_FUNCTION__))
18375 "Expected to convert into a fixed length vector!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected to convert into a fixed length vector!") ? void (0
) : __assert_fail ("VT.isFixedLengthVector() && \"Expected to convert into a fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18375, __extension__ __PRETTY_FUNCTION__))
;
18376 assert(V.getValueType().isScalableVector() &&(static_cast <bool> (V.getValueType().isScalableVector(
) && "Expected a scalable vector operand!") ? void (0
) : __assert_fail ("V.getValueType().isScalableVector() && \"Expected a scalable vector operand!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18377, __extension__ __PRETTY_FUNCTION__))
18377 "Expected a scalable vector operand!")(static_cast <bool> (V.getValueType().isScalableVector(
) && "Expected a scalable vector operand!") ? void (0
) : __assert_fail ("V.getValueType().isScalableVector() && \"Expected a scalable vector operand!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18377, __extension__ __PRETTY_FUNCTION__))
;
18378 SDLoc DL(V);
18379 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
18380 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
18381}
18382
18383// Convert all fixed length vector loads larger than NEON to masked_loads.
18384SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
18385 SDValue Op, SelectionDAG &DAG) const {
18386 auto Load = cast<LoadSDNode>(Op);
18387
18388 SDLoc DL(Op);
18389 EVT VT = Op.getValueType();
18390 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18391
18392 auto NewLoad = DAG.getMaskedLoad(
18393 ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
18394 getPredicateForFixedLengthVector(DAG, DL, VT), DAG.getUNDEF(ContainerVT),
18395 Load->getMemoryVT(), Load->getMemOperand(), Load->getAddressingMode(),
18396 Load->getExtensionType());
18397
18398 auto Result = convertFromScalableVector(DAG, VT, NewLoad);
18399 SDValue MergedValues[2] = {Result, Load->getChain()};
18400 return DAG.getMergeValues(MergedValues, DL);
18401}
18402
18403static SDValue convertFixedMaskToScalableVector(SDValue Mask,
18404 SelectionDAG &DAG) {
18405 SDLoc DL(Mask);
18406 EVT InVT = Mask.getValueType();
18407 EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18408
18409 auto Op1 = convertToScalableVector(DAG, ContainerVT, Mask);
18410 auto Op2 = DAG.getConstant(0, DL, ContainerVT);
18411 auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
18412
18413 EVT CmpVT = Pg.getValueType();
18414 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
18415 {Pg, Op1, Op2, DAG.getCondCode(ISD::SETNE)});
18416}
18417
18418// Convert all fixed length vector loads larger than NEON to masked_loads.
18419SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
18420 SDValue Op, SelectionDAG &DAG) const {
18421 auto Load = cast<MaskedLoadSDNode>(Op);
18422
18423 SDLoc DL(Op);
18424 EVT VT = Op.getValueType();
18425 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18426
18427 SDValue Mask = convertFixedMaskToScalableVector(Load->getMask(), DAG);
18428
18429 SDValue PassThru;
18430 bool IsPassThruZeroOrUndef = false;
18431
18432 if (Load->getPassThru()->isUndef()) {
18433 PassThru = DAG.getUNDEF(ContainerVT);
18434 IsPassThruZeroOrUndef = true;
18435 } else {
18436 if (ContainerVT.isInteger())
18437 PassThru = DAG.getConstant(0, DL, ContainerVT);
18438 else
18439 PassThru = DAG.getConstantFP(0, DL, ContainerVT);
18440 if (isZerosVector(Load->getPassThru().getNode()))
18441 IsPassThruZeroOrUndef = true;
18442 }
18443
18444 auto NewLoad = DAG.getMaskedLoad(
18445 ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
18446 Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(),
18447 Load->getAddressingMode(), Load->getExtensionType());
18448
18449 if (!IsPassThruZeroOrUndef) {
18450 SDValue OldPassThru =
18451 convertToScalableVector(DAG, ContainerVT, Load->getPassThru());
18452 NewLoad = DAG.getSelect(DL, ContainerVT, Mask, NewLoad, OldPassThru);
18453 }
18454
18455 auto Result = convertFromScalableVector(DAG, VT, NewLoad);
18456 SDValue MergedValues[2] = {Result, Load->getChain()};
18457 return DAG.getMergeValues(MergedValues, DL);
18458}
18459
18460// Convert all fixed length vector stores larger than NEON to masked_stores.
18461SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
18462 SDValue Op, SelectionDAG &DAG) const {
18463 auto Store = cast<StoreSDNode>(Op);
18464
18465 SDLoc DL(Op);
18466 EVT VT = Store->getValue().getValueType();
18467 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18468
18469 auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
18470 return DAG.getMaskedStore(
18471 Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
18472 getPredicateForFixedLengthVector(DAG, DL, VT), Store->getMemoryVT(),
18473 Store->getMemOperand(), Store->getAddressingMode(),
18474 Store->isTruncatingStore());
18475}
18476
18477SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
18478 SDValue Op, SelectionDAG &DAG) const {
18479 auto Store = cast<MaskedStoreSDNode>(Op);
18480
18481 if (Store->isTruncatingStore())
18482 return SDValue();
18483
18484 SDLoc DL(Op);
18485 EVT VT = Store->getValue().getValueType();
18486 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18487
18488 auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
18489 SDValue Mask = convertFixedMaskToScalableVector(Store->getMask(), DAG);
18490
18491 return DAG.getMaskedStore(
18492 Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
18493 Mask, Store->getMemoryVT(), Store->getMemOperand(),
18494 Store->getAddressingMode(), Store->isTruncatingStore());
18495}
18496
18497SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
18498 SDValue Op, SelectionDAG &DAG) const {
18499 SDLoc dl(Op);
18500 EVT VT = Op.getValueType();
18501 EVT EltVT = VT.getVectorElementType();
18502
18503 bool Signed = Op.getOpcode() == ISD::SDIV;
18504 unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
18505
18506 // Scalable vector i32/i64 DIV is supported.
18507 if (EltVT == MVT::i32 || EltVT == MVT::i64)
18508 return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
18509
18510 // Scalable vector i8/i16 DIV is not supported. Promote it to i32.
18511 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18512 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18513 EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
18514 EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT);
18515
18516 // If this is not a full vector, extend, div, and truncate it.
18517 EVT WidenedVT = VT.widenIntegerVectorElementType(*DAG.getContext());
18518 if (DAG.getTargetLoweringInfo().isTypeLegal(WidenedVT)) {
18519 unsigned ExtendOpcode = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
18520 SDValue Op0 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(0));
18521 SDValue Op1 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(1));
18522 SDValue Div = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0, Op1);
18523 return DAG.getNode(ISD::TRUNCATE, dl, VT, Div);
18524 }
18525
18526 // Convert the operands to scalable vectors.
18527 SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
18528 SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
18529
18530 // Extend the scalable operands.
18531 unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
18532 unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
18533 SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0);
18534 SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1);
18535 SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0);
18536 SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1);
18537
18538 // Convert back to fixed vectors so the DIV can be further lowered.
18539 Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo);
18540 Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo);
18541 Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi);
18542 Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi);
18543 SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
18544 Op0Lo, Op1Lo);
18545 SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
18546 Op0Hi, Op1Hi);
18547
18548 // Convert again to scalable vectors to truncate.
18549 ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo);
18550 ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi);
18551 SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT,
18552 ResultLo, ResultHi);
18553
18554 return convertFromScalableVector(DAG, VT, ScalableResult);
18555}
18556
18557SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
18558 SDValue Op, SelectionDAG &DAG) const {
18559 EVT VT = Op.getValueType();
18560 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18560, __extension__ __PRETTY_FUNCTION__))
;
18561
18562 SDLoc DL(Op);
18563 SDValue Val = Op.getOperand(0);
18564 EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
18565 Val = convertToScalableVector(DAG, ContainerVT, Val);
18566
18567 bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND;
18568 unsigned ExtendOpc = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
18569
18570 // Repeatedly unpack Val until the result is of the desired element type.
18571 switch (ContainerVT.getSimpleVT().SimpleTy) {
18572 default:
18573 llvm_unreachable("unimplemented container type")::llvm::llvm_unreachable_internal("unimplemented container type"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18573)
;
18574 case MVT::nxv16i8:
18575 Val = DAG.getNode(ExtendOpc, DL, MVT::nxv8i16, Val);
18576 if (VT.getVectorElementType() == MVT::i16)
18577 break;
18578 LLVM_FALLTHROUGH[[gnu::fallthrough]];
18579 case MVT::nxv8i16:
18580 Val = DAG.getNode(ExtendOpc, DL, MVT::nxv4i32, Val);
18581 if (VT.getVectorElementType() == MVT::i32)
18582 break;
18583 LLVM_FALLTHROUGH[[gnu::fallthrough]];
18584 case MVT::nxv4i32:
18585 Val = DAG.getNode(ExtendOpc, DL, MVT::nxv2i64, Val);
18586 assert(VT.getVectorElementType() == MVT::i64 && "Unexpected element type!")(static_cast <bool> (VT.getVectorElementType() == MVT::
i64 && "Unexpected element type!") ? void (0) : __assert_fail
("VT.getVectorElementType() == MVT::i64 && \"Unexpected element type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18586, __extension__ __PRETTY_FUNCTION__))
;
18587 break;
18588 }
18589
18590 return convertFromScalableVector(DAG, VT, Val);
18591}
18592
18593SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
18594 SDValue Op, SelectionDAG &DAG) const {
18595 EVT VT = Op.getValueType();
18596 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18596, __extension__ __PRETTY_FUNCTION__))
;
18597
18598 SDLoc DL(Op);
18599 SDValue Val = Op.getOperand(0);
18600 EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
18601 Val = convertToScalableVector(DAG, ContainerVT, Val);
18602
18603 // Repeatedly truncate Val until the result is of the desired element type.
18604 switch (ContainerVT.getSimpleVT().SimpleTy) {
18605 default:
18606 llvm_unreachable("unimplemented container type")::llvm::llvm_unreachable_internal("unimplemented container type"
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18606)
;
18607 case MVT::nxv2i64:
18608 Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv4i32, Val);
18609 Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv4i32, Val, Val);
18610 if (VT.getVectorElementType() == MVT::i32)
18611 break;
18612 LLVM_FALLTHROUGH[[gnu::fallthrough]];
18613 case MVT::nxv4i32:
18614 Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv8i16, Val);
18615 Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv8i16, Val, Val);
18616 if (VT.getVectorElementType() == MVT::i16)
18617 break;
18618 LLVM_FALLTHROUGH[[gnu::fallthrough]];
18619 case MVT::nxv8i16:
18620 Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i8, Val);
18621 Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv16i8, Val, Val);
18622 assert(VT.getVectorElementType() == MVT::i8 && "Unexpected element type!")(static_cast <bool> (VT.getVectorElementType() == MVT::
i8 && "Unexpected element type!") ? void (0) : __assert_fail
("VT.getVectorElementType() == MVT::i8 && \"Unexpected element type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18622, __extension__ __PRETTY_FUNCTION__))
;
18623 break;
18624 }
18625
18626 return convertFromScalableVector(DAG, VT, Val);
18627}
18628
18629SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt(
18630 SDValue Op, SelectionDAG &DAG) const {
18631 EVT VT = Op.getValueType();
18632 EVT InVT = Op.getOperand(0).getValueType();
18633 assert(InVT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (InVT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("InVT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18633, __extension__ __PRETTY_FUNCTION__))
;
18634
18635 SDLoc DL(Op);
18636 EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18637 SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
18638
18639 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Op.getOperand(1));
18640}
18641
18642SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
18643 SDValue Op, SelectionDAG &DAG) const {
18644 EVT VT = Op.getValueType();
18645 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18645, __extension__ __PRETTY_FUNCTION__))
;
18646
18647 SDLoc DL(Op);
18648 EVT InVT = Op.getOperand(0).getValueType();
18649 EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18650 SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
18651
18652 auto ScalableRes = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, Op0,
18653 Op.getOperand(1), Op.getOperand(2));
18654
18655 return convertFromScalableVector(DAG, VT, ScalableRes);
18656}
18657
18658// Convert vector operation 'Op' to an equivalent predicated operation whereby
18659// the original operation's type is used to construct a suitable predicate.
18660// NOTE: The results for inactive lanes are undefined.
18661SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
18662 SelectionDAG &DAG,
18663 unsigned NewOp,
18664 bool OverrideNEON) const {
18665 EVT VT = Op.getValueType();
18666 SDLoc DL(Op);
18667 auto Pg = getPredicateForVector(DAG, DL, VT);
18668
18669 if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) {
18670 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18671
18672 // Create list of operands by converting existing ones to scalable types.
18673 SmallVector<SDValue, 4> Operands = {Pg};
18674 for (const SDValue &V : Op->op_values()) {
18675 if (isa<CondCodeSDNode>(V)) {
18676 Operands.push_back(V);
18677 continue;
18678 }
18679
18680 if (const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
18681 EVT VTArg = VTNode->getVT().getVectorElementType();
18682 EVT NewVTArg = ContainerVT.changeVectorElementType(VTArg);
18683 Operands.push_back(DAG.getValueType(NewVTArg));
18684 continue;
18685 }
18686
18687 assert(useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) &&(static_cast <bool> (useSVEForFixedLengthVectorVT(V.getValueType
(), OverrideNEON) && "Only fixed length vectors are supported!"
) ? void (0) : __assert_fail ("useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) && \"Only fixed length vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18688, __extension__ __PRETTY_FUNCTION__))
18688 "Only fixed length vectors are supported!")(static_cast <bool> (useSVEForFixedLengthVectorVT(V.getValueType
(), OverrideNEON) && "Only fixed length vectors are supported!"
) ? void (0) : __assert_fail ("useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) && \"Only fixed length vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18688, __extension__ __PRETTY_FUNCTION__))
;
18689 Operands.push_back(convertToScalableVector(DAG, ContainerVT, V));
18690 }
18691
18692 if (isMergePassthruOpcode(NewOp))
18693 Operands.push_back(DAG.getUNDEF(ContainerVT));
18694
18695 auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
18696 return convertFromScalableVector(DAG, VT, ScalableRes);
18697 }
18698
18699 assert(VT.isScalableVector() && "Only expect to lower scalable vector op!")(static_cast <bool> (VT.isScalableVector() && "Only expect to lower scalable vector op!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Only expect to lower scalable vector op!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18699, __extension__ __PRETTY_FUNCTION__))
;
18700
18701 SmallVector<SDValue, 4> Operands = {Pg};
18702 for (const SDValue &V : Op->op_values()) {
18703 assert((!V.getValueType().isVector() ||(static_cast <bool> ((!V.getValueType().isVector() || V
.getValueType().isScalableVector()) && "Only scalable vectors are supported!"
) ? void (0) : __assert_fail ("(!V.getValueType().isVector() || V.getValueType().isScalableVector()) && \"Only scalable vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18705, __extension__ __PRETTY_FUNCTION__))
18704 V.getValueType().isScalableVector()) &&(static_cast <bool> ((!V.getValueType().isVector() || V
.getValueType().isScalableVector()) && "Only scalable vectors are supported!"
) ? void (0) : __assert_fail ("(!V.getValueType().isVector() || V.getValueType().isScalableVector()) && \"Only scalable vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18705, __extension__ __PRETTY_FUNCTION__))
18705 "Only scalable vectors are supported!")(static_cast <bool> ((!V.getValueType().isVector() || V
.getValueType().isScalableVector()) && "Only scalable vectors are supported!"
) ? void (0) : __assert_fail ("(!V.getValueType().isVector() || V.getValueType().isScalableVector()) && \"Only scalable vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18705, __extension__ __PRETTY_FUNCTION__))
;
18706 Operands.push_back(V);
18707 }
18708
18709 if (isMergePassthruOpcode(NewOp))
18710 Operands.push_back(DAG.getUNDEF(VT));
18711
18712 return DAG.getNode(NewOp, DL, VT, Operands);
18713}
18714
18715// If a fixed length vector operation has no side effects when applied to
18716// undefined elements, we can safely use scalable vectors to perform the same
18717// operation without needing to worry about predication.
18718SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
18719 SelectionDAG &DAG) const {
18720 EVT VT = Op.getValueType();
18721 assert(useSVEForFixedLengthVectorVT(VT) &&(static_cast <bool> (useSVEForFixedLengthVectorVT(VT) &&
"Only expected to lower fixed length vector operation!") ? void
(0) : __assert_fail ("useSVEForFixedLengthVectorVT(VT) && \"Only expected to lower fixed length vector operation!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18722, __extension__ __PRETTY_FUNCTION__))
18722 "Only expected to lower fixed length vector operation!")(static_cast <bool> (useSVEForFixedLengthVectorVT(VT) &&
"Only expected to lower fixed length vector operation!") ? void
(0) : __assert_fail ("useSVEForFixedLengthVectorVT(VT) && \"Only expected to lower fixed length vector operation!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18722, __extension__ __PRETTY_FUNCTION__))
;
18723 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18724
18725 // Create list of operands by converting existing ones to scalable types.
18726 SmallVector<SDValue, 4> Ops;
18727 for (const SDValue &V : Op->op_values()) {
18728 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!")(static_cast <bool> (!isa<VTSDNode>(V) &&
"Unexpected VTSDNode node!") ? void (0) : __assert_fail ("!isa<VTSDNode>(V) && \"Unexpected VTSDNode node!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18728, __extension__ __PRETTY_FUNCTION__))
;
18729
18730 // Pass through non-vector operands.
18731 if (!V.getValueType().isVector()) {
18732 Ops.push_back(V);
18733 continue;
18734 }
18735
18736 // "cast" fixed length vector to a scalable vector.
18737 assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&(static_cast <bool> (useSVEForFixedLengthVectorVT(V.getValueType
()) && "Only fixed length vectors are supported!") ? void
(0) : __assert_fail ("useSVEForFixedLengthVectorVT(V.getValueType()) && \"Only fixed length vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18738, __extension__ __PRETTY_FUNCTION__))
18738 "Only fixed length vectors are supported!")(static_cast <bool> (useSVEForFixedLengthVectorVT(V.getValueType
()) && "Only fixed length vectors are supported!") ? void
(0) : __assert_fail ("useSVEForFixedLengthVectorVT(V.getValueType()) && \"Only fixed length vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18738, __extension__ __PRETTY_FUNCTION__))
;
18739 Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
18740 }
18741
18742 auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops);
18743 return convertFromScalableVector(DAG, VT, ScalableRes);
18744}
18745
18746SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
18747 SelectionDAG &DAG) const {
18748 SDLoc DL(ScalarOp);
18749 SDValue AccOp = ScalarOp.getOperand(0);
18750 SDValue VecOp = ScalarOp.getOperand(1);
18751 EVT SrcVT = VecOp.getValueType();
18752 EVT ResVT = SrcVT.getVectorElementType();
18753
18754 EVT ContainerVT = SrcVT;
18755 if (SrcVT.isFixedLengthVector()) {
18756 ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
18757 VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
18758 }
18759
18760 SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
18761 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
18762
18763 // Convert operands to Scalable.
18764 AccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT,
18765 DAG.getUNDEF(ContainerVT), AccOp, Zero);
18766
18767 // Perform reduction.
18768 SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
18769 Pg, AccOp, VecOp);
18770
18771 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
18772}
18773
18774SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
18775 SelectionDAG &DAG) const {
18776 SDLoc DL(ReduceOp);
18777 SDValue Op = ReduceOp.getOperand(0);
18778 EVT OpVT = Op.getValueType();
18779 EVT VT = ReduceOp.getValueType();
18780
18781 if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
18782 return SDValue();
18783
18784 SDValue Pg = getPredicateForVector(DAG, DL, OpVT);
18785
18786 switch (ReduceOp.getOpcode()) {
18787 default:
18788 return SDValue();
18789 case ISD::VECREDUCE_OR:
18790 return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
18791 case ISD::VECREDUCE_AND: {
18792 Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
18793 return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
18794 }
18795 case ISD::VECREDUCE_XOR: {
18796 SDValue ID =
18797 DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
18798 SDValue Cntp =
18799 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op);
18800 return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
18801 }
18802 }
18803
18804 return SDValue();
18805}
18806
18807SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
18808 SDValue ScalarOp,
18809 SelectionDAG &DAG) const {
18810 SDLoc DL(ScalarOp);
18811 SDValue VecOp = ScalarOp.getOperand(0);
18812 EVT SrcVT = VecOp.getValueType();
18813
18814 if (useSVEForFixedLengthVectorVT(SrcVT, true)) {
18815 EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
18816 VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
18817 }
18818
18819 // UADDV always returns an i64 result.
18820 EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
18821 SrcVT.getVectorElementType();
18822 EVT RdxVT = SrcVT;
18823 if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
18824 RdxVT = getPackedSVEVectorVT(ResVT);
18825
18826 SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
18827 SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
18828 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
18829 Rdx, DAG.getConstant(0, DL, MVT::i64));
18830
18831 // The VEC_REDUCE nodes expect an element size result.
18832 if (ResVT != ScalarOp.getValueType())
18833 Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType());
18834
18835 return Res;
18836}
18837
18838SDValue
18839AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
18840 SelectionDAG &DAG) const {
18841 EVT VT = Op.getValueType();
18842 SDLoc DL(Op);
18843
18844 EVT InVT = Op.getOperand(1).getValueType();
18845 EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18846 SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
18847 SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
18848
18849 // Convert the mask to a predicated (NOTE: We don't need to worry about
18850 // inactive lanes since VSELECT is safe when given undefined elements).
18851 EVT MaskVT = Op.getOperand(0).getValueType();
18852 EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT);
18853 auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0));
18854 Mask = DAG.getNode(ISD::TRUNCATE, DL,
18855 MaskContainerVT.changeVectorElementType(MVT::i1), Mask);
18856
18857 auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
18858 Mask, Op1, Op2);
18859
18860 return convertFromScalableVector(DAG, VT, ScalableRes);
18861}
18862
18863SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
18864 SDValue Op, SelectionDAG &DAG) const {
18865 SDLoc DL(Op);
18866 EVT InVT = Op.getOperand(0).getValueType();
18867 EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18868
18869 assert(useSVEForFixedLengthVectorVT(InVT) &&(static_cast <bool> (useSVEForFixedLengthVectorVT(InVT)
&& "Only expected to lower fixed length vector operation!"
) ? void (0) : __assert_fail ("useSVEForFixedLengthVectorVT(InVT) && \"Only expected to lower fixed length vector operation!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18870, __extension__ __PRETTY_FUNCTION__))
18870 "Only expected to lower fixed length vector operation!")(static_cast <bool> (useSVEForFixedLengthVectorVT(InVT)
&& "Only expected to lower fixed length vector operation!"
) ? void (0) : __assert_fail ("useSVEForFixedLengthVectorVT(InVT) && \"Only expected to lower fixed length vector operation!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18870, __extension__ __PRETTY_FUNCTION__))
;
18871 assert(Op.getValueType() == InVT.changeTypeToInteger() &&(static_cast <bool> (Op.getValueType() == InVT.changeTypeToInteger
() && "Expected integer result of the same bit length as the inputs!"
) ? void (0) : __assert_fail ("Op.getValueType() == InVT.changeTypeToInteger() && \"Expected integer result of the same bit length as the inputs!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18872, __extension__ __PRETTY_FUNCTION__))
18872 "Expected integer result of the same bit length as the inputs!")(static_cast <bool> (Op.getValueType() == InVT.changeTypeToInteger
() && "Expected integer result of the same bit length as the inputs!"
) ? void (0) : __assert_fail ("Op.getValueType() == InVT.changeTypeToInteger() && \"Expected integer result of the same bit length as the inputs!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18872, __extension__ __PRETTY_FUNCTION__))
;
18873
18874 auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
18875 auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
18876 auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
18877
18878 EVT CmpVT = Pg.getValueType();
18879 auto Cmp = DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
18880 {Pg, Op1, Op2, Op.getOperand(2)});
18881
18882 EVT PromoteVT = ContainerVT.changeTypeToInteger();
18883 auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT);
18884 return convertFromScalableVector(DAG, Op.getValueType(), Promote);
18885}
18886
18887SDValue
18888AArch64TargetLowering::LowerFixedLengthBitcastToSVE(SDValue Op,
18889 SelectionDAG &DAG) const {
18890 SDLoc DL(Op);
18891 auto SrcOp = Op.getOperand(0);
18892 EVT VT = Op.getValueType();
18893 EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
18894 EVT ContainerSrcVT =
18895 getContainerForFixedLengthVector(DAG, SrcOp.getValueType());
18896
18897 SrcOp = convertToScalableVector(DAG, ContainerSrcVT, SrcOp);
18898 Op = DAG.getNode(ISD::BITCAST, DL, ContainerDstVT, SrcOp);
18899 return convertFromScalableVector(DAG, VT, Op);
18900}
18901
18902SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
18903 SDValue Op, SelectionDAG &DAG) const {
18904 SDLoc DL(Op);
18905 unsigned NumOperands = Op->getNumOperands();
18906
18907 assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&(static_cast <bool> (NumOperands > 1 && isPowerOf2_32
(NumOperands) && "Unexpected number of operands in CONCAT_VECTORS"
) ? void (0) : __assert_fail ("NumOperands > 1 && isPowerOf2_32(NumOperands) && \"Unexpected number of operands in CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18908, __extension__ __PRETTY_FUNCTION__))
18908 "Unexpected number of operands in CONCAT_VECTORS")(static_cast <bool> (NumOperands > 1 && isPowerOf2_32
(NumOperands) && "Unexpected number of operands in CONCAT_VECTORS"
) ? void (0) : __assert_fail ("NumOperands > 1 && isPowerOf2_32(NumOperands) && \"Unexpected number of operands in CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18908, __extension__ __PRETTY_FUNCTION__))
;
18909
18910 auto SrcOp1 = Op.getOperand(0);
18911 auto SrcOp2 = Op.getOperand(1);
18912 EVT VT = Op.getValueType();
18913 EVT SrcVT = SrcOp1.getValueType();
18914
18915 if (NumOperands > 2) {
18916 SmallVector<SDValue, 4> Ops;
18917 EVT PairVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext());
18918 for (unsigned I = 0; I < NumOperands; I += 2)
18919 Ops.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, PairVT,
18920 Op->getOperand(I), Op->getOperand(I + 1)));
18921
18922 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops);
18923 }
18924
18925 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18926
18927 SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, SrcVT);
18928 SrcOp1 = convertToScalableVector(DAG, ContainerVT, SrcOp1);
18929 SrcOp2 = convertToScalableVector(DAG, ContainerVT, SrcOp2);
18930
18931 Op = DAG.getNode(AArch64ISD::SPLICE, DL, ContainerVT, Pg, SrcOp1, SrcOp2);
18932
18933 return convertFromScalableVector(DAG, VT, Op);
18934}
18935
18936SDValue
18937AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(SDValue Op,
18938 SelectionDAG &DAG) const {
18939 EVT VT = Op.getValueType();
18940 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18940, __extension__ __PRETTY_FUNCTION__))
;
18941
18942 SDLoc DL(Op);
18943 SDValue Val = Op.getOperand(0);
18944 SDValue Pg = getPredicateForVector(DAG, DL, VT);
18945 EVT SrcVT = Val.getValueType();
18946 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18947 EVT ExtendVT = ContainerVT.changeVectorElementType(
18948 SrcVT.getVectorElementType());
18949
18950 Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
18951 Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT.changeTypeToInteger(), Val);
18952
18953 Val = convertToScalableVector(DAG, ContainerVT.changeTypeToInteger(), Val);
18954 Val = getSVESafeBitCast(ExtendVT, Val, DAG);
18955 Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
18956 Pg, Val, DAG.getUNDEF(ContainerVT));
18957
18958 return convertFromScalableVector(DAG, VT, Val);
18959}
18960
18961SDValue
18962AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op,
18963 SelectionDAG &DAG) const {
18964 EVT VT = Op.getValueType();
18965 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18965, __extension__ __PRETTY_FUNCTION__))
;
18966
18967 SDLoc DL(Op);
18968 SDValue Val = Op.getOperand(0);
18969 EVT SrcVT = Val.getValueType();
18970 EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
18971 EVT RoundVT = ContainerSrcVT.changeVectorElementType(
18972 VT.getVectorElementType());
18973 SDValue Pg = getPredicateForVector(DAG, DL, RoundVT);
18974
18975 Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
18976 Val = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, RoundVT, Pg, Val,
18977 Op.getOperand(1), DAG.getUNDEF(RoundVT));
18978 Val = getSVESafeBitCast(ContainerSrcVT.changeTypeToInteger(), Val, DAG);
18979 Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
18980
18981 Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
18982 return DAG.getNode(ISD::BITCAST, DL, VT, Val);
18983}
18984
18985SDValue
18986AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
18987 SelectionDAG &DAG) const {
18988 EVT VT = Op.getValueType();
18989 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18989, __extension__ __PRETTY_FUNCTION__))
;
18990
18991 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP;
18992 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
18993 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
18994
18995 SDLoc DL(Op);
18996 SDValue Val = Op.getOperand(0);
18997 EVT SrcVT = Val.getValueType();
18998 EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
18999 EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
19000
19001 if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
19002 ContainerDstVT.getVectorElementType().getSizeInBits()) {
19003 SDValue Pg = getPredicateForVector(DAG, DL, VT);
19004
19005 Val = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
19006 VT.changeTypeToInteger(), Val);
19007
19008 Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
19009 Val = getSVESafeBitCast(ContainerDstVT.changeTypeToInteger(), Val, DAG);
19010 // Safe to use a larger than specified operand since we just unpacked the
19011 // data, hence the upper bits are zero.
19012 Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
19013 DAG.getUNDEF(ContainerDstVT));
19014 return convertFromScalableVector(DAG, VT, Val);
19015 } else {
19016 EVT CvtVT = ContainerSrcVT.changeVectorElementType(
19017 ContainerDstVT.getVectorElementType());
19018 SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
19019
19020 Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
19021 Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
19022 Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
19023 Val = convertFromScalableVector(DAG, SrcVT, Val);
19024
19025 Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
19026 return DAG.getNode(ISD::BITCAST, DL, VT, Val);
19027 }
19028}
19029
19030SDValue
19031AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
19032 SelectionDAG &DAG) const {
19033 EVT VT = Op.getValueType();
19034 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 19034, __extension__ __PRETTY_FUNCTION__))
;
19035
19036 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
19037 unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
19038 : AArch64ISD::FCVTZU_MERGE_PASSTHRU;
19039
19040 SDLoc DL(Op);
19041 SDValue Val = Op.getOperand(0);
19042 EVT SrcVT = Val.getValueType();
19043 EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
19044 EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
19045
19046 if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
19047 ContainerDstVT.getVectorElementType().getSizeInBits()) {
19048 EVT CvtVT = ContainerDstVT.changeVectorElementType(
19049 ContainerSrcVT.getVectorElementType());
19050 SDValue Pg = getPredicateForVector(DAG, DL, VT);
19051
19052 Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
19053 Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Val);
19054
19055 Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
19056 Val = getSVESafeBitCast(CvtVT, Val, DAG);
19057 Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
19058 DAG.getUNDEF(ContainerDstVT));
19059 return convertFromScalableVector(DAG, VT, Val);
19060 } else {
19061 EVT CvtVT = ContainerSrcVT.changeTypeToInteger();
19062 SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
19063
19064 // Safe to use a larger than specified result since an fp_to_int where the
19065 // result doesn't fit into the destination is undefined.
19066 Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
19067 Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
19068 Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
19069
19070 return DAG.getNode(ISD::TRUNCATE, DL, VT, Val);
19071 }
19072}
19073
19074SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
19075 SDValue Op, SelectionDAG &DAG) const {
19076 EVT VT = Op.getValueType();
19077 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 19077, __extension__ __PRETTY_FUNCTION__))
;
19078
19079 auto *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
19080 auto ShuffleMask = SVN->getMask();
19081
19082 SDLoc DL(Op);
19083 SDValue Op1 = Op.getOperand(0);
19084 SDValue Op2 = Op.getOperand(1);
19085
19086 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
19087 Op1 = convertToScalableVector(DAG, ContainerVT, Op1);
19088 Op2 = convertToScalableVector(DAG, ContainerVT, Op2);
19089
19090 bool ReverseEXT = false;
19091 unsigned Imm;
19092 if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm) &&
19093 Imm == VT.getVectorNumElements() - 1) {
19094 if (ReverseEXT)
19095 std::swap(Op1, Op2);
19096
19097 EVT ScalarTy = VT.getVectorElementType();
19098 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
19099 ScalarTy = MVT::i32;
19100 SDValue Scalar = DAG.getNode(
19101 ISD::EXTRACT_VECTOR_ELT, DL, ScalarTy, Op1,
19102 DAG.getConstant(VT.getVectorNumElements() - 1, DL, MVT::i64));
19103 Op = DAG.getNode(AArch64ISD::INSR, DL, ContainerVT, Op2, Scalar);
19104 return convertFromScalableVector(DAG, VT, Op);
19105 }
19106
19107 return SDValue();
19108}
19109
19110SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
19111 SelectionDAG &DAG) const {
19112 SDLoc DL(Op);
19113 EVT InVT = Op.getValueType();
19114 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19115 (void)TLI;
19116
19117 assert(VT.isScalableVector() && TLI.isTypeLegal(VT) &&(static_cast <bool> (VT.isScalableVector() && TLI
.isTypeLegal(VT) && InVT.isScalableVector() &&
TLI.isTypeLegal(InVT) && "Only expect to cast between legal scalable vector types!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && TLI.isTypeLegal(VT) && InVT.isScalableVector() && TLI.isTypeLegal(InVT) && \"Only expect to cast between legal scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 19119, __extension__ __PRETTY_FUNCTION__))
19118 InVT.isScalableVector() && TLI.isTypeLegal(InVT) &&(static_cast <bool> (VT.isScalableVector() && TLI
.isTypeLegal(VT) && InVT.isScalableVector() &&
TLI.isTypeLegal(InVT) && "Only expect to cast between legal scalable vector types!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && TLI.isTypeLegal(VT) && InVT.isScalableVector() && TLI.isTypeLegal(InVT) && \"Only expect to cast between legal scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 19119, __extension__ __PRETTY_FUNCTION__))
19119 "Only expect to cast between legal scalable vector types!")(static_cast <bool> (VT.isScalableVector() && TLI
.isTypeLegal(VT) && InVT.isScalableVector() &&
TLI.isTypeLegal(InVT) && "Only expect to cast between legal scalable vector types!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && TLI.isTypeLegal(VT) && InVT.isScalableVector() && TLI.isTypeLegal(InVT) && \"Only expect to cast between legal scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 19119, __extension__ __PRETTY_FUNCTION__))
;
19120 assert((VT.getVectorElementType() == MVT::i1) ==(static_cast <bool> ((VT.getVectorElementType() == MVT::
i1) == (InVT.getVectorElementType() == MVT::i1) && "Cannot cast between data and predicate scalable vector types!"
) ? void (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) == (InVT.getVectorElementType() == MVT::i1) && \"Cannot cast between data and predicate scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 19122, __extension__ __PRETTY_FUNCTION__))
19121 (InVT.getVectorElementType() == MVT::i1) &&(static_cast <bool> ((VT.getVectorElementType() == MVT::
i1) == (InVT.getVectorElementType() == MVT::i1) && "Cannot cast between data and predicate scalable vector types!"
) ? void (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) == (InVT.getVectorElementType() == MVT::i1) && \"Cannot cast between data and predicate scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 19122, __extension__ __PRETTY_FUNCTION__))
19122 "Cannot cast between data and predicate scalable vector types!")(static_cast <bool> ((VT.getVectorElementType() == MVT::
i1) == (InVT.getVectorElementType() == MVT::i1) && "Cannot cast between data and predicate scalable vector types!"
) ? void (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) == (InVT.getVectorElementType() == MVT::i1) && \"Cannot cast between data and predicate scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 19122, __extension__ __PRETTY_FUNCTION__))
;
19123
19124 if (InVT == VT)
19125 return Op;
19126
19127 if (VT.getVectorElementType() == MVT::i1)
19128 return DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
19129
19130 EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType());
19131 EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());
19132
19133 // Pack input if required.
19134 if (InVT != PackedInVT)
19135 Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op);
19136
19137 Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);
19138
19139 // Unpack result if required.
19140 if (VT != PackedVT)
19141 Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
19142
19143 return Op;
19144}
19145
19146bool AArch64TargetLowering::isAllActivePredicate(SDValue N) const {
19147 return ::isAllActivePredicate(N);
19148}
19149
19150EVT AArch64TargetLowering::getPromotedVTForPredicate(EVT VT) const {
19151 return ::getPromotedVTForPredicate(VT);
19152}
19153
19154bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
19155 SDValue Op, const APInt &OriginalDemandedBits,
19156 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
19157 unsigned Depth) const {
19158
19159 unsigned Opc = Op.getOpcode();
19160 switch (Opc) {
19161 case AArch64ISD::VSHL: {
19162 // Match (VSHL (VLSHR Val X) X)
19163 SDValue ShiftL = Op;
19164 SDValue ShiftR = Op->getOperand(0);
19165 if (ShiftR->getOpcode() != AArch64ISD::VLSHR)
19166 return false;
19167
19168 if (!ShiftL.hasOneUse() || !ShiftR.hasOneUse())
19169 return false;
19170
19171 unsigned ShiftLBits = ShiftL->getConstantOperandVal(1);
19172 unsigned ShiftRBits = ShiftR->getConstantOperandVal(1);
19173
19174 // Other cases can be handled as well, but this is not
19175 // implemented.
19176 if (ShiftRBits != ShiftLBits)
19177 return false;
19178
19179 unsigned ScalarSize = Op.getScalarValueSizeInBits();
19180 assert(ScalarSize > ShiftLBits && "Invalid shift imm")(static_cast <bool> (ScalarSize > ShiftLBits &&
"Invalid shift imm") ? void (0) : __assert_fail ("ScalarSize > ShiftLBits && \"Invalid shift imm\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 19180, __extension__ __PRETTY_FUNCTION__))
;
19181
19182 APInt ZeroBits = APInt::getLowBitsSet(ScalarSize, ShiftLBits);
19183 APInt UnusedBits = ~OriginalDemandedBits;
19184
19185 if ((ZeroBits & UnusedBits) != ZeroBits)
19186 return false;
19187
19188 // All bits that are zeroed by (VSHL (VLSHR Val X) X) are not
19189 // used - simplify to just Val.
19190 return TLO.CombineTo(Op, ShiftR->getOperand(0));
19191 }
19192 }
19193
19194 return TargetLowering::SimplifyDemandedBitsForTargetNode(
19195 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
19196}
19197
19198bool AArch64TargetLowering::isConstantUnsignedBitfieldExtactLegal(
19199 unsigned Opc, LLT Ty1, LLT Ty2) const {
19200 return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64));
19201}

/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h

1//===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a class to represent arbitrary precision
11/// integral constant values and operations on them.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_ADT_APINT_H
16#define LLVM_ADT_APINT_H
17
18#include "llvm/Support/Compiler.h"
19#include "llvm/Support/MathExtras.h"
20#include <cassert>
21#include <climits>
22#include <cstring>
23#include <utility>
24
25namespace llvm {
26class FoldingSetNodeID;
27class StringRef;
28class hash_code;
29class raw_ostream;
30
31template <typename T> class SmallVectorImpl;
32template <typename T> class ArrayRef;
33template <typename T> class Optional;
34template <typename T> struct DenseMapInfo;
35
36class APInt;
37
38inline APInt operator-(APInt);
39
40//===----------------------------------------------------------------------===//
41// APInt Class
42//===----------------------------------------------------------------------===//
43
44/// Class for arbitrary precision integers.
45///
46/// APInt is a functional replacement for common case unsigned integer type like
47/// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width
48/// integer sizes and large integer value types such as 3-bits, 15-bits, or more
49/// than 64-bits of precision. APInt provides a variety of arithmetic operators
50/// and methods to manipulate integer values of any bit-width. It supports both
51/// the typical integer arithmetic and comparison operations as well as bitwise
52/// manipulation.
53///
54/// The class has several invariants worth noting:
55/// * All bit, byte, and word positions are zero-based.
56/// * Once the bit width is set, it doesn't change except by the Truncate,
57/// SignExtend, or ZeroExtend operations.
58/// * All binary operators must be on APInt instances of the same bit width.
59/// Attempting to use these operators on instances with different bit
60/// widths will yield an assertion.
61/// * The value is stored canonically as an unsigned value. For operations
62/// where it makes a difference, there are both signed and unsigned variants
63/// of the operation. For example, sdiv and udiv. However, because the bit
64/// widths must be the same, operations such as Mul and Add produce the same
65/// results regardless of whether the values are interpreted as signed or
66/// not.
67/// * In general, the class tries to follow the style of computation that LLVM
68/// uses in its IR. This simplifies its use for LLVM.
69/// * APInt supports zero-bit-width values, but operations that require bits
70/// are not defined on it (e.g. you cannot ask for the sign of a zero-bit
71/// integer). This means that operations like zero extension and logical
72/// shifts are defined, but sign extension and ashr is not. Zero bit values
73/// compare and hash equal to themselves, and countLeadingZeros returns 0.
74///
75class LLVM_NODISCARD[[clang::warn_unused_result]] APInt {
76public:
77 typedef uint64_t WordType;
78
79 /// This enum is used to hold the constants we needed for APInt.
80 enum : unsigned {
81 /// Byte size of a word.
82 APINT_WORD_SIZE = sizeof(WordType),
83 /// Bits in a word.
84 APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT8
85 };
86
87 enum class Rounding {
88 DOWN,
89 TOWARD_ZERO,
90 UP,
91 };
92
93 static constexpr WordType WORDTYPE_MAX = ~WordType(0);
94
95 /// \name Constructors
96 /// @{
97
98 /// Create a new APInt of numBits width, initialized as val.
99 ///
100 /// If isSigned is true then val is treated as if it were a signed value
101 /// (i.e. as an int64_t) and the appropriate sign extension to the bit width
102 /// will be done. Otherwise, no sign extension occurs (high order bits beyond
103 /// the range of val are zero filled).
104 ///
105 /// \param numBits the bit width of the constructed APInt
106 /// \param val the initial value of the APInt
107 /// \param isSigned how to treat signedness of val
108 APInt(unsigned numBits, uint64_t val, bool isSigned = false)
109 : BitWidth(numBits) {
110 if (isSingleWord()) {
111 U.VAL = val;
112 clearUnusedBits();
113 } else {
114 initSlowCase(val, isSigned);
115 }
116 }
117
118 /// Construct an APInt of numBits width, initialized as bigVal[].
119 ///
120 /// Note that bigVal.size() can be smaller or larger than the corresponding
121 /// bit width but any extraneous bits will be dropped.
122 ///
123 /// \param numBits the bit width of the constructed APInt
124 /// \param bigVal a sequence of words to form the initial value of the APInt
125 APInt(unsigned numBits, ArrayRef<uint64_t> bigVal);
126
127 /// Equivalent to APInt(numBits, ArrayRef<uint64_t>(bigVal, numWords)), but
128 /// deprecated because this constructor is prone to ambiguity with the
129 /// APInt(unsigned, uint64_t, bool) constructor.
130 ///
131 /// If this overload is ever deleted, care should be taken to prevent calls
132 /// from being incorrectly captured by the APInt(unsigned, uint64_t, bool)
133 /// constructor.
134 APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]);
135
136 /// Construct an APInt from a string representation.
137 ///
138 /// This constructor interprets the string \p str in the given radix. The
139 /// interpretation stops when the first character that is not suitable for the
140 /// radix is encountered, or the end of the string. Acceptable radix values
141 /// are 2, 8, 10, 16, and 36. It is an error for the value implied by the
142 /// string to require more bits than numBits.
143 ///
144 /// \param numBits the bit width of the constructed APInt
145 /// \param str the string to be interpreted
146 /// \param radix the radix to use for the conversion
147 APInt(unsigned numBits, StringRef str, uint8_t radix);
148
149 /// Default constructor that creates an APInt with a 1-bit zero value.
150 explicit APInt() : BitWidth(1) { U.VAL = 0; }
151
152 /// Copy Constructor.
153 APInt(const APInt &that) : BitWidth(that.BitWidth) {
154 if (isSingleWord())
155 U.VAL = that.U.VAL;
156 else
157 initSlowCase(that);
158 }
159
160 /// Move Constructor.
161 APInt(APInt &&that) : BitWidth(that.BitWidth) {
162 memcpy(&U, &that.U, sizeof(U));
163 that.BitWidth = 0;
164 }
165
166 /// Destructor.
167 ~APInt() {
168 if (needsCleanup())
169 delete[] U.pVal;
170 }
171
172 /// @}
173 /// \name Value Generators
174 /// @{
175
176 /// Get the '0' value for the specified bit-width.
177 static APInt getZero(unsigned numBits) { return APInt(numBits, 0); }
178
179 /// NOTE: This is soft-deprecated. Please use `getZero()` instead.
180 static APInt getNullValue(unsigned numBits) { return getZero(numBits); }
181
182 /// Return an APInt zero bits wide.
183 static APInt getZeroWidth() { return getZero(0); }
184
185 /// Gets maximum unsigned value of APInt for specific bit width.
186 static APInt getMaxValue(unsigned numBits) { return getAllOnes(numBits); }
187
188 /// Gets maximum signed value of APInt for a specific bit width.
189 static APInt getSignedMaxValue(unsigned numBits) {
190 APInt API = getAllOnes(numBits);
191 API.clearBit(numBits - 1);
192 return API;
193 }
194
195 /// Gets minimum unsigned value of APInt for a specific bit width.
196 static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
197
198 /// Gets minimum signed value of APInt for a specific bit width.
199 static APInt getSignedMinValue(unsigned numBits) {
200 APInt API(numBits, 0);
201 API.setBit(numBits - 1);
202 return API;
203 }
204
205 /// Get the SignMask for a specific bit width.
206 ///
207 /// This is just a wrapper function of getSignedMinValue(), and it helps code
208 /// readability when we want to get a SignMask.
209 static APInt getSignMask(unsigned BitWidth) {
210 return getSignedMinValue(BitWidth);
211 }
212
213 /// Return an APInt of a specified width with all bits set.
214 static APInt getAllOnes(unsigned numBits) {
215 return APInt(numBits, WORDTYPE_MAX, true);
216 }
217
218 /// NOTE: This is soft-deprecated. Please use `getAllOnes()` instead.
219 static APInt getAllOnesValue(unsigned numBits) { return getAllOnes(numBits); }
220
221 /// Return an APInt with exactly one bit set in the result.
222 static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
223 APInt Res(numBits, 0);
224 Res.setBit(BitNo);
225 return Res;
226 }
227
228 /// Get a value with a block of bits set.
229 ///
230 /// Constructs an APInt value that has a contiguous range of bits set. The
231 /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
232 /// bits will be zero. For example, with parameters(32, 0, 16) you would get
233 /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than
234 /// \p hiBit.
235 ///
236 /// \param numBits the intended bit width of the result
237 /// \param loBit the index of the lowest bit set.
238 /// \param hiBit the index of the highest bit set.
239 ///
240 /// \returns An APInt value with the requested bits set.
241 static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
242 APInt Res(numBits, 0);
243 Res.setBits(loBit, hiBit);
244 return Res;
245 }
246
247 /// Wrap version of getBitsSet.
248 /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet.
249 /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example,
250 /// with parameters (32, 28, 4), you would get 0xF000000F.
251 /// If \p hiBit is equal to \p loBit, you would get a result with all bits
252 /// set.
253 static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit,
254 unsigned hiBit) {
255 APInt Res(numBits, 0);
256 Res.setBitsWithWrap(loBit, hiBit);
257 return Res;
258 }
259
260 /// Constructs an APInt value that has a contiguous range of bits set. The
261 /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
262 /// bits will be zero. For example, with parameters(32, 12) you would get
263 /// 0xFFFFF000.
264 ///
265 /// \param numBits the intended bit width of the result
266 /// \param loBit the index of the lowest bit to set.
267 ///
268 /// \returns An APInt value with the requested bits set.
269 static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
270 APInt Res(numBits, 0);
271 Res.setBitsFrom(loBit);
272 return Res;
273 }
274
275 /// Constructs an APInt value that has the top hiBitsSet bits set.
276 ///
277 /// \param numBits the bitwidth of the result
278 /// \param hiBitsSet the number of high-order bits set in the result.
279 static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
280 APInt Res(numBits, 0);
281 Res.setHighBits(hiBitsSet);
282 return Res;
283 }
284
285 /// Constructs an APInt value that has the bottom loBitsSet bits set.
286 ///
287 /// \param numBits the bitwidth of the result
288 /// \param loBitsSet the number of low-order bits set in the result.
289 static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
290 APInt Res(numBits, 0);
291 Res.setLowBits(loBitsSet);
292 return Res;
293 }
294
295 /// Return a value containing V broadcasted over NewLen bits.
296 static APInt getSplat(unsigned NewLen, const APInt &V);
297
298 /// @}
299 /// \name Value Tests
300 /// @{
301
302 /// Determine if this APInt just has one word to store value.
303 ///
304 /// \returns true if the number of bits <= 64, false otherwise.
305 bool isSingleWord() const { return BitWidth
5.1
Field 'BitWidth' is <= APINT_BITS_PER_WORD
5.1
Field 'BitWidth' is <= APINT_BITS_PER_WORD
5.1
Field 'BitWidth' is <= APINT_BITS_PER_WORD
<= APINT_BITS_PER_WORD
; }
6
Returning the value 1, which participates in a condition later
306
307 /// Determine sign of this APInt.
308 ///
309 /// This tests the high bit of this APInt to determine if it is set.
310 ///
311 /// \returns true if this APInt is negative, false otherwise
312 bool isNegative() const { return (*this)[BitWidth - 1]; }
313
314 /// Determine if this APInt Value is non-negative (>= 0)
315 ///
316 /// This tests the high bit of the APInt to determine if it is unset.
317 bool isNonNegative() const { return !isNegative(); }
318
319 /// Determine if sign bit of this APInt is set.
320 ///
321 /// This tests the high bit of this APInt to determine if it is set.
322 ///
323 /// \returns true if this APInt has its sign bit set, false otherwise.
324 bool isSignBitSet() const { return (*this)[BitWidth - 1]; }
325
326 /// Determine if sign bit of this APInt is clear.
327 ///
328 /// This tests the high bit of this APInt to determine if it is clear.
329 ///
330 /// \returns true if this APInt has its sign bit clear, false otherwise.
331 bool isSignBitClear() const { return !isSignBitSet(); }
332
333 /// Determine if this APInt Value is positive.
334 ///
335 /// This tests if the value of this APInt is positive (> 0). Note
336 /// that 0 is not a positive value.
337 ///
338 /// \returns true if this APInt is positive.
339 bool isStrictlyPositive() const { return isNonNegative() && !isZero(); }
340
341 /// Determine if this APInt Value is non-positive (<= 0).
342 ///
343 /// \returns true if this APInt is non-positive.
344 bool isNonPositive() const { return !isStrictlyPositive(); }
345
346 /// Determine if all bits are set. This is true for zero-width values.
347 bool isAllOnes() const {
348 if (BitWidth == 0)
349 return true;
350 if (isSingleWord())
351 return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth);
352 return countTrailingOnesSlowCase() == BitWidth;
353 }
354
355 /// NOTE: This is soft-deprecated. Please use `isAllOnes()` instead.
356 bool isAllOnesValue() const { return isAllOnes(); }
357
358 /// Determine if this value is zero, i.e. all bits are clear.
359 bool isZero() const {
360 if (isSingleWord())
361 return U.VAL == 0;
362 return countLeadingZerosSlowCase() == BitWidth;
363 }
364
365 /// NOTE: This is soft-deprecated. Please use `isZero()` instead.
366 bool isNullValue() const { return isZero(); }
367
368 /// Determine if this is a value of 1.
369 ///
370 /// This checks to see if the value of this APInt is one.
371 bool isOne() const {
372 if (isSingleWord())
373 return U.VAL == 1;
374 return countLeadingZerosSlowCase() == BitWidth - 1;
375 }
376
377 /// NOTE: This is soft-deprecated. Please use `isOne()` instead.
378 bool isOneValue() const { return isOne(); }
379
380 /// Determine if this is the largest unsigned value.
381 ///
382 /// This checks to see if the value of this APInt is the maximum unsigned
383 /// value for the APInt's bit width.
384 bool isMaxValue() const { return isAllOnes(); }
385
386 /// Determine if this is the largest signed value.
387 ///
388 /// This checks to see if the value of this APInt is the maximum signed
389 /// value for the APInt's bit width.
390 bool isMaxSignedValue() const {
391 if (isSingleWord()) {
392 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 392, __extension__ __PRETTY_FUNCTION__))
;
393 return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1);
394 }
395 return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1;
396 }
397
398 /// Determine if this is the smallest unsigned value.
399 ///
400 /// This checks to see if the value of this APInt is the minimum unsigned
401 /// value for the APInt's bit width.
402 bool isMinValue() const { return isZero(); }
403
404 /// Determine if this is the smallest signed value.
405 ///
406 /// This checks to see if the value of this APInt is the minimum signed
407 /// value for the APInt's bit width.
408 bool isMinSignedValue() const {
409 if (isSingleWord()) {
410 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 410, __extension__ __PRETTY_FUNCTION__))
;
411 return U.VAL == (WordType(1) << (BitWidth - 1));
412 }
413 return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1;
414 }
415
416 /// Check if this APInt has an N-bits unsigned integer value.
417 bool isIntN(unsigned N) const { return getActiveBits() <= N; }
418
419 /// Check if this APInt has an N-bits signed integer value.
420 bool isSignedIntN(unsigned N) const { return getMinSignedBits() <= N; }
421
422 /// Check if this APInt's value is a power of two greater than zero.
423 ///
424 /// \returns true if the argument APInt value is a power of two > 0.
425 bool isPowerOf2() const {
426 if (isSingleWord()) {
427 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 427, __extension__ __PRETTY_FUNCTION__))
;
428 return isPowerOf2_64(U.VAL);
429 }
430 return countPopulationSlowCase() == 1;
431 }
432
433 /// Check if the APInt's value is returned by getSignMask.
434 ///
435 /// \returns true if this is the value returned by getSignMask.
436 bool isSignMask() const { return isMinSignedValue(); }
437
438 /// Convert APInt to a boolean value.
439 ///
440 /// This converts the APInt to a boolean value as a test against zero.
441 bool getBoolValue() const { return !isZero(); }
442
443 /// If this value is smaller than the specified limit, return it, otherwise
444 /// return the limit value. This causes the value to saturate to the limit.
445 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) const {
446 return ugt(Limit) ? Limit : getZExtValue();
447 }
448
449 /// Check if the APInt consists of a repeated bit pattern.
450 ///
451 /// e.g. 0x01010101 satisfies isSplat(8).
452 /// \param SplatSizeInBits The size of the pattern in bits. Must divide bit
453 /// width without remainder.
454 bool isSplat(unsigned SplatSizeInBits) const;
455
456 /// \returns true if this APInt value is a sequence of \param numBits ones
457 /// starting at the least significant bit with the remainder zero.
458 bool isMask(unsigned numBits) const {
459 assert(numBits != 0 && "numBits must be non-zero")(static_cast <bool> (numBits != 0 && "numBits must be non-zero"
) ? void (0) : __assert_fail ("numBits != 0 && \"numBits must be non-zero\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 459, __extension__ __PRETTY_FUNCTION__))
;
460 assert(numBits <= BitWidth && "numBits out of range")(static_cast <bool> (numBits <= BitWidth && "numBits out of range"
) ? void (0) : __assert_fail ("numBits <= BitWidth && \"numBits out of range\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 460, __extension__ __PRETTY_FUNCTION__))
;
461 if (isSingleWord())
462 return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits));
463 unsigned Ones = countTrailingOnesSlowCase();
464 return (numBits == Ones) &&
465 ((Ones + countLeadingZerosSlowCase()) == BitWidth);
466 }
467
468 /// \returns true if this APInt is a non-empty sequence of ones starting at
469 /// the least significant bit with the remainder zero.
470 /// Ex. isMask(0x0000FFFFU) == true.
471 bool isMask() const {
472 if (isSingleWord())
473 return isMask_64(U.VAL);
474 unsigned Ones = countTrailingOnesSlowCase();
475 return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
476 }
477
478 /// Return true if this APInt value contains a sequence of ones with
479 /// the remainder zero.
480 bool isShiftedMask() const {
481 if (isSingleWord())
482 return isShiftedMask_64(U.VAL);
483 unsigned Ones = countPopulationSlowCase();
484 unsigned LeadZ = countLeadingZerosSlowCase();
485 return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
486 }
487
488 /// Compute an APInt containing numBits highbits from this APInt.
489 ///
490 /// Get an APInt with the same BitWidth as this APInt, just zero mask the low
491 /// bits and right shift to the least significant bit.
492 ///
493 /// \returns the high "numBits" bits of this APInt.
494 APInt getHiBits(unsigned numBits) const;
495
496 /// Compute an APInt containing numBits lowbits from this APInt.
497 ///
498 /// Get an APInt with the same BitWidth as this APInt, just zero mask the high
499 /// bits.
500 ///
501 /// \returns the low "numBits" bits of this APInt.
502 APInt getLoBits(unsigned numBits) const;
503
504 /// Determine if two APInts have the same value, after zero-extending
505 /// one of them (if needed!) to ensure that the bit-widths match.
506 static bool isSameValue(const APInt &I1, const APInt &I2) {
507 if (I1.getBitWidth() == I2.getBitWidth())
508 return I1 == I2;
509
510 if (I1.getBitWidth() > I2.getBitWidth())
511 return I1 == I2.zext(I1.getBitWidth());
512
513 return I1.zext(I2.getBitWidth()) == I2;
514 }
515
516 /// Overload to compute a hash_code for an APInt value.
517 friend hash_code hash_value(const APInt &Arg);
518
519 /// This function returns a pointer to the internal storage of the APInt.
520 /// This is useful for writing out the APInt in binary form without any
521 /// conversions.
522 const uint64_t *getRawData() const {
523 if (isSingleWord())
524 return &U.VAL;
525 return &U.pVal[0];
526 }
527
528 /// @}
529 /// \name Unary Operators
530 /// @{
531
532 /// Postfix increment operator. Increment *this by 1.
533 ///
534 /// \returns a new APInt value representing the original value of *this.
535 APInt operator++(int) {
536 APInt API(*this);
537 ++(*this);
538 return API;
539 }
540
541 /// Prefix increment operator.
542 ///
543 /// \returns *this incremented by one
544 APInt &operator++();
545
546 /// Postfix decrement operator. Decrement *this by 1.
547 ///
548 /// \returns a new APInt value representing the original value of *this.
549 APInt operator--(int) {
550 APInt API(*this);
551 --(*this);
552 return API;
553 }
554
555 /// Prefix decrement operator.
556 ///
557 /// \returns *this decremented by one.
558 APInt &operator--();
559
560 /// Logical negation operation on this APInt returns true if zero, like normal
561 /// integers.
562 bool operator!() const { return isZero(); }
563
564 /// @}
565 /// \name Assignment Operators
566 /// @{
567
568 /// Copy assignment operator.
569 ///
570 /// \returns *this after assignment of RHS.
571 APInt &operator=(const APInt &RHS) {
572 // The common case (both source or dest being inline) doesn't require
573 // allocation or deallocation.
574 if (isSingleWord() && RHS.isSingleWord()) {
575 U.VAL = RHS.U.VAL;
576 BitWidth = RHS.BitWidth;
577 return *this;
578 }
579
580 assignSlowCase(RHS);
581 return *this;
582 }
583
584 /// Move assignment operator.
585 APInt &operator=(APInt &&that) {
586#ifdef EXPENSIVE_CHECKS
587 // Some std::shuffle implementations still do self-assignment.
588 if (this == &that)
589 return *this;
590#endif
591 assert(this != &that && "Self-move not supported")(static_cast <bool> (this != &that && "Self-move not supported"
) ? void (0) : __assert_fail ("this != &that && \"Self-move not supported\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 591, __extension__ __PRETTY_FUNCTION__))
;
592 if (!isSingleWord())
593 delete[] U.pVal;
594
595 // Use memcpy so that type based alias analysis sees both VAL and pVal
596 // as modified.
597 memcpy(&U, &that.U, sizeof(U));
598
599 BitWidth = that.BitWidth;
600 that.BitWidth = 0;
601 return *this;
602 }
603
604 /// Assignment operator.
605 ///
606 /// The RHS value is assigned to *this. If the significant bits in RHS exceed
607 /// the bit width, the excess bits are truncated. If the bit width is larger
608 /// than 64, the value is zero filled in the unspecified high order bits.
609 ///
610 /// \returns *this after assignment of RHS value.
611 APInt &operator=(uint64_t RHS) {
612 if (isSingleWord()) {
613 U.VAL = RHS;
614 return clearUnusedBits();
615 }
616 U.pVal[0] = RHS;
617 memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
618 return *this;
619 }
620
621 /// Bitwise AND assignment operator.
622 ///
623 /// Performs a bitwise AND operation on this APInt and RHS. The result is
624 /// assigned to *this.
625 ///
626 /// \returns *this after ANDing with RHS.
627 APInt &operator&=(const APInt &RHS) {
628 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 628, __extension__ __PRETTY_FUNCTION__))
;
629 if (isSingleWord())
630 U.VAL &= RHS.U.VAL;
631 else
632 andAssignSlowCase(RHS);
633 return *this;
634 }
635
636 /// Bitwise AND assignment operator.
637 ///
638 /// Performs a bitwise AND operation on this APInt and RHS. RHS is
639 /// logically zero-extended or truncated to match the bit-width of
640 /// the LHS.
641 APInt &operator&=(uint64_t RHS) {
642 if (isSingleWord()) {
643 U.VAL &= RHS;
644 return *this;
645 }
646 U.pVal[0] &= RHS;
647 memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
648 return *this;
649 }
650
651 /// Bitwise OR assignment operator.
652 ///
653 /// Performs a bitwise OR operation on this APInt and RHS. The result is
654 /// assigned *this;
655 ///
656 /// \returns *this after ORing with RHS.
657 APInt &operator|=(const APInt &RHS) {
658 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 658, __extension__ __PRETTY_FUNCTION__))
;
659 if (isSingleWord())
660 U.VAL |= RHS.U.VAL;
661 else
662 orAssignSlowCase(RHS);
663 return *this;
664 }
665
666 /// Bitwise OR assignment operator.
667 ///
668 /// Performs a bitwise OR operation on this APInt and RHS. RHS is
669 /// logically zero-extended or truncated to match the bit-width of
670 /// the LHS.
671 APInt &operator|=(uint64_t RHS) {
672 if (isSingleWord()) {
673 U.VAL |= RHS;
674 return clearUnusedBits();
675 }
676 U.pVal[0] |= RHS;
677 return *this;
678 }
679
680 /// Bitwise XOR assignment operator.
681 ///
682 /// Performs a bitwise XOR operation on this APInt and RHS. The result is
683 /// assigned to *this.
684 ///
685 /// \returns *this after XORing with RHS.
686 APInt &operator^=(const APInt &RHS) {
687 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 687, __extension__ __PRETTY_FUNCTION__))
;
688 if (isSingleWord())
689 U.VAL ^= RHS.U.VAL;
690 else
691 xorAssignSlowCase(RHS);
692 return *this;
693 }
694
695 /// Bitwise XOR assignment operator.
696 ///
697 /// Performs a bitwise XOR operation on this APInt and RHS. RHS is
698 /// logically zero-extended or truncated to match the bit-width of
699 /// the LHS.
700 APInt &operator^=(uint64_t RHS) {
701 if (isSingleWord()) {
702 U.VAL ^= RHS;
703 return clearUnusedBits();
704 }
705 U.pVal[0] ^= RHS;
706 return *this;
707 }
708
709 /// Multiplication assignment operator.
710 ///
711 /// Multiplies this APInt by RHS and assigns the result to *this.
712 ///
713 /// \returns *this
714 APInt &operator*=(const APInt &RHS);
715 APInt &operator*=(uint64_t RHS);
716
717 /// Addition assignment operator.
718 ///
719 /// Adds RHS to *this and assigns the result to *this.
720 ///
721 /// \returns *this
722 APInt &operator+=(const APInt &RHS);
723 APInt &operator+=(uint64_t RHS);
724
725 /// Subtraction assignment operator.
726 ///
727 /// Subtracts RHS from *this and assigns the result to *this.
728 ///
729 /// \returns *this
730 APInt &operator-=(const APInt &RHS);
731 APInt &operator-=(uint64_t RHS);
732
733 /// Left-shift assignment function.
734 ///
735 /// Shifts *this left by shiftAmt and assigns the result to *this.
736 ///
737 /// \returns *this after shifting left by ShiftAmt
738 APInt &operator<<=(unsigned ShiftAmt) {
739 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 739, __extension__ __PRETTY_FUNCTION__))
;
740 if (isSingleWord()) {
741 if (ShiftAmt == BitWidth)
742 U.VAL = 0;
743 else
744 U.VAL <<= ShiftAmt;
745 return clearUnusedBits();
746 }
747 shlSlowCase(ShiftAmt);
748 return *this;
749 }
750
751 /// Left-shift assignment function.
752 ///
753 /// Shifts *this left by shiftAmt and assigns the result to *this.
754 ///
755 /// \returns *this after shifting left by ShiftAmt
756 APInt &operator<<=(const APInt &ShiftAmt);
757
758 /// @}
759 /// \name Binary Operators
760 /// @{
761
762 /// Multiplication operator.
763 ///
764 /// Multiplies this APInt by RHS and returns the result.
765 APInt operator*(const APInt &RHS) const;
766
767 /// Left logical shift operator.
768 ///
769 /// Shifts this APInt left by \p Bits and returns the result.
770 APInt operator<<(unsigned Bits) const { return shl(Bits); }
771
772 /// Left logical shift operator.
773 ///
774 /// Shifts this APInt left by \p Bits and returns the result.
775 APInt operator<<(const APInt &Bits) const { return shl(Bits); }
776
777 /// Arithmetic right-shift function.
778 ///
779 /// Arithmetic right-shift this APInt by shiftAmt.
780 APInt ashr(unsigned ShiftAmt) const {
781 APInt R(*this);
782 R.ashrInPlace(ShiftAmt);
783 return R;
784 }
785
786 /// Arithmetic right-shift this APInt by ShiftAmt in place.
787 void ashrInPlace(unsigned ShiftAmt) {
788 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 788, __extension__ __PRETTY_FUNCTION__))
;
789 if (isSingleWord()) {
790 int64_t SExtVAL = SignExtend64(U.VAL, BitWidth);
791 if (ShiftAmt == BitWidth)
792 U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit.
793 else
794 U.VAL = SExtVAL >> ShiftAmt;
795 clearUnusedBits();
796 return;
797 }
798 ashrSlowCase(ShiftAmt);
799 }
800
801 /// Logical right-shift function.
802 ///
803 /// Logical right-shift this APInt by shiftAmt.
804 APInt lshr(unsigned shiftAmt) const {
805 APInt R(*this);
806 R.lshrInPlace(shiftAmt);
807 return R;
808 }
809
810 /// Logical right-shift this APInt by ShiftAmt in place.
811 void lshrInPlace(unsigned ShiftAmt) {
812 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 812, __extension__ __PRETTY_FUNCTION__))
;
813 if (isSingleWord()) {
814 if (ShiftAmt == BitWidth)
815 U.VAL = 0;
816 else
817 U.VAL >>= ShiftAmt;
818 return;
819 }
820 lshrSlowCase(ShiftAmt);
821 }
822
823 /// Left-shift function.
824 ///
825 /// Left-shift this APInt by shiftAmt.
826 APInt shl(unsigned shiftAmt) const {
827 APInt R(*this);
828 R <<= shiftAmt;
829 return R;
830 }
831
832 /// Rotate left by rotateAmt.
833 APInt rotl(unsigned rotateAmt) const;
834
835 /// Rotate right by rotateAmt.
836 APInt rotr(unsigned rotateAmt) const;
837
838 /// Arithmetic right-shift function.
839 ///
840 /// Arithmetic right-shift this APInt by shiftAmt.
841 APInt ashr(const APInt &ShiftAmt) const {
842 APInt R(*this);
843 R.ashrInPlace(ShiftAmt);
844 return R;
845 }
846
847 /// Arithmetic right-shift this APInt by shiftAmt in place.
848 void ashrInPlace(const APInt &shiftAmt);
849
850 /// Logical right-shift function.
851 ///
852 /// Logical right-shift this APInt by shiftAmt.
853 APInt lshr(const APInt &ShiftAmt) const {
854 APInt R(*this);
855 R.lshrInPlace(ShiftAmt);
856 return R;
857 }
858
859 /// Logical right-shift this APInt by ShiftAmt in place.
860 void lshrInPlace(const APInt &ShiftAmt);
861
862 /// Left-shift function.
863 ///
864 /// Left-shift this APInt by shiftAmt.
865 APInt shl(const APInt &ShiftAmt) const {
866 APInt R(*this);
867 R <<= ShiftAmt;
868 return R;
869 }
870
871 /// Rotate left by rotateAmt.
872 APInt rotl(const APInt &rotateAmt) const;
873
874 /// Rotate right by rotateAmt.
875 APInt rotr(const APInt &rotateAmt) const;
876
877 /// Concatenate the bits from "NewLSB" onto the bottom of *this. This is
878 /// equivalent to:
879 /// (this->zext(NewWidth) << NewLSB.getBitWidth()) | NewLSB.zext(NewWidth)
880 APInt concat(const APInt &NewLSB) const {
881 /// If the result will be small, then both the merged values are small.
882 unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth();
883 if (NewWidth <= APINT_BITS_PER_WORD)
884 return APInt(NewWidth, (U.VAL << NewLSB.getBitWidth()) | NewLSB.U.VAL);
885 return concatSlowCase(NewLSB);
886 }
887
888 /// Unsigned division operation.
889 ///
890 /// Perform an unsigned divide operation on this APInt by RHS. Both this and
891 /// RHS are treated as unsigned quantities for purposes of this division.
892 ///
893 /// \returns a new APInt value containing the division result, rounded towards
894 /// zero.
895 APInt udiv(const APInt &RHS) const;
896 APInt udiv(uint64_t RHS) const;
897
898 /// Signed division function for APInt.
899 ///
900 /// Signed divide this APInt by APInt RHS.
901 ///
902 /// The result is rounded towards zero.
903 APInt sdiv(const APInt &RHS) const;
904 APInt sdiv(int64_t RHS) const;
905
906 /// Unsigned remainder operation.
907 ///
908 /// Perform an unsigned remainder operation on this APInt with RHS being the
909 /// divisor. Both this and RHS are treated as unsigned quantities for purposes
910 /// of this operation. Note that this is a true remainder operation and not a
911 /// modulo operation because the sign follows the sign of the dividend which
912 /// is *this.
913 ///
914 /// \returns a new APInt value containing the remainder result
915 APInt urem(const APInt &RHS) const;
916 uint64_t urem(uint64_t RHS) const;
917
918 /// Function for signed remainder operation.
919 ///
920 /// Signed remainder operation on APInt.
921 APInt srem(const APInt &RHS) const;
922 int64_t srem(int64_t RHS) const;
923
924 /// Dual division/remainder interface.
925 ///
926 /// Sometimes it is convenient to divide two APInt values and obtain both the
927 /// quotient and remainder. This function does both operations in the same
928 /// computation making it a little more efficient. The pair of input arguments
929 /// may overlap with the pair of output arguments. It is safe to call
930 /// udivrem(X, Y, X, Y), for example.
931 static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
932 APInt &Remainder);
933 static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
934 uint64_t &Remainder);
935
936 static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
937 APInt &Remainder);
938 static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient,
939 int64_t &Remainder);
940
941 // Operations that return overflow indicators.
942 APInt sadd_ov(const APInt &RHS, bool &Overflow) const;
943 APInt uadd_ov(const APInt &RHS, bool &Overflow) const;
944 APInt ssub_ov(const APInt &RHS, bool &Overflow) const;
945 APInt usub_ov(const APInt &RHS, bool &Overflow) const;
946 APInt sdiv_ov(const APInt &RHS, bool &Overflow) const;
947 APInt smul_ov(const APInt &RHS, bool &Overflow) const;
948 APInt umul_ov(const APInt &RHS, bool &Overflow) const;
949 APInt sshl_ov(const APInt &Amt, bool &Overflow) const;
950 APInt ushl_ov(const APInt &Amt, bool &Overflow) const;
951
952 // Operations that saturate
953 APInt sadd_sat(const APInt &RHS) const;
954 APInt uadd_sat(const APInt &RHS) const;
955 APInt ssub_sat(const APInt &RHS) const;
956 APInt usub_sat(const APInt &RHS) const;
957 APInt smul_sat(const APInt &RHS) const;
958 APInt umul_sat(const APInt &RHS) const;
959 APInt sshl_sat(const APInt &RHS) const;
960 APInt ushl_sat(const APInt &RHS) const;
961
962 /// Array-indexing support.
963 ///
964 /// \returns the bit value at bitPosition
965 bool operator[](unsigned bitPosition) const {
966 assert(bitPosition < getBitWidth() && "Bit position out of bounds!")(static_cast <bool> (bitPosition < getBitWidth() &&
"Bit position out of bounds!") ? void (0) : __assert_fail ("bitPosition < getBitWidth() && \"Bit position out of bounds!\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 966, __extension__ __PRETTY_FUNCTION__))
;
967 return (maskBit(bitPosition) & getWord(bitPosition)) != 0;
968 }
969
970 /// @}
971 /// \name Comparison Operators
972 /// @{
973
974 /// Equality operator.
975 ///
976 /// Compares this APInt with RHS for the validity of the equality
977 /// relationship.
978 bool operator==(const APInt &RHS) const {
979 assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Comparison requires equal bit widths") ? void (0) : __assert_fail
("BitWidth == RHS.BitWidth && \"Comparison requires equal bit widths\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 979, __extension__ __PRETTY_FUNCTION__))
;
980 if (isSingleWord())
981 return U.VAL == RHS.U.VAL;
982 return equalSlowCase(RHS);
983 }
984
985 /// Equality operator.
986 ///
987 /// Compares this APInt with a uint64_t for the validity of the equality
988 /// relationship.
989 ///
990 /// \returns true if *this == Val
991 bool operator==(uint64_t Val) const {
992 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val;
993 }
994
995 /// Equality comparison.
996 ///
997 /// Compares this APInt with RHS for the validity of the equality
998 /// relationship.
999 ///
1000 /// \returns true if *this == Val
1001 bool eq(const APInt &RHS) const { return (*this) == RHS; }
1002
1003 /// Inequality operator.
1004 ///
1005 /// Compares this APInt with RHS for the validity of the inequality
1006 /// relationship.
1007 ///
1008 /// \returns true if *this != Val
1009 bool operator!=(const APInt &RHS) const { return !((*this) == RHS); }
1010
1011 /// Inequality operator.
1012 ///
1013 /// Compares this APInt with a uint64_t for the validity of the inequality
1014 /// relationship.
1015 ///
1016 /// \returns true if *this != Val
1017 bool operator!=(uint64_t Val) const { return !((*this) == Val); }
1018
1019 /// Inequality comparison
1020 ///
1021 /// Compares this APInt with RHS for the validity of the inequality
1022 /// relationship.
1023 ///
1024 /// \returns true if *this != Val
1025 bool ne(const APInt &RHS) const { return !((*this) == RHS); }
1026
1027 /// Unsigned less than comparison
1028 ///
1029 /// Regards both *this and RHS as unsigned quantities and compares them for
1030 /// the validity of the less-than relationship.
1031 ///
1032 /// \returns true if *this < RHS when both are considered unsigned.
1033 bool ult(const APInt &RHS) const { return compare(RHS) < 0; }
1034
1035 /// Unsigned less than comparison
1036 ///
1037 /// Regards both *this as an unsigned quantity and compares it with RHS for
1038 /// the validity of the less-than relationship.
1039 ///
1040 /// \returns true if *this < RHS when considered unsigned.
1041 bool ult(uint64_t RHS) const {
1042 // Only need to check active bits if not a single word.
1043 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS;
1044 }
1045
1046 /// Signed less than comparison
1047 ///
1048 /// Regards both *this and RHS as signed quantities and compares them for
1049 /// validity of the less-than relationship.
1050 ///
1051 /// \returns true if *this < RHS when both are considered signed.
1052 bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; }
1053
1054 /// Signed less than comparison
1055 ///
1056 /// Regards both *this as a signed quantity and compares it with RHS for
1057 /// the validity of the less-than relationship.
1058 ///
1059 /// \returns true if *this < RHS when considered signed.
1060 bool slt(int64_t RHS) const {
1061 return (!isSingleWord() && getMinSignedBits() > 64) ? isNegative()
1062 : getSExtValue() < RHS;
1063 }
1064
1065 /// Unsigned less or equal comparison
1066 ///
1067 /// Regards both *this and RHS as unsigned quantities and compares them for
1068 /// validity of the less-or-equal relationship.
1069 ///
1070 /// \returns true if *this <= RHS when both are considered unsigned.
1071 bool ule(const APInt &RHS) const { return compare(RHS) <= 0; }
1072
1073 /// Unsigned less or equal comparison
1074 ///
1075 /// Regards both *this as an unsigned quantity and compares it with RHS for
1076 /// the validity of the less-or-equal relationship.
1077 ///
1078 /// \returns true if *this <= RHS when considered unsigned.
1079 bool ule(uint64_t RHS) const { return !ugt(RHS); }
1080
1081 /// Signed less or equal comparison
1082 ///
1083 /// Regards both *this and RHS as signed quantities and compares them for
1084 /// validity of the less-or-equal relationship.
1085 ///
1086 /// \returns true if *this <= RHS when both are considered signed.
1087 bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; }
1088
1089 /// Signed less or equal comparison
1090 ///
1091 /// Regards both *this as a signed quantity and compares it with RHS for the
1092 /// validity of the less-or-equal relationship.
1093 ///
1094 /// \returns true if *this <= RHS when considered signed.
1095 bool sle(uint64_t RHS) const { return !sgt(RHS); }
1096
1097 /// Unsigned greater than comparison
1098 ///
1099 /// Regards both *this and RHS as unsigned quantities and compares them for
1100 /// the validity of the greater-than relationship.
1101 ///
1102 /// \returns true if *this > RHS when both are considered unsigned.
1103 bool ugt(const APInt &RHS) const { return !ule(RHS); }
1104
1105 /// Unsigned greater than comparison
1106 ///
1107 /// Regards both *this as an unsigned quantity and compares it with RHS for
1108 /// the validity of the greater-than relationship.
1109 ///
1110 /// \returns true if *this > RHS when considered unsigned.
1111 bool ugt(uint64_t RHS) const {
1112 // Only need to check active bits if not a single word.
1113 return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS;
1114 }
1115
1116 /// Signed greater than comparison
1117 ///
1118 /// Regards both *this and RHS as signed quantities and compares them for the
1119 /// validity of the greater-than relationship.
1120 ///
1121 /// \returns true if *this > RHS when both are considered signed.
1122 bool sgt(const APInt &RHS) const { return !sle(RHS); }
1123
1124 /// Signed greater than comparison
1125 ///
1126 /// Regards both *this as a signed quantity and compares it with RHS for
1127 /// the validity of the greater-than relationship.
1128 ///
1129 /// \returns true if *this > RHS when considered signed.
1130 bool sgt(int64_t RHS) const {
1131 return (!isSingleWord() && getMinSignedBits() > 64) ? !isNegative()
1132 : getSExtValue() > RHS;
1133 }
1134
1135 /// Unsigned greater or equal comparison
1136 ///
1137 /// Regards both *this and RHS as unsigned quantities and compares them for
1138 /// validity of the greater-or-equal relationship.
1139 ///
1140 /// \returns true if *this >= RHS when both are considered unsigned.
1141 bool uge(const APInt &RHS) const { return !ult(RHS); }
1142
1143 /// Unsigned greater or equal comparison
1144 ///
1145 /// Regards both *this as an unsigned quantity and compares it with RHS for
1146 /// the validity of the greater-or-equal relationship.
1147 ///
1148 /// \returns true if *this >= RHS when considered unsigned.
1149 bool uge(uint64_t RHS) const { return !ult(RHS); }
1150
1151 /// Signed greater or equal comparison
1152 ///
1153 /// Regards both *this and RHS as signed quantities and compares them for
1154 /// validity of the greater-or-equal relationship.
1155 ///
1156 /// \returns true if *this >= RHS when both are considered signed.
1157 bool sge(const APInt &RHS) const { return !slt(RHS); }
1158
1159 /// Signed greater or equal comparison
1160 ///
1161 /// Regards both *this as a signed quantity and compares it with RHS for
1162 /// the validity of the greater-or-equal relationship.
1163 ///
1164 /// \returns true if *this >= RHS when considered signed.
1165 bool sge(int64_t RHS) const { return !slt(RHS); }
1166
1167 /// This operation tests if there are any pairs of corresponding bits
1168 /// between this APInt and RHS that are both set.
1169 bool intersects(const APInt &RHS) const {
1170 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 1170, __extension__ __PRETTY_FUNCTION__))
;
1171 if (isSingleWord())
1172 return (U.VAL & RHS.U.VAL) != 0;
1173 return intersectsSlowCase(RHS);
1174 }
1175
1176 /// This operation checks that all bits set in this APInt are also set in RHS.
1177 bool isSubsetOf(const APInt &RHS) const {
1178 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 1178, __extension__ __PRETTY_FUNCTION__))
;
1179 if (isSingleWord())
1180 return (U.VAL & ~RHS.U.VAL) == 0;
1181 return isSubsetOfSlowCase(RHS);
1182 }
1183
1184 /// @}
1185 /// \name Resizing Operators
1186 /// @{
1187
1188 /// Truncate to new width.
1189 ///
1190 /// Truncate the APInt to a specified width. It is an error to specify a width
1191 /// that is greater than or equal to the current width.
1192 APInt trunc(unsigned width) const;
1193
1194 /// Truncate to new width with unsigned saturation.
1195 ///
1196 /// If the APInt, treated as unsigned integer, can be losslessly truncated to
1197 /// the new bitwidth, then return truncated APInt. Else, return max value.
1198 APInt truncUSat(unsigned width) const;
1199
1200 /// Truncate to new width with signed saturation.
1201 ///
1202 /// If this APInt, treated as signed integer, can be losslessly truncated to
1203 /// the new bitwidth, then return truncated APInt. Else, return either
1204 /// signed min value if the APInt was negative, or signed max value.
1205 APInt truncSSat(unsigned width) const;
1206
1207 /// Sign extend to a new width.
1208 ///
1209 /// This operation sign extends the APInt to a new width. If the high order
1210 /// bit is set, the fill on the left will be done with 1 bits, otherwise zero.
1211 /// It is an error to specify a width that is less than or equal to the
1212 /// current width.
1213 APInt sext(unsigned width) const;
1214
1215 /// Zero extend to a new width.
1216 ///
1217 /// This operation zero extends the APInt to a new width. The high order bits
1218 /// are filled with 0 bits. It is an error to specify a width that is less
1219 /// than or equal to the current width.
1220 APInt zext(unsigned width) const;
1221
1222 /// Sign extend or truncate to width
1223 ///
1224 /// Make this APInt have the bit width given by \p width. The value is sign
1225 /// extended, truncated, or left alone to make it that width.
1226 APInt sextOrTrunc(unsigned width) const;
1227
1228 /// Zero extend or truncate to width
1229 ///
1230 /// Make this APInt have the bit width given by \p width. The value is zero
1231 /// extended, truncated, or left alone to make it that width.
1232 APInt zextOrTrunc(unsigned width) const;
1233
1234 /// Truncate to width
1235 ///
1236 /// Make this APInt have the bit width given by \p width. The value is
1237 /// truncated or left alone to make it that width.
1238 APInt truncOrSelf(unsigned width) const;
1239
1240 /// Sign extend or truncate to width
1241 ///
1242 /// Make this APInt have the bit width given by \p width. The value is sign
1243 /// extended, or left alone to make it that width.
1244 APInt sextOrSelf(unsigned width) const;
1245
1246 /// Zero extend or truncate to width
1247 ///
1248 /// Make this APInt have the bit width given by \p width. The value is zero
1249 /// extended, or left alone to make it that width.
1250 APInt zextOrSelf(unsigned width) const;
1251
1252 /// @}
1253 /// \name Bit Manipulation Operators
1254 /// @{
1255
1256 /// Set every bit to 1.
1257 void setAllBits() {
1258 if (isSingleWord())
1259 U.VAL = WORDTYPE_MAX;
1260 else
1261 // Set all the bits in all the words.
1262 memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE);
1263 // Clear the unused ones
1264 clearUnusedBits();
1265 }
1266
1267 /// Set the given bit to 1 whose position is given as "bitPosition".
1268 void setBit(unsigned BitPosition) {
1269 assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth &&
"BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 1269, __extension__ __PRETTY_FUNCTION__))
;
1270 WordType Mask = maskBit(BitPosition);
1271 if (isSingleWord())
1272 U.VAL |= Mask;
1273 else
1274 U.pVal[whichWord(BitPosition)] |= Mask;
1275 }
1276
1277 /// Set the sign bit to 1.
1278 void setSignBit() { setBit(BitWidth - 1); }
1279
1280 /// Set a given bit to a given value.
1281 void setBitVal(unsigned BitPosition, bool BitValue) {
1282 if (BitValue)
1283 setBit(BitPosition);
1284 else
1285 clearBit(BitPosition);
1286 }
1287
1288 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1289 /// This function handles "wrap" case when \p loBit >= \p hiBit, and calls
1290 /// setBits when \p loBit < \p hiBit.
1291 /// For \p loBit == \p hiBit wrap case, set every bit to 1.
1292 void setBitsWithWrap(unsigned loBit, unsigned hiBit) {
1293 assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range"
) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 1293, __extension__ __PRETTY_FUNCTION__))
;
1294 assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range"
) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 1294, __extension__ __PRETTY_FUNCTION__))
;
1295 if (loBit < hiBit) {
1296 setBits(loBit, hiBit);
1297 return;
1298 }
1299 setLowBits(hiBit);
1300 setHighBits(BitWidth - loBit);
1301 }
1302
1303 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1304 /// This function handles case when \p loBit <= \p hiBit.
1305 void setBits(unsigned loBit, unsigned hiBit) {
1306 assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range"
) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 1306, __extension__ __PRETTY_FUNCTION__))
;
1307 assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range"
) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 1307, __extension__ __PRETTY_FUNCTION__))
;
1308 assert(loBit <= hiBit && "loBit greater than hiBit")(static_cast <bool> (loBit <= hiBit && "loBit greater than hiBit"
) ? void (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 1308, __extension__ __PRETTY_FUNCTION__))
;
1309 if (loBit == hiBit)
1310 return;
1311 if (loBit < APINT_BITS_PER_WORD && hiBit <= APINT_BITS_PER_WORD) {
1312 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
1313 mask <<= loBit;
1314 if (isSingleWord())
1315 U.VAL |= mask;
1316 else
1317 U.pVal[0] |= mask;
1318 } else {
1319 setBitsSlowCase(loBit, hiBit);
1320 }
1321 }
1322
1323 /// Set the top bits starting from loBit.
1324 void setBitsFrom(unsigned loBit) { return setBits(loBit, BitWidth); }
1325
1326 /// Set the bottom loBits bits.
1327 void setLowBits(unsigned loBits) { return setBits(0, loBits); }
1328
1329 /// Set the top hiBits bits.
1330 void setHighBits(unsigned hiBits) {
1331 return setBits(BitWidth - hiBits, BitWidth);
1332 }
1333
1334 /// Set every bit to 0.
1335 void clearAllBits() {
1336 if (isSingleWord())
1337 U.VAL = 0;
1338 else
1339 memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE);
1340 }
1341
1342 /// Set a given bit to 0.
1343 ///
1344 /// Set the given bit to 0 whose position is given as "bitPosition".
1345 void clearBit(unsigned BitPosition) {
1346 assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth &&
"BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 1346, __extension__ __PRETTY_FUNCTION__))
;
1347 WordType Mask = ~maskBit(BitPosition);
1348 if (isSingleWord())
1349 U.VAL &= Mask;
1350 else
1351 U.pVal[whichWord(BitPosition)] &= Mask;
1352 }
1353
1354 /// Set bottom loBits bits to 0.
1355 void clearLowBits(unsigned loBits) {
1356 assert(loBits <= BitWidth && "More bits than bitwidth")(static_cast <bool> (loBits <= BitWidth && "More bits than bitwidth"
) ? void (0) : __assert_fail ("loBits <= BitWidth && \"More bits than bitwidth\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 1356, __extension__ __PRETTY_FUNCTION__))
;
1357 APInt Keep = getHighBitsSet(BitWidth, BitWidth - loBits);
1358 *this &= Keep;
1359 }
1360
1361 /// Set the sign bit to 0.
1362 void clearSignBit() { clearBit(BitWidth - 1); }
1363
1364 /// Toggle every bit to its opposite value.
1365 void flipAllBits() {
1366 if (isSingleWord()) {
1367 U.VAL ^= WORDTYPE_MAX;
1368 clearUnusedBits();
1369 } else {
1370 flipAllBitsSlowCase();
1371 }
1372 }
1373
1374 /// Toggles a given bit to its opposite value.
1375 ///
1376 /// Toggle a given bit to its opposite value whose position is given
1377 /// as "bitPosition".
1378 void flipBit(unsigned bitPosition);
1379
1380 /// Negate this APInt in place.
1381 void negate() {
1382 flipAllBits();
1383 ++(*this);
1384 }
1385
1386 /// Insert the bits from a smaller APInt starting at bitPosition.
1387 void insertBits(const APInt &SubBits, unsigned bitPosition);
1388 void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits);
1389
1390 /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
1391 APInt extractBits(unsigned numBits, unsigned bitPosition) const;
1392 uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const;
1393
1394 /// @}
1395 /// \name Value Characterization Functions
1396 /// @{
1397
1398 /// Return the number of bits in the APInt.
1399 unsigned getBitWidth() const { return BitWidth; }
1400
1401 /// Get the number of words.
1402 ///
1403 /// Here one word's bitwidth equals to that of uint64_t.
1404 ///
1405 /// \returns the number of words to hold the integer value of this APInt.
1406 unsigned getNumWords() const { return getNumWords(BitWidth); }
1407
1408 /// Get the number of words.
1409 ///
1410 /// *NOTE* Here one word's bitwidth equals to that of uint64_t.
1411 ///
1412 /// \returns the number of words to hold the integer value with a given bit
1413 /// width.
1414 static unsigned getNumWords(unsigned BitWidth) {
1415 return ((uint64_t)BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
1416 }
1417
1418 /// Compute the number of active bits in the value
1419 ///
1420 /// This function returns the number of active bits which is defined as the
1421 /// bit width minus the number of leading zeros. This is used in several
1422 /// computations to see how "wide" the value is.
1423 unsigned getActiveBits() const { return BitWidth - countLeadingZeros(); }
1424
1425 /// Compute the number of active words in the value of this APInt.
1426 ///
1427 /// This is used in conjunction with getActiveData to extract the raw value of
1428 /// the APInt.
1429 unsigned getActiveWords() const {
1430 unsigned numActiveBits = getActiveBits();
1431 return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1;
1432 }
1433
1434 /// Get the minimum bit size for this signed APInt
1435 ///
1436 /// Computes the minimum bit width for this APInt while considering it to be a
1437 /// signed (and probably negative) value. If the value is not negative, this
1438 /// function returns the same value as getActiveBits()+1. Otherwise, it
1439 /// returns the smallest bit width that will retain the negative value. For
1440 /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so
1441 /// for -1, this function will always return 1.
1442 unsigned getMinSignedBits() const { return BitWidth - getNumSignBits() + 1; }
1443
1444 /// Get zero extended value
1445 ///
1446 /// This method attempts to return the value of this APInt as a zero extended
1447 /// uint64_t. The bitwidth must be <= 64 or the value must fit within a
1448 /// uint64_t. Otherwise an assertion will result.
1449 uint64_t getZExtValue() const {
1450 if (isSingleWord()) {
1451 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 1451, __extension__ __PRETTY_FUNCTION__))
;
1452 return U.VAL;
1453 }
1454 assert(getActiveBits() <= 64 && "Too many bits for uint64_t")(static_cast <bool> (getActiveBits() <= 64 &&
"Too many bits for uint64_t") ? void (0) : __assert_fail ("getActiveBits() <= 64 && \"Too many bits for uint64_t\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 1454, __extension__ __PRETTY_FUNCTION__))
;
1455 return U.pVal[0];
1456 }
1457
1458 /// Get sign extended value
1459 ///
1460 /// This method attempts to return the value of this APInt as a sign extended
1461 /// int64_t. The bit width must be <= 64 or the value must fit within an
1462 /// int64_t. Otherwise an assertion will result.
1463 int64_t getSExtValue() const {
1464 if (isSingleWord())
1465 return SignExtend64(U.VAL, BitWidth);
1466 assert(getMinSignedBits() <= 64 && "Too many bits for int64_t")(static_cast <bool> (getMinSignedBits() <= 64 &&
"Too many bits for int64_t") ? void (0) : __assert_fail ("getMinSignedBits() <= 64 && \"Too many bits for int64_t\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/ADT/APInt.h"
, 1466, __extension__ __PRETTY_FUNCTION__))
;
1467 return int64_t(U.pVal[0]);
1468 }
1469
1470 /// Get bits required for string value.
1471 ///
1472 /// This method determines how many bits are required to hold the APInt
1473 /// equivalent of the string given by \p str.
1474 static unsigned getBitsNeeded(StringRef str, uint8_t radix);
1475
1476 /// The APInt version of the countLeadingZeros functions in
1477 /// MathExtras.h.
1478 ///
1479 /// It counts the number of zeros from the most significant bit to the first
1480 /// one bit.
1481 ///
1482 /// \returns BitWidth if the value is zero, otherwise returns the number of
1483 /// zeros from the most significant bit to the first one bits.
1484 unsigned countLeadingZeros() const {
1485 if (isSingleWord()) {
1486 unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth;
1487 return llvm::countLeadingZeros(U.VAL) - unusedBits;
1488 }
1489 return countLeadingZerosSlowCase();
1490 }
1491
1492 /// Count the number of leading one bits.
1493 ///
1494 /// This function is an APInt version of the countLeadingOnes
1495 /// functions in MathExtras.h. It counts the number of ones from the most
1496 /// significant bit to the first zero bit.
1497 ///
1498 /// \returns 0 if the high order bit is not set, otherwise returns the number
1499 /// of 1 bits from the most significant to the least
1500 unsigned countLeadingOnes() const {
1501 if (isSingleWord()) {
1502 if (LLVM_UNLIKELY(BitWidth == 0)__builtin_expect((bool)(BitWidth == 0), false))
1503 return 0;
1504 return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
1505 }
1506 return countLeadingOnesSlowCase();
1507 }
1508
1509 /// Computes the number of leading bits of this APInt that are equal to its
1510 /// sign bit.
1511 unsigned getNumSignBits() const {
1512 return isNegative() ? countLeadingOnes() : countLeadingZeros();
1513 }
1514
1515 /// Count the number of trailing zero bits.
1516 ///
1517 /// This function is an APInt version of the countTrailingZeros
1518 /// functions in MathExtras.h. It counts the number of zeros from the least
1519 /// significant bit to the first set bit.
1520 ///
1521 /// \returns BitWidth if the value is zero, otherwise returns the number of
1522 /// zeros from the least significant bit to the first one bit.
1523 unsigned countTrailingZeros() const {
1524 if (isSingleWord()) {
5
Calling 'APInt::isSingleWord'
7
Returning from 'APInt::isSingleWord'
8
Taking true branch
1525 unsigned TrailingZeros = llvm::countTrailingZeros(U.VAL);
9
Calling 'countTrailingZeros<unsigned long>'
15
Returning from 'countTrailingZeros<unsigned long>'
16
'TrailingZeros' initialized to 64
1526 return (TrailingZeros > BitWidth ? BitWidth : TrailingZeros);
17
Assuming 'TrailingZeros' is <= field 'BitWidth'
18
'?' condition is false
19
Returning the value 64
1527 }
1528 return countTrailingZerosSlowCase();
1529 }
1530
1531 /// Count the number of trailing one bits.
1532 ///
1533 /// This function is an APInt version of the countTrailingOnes
1534 /// functions in MathExtras.h. It counts the number of ones from the least
1535 /// significant bit to the first zero bit.
1536 ///
1537 /// \returns BitWidth if the value is all ones, otherwise returns the number
1538 /// of ones from the least significant bit to the first zero bit.
1539 unsigned countTrailingOnes() const {
1540 if (isSingleWord())
1541 return llvm::countTrailingOnes(U.VAL);
1542 return countTrailingOnesSlowCase();
1543 }
1544
1545 /// Count the number of bits set.
1546 ///
1547 /// This function is an APInt version of the countPopulation functions
1548 /// in MathExtras.h. It counts the number of 1 bits in the APInt value.
1549 ///
1550 /// \returns 0 if the value is zero, otherwise returns the number of set bits.
1551 unsigned countPopulation() const {
1552 if (isSingleWord())
1553 return llvm::countPopulation(U.VAL);
1554 return countPopulationSlowCase();
1555 }
1556
1557 /// @}
1558 /// \name Conversion Functions
1559 /// @{
1560 void print(raw_ostream &OS, bool isSigned) const;
1561
1562 /// Converts an APInt to a string and append it to Str. Str is commonly a
1563 /// SmallString.
1564 void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
1565 bool formatAsCLiteral = false) const;
1566
1567 /// Considers the APInt to be unsigned and converts it into a string in the
1568 /// radix given. The radix can be 2, 8, 10 16, or 36.
1569 void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1570 toString(Str, Radix, false, false);
1571 }
1572
1573 /// Considers the APInt to be signed and converts it into a string in the
1574 /// radix given. The radix can be 2, 8, 10, 16, or 36.
1575 void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1576 toString(Str, Radix, true, false);
1577 }
1578
1579 /// \returns a byte-swapped representation of this APInt Value.
1580 APInt byteSwap() const;
1581
1582 /// \returns the value with the bit representation reversed of this APInt
1583 /// Value.
1584 APInt reverseBits() const;
1585
1586 /// Converts this APInt to a double value.
1587 double roundToDouble(bool isSigned) const;
1588
1589 /// Converts this unsigned APInt to a double value.
1590 double roundToDouble() const { return roundToDouble(false); }
1591
1592 /// Converts this signed APInt to a double value.
1593 double signedRoundToDouble() const { return roundToDouble(true); }
1594
1595 /// Converts APInt bits to a double
1596 ///
1597 /// The conversion does not do a translation from integer to double, it just
1598 /// re-interprets the bits as a double. Note that it is valid to do this on
1599 /// any bit width. Exactly 64 bits will be translated.
1600 double bitsToDouble() const { return BitsToDouble(getWord(0)); }
1601
1602 /// Converts APInt bits to a float
1603 ///
1604 /// The conversion does not do a translation from integer to float, it just
1605 /// re-interprets the bits as a float. Note that it is valid to do this on
1606 /// any bit width. Exactly 32 bits will be translated.
1607 float bitsToFloat() const {
1608 return BitsToFloat(static_cast<uint32_t>(getWord(0)));
1609 }
1610
1611 /// Converts a double to APInt bits.
1612 ///
1613 /// The conversion does not do a translation from double to integer, it just
1614 /// re-interprets the bits of the double.
1615 static APInt doubleToBits(double V) {
1616 return APInt(sizeof(double) * CHAR_BIT8, DoubleToBits(V));
1617 }
1618
1619 /// Converts a float to APInt bits.
1620 ///
1621 /// The conversion does not do a translation from float to integer, it just
1622 /// re-interprets the bits of the float.
1623 static APInt floatToBits(float V) {
1624 return APInt(sizeof(float) * CHAR_BIT8, FloatToBits(V));
1625 }
1626
1627 /// @}
1628 /// \name Mathematics Operations
1629 /// @{
1630
1631 /// \returns the floor log base 2 of this APInt.
1632 unsigned logBase2() const { return getActiveBits() - 1; }
1633
1634 /// \returns the ceil log base 2 of this APInt.
1635 unsigned ceilLogBase2() const {
1636 APInt temp(*this);
1637 --temp;
1638 return temp.getActiveBits();
1639 }
1640
1641 /// \returns the nearest log base 2 of this APInt. Ties round up.
1642 ///
1643 /// NOTE: When we have a BitWidth of 1, we define:
1644 ///
1645 /// log2(0) = UINT32_MAX
1646 /// log2(1) = 0
1647 ///
1648 /// to get around any mathematical concerns resulting from
1649 /// referencing 2 in a space where 2 does no exist.
1650 unsigned nearestLogBase2() const;
1651
1652 /// \returns the log base 2 of this APInt if its an exact power of two, -1
1653 /// otherwise
1654 int32_t exactLogBase2() const {
1655 if (!isPowerOf2())
1656 return -1;
1657 return logBase2();
1658 }
1659
1660 /// Compute the square root.
1661 APInt sqrt() const;
1662
1663 /// Get the absolute value. If *this is < 0 then return -(*this), otherwise
1664 /// *this. Note that the "most negative" signed number (e.g. -128 for 8 bit
1665 /// wide APInt) is unchanged due to how negation works.
1666 APInt abs() const {
1667 if (isNegative())
1668 return -(*this);
1669 return *this;
1670 }
1671
1672 /// \returns the multiplicative inverse for a given modulo.
1673 APInt multiplicativeInverse(const APInt &modulo) const;
1674
1675 /// @}
1676 /// \name Building-block Operations for APInt and APFloat
1677 /// @{
1678
1679 // These building block operations operate on a representation of arbitrary
1680 // precision, two's-complement, bignum integer values. They should be
1681 // sufficient to implement APInt and APFloat bignum requirements. Inputs are
1682 // generally a pointer to the base of an array of integer parts, representing
1683 // an unsigned bignum, and a count of how many parts there are.
1684
1685 /// Sets the least significant part of a bignum to the input value, and zeroes
1686 /// out higher parts.
1687 static void tcSet(WordType *, WordType, unsigned);
1688
1689 /// Assign one bignum to another.
1690 static void tcAssign(WordType *, const WordType *, unsigned);
1691
1692 /// Returns true if a bignum is zero, false otherwise.
1693 static bool tcIsZero(const WordType *, unsigned);
1694
1695 /// Extract the given bit of a bignum; returns 0 or 1. Zero-based.
1696 static int tcExtractBit(const WordType *, unsigned bit);
1697
1698 /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
1699 /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
1700 /// significant bit of DST. All high bits above srcBITS in DST are
1701 /// zero-filled.
1702 static void tcExtract(WordType *, unsigned dstCount, const WordType *,
1703 unsigned srcBits, unsigned srcLSB);
1704
1705 /// Set the given bit of a bignum. Zero-based.
1706 static void tcSetBit(WordType *, unsigned bit);
1707
1708 /// Clear the given bit of a bignum. Zero-based.
1709 static void tcClearBit(WordType *, unsigned bit);
1710
1711 /// Returns the bit number of the least or most significant set bit of a
1712 /// number. If the input number has no bits set -1U is returned.
1713 static unsigned tcLSB(const WordType *, unsigned n);
1714 static unsigned tcMSB(const WordType *parts, unsigned n);
1715
1716 /// Negate a bignum in-place.
1717 static void tcNegate(WordType *, unsigned);
1718
1719 /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1720 static WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned);
1721 /// DST += RHS. Returns the carry flag.
1722 static WordType tcAddPart(WordType *, WordType, unsigned);
1723
1724 /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1725 static WordType tcSubtract(WordType *, const WordType *, WordType carry,
1726 unsigned);
1727 /// DST -= RHS. Returns the carry flag.
1728 static WordType tcSubtractPart(WordType *, WordType, unsigned);
1729
1730 /// DST += SRC * MULTIPLIER + PART if add is true
1731 /// DST = SRC * MULTIPLIER + PART if add is false
1732 ///
1733 /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC they must
1734 /// start at the same point, i.e. DST == SRC.
1735 ///
1736 /// If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is returned.
1737 /// Otherwise DST is filled with the least significant DSTPARTS parts of the
1738 /// result, and if all of the omitted higher parts were zero return zero,
1739 /// otherwise overflow occurred and return one.
1740 static int tcMultiplyPart(WordType *dst, const WordType *src,
1741 WordType multiplier, WordType carry,
1742 unsigned srcParts, unsigned dstParts, bool add);
1743
1744 /// DST = LHS * RHS, where DST has the same width as the operands and is
1745 /// filled with the least significant parts of the result. Returns one if
1746 /// overflow occurred, otherwise zero. DST must be disjoint from both
1747 /// operands.
1748 static int tcMultiply(WordType *, const WordType *, const WordType *,
1749 unsigned);
1750
1751 /// DST = LHS * RHS, where DST has width the sum of the widths of the
1752 /// operands. No overflow occurs. DST must be disjoint from both operands.
1753 static void tcFullMultiply(WordType *, const WordType *, const WordType *,
1754 unsigned, unsigned);
1755
1756 /// If RHS is zero LHS and REMAINDER are left unchanged, return one.
1757 /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set
1758 /// REMAINDER to the remainder, return zero. i.e.
1759 ///
1760 /// OLD_LHS = RHS * LHS + REMAINDER
1761 ///
1762 /// SCRATCH is a bignum of the same size as the operands and result for use by
1763 /// the routine; its contents need not be initialized and are destroyed. LHS,
1764 /// REMAINDER and SCRATCH must be distinct.
1765 static int tcDivide(WordType *lhs, const WordType *rhs, WordType *remainder,
1766 WordType *scratch, unsigned parts);
1767
1768 /// Shift a bignum left Count bits. Shifted in bits are zero. There are no
1769 /// restrictions on Count.
1770 static void tcShiftLeft(WordType *, unsigned Words, unsigned Count);
1771
1772 /// Shift a bignum right Count bits. Shifted in bits are zero. There are no
1773 /// restrictions on Count.
1774 static void tcShiftRight(WordType *, unsigned Words, unsigned Count);
1775
1776 /// Comparison (unsigned) of two bignums.
1777 static int tcCompare(const WordType *, const WordType *, unsigned);
1778
1779 /// Increment a bignum in-place. Return the carry flag.
1780 static WordType tcIncrement(WordType *dst, unsigned parts) {
1781 return tcAddPart(dst, 1, parts);
1782 }
1783
1784 /// Decrement a bignum in-place. Return the borrow flag.
1785 static WordType tcDecrement(WordType *dst, unsigned parts) {
1786 return tcSubtractPart(dst, 1, parts);
1787 }
1788
1789 /// Used to insert APInt objects, or objects that contain APInt objects, into
1790 /// FoldingSets.
1791 void Profile(FoldingSetNodeID &id) const;
1792
1793 /// debug method
1794 void dump() const;
1795
1796 /// Returns whether this instance allocated memory.
1797 bool needsCleanup() const { return !isSingleWord(); }
1798
1799private:
1800 /// This union is used to store the integer value. When the
1801 /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
1802 union {
1803 uint64_t VAL; ///< Used to store the <= 64 bits integer value.
1804 uint64_t *pVal; ///< Used to store the >64 bits integer value.
1805 } U;
1806
1807 unsigned BitWidth; ///< The number of bits in this APInt.
1808
1809 friend struct DenseMapInfo<APInt>;
1810 friend class APSInt;
1811
1812 /// This constructor is used only internally for speed of construction of
1813 /// temporaries. It is unsafe since it takes ownership of the pointer, so it
1814 /// is not public.
1815 APInt(uint64_t *val, unsigned bits) : BitWidth(bits) { U.pVal = val; }
1816
1817 /// Determine which word a bit is in.
1818 ///
1819 /// \returns the word position for the specified bit position.
1820 static unsigned whichWord(unsigned bitPosition) {
1821 return bitPosition / APINT_BITS_PER_WORD;
1822 }
1823
1824 /// Determine which bit in a word the specified bit position is in.
1825 static unsigned whichBit(unsigned bitPosition) {
1826 return bitPosition % APINT_BITS_PER_WORD;
1827 }
1828
1829 /// Get a single bit mask.
1830 ///
1831 /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
1832 /// This method generates and returns a uint64_t (word) mask for a single
1833 /// bit at a specific bit position. This is used to mask the bit in the
1834 /// corresponding word.
1835 static uint64_t maskBit(unsigned bitPosition) {
1836 return 1ULL << whichBit(bitPosition);
1837 }
1838
1839 /// Clear unused high order bits
1840 ///
1841 /// This method is used internally to clear the top "N" bits in the high order
1842 /// word that are not used by the APInt. This is needed after the most
1843 /// significant word is assigned a value to ensure that those bits are
1844 /// zero'd out.
1845 APInt &clearUnusedBits() {
1846 // Compute how many bits are used in the final word.
1847 unsigned WordBits = ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1;
1848
1849 // Mask out the high bits.
1850 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
1851 if (LLVM_UNLIKELY(BitWidth == 0)__builtin_expect((bool)(BitWidth == 0), false))
1852 mask = 0;
1853
1854 if (isSingleWord())
1855 U.VAL &= mask;
1856 else
1857 U.pVal[getNumWords() - 1] &= mask;
1858 return *this;
1859 }
1860
1861 /// Get the word corresponding to a bit position
1862 /// \returns the corresponding word for the specified bit position.
1863 uint64_t getWord(unsigned bitPosition) const {
1864 return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
1865 }
1866
1867 /// Utility method to change the bit width of this APInt to new bit width,
1868 /// allocating and/or deallocating as necessary. There is no guarantee on the
1869 /// value of any bits upon return. Caller should populate the bits after.
1870 void reallocate(unsigned NewBitWidth);
1871
1872 /// Convert a char array into an APInt
1873 ///
1874 /// \param radix 2, 8, 10, 16, or 36
1875 /// Converts a string into a number. The string must be non-empty
1876 /// and well-formed as a number of the given base. The bit-width
1877 /// must be sufficient to hold the result.
1878 ///
1879 /// This is used by the constructors that take string arguments.
1880 ///
1881 /// StringRef::getAsInteger is superficially similar but (1) does
1882 /// not assume that the string is well-formed and (2) grows the
1883 /// result to hold the input.
1884 void fromString(unsigned numBits, StringRef str, uint8_t radix);
1885
1886 /// An internal division function for dividing APInts.
1887 ///
1888 /// This is used by the toString method to divide by the radix. It simply
1889 /// provides a more convenient form of divide for internal use since KnuthDiv
1890 /// has specific constraints on its inputs. If those constraints are not met
1891 /// then it provides a simpler form of divide.
1892 static void divide(const WordType *LHS, unsigned lhsWords,
1893 const WordType *RHS, unsigned rhsWords, WordType *Quotient,
1894 WordType *Remainder);
1895
1896 /// out-of-line slow case for inline constructor
1897 void initSlowCase(uint64_t val, bool isSigned);
1898
1899 /// shared code between two array constructors
1900 void initFromArray(ArrayRef<uint64_t> array);
1901
1902 /// out-of-line slow case for inline copy constructor
1903 void initSlowCase(const APInt &that);
1904
1905 /// out-of-line slow case for shl
1906 void shlSlowCase(unsigned ShiftAmt);
1907
1908 /// out-of-line slow case for lshr.
1909 void lshrSlowCase(unsigned ShiftAmt);
1910
1911 /// out-of-line slow case for ashr.
1912 void ashrSlowCase(unsigned ShiftAmt);
1913
1914 /// out-of-line slow case for operator=
1915 void assignSlowCase(const APInt &RHS);
1916
1917 /// out-of-line slow case for operator==
1918 bool equalSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1919
1920 /// out-of-line slow case for countLeadingZeros
1921 unsigned countLeadingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
1922
1923 /// out-of-line slow case for countLeadingOnes.
1924 unsigned countLeadingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
1925
1926 /// out-of-line slow case for countTrailingZeros.
1927 unsigned countTrailingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
1928
1929 /// out-of-line slow case for countTrailingOnes
1930 unsigned countTrailingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
1931
1932 /// out-of-line slow case for countPopulation
1933 unsigned countPopulationSlowCase() const LLVM_READONLY__attribute__((__pure__));
1934
1935 /// out-of-line slow case for intersects.
1936 bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1937
1938 /// out-of-line slow case for isSubsetOf.
1939 bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1940
1941 /// out-of-line slow case for setBits.
1942 void setBitsSlowCase(unsigned loBit, unsigned hiBit);
1943
1944 /// out-of-line slow case for flipAllBits.
1945 void flipAllBitsSlowCase();
1946
1947 /// out-of-line slow case for concat.
1948 APInt concatSlowCase(const APInt &NewLSB) const;
1949
1950 /// out-of-line slow case for operator&=.
1951 void andAssignSlowCase(const APInt &RHS);
1952
1953 /// out-of-line slow case for operator|=.
1954 void orAssignSlowCase(const APInt &RHS);
1955
1956 /// out-of-line slow case for operator^=.
1957 void xorAssignSlowCase(const APInt &RHS);
1958
1959 /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
1960 /// to, or greater than RHS.
1961 int compare(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1962
1963 /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
1964 /// to, or greater than RHS.
1965 int compareSigned(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1966
1967 /// @}
1968};
1969
1970inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }
1971
1972inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; }
1973
1974/// Unary bitwise complement operator.
1975///
1976/// \returns an APInt that is the bitwise complement of \p v.
1977inline APInt operator~(APInt v) {
1978 v.flipAllBits();
1979 return v;
1980}
1981
1982inline APInt operator&(APInt a, const APInt &b) {
1983 a &= b;
1984 return a;
1985}
1986
1987inline APInt operator&(const APInt &a, APInt &&b) {
1988 b &= a;
1989 return std::move(b);
1990}
1991
1992inline APInt operator&(APInt a, uint64_t RHS) {
1993 a &= RHS;
1994 return a;
1995}
1996
1997inline APInt operator&(uint64_t LHS, APInt b) {
1998 b &= LHS;
1999 return b;
2000}
2001
2002inline APInt operator|(APInt a, const APInt &b) {
2003 a |= b;
2004 return a;
2005}
2006
2007inline APInt operator|(const APInt &a, APInt &&b) {
2008 b |= a;
2009 return std::move(b);
2010}
2011
2012inline APInt operator|(APInt a, uint64_t RHS) {
2013 a |= RHS;
2014 return a;
2015}
2016
2017inline APInt operator|(uint64_t LHS, APInt b) {
2018 b |= LHS;
2019 return b;
2020}
2021
2022inline APInt operator^(APInt a, const APInt &b) {
2023 a ^= b;
2024 return a;
2025}
2026
2027inline APInt operator^(const APInt &a, APInt &&b) {
2028 b ^= a;
2029 return std::move(b);
2030}
2031
2032inline APInt operator^(APInt a, uint64_t RHS) {
2033 a ^= RHS;
2034 return a;
2035}
2036
2037inline APInt operator^(uint64_t LHS, APInt b) {
2038 b ^= LHS;
2039 return b;
2040}
2041
2042inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) {
2043 I.print(OS, true);
2044 return OS;
2045}
2046
2047inline APInt operator-(APInt v) {
2048 v.negate();
2049 return v;
2050}
2051
2052inline APInt operator+(APInt a, const APInt &b) {
2053 a += b;
2054 return a;
2055}
2056
2057inline APInt operator+(const APInt &a, APInt &&b) {
2058 b += a;
2059 return std::move(b);
2060}
2061
2062inline APInt operator+(APInt a, uint64_t RHS) {
2063 a += RHS;
2064 return a;
2065}
2066
2067inline APInt operator+(uint64_t LHS, APInt b) {
2068 b += LHS;
2069 return b;
2070}
2071
2072inline APInt operator-(APInt a, const APInt &b) {
2073 a -= b;
2074 return a;
2075}
2076
2077inline APInt operator-(const APInt &a, APInt &&b) {
2078 b.negate();
2079 b += a;
2080 return std::move(b);
2081}
2082
2083inline APInt operator-(APInt a, uint64_t RHS) {
2084 a -= RHS;
2085 return a;
2086}
2087
2088inline APInt operator-(uint64_t LHS, APInt b) {
2089 b.negate();
2090 b += LHS;
2091 return b;
2092}
2093
2094inline APInt operator*(APInt a, uint64_t RHS) {
2095 a *= RHS;
2096 return a;
2097}
2098
2099inline APInt operator*(uint64_t LHS, APInt b) {
2100 b *= LHS;
2101 return b;
2102}
2103
2104namespace APIntOps {
2105
2106/// Determine the smaller of two APInts considered to be signed.
2107inline const APInt &smin(const APInt &A, const APInt &B) {
2108 return A.slt(B) ? A : B;
2109}
2110
2111/// Determine the larger of two APInts considered to be signed.
2112inline const APInt &smax(const APInt &A, const APInt &B) {
2113 return A.sgt(B) ? A : B;
2114}
2115
2116/// Determine the smaller of two APInts considered to be unsigned.
2117inline const APInt &umin(const APInt &A, const APInt &B) {
2118 return A.ult(B) ? A : B;
2119}
2120
2121/// Determine the larger of two APInts considered to be unsigned.
2122inline const APInt &umax(const APInt &A, const APInt &B) {
2123 return A.ugt(B) ? A : B;
2124}
2125
2126/// Compute GCD of two unsigned APInt values.
2127///
2128/// This function returns the greatest common divisor of the two APInt values
2129/// using Stein's algorithm.
2130///
2131/// \returns the greatest common divisor of A and B.
2132APInt GreatestCommonDivisor(APInt A, APInt B);
2133
2134/// Converts the given APInt to a double value.
2135///
2136/// Treats the APInt as an unsigned value for conversion purposes.
2137inline double RoundAPIntToDouble(const APInt &APIVal) {
2138 return APIVal.roundToDouble();
2139}
2140
2141/// Converts the given APInt to a double value.
2142///
2143/// Treats the APInt as a signed value for conversion purposes.
2144inline double RoundSignedAPIntToDouble(const APInt &APIVal) {
2145 return APIVal.signedRoundToDouble();
2146}
2147
2148/// Converts the given APInt to a float value.
2149inline float RoundAPIntToFloat(const APInt &APIVal) {
2150 return float(RoundAPIntToDouble(APIVal));
2151}
2152
2153/// Converts the given APInt to a float value.
2154///
2155/// Treats the APInt as a signed value for conversion purposes.
2156inline float RoundSignedAPIntToFloat(const APInt &APIVal) {
2157 return float(APIVal.signedRoundToDouble());
2158}
2159
2160/// Converts the given double value into a APInt.
2161///
2162/// This function convert a double value to an APInt value.
2163APInt RoundDoubleToAPInt(double Double, unsigned width);
2164
2165/// Converts a float value into a APInt.
2166///
2167/// Converts a float value into an APInt value.
2168inline APInt RoundFloatToAPInt(float Float, unsigned width) {
2169 return RoundDoubleToAPInt(double(Float), width);
2170}
2171
2172/// Return A unsign-divided by B, rounded by the given rounding mode.
2173APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2174
2175/// Return A sign-divided by B, rounded by the given rounding mode.
2176APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2177
2178/// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range
2179/// (e.g. 32 for i32).
2180/// This function finds the smallest number n, such that
2181/// (a) n >= 0 and q(n) = 0, or
2182/// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all
2183/// integers, belong to two different intervals [Rk, Rk+R),
2184/// where R = 2^BW, and k is an integer.
2185/// The idea here is to find when q(n) "overflows" 2^BW, while at the
2186/// same time "allowing" subtraction. In unsigned modulo arithmetic a
2187/// subtraction (treated as addition of negated numbers) would always
2188/// count as an overflow, but here we want to allow values to decrease
2189/// and increase as long as they are within the same interval.
2190/// Specifically, adding of two negative numbers should not cause an
2191/// overflow (as long as the magnitude does not exceed the bit width).
2192/// On the other hand, given a positive number, adding a negative
2193/// number to it can give a negative result, which would cause the
2194/// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is
2195/// treated as a special case of an overflow.
2196///
2197/// This function returns None if after finding k that minimizes the
2198/// positive solution to q(n) = kR, both solutions are contained between
2199/// two consecutive integers.
2200///
2201/// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation
2202/// in arithmetic modulo 2^BW, and treating the values as signed) by the
2203/// virtue of *signed* overflow. This function will *not* find such an n,
2204/// however it may find a value of n satisfying the inequalities due to
2205/// an *unsigned* overflow (if the values are treated as unsigned).
2206/// To find a solution for a signed overflow, treat it as a problem of
2207/// finding an unsigned overflow with a range with of BW-1.
2208///
2209/// The returned value may have a different bit width from the input
2210/// coefficients.
2211Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2212 unsigned RangeWidth);
2213
2214/// Compare two values, and if they are different, return the position of the
2215/// most significant bit that is different in the values.
2216Optional<unsigned> GetMostSignificantDifferentBit(const APInt &A,
2217 const APInt &B);
2218
2219/// Splat/Merge neighboring bits to widen/narrow the bitmask represented
2220/// by \param A to \param NewBitWidth bits.
2221///
2222/// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011
2223/// e.g. ScaleBitMask(0b00011011, 4) -> 0b0111
2224/// A.getBitwidth() or NewBitWidth must be a whole multiples of the other.
2225///
2226/// TODO: Do we need a mode where all bits must be set when merging down?
2227APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth);
2228} // namespace APIntOps
2229
2230// See friend declaration above. This additional declaration is required in
2231// order to compile LLVM with IBM xlC compiler.
2232hash_code hash_value(const APInt &Arg);
2233
2234/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2235/// with the integer held in IntVal.
2236void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes);
2237
2238/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2239/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2240void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes);
2241
2242/// Provide DenseMapInfo for APInt.
2243template <> struct DenseMapInfo<APInt> {
2244 static inline APInt getEmptyKey() {
2245 APInt V(nullptr, 0);
2246 V.U.VAL = 0;
2247 return V;
2248 }
2249
2250 static inline APInt getTombstoneKey() {
2251 APInt V(nullptr, 0);
2252 V.U.VAL = 1;
2253 return V;
2254 }
2255
2256 static unsigned getHashValue(const APInt &Key);
2257
2258 static bool isEqual(const APInt &LHS, const APInt &RHS) {
2259 return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS;
2260 }
2261};
2262
2263} // namespace llvm
2264
2265#endif

/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/Support/Compiler.h"
17#include <cassert>
18#include <climits>
19#include <cmath>
20#include <cstdint>
21#include <cstring>
22#include <limits>
23#include <type_traits>
24
25#ifdef __ANDROID_NDK__
26#include <android/api-level.h>
27#endif
28
29#ifdef _MSC_VER
30// Declare these intrinsics manually rather including intrin.h. It's very
31// expensive, and MathExtras.h is popular.
32// #include <intrin.h>
33extern "C" {
34unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
35unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
36unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
37unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
38}
39#endif
40
41namespace llvm {
42
43/// The behavior an operation has on an input of 0.
44enum ZeroBehavior {
45 /// The returned value is undefined.
46 ZB_Undefined,
47 /// The returned value is numeric_limits<T>::max()
48 ZB_Max,
49 /// The returned value is numeric_limits<T>::digits
50 ZB_Width
51};
52
53/// Mathematical constants.
54namespace numbers {
55// TODO: Track C++20 std::numbers.
56// TODO: Favor using the hexadecimal FP constants (requires C++17).
57constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
58 egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
59 ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
60 ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
61 log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0)
62 log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
63 pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
64 inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
65 sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
66 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
67 sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
68 inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
69 sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
70 inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1)
71 phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
72constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
73 egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
74 ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
75 ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
76 log2ef = 1.44269504F, // (0x1.715476P+0)
77 log10ef = .434294482F, // (0x1.bcb7b2P-2)
78 pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
79 inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
80 sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
81 inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
82 sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
83 inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1)
84 sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
85 inv_sqrt3f = .577350269F, // (0x1.279a74P-1)
86 phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
87} // namespace numbers
88
89namespace detail {
90template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
91 static unsigned count(T Val, ZeroBehavior) {
92 if (!Val)
93 return std::numeric_limits<T>::digits;
94 if (Val & 0x1)
95 return 0;
96
97 // Bisection method.
98 unsigned ZeroBits = 0;
99 T Shift = std::numeric_limits<T>::digits >> 1;
100 T Mask = std::numeric_limits<T>::max() >> Shift;
101 while (Shift) {
102 if ((Val & Mask) == 0) {
103 Val >>= Shift;
104 ZeroBits |= Shift;
105 }
106 Shift >>= 1;
107 Mask >>= Shift;
108 }
109 return ZeroBits;
110 }
111};
112
113#if defined(__GNUC__4) || defined(_MSC_VER)
114template <typename T> struct TrailingZerosCounter<T, 4> {
115 static unsigned count(T Val, ZeroBehavior ZB) {
116 if (ZB != ZB_Undefined && Val == 0)
117 return 32;
118
119#if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4)
120 return __builtin_ctz(Val);
121#elif defined(_MSC_VER)
122 unsigned long Index;
123 _BitScanForward(&Index, Val);
124 return Index;
125#endif
126 }
127};
128
129#if !defined(_MSC_VER) || defined(_M_X64)
130template <typename T> struct TrailingZerosCounter<T, 8> {
131 static unsigned count(T Val, ZeroBehavior ZB) {
132 if (ZB
10.1
'ZB' is not equal to ZB_Undefined
10.1
'ZB' is not equal to ZB_Undefined
10.1
'ZB' is not equal to ZB_Undefined
!= ZB_Undefined && Val
10.2
'Val' is equal to 0
10.2
'Val' is equal to 0
10.2
'Val' is equal to 0
== 0)
11
Taking true branch
133 return 64;
12
Returning the value 64
134
135#if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4)
136 return __builtin_ctzll(Val);
137#elif defined(_MSC_VER)
138 unsigned long Index;
139 _BitScanForward64(&Index, Val);
140 return Index;
141#endif
142 }
143};
144#endif
145#endif
146} // namespace detail
147
148/// Count number of 0's from the least significant bit to the most
149/// stopping at the first 1.
150///
151/// Only unsigned integral types are allowed.
152///
153/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
154/// valid arguments.
155template <typename T>
156unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
157 static_assert(std::numeric_limits<T>::is_integer &&
158 !std::numeric_limits<T>::is_signed,
159 "Only unsigned integral types are allowed.");
160 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
10
Calling 'TrailingZerosCounter::count'
13
Returning from 'TrailingZerosCounter::count'
14
Returning the value 64
161}
162
163namespace detail {
164template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
165 static unsigned count(T Val, ZeroBehavior) {
166 if (!Val)
167 return std::numeric_limits<T>::digits;
168
169 // Bisection method.
170 unsigned ZeroBits = 0;
171 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
172 T Tmp = Val >> Shift;
173 if (Tmp)
174 Val = Tmp;
175 else
176 ZeroBits |= Shift;
177 }
178 return ZeroBits;
179 }
180};
181
182#if defined(__GNUC__4) || defined(_MSC_VER)
183template <typename T> struct LeadingZerosCounter<T, 4> {
184 static unsigned count(T Val, ZeroBehavior ZB) {
185 if (ZB != ZB_Undefined && Val == 0)
186 return 32;
187
188#if __has_builtin(__builtin_clz)1 || defined(__GNUC__4)
189 return __builtin_clz(Val);
190#elif defined(_MSC_VER)
191 unsigned long Index;
192 _BitScanReverse(&Index, Val);
193 return Index ^ 31;
194#endif
195 }
196};
197
198#if !defined(_MSC_VER) || defined(_M_X64)
199template <typename T> struct LeadingZerosCounter<T, 8> {
200 static unsigned count(T Val, ZeroBehavior ZB) {
201 if (ZB != ZB_Undefined && Val == 0)
202 return 64;
203
204#if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4)
205 return __builtin_clzll(Val);
206#elif defined(_MSC_VER)
207 unsigned long Index;
208 _BitScanReverse64(&Index, Val);
209 return Index ^ 63;
210#endif
211 }
212};
213#endif
214#endif
215} // namespace detail
216
217/// Count number of 0's from the most significant bit to the least
218/// stopping at the first 1.
219///
220/// Only unsigned integral types are allowed.
221///
222/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
223/// valid arguments.
224template <typename T>
225unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
226 static_assert(std::numeric_limits<T>::is_integer &&
227 !std::numeric_limits<T>::is_signed,
228 "Only unsigned integral types are allowed.");
229 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
230}
231
232/// Get the index of the first set bit starting from the least
233/// significant bit.
234///
235/// Only unsigned integral types are allowed.
236///
237/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
238/// valid arguments.
239template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
240 if (ZB == ZB_Max && Val == 0)
241 return std::numeric_limits<T>::max();
242
243 return countTrailingZeros(Val, ZB_Undefined);
244}
245
246/// Create a bitmask with the N right-most bits set to 1, and all other
247/// bits set to 0. Only unsigned types are allowed.
248template <typename T> T maskTrailingOnes(unsigned N) {
249 static_assert(std::is_unsigned<T>::value, "Invalid type!");
250 const unsigned Bits = CHAR_BIT8 * sizeof(T);
251 assert(N <= Bits && "Invalid bit index")(static_cast <bool> (N <= Bits && "Invalid bit index"
) ? void (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/Support/MathExtras.h"
, 251, __extension__ __PRETTY_FUNCTION__))
;
252 return N == 0 ? 0 : (T(-1) >> (Bits - N));
253}
254
255/// Create a bitmask with the N left-most bits set to 1, and all other
256/// bits set to 0. Only unsigned types are allowed.
257template <typename T> T maskLeadingOnes(unsigned N) {
258 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
259}
260
261/// Create a bitmask with the N right-most bits set to 0, and all other
262/// bits set to 1. Only unsigned types are allowed.
263template <typename T> T maskTrailingZeros(unsigned N) {
264 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
265}
266
267/// Create a bitmask with the N left-most bits set to 0, and all other
268/// bits set to 1. Only unsigned types are allowed.
269template <typename T> T maskLeadingZeros(unsigned N) {
270 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
271}
272
273/// Get the index of the last set bit starting from the least
274/// significant bit.
275///
276/// Only unsigned integral types are allowed.
277///
278/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
279/// valid arguments.
280template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
281 if (ZB == ZB_Max && Val == 0)
282 return std::numeric_limits<T>::max();
283
284 // Use ^ instead of - because both gcc and llvm can remove the associated ^
285 // in the __builtin_clz intrinsic on x86.
286 return countLeadingZeros(Val, ZB_Undefined) ^
287 (std::numeric_limits<T>::digits - 1);
288}
289
290/// Macro compressed bit reversal table for 256 bits.
291///
292/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
293static const unsigned char BitReverseTable256[256] = {
294#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
295#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
296#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
297 R6(0), R6(2), R6(1), R6(3)
298#undef R2
299#undef R4
300#undef R6
301};
302
303/// Reverse the bits in \p Val.
304template <typename T>
305T reverseBits(T Val) {
306 unsigned char in[sizeof(Val)];
307 unsigned char out[sizeof(Val)];
308 std::memcpy(in, &Val, sizeof(Val));
309 for (unsigned i = 0; i < sizeof(Val); ++i)
310 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
311 std::memcpy(&Val, out, sizeof(Val));
312 return Val;
313}
314
315#if __has_builtin(__builtin_bitreverse8)1
316template<>
317inline uint8_t reverseBits<uint8_t>(uint8_t Val) {
318 return __builtin_bitreverse8(Val);
319}
320#endif
321
322#if __has_builtin(__builtin_bitreverse16)1
323template<>
324inline uint16_t reverseBits<uint16_t>(uint16_t Val) {
325 return __builtin_bitreverse16(Val);
326}
327#endif
328
329#if __has_builtin(__builtin_bitreverse32)1
330template<>
331inline uint32_t reverseBits<uint32_t>(uint32_t Val) {
332 return __builtin_bitreverse32(Val);
333}
334#endif
335
336#if __has_builtin(__builtin_bitreverse64)1
337template<>
338inline uint64_t reverseBits<uint64_t>(uint64_t Val) {
339 return __builtin_bitreverse64(Val);
340}
341#endif
342
343// NOTE: The following support functions use the _32/_64 extensions instead of
344// type overloading so that signed and unsigned integers can be used without
345// ambiguity.
346
347/// Return the high 32 bits of a 64 bit value.
348constexpr inline uint32_t Hi_32(uint64_t Value) {
349 return static_cast<uint32_t>(Value >> 32);
350}
351
352/// Return the low 32 bits of a 64 bit value.
353constexpr inline uint32_t Lo_32(uint64_t Value) {
354 return static_cast<uint32_t>(Value);
355}
356
357/// Make a 64-bit integer from a high / low pair of 32-bit integers.
358constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
359 return ((uint64_t)High << 32) | (uint64_t)Low;
360}
361
362/// Checks if an integer fits into the given bit width.
363template <unsigned N> constexpr inline bool isInt(int64_t x) {
364 return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1)));
365}
366// Template specializations to get better code for common cases.
367template <> constexpr inline bool isInt<8>(int64_t x) {
368 return static_cast<int8_t>(x) == x;
369}
370template <> constexpr inline bool isInt<16>(int64_t x) {
371 return static_cast<int16_t>(x) == x;
372}
373template <> constexpr inline bool isInt<32>(int64_t x) {
374 return static_cast<int32_t>(x) == x;
375}
376
377/// Checks if a signed integer is an N bit number shifted left by S.
378template <unsigned N, unsigned S>
379constexpr inline bool isShiftedInt(int64_t x) {
380 static_assert(
381 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
382 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
383 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
384}
385
386/// Checks if an unsigned integer fits into the given bit width.
387///
388/// This is written as two functions rather than as simply
389///
390/// return N >= 64 || X < (UINT64_C(1) << N);
391///
392/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
393/// left too many places.
394template <unsigned N>
395constexpr inline std::enable_if_t<(N < 64), bool> isUInt(uint64_t X) {
396 static_assert(N > 0, "isUInt<0> doesn't make sense");
397 return X < (UINT64_C(1)1UL << (N));
398}
399template <unsigned N>
400constexpr inline std::enable_if_t<N >= 64, bool> isUInt(uint64_t) {
401 return true;
402}
403
404// Template specializations to get better code for common cases.
405template <> constexpr inline bool isUInt<8>(uint64_t x) {
406 return static_cast<uint8_t>(x) == x;
407}
408template <> constexpr inline bool isUInt<16>(uint64_t x) {
409 return static_cast<uint16_t>(x) == x;
410}
411template <> constexpr inline bool isUInt<32>(uint64_t x) {
412 return static_cast<uint32_t>(x) == x;
413}
414
415/// Checks if a unsigned integer is an N bit number shifted left by S.
416template <unsigned N, unsigned S>
417constexpr inline bool isShiftedUInt(uint64_t x) {
418 static_assert(
419 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
420 static_assert(N + S <= 64,
421 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
422 // Per the two static_asserts above, S must be strictly less than 64. So
423 // 1 << S is not undefined behavior.
424 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
425}
426
427/// Gets the maximum value for a N-bit unsigned integer.
428inline uint64_t maxUIntN(uint64_t N) {
429 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/Support/MathExtras.h"
, 429, __extension__ __PRETTY_FUNCTION__))
;
430
431 // uint64_t(1) << 64 is undefined behavior, so we can't do
432 // (uint64_t(1) << N) - 1
433 // without checking first that N != 64. But this works and doesn't have a
434 // branch.
435 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
436}
437
438/// Gets the minimum value for a N-bit signed integer.
439inline int64_t minIntN(int64_t N) {
440 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/Support/MathExtras.h"
, 440, __extension__ __PRETTY_FUNCTION__))
;
441
442 return UINT64_C(1)1UL + ~(UINT64_C(1)1UL << (N - 1));
443}
444
445/// Gets the maximum value for a N-bit signed integer.
446inline int64_t maxIntN(int64_t N) {
447 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/Support/MathExtras.h"
, 447, __extension__ __PRETTY_FUNCTION__))
;
448
449 // This relies on two's complement wraparound when N == 64, so we convert to
450 // int64_t only at the very end to avoid UB.
451 return (UINT64_C(1)1UL << (N - 1)) - 1;
452}
453
454/// Checks if an unsigned integer fits into the given (dynamic) bit width.
455inline bool isUIntN(unsigned N, uint64_t x) {
456 return N >= 64 || x <= maxUIntN(N);
457}
458
459/// Checks if an signed integer fits into the given (dynamic) bit width.
460inline bool isIntN(unsigned N, int64_t x) {
461 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
462}
463
464/// Return true if the argument is a non-empty sequence of ones starting at the
465/// least significant bit with the remainder zero (32 bit version).
466/// Ex. isMask_32(0x0000FFFFU) == true.
467constexpr inline bool isMask_32(uint32_t Value) {
468 return Value && ((Value + 1) & Value) == 0;
469}
470
471/// Return true if the argument is a non-empty sequence of ones starting at the
472/// least significant bit with the remainder zero (64 bit version).
473constexpr inline bool isMask_64(uint64_t Value) {
474 return Value && ((Value + 1) & Value) == 0;
475}
476
477/// Return true if the argument contains a non-empty sequence of ones with the
478/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
479constexpr inline bool isShiftedMask_32(uint32_t Value) {
480 return Value && isMask_32((Value - 1) | Value);
481}
482
483/// Return true if the argument contains a non-empty sequence of ones with the
484/// remainder zero (64 bit version.)
485constexpr inline bool isShiftedMask_64(uint64_t Value) {
486 return Value && isMask_64((Value - 1) | Value);
487}
488
489/// Return true if the argument is a power of two > 0.
490/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
491constexpr inline bool isPowerOf2_32(uint32_t Value) {
492 return Value && !(Value & (Value - 1));
493}
494
495/// Return true if the argument is a power of two > 0 (64 bit edition.)
496constexpr inline bool isPowerOf2_64(uint64_t Value) {
497 return Value && !(Value & (Value - 1));
498}
499
500/// Count the number of ones from the most significant bit to the first
501/// zero bit.
502///
503/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
504/// Only unsigned integral types are allowed.
505///
506/// \param ZB the behavior on an input of all ones. Only ZB_Width and
507/// ZB_Undefined are valid arguments.
508template <typename T>
509unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
510 static_assert(std::numeric_limits<T>::is_integer &&
511 !std::numeric_limits<T>::is_signed,
512 "Only unsigned integral types are allowed.");
513 return countLeadingZeros<T>(~Value, ZB);
514}
515
516/// Count the number of ones from the least significant bit to the first
517/// zero bit.
518///
519/// Ex. countTrailingOnes(0x00FF00FF) == 8.
520/// Only unsigned integral types are allowed.
521///
522/// \param ZB the behavior on an input of all ones. Only ZB_Width and
523/// ZB_Undefined are valid arguments.
524template <typename T>
525unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
526 static_assert(std::numeric_limits<T>::is_integer &&
527 !std::numeric_limits<T>::is_signed,
528 "Only unsigned integral types are allowed.");
529 return countTrailingZeros<T>(~Value, ZB);
530}
531
532namespace detail {
533template <typename T, std::size_t SizeOfT> struct PopulationCounter {
534 static unsigned count(T Value) {
535 // Generic version, forward to 32 bits.
536 static_assert(SizeOfT <= 4, "Not implemented!");
537#if defined(__GNUC__4)
538 return __builtin_popcount(Value);
539#else
540 uint32_t v = Value;
541 v = v - ((v >> 1) & 0x55555555);
542 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
543 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
544#endif
545 }
546};
547
548template <typename T> struct PopulationCounter<T, 8> {
549 static unsigned count(T Value) {
550#if defined(__GNUC__4)
551 return __builtin_popcountll(Value);
552#else
553 uint64_t v = Value;
554 v = v - ((v >> 1) & 0x5555555555555555ULL);
555 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
556 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
557 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
558#endif
559 }
560};
561} // namespace detail
562
563/// Count the number of set bits in a value.
564/// Ex. countPopulation(0xF000F000) = 8
565/// Returns 0 if the word is zero.
566template <typename T>
567inline unsigned countPopulation(T Value) {
568 static_assert(std::numeric_limits<T>::is_integer &&
569 !std::numeric_limits<T>::is_signed,
570 "Only unsigned integral types are allowed.");
571 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
572}
573
574/// Compile time Log2.
575/// Valid only for positive powers of two.
576template <size_t kValue> constexpr inline size_t CTLog2() {
577 static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
578 "Value is not a valid power of 2");
579 return 1 + CTLog2<kValue / 2>();
580}
581
582template <> constexpr inline size_t CTLog2<1>() { return 0; }
583
584/// Return the log base 2 of the specified value.
585inline double Log2(double Value) {
586#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
587 return __builtin_log(Value) / __builtin_log(2.0);
588#else
589 return log2(Value);
590#endif
591}
592
593/// Return the floor log base 2 of the specified value, -1 if the value is zero.
594/// (32 bit edition.)
595/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
596inline unsigned Log2_32(uint32_t Value) {
597 return 31 - countLeadingZeros(Value);
598}
599
600/// Return the floor log base 2 of the specified value, -1 if the value is zero.
601/// (64 bit edition.)
602inline unsigned Log2_64(uint64_t Value) {
603 return 63 - countLeadingZeros(Value);
604}
605
606/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
607/// (32 bit edition).
608/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
609inline unsigned Log2_32_Ceil(uint32_t Value) {
610 return 32 - countLeadingZeros(Value - 1);
611}
612
613/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
614/// (64 bit edition.)
615inline unsigned Log2_64_Ceil(uint64_t Value) {
616 return 64 - countLeadingZeros(Value - 1);
617}
618
619/// Return the greatest common divisor of the values using Euclid's algorithm.
620template <typename T>
621inline T greatestCommonDivisor(T A, T B) {
622 while (B) {
623 T Tmp = B;
624 B = A % B;
625 A = Tmp;
626 }
627 return A;
628}
629
630inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
631 return greatestCommonDivisor<uint64_t>(A, B);
632}
633
634/// This function takes a 64-bit integer and returns the bit equivalent double.
635inline double BitsToDouble(uint64_t Bits) {
636 double D;
637 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
638 memcpy(&D, &Bits, sizeof(Bits));
639 return D;
640}
641
642/// This function takes a 32-bit integer and returns the bit equivalent float.
643inline float BitsToFloat(uint32_t Bits) {
644 float F;
645 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
646 memcpy(&F, &Bits, sizeof(Bits));
647 return F;
648}
649
650/// This function takes a double and returns the bit equivalent 64-bit integer.
651/// Note that copying doubles around changes the bits of NaNs on some hosts,
652/// notably x86, so this routine cannot be used if these bits are needed.
653inline uint64_t DoubleToBits(double Double) {
654 uint64_t Bits;
655 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
656 memcpy(&Bits, &Double, sizeof(Double));
657 return Bits;
658}
659
660/// This function takes a float and returns the bit equivalent 32-bit integer.
661/// Note that copying floats around changes the bits of NaNs on some hosts,
662/// notably x86, so this routine cannot be used if these bits are needed.
663inline uint32_t FloatToBits(float Float) {
664 uint32_t Bits;
665 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
666 memcpy(&Bits, &Float, sizeof(Float));
667 return Bits;
668}
669
670/// A and B are either alignments or offsets. Return the minimum alignment that
671/// may be assumed after adding the two together.
672constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
673 // The largest power of 2 that divides both A and B.
674 //
675 // Replace "-Value" by "1+~Value" in the following commented code to avoid
676 // MSVC warning C4146
677 // return (A | B) & -(A | B);
678 return (A | B) & (1 + ~(A | B));
679}
680
681/// Returns the next power of two (in 64-bits) that is strictly greater than A.
682/// Returns zero on overflow.
683inline uint64_t NextPowerOf2(uint64_t A) {
684 A |= (A >> 1);
685 A |= (A >> 2);
686 A |= (A >> 4);
687 A |= (A >> 8);
688 A |= (A >> 16);
689 A |= (A >> 32);
690 return A + 1;
691}
692
693/// Returns the power of two which is less than or equal to the given value.
694/// Essentially, it is a floor operation across the domain of powers of two.
695inline uint64_t PowerOf2Floor(uint64_t A) {
696 if (!A) return 0;
697 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
698}
699
700/// Returns the power of two which is greater than or equal to the given value.
701/// Essentially, it is a ceil operation across the domain of powers of two.
702inline uint64_t PowerOf2Ceil(uint64_t A) {
703 if (!A)
704 return 0;
705 return NextPowerOf2(A - 1);
706}
707
708/// Returns the next integer (mod 2**64) that is greater than or equal to
709/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
710///
711/// If non-zero \p Skew is specified, the return value will be a minimal
712/// integer that is greater than or equal to \p Value and equal to
713/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
714/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
715///
716/// Examples:
717/// \code
718/// alignTo(5, 8) = 8
719/// alignTo(17, 8) = 24
720/// alignTo(~0LL, 8) = 0
721/// alignTo(321, 255) = 510
722///
723/// alignTo(5, 8, 7) = 7
724/// alignTo(17, 8, 1) = 17
725/// alignTo(~0LL, 8, 3) = 3
726/// alignTo(321, 255, 42) = 552
727/// \endcode
728inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
729 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/Support/MathExtras.h"
, 729, __extension__ __PRETTY_FUNCTION__))
;
730 Skew %= Align;
731 return (Value + Align - 1 - Skew) / Align * Align + Skew;
732}
733
734/// Returns the next integer (mod 2**64) that is greater than or equal to
735/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
736template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
737 static_assert(Align != 0u, "Align must be non-zero");
738 return (Value + Align - 1) / Align * Align;
739}
740
741/// Returns the integer ceil(Numerator / Denominator).
742inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
743 return alignTo(Numerator, Denominator) / Denominator;
744}
745
746/// Returns the integer nearest(Numerator / Denominator).
747inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) {
748 return (Numerator + (Denominator / 2)) / Denominator;
749}
750
751/// Returns the largest uint64_t less than or equal to \p Value and is
752/// \p Skew mod \p Align. \p Align must be non-zero
753inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
754 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/Support/MathExtras.h"
, 754, __extension__ __PRETTY_FUNCTION__))
;
755 Skew %= Align;
756 return (Value - Skew) / Align * Align + Skew;
757}
758
759/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
760/// Requires 0 < B <= 32.
761template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
762 static_assert(B > 0, "Bit width can't be 0.");
763 static_assert(B <= 32, "Bit width out of range.");
764 return int32_t(X << (32 - B)) >> (32 - B);
765}
766
767/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
768/// Requires 0 < B <= 32.
769inline int32_t SignExtend32(uint32_t X, unsigned B) {
770 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/Support/MathExtras.h"
, 770, __extension__ __PRETTY_FUNCTION__))
;
771 assert(B <= 32 && "Bit width out of range.")(static_cast <bool> (B <= 32 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/Support/MathExtras.h"
, 771, __extension__ __PRETTY_FUNCTION__))
;
772 return int32_t(X << (32 - B)) >> (32 - B);
773}
774
775/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
776/// Requires 0 < B <= 64.
777template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
778 static_assert(B > 0, "Bit width can't be 0.");
779 static_assert(B <= 64, "Bit width out of range.");
780 return int64_t(x << (64 - B)) >> (64 - B);
781}
782
783/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
784/// Requires 0 < B <= 64.
785inline int64_t SignExtend64(uint64_t X, unsigned B) {
786 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/Support/MathExtras.h"
, 786, __extension__ __PRETTY_FUNCTION__))
;
787 assert(B <= 64 && "Bit width out of range.")(static_cast <bool> (B <= 64 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-14~++20211019111405+8fbac4e88ac3/llvm/include/llvm/Support/MathExtras.h"
, 787, __extension__ __PRETTY_FUNCTION__))
;
788 return int64_t(X << (64 - B)) >> (64 - B);
789}
790
791/// Subtract two unsigned integers, X and Y, of type T and return the absolute
792/// value of the result.
793template <typename T>
794std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) {
795 return X > Y ? (X - Y) : (Y - X);
796}
797
798/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
799/// maximum representable value of T on overflow. ResultOverflowed indicates if
800/// the result is larger than the maximum representable value of type T.
801template <typename T>
802std::enable_if_t<std::is_unsigned<T>::value, T>
803SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
804 bool Dummy;
805 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
806 // Hacker's Delight, p. 29
807 T Z = X + Y;
808 Overflowed = (Z < X || Z < Y);
809 if (Overflowed)
810 return std::numeric_limits<T>::max();
811 else
812 return Z;
813}
814
815/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
816/// maximum representable value of T on overflow. ResultOverflowed indicates if
817/// the result is larger than the maximum representable value of type T.
818template <typename T>
819std::enable_if_t<std::is_unsigned<T>::value, T>
820SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
821 bool Dummy;
822 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
823
824 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
825 // because it fails for uint16_t (where multiplication can have undefined
826 // behavior due to promotion to int), and requires a division in addition
827 // to the multiplication.
828
829 Overflowed = false;
830
831 // Log2(Z) would be either Log2Z or Log2Z + 1.
832 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
833 // will necessarily be less than Log2Max as desired.
834 int Log2Z = Log2_64(X) + Log2_64(Y);
835 const T Max = std::numeric_limits<T>::max();
836 int Log2Max = Log2_64(Max);
837 if (Log2Z < Log2Max) {
838 return X * Y;
839 }
840 if (Log2Z > Log2Max) {
841 Overflowed = true;
842 return Max;
843 }
844
845 // We're going to use the top bit, and maybe overflow one
846 // bit past it. Multiply all but the bottom bit then add
847 // that on at the end.
848 T Z = (X >> 1) * Y;
849 if (Z & ~(Max >> 1)) {
850 Overflowed = true;
851 return Max;
852 }
853 Z <<= 1;
854 if (X & 1)
855 return SaturatingAdd(Z, Y, ResultOverflowed);
856
857 return Z;
858}
859
860/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
861/// the product. Clamp the result to the maximum representable value of T on
862/// overflow. ResultOverflowed indicates if the result is larger than the
863/// maximum representable value of type T.
864template <typename T>
865std::enable_if_t<std::is_unsigned<T>::value, T>
866SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
867 bool Dummy;
868 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
869
870 T Product = SaturatingMultiply(X, Y, &Overflowed);
871 if (Overflowed)
872 return Product;
873
874 return SaturatingAdd(A, Product, &Overflowed);
875}
876
877/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
878extern const float huge_valf;
879
880
881/// Add two signed integers, computing the two's complement truncated result,
882/// returning true if overflow occured.
883template <typename T>
884std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) {
885#if __has_builtin(__builtin_add_overflow)1
886 return __builtin_add_overflow(X, Y, &Result);
887#else
888 // Perform the unsigned addition.
889 using U = std::make_unsigned_t<T>;
890 const U UX = static_cast<U>(X);
891 const U UY = static_cast<U>(Y);
892 const U UResult = UX + UY;
893
894 // Convert to signed.
895 Result = static_cast<T>(UResult);
896
897 // Adding two positive numbers should result in a positive number.
898 if (X > 0 && Y > 0)
899 return Result <= 0;
900 // Adding two negatives should result in a negative number.
901 if (X < 0 && Y < 0)
902 return Result >= 0;
903 return false;
904#endif
905}
906
907/// Subtract two signed integers, computing the two's complement truncated
908/// result, returning true if an overflow ocurred.
909template <typename T>
910std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) {
911#if __has_builtin(__builtin_sub_overflow)1
912 return __builtin_sub_overflow(X, Y, &Result);
913#else
914 // Perform the unsigned addition.
915 using U = std::make_unsigned_t<T>;
916 const U UX = static_cast<U>(X);
917 const U UY = static_cast<U>(Y);
918 const U UResult = UX - UY;
919
920 // Convert to signed.
921 Result = static_cast<T>(UResult);
922
923 // Subtracting a positive number from a negative results in a negative number.
924 if (X <= 0 && Y > 0)
925 return Result >= 0;
926 // Subtracting a negative number from a positive results in a positive number.
927 if (X >= 0 && Y < 0)
928 return Result <= 0;
929 return false;
930#endif
931}
932
933/// Multiply two signed integers, computing the two's complement truncated
934/// result, returning true if an overflow ocurred.
935template <typename T>
936std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) {
937 // Perform the unsigned multiplication on absolute values.
938 using U = std::make_unsigned_t<T>;
939 const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
940 const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
941 const U UResult = UX * UY;
942
943 // Convert to signed.
944 const bool IsNegative = (X < 0) ^ (Y < 0);
945 Result = IsNegative ? (0 - UResult) : UResult;
946
947 // If any of the args was 0, result is 0 and no overflow occurs.
948 if (UX == 0 || UY == 0)
949 return false;
950
951 // UX and UY are in [1, 2^n], where n is the number of digits.
952 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
953 // positive) divided by an argument compares to the other.
954 if (IsNegative)
955 return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
956 else
957 return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
958}
959
960} // End llvm namespace
961
962#endif