74#include "llvm/IR/IntrinsicsAArch64.h"
109#define DEBUG_TYPE "aarch64-lower"
112STATISTIC(NumOptimizedImms,
"Number of times immediates were optimized");
119 cl::desc(
"Allow AArch64 Local Dynamic TLS code generation"),
124 cl::desc(
"Enable AArch64 logical imm instruction "
134 cl::desc(
"Combine extends of AArch64 masked "
135 "gather intrinsics"),
139 cl::desc(
"Combine ext and trunc to TBL"),
154 cl::desc(
"Enable / disable SVE scalable vectors in Global ISel"),
161 cl::desc(
"Generate ISD::PTRADD nodes for pointer arithmetic in "
162 "SelectionDAG for FEAT_CPA"),
172 AArch64::X3, AArch64::X4, AArch64::X5,
173 AArch64::X6, AArch64::X7};
175 AArch64::Q3, AArch64::Q4, AArch64::Q5,
176 AArch64::Q6, AArch64::Q7};
201 return MVT::nxv8bf16;
208 switch (EC.getKnownMinValue()) {
224 "Expected scalable predicate vector type!");
246 "Expected legal vector type!");
253 "Expected legal type!");
254 return VT == MVT::nxv16i1;
267 "Unexpected fixed-size unpacked type.");
277 case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
278 case AArch64ISD::BSWAP_MERGE_PASSTHRU:
279 case AArch64ISD::REVH_MERGE_PASSTHRU:
280 case AArch64ISD::REVW_MERGE_PASSTHRU:
281 case AArch64ISD::REVD_MERGE_PASSTHRU:
282 case AArch64ISD::CTLZ_MERGE_PASSTHRU:
283 case AArch64ISD::CTPOP_MERGE_PASSTHRU:
284 case AArch64ISD::DUP_MERGE_PASSTHRU:
285 case AArch64ISD::ABS_MERGE_PASSTHRU:
286 case AArch64ISD::NEG_MERGE_PASSTHRU:
287 case AArch64ISD::FNEG_MERGE_PASSTHRU:
288 case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
289 case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
290 case AArch64ISD::FCEIL_MERGE_PASSTHRU:
291 case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
292 case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
293 case AArch64ISD::FRINT_MERGE_PASSTHRU:
294 case AArch64ISD::FRINT32_MERGE_PASSTHRU:
295 case AArch64ISD::FRINT64_MERGE_PASSTHRU:
296 case AArch64ISD::FROUND_MERGE_PASSTHRU:
297 case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
298 case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
299 case AArch64ISD::FTRUNC32_MERGE_PASSTHRU:
300 case AArch64ISD::FTRUNC64_MERGE_PASSTHRU:
301 case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
302 case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
303 case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
304 case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
305 case AArch64ISD::FCVTX_MERGE_PASSTHRU:
306 case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
307 case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
308 case AArch64ISD::FSQRT_MERGE_PASSTHRU:
309 case AArch64ISD::FRECPX_MERGE_PASSTHRU:
310 case AArch64ISD::FABS_MERGE_PASSTHRU:
317 switch (
Op.getOpcode()) {
323 case AArch64ISD::PTRUE:
324 case AArch64ISD::SETCC_MERGE_ZERO:
327 switch (
Op.getConstantOperandVal(0)) {
330 case Intrinsic::aarch64_sve_ptrue:
331 case Intrinsic::aarch64_sve_pnext:
332 case Intrinsic::aarch64_sve_cmpeq:
333 case Intrinsic::aarch64_sve_cmpne:
334 case Intrinsic::aarch64_sve_cmpge:
335 case Intrinsic::aarch64_sve_cmpgt:
336 case Intrinsic::aarch64_sve_cmphs:
337 case Intrinsic::aarch64_sve_cmphi:
338 case Intrinsic::aarch64_sve_cmpeq_wide:
339 case Intrinsic::aarch64_sve_cmpne_wide:
340 case Intrinsic::aarch64_sve_cmpge_wide:
341 case Intrinsic::aarch64_sve_cmpgt_wide:
342 case Intrinsic::aarch64_sve_cmplt_wide:
343 case Intrinsic::aarch64_sve_cmple_wide:
344 case Intrinsic::aarch64_sve_cmphs_wide:
345 case Intrinsic::aarch64_sve_cmphi_wide:
346 case Intrinsic::aarch64_sve_cmplo_wide:
347 case Intrinsic::aarch64_sve_cmpls_wide:
348 case Intrinsic::aarch64_sve_fcmpeq:
349 case Intrinsic::aarch64_sve_fcmpne:
350 case Intrinsic::aarch64_sve_fcmpge:
351 case Intrinsic::aarch64_sve_fcmpgt:
352 case Intrinsic::aarch64_sve_fcmpuo:
353 case Intrinsic::aarch64_sve_facgt:
354 case Intrinsic::aarch64_sve_facge:
355 case Intrinsic::aarch64_sve_whilege:
356 case Intrinsic::aarch64_sve_whilegt:
357 case Intrinsic::aarch64_sve_whilehi:
358 case Intrinsic::aarch64_sve_whilehs:
359 case Intrinsic::aarch64_sve_whilele:
360 case Intrinsic::aarch64_sve_whilelo:
361 case Intrinsic::aarch64_sve_whilels:
362 case Intrinsic::aarch64_sve_whilelt:
363 case Intrinsic::aarch64_sve_match:
364 case Intrinsic::aarch64_sve_nmatch:
365 case Intrinsic::aarch64_sve_whilege_x2:
366 case Intrinsic::aarch64_sve_whilegt_x2:
367 case Intrinsic::aarch64_sve_whilehi_x2:
368 case Intrinsic::aarch64_sve_whilehs_x2:
369 case Intrinsic::aarch64_sve_whilele_x2:
370 case Intrinsic::aarch64_sve_whilelo_x2:
371 case Intrinsic::aarch64_sve_whilels_x2:
372 case Intrinsic::aarch64_sve_whilelt_x2:
378static std::tuple<SDValue, SDValue>
399 if (!ConstDiscN || !
isUInt<16>(ConstDiscN->getZExtValue()))
405 AddrDisc = DAG->
getRegister(AArch64::NoRegister, MVT::i64);
407 return std::make_tuple(
426 if (Subtarget->hasLS64()) {
432 if (Subtarget->hasFPARMv8()) {
441 if (Subtarget->hasNEON()) {
445 addDRType(MVT::v2f32);
446 addDRType(MVT::v8i8);
447 addDRType(MVT::v4i16);
448 addDRType(MVT::v2i32);
449 addDRType(MVT::v1i64);
450 addDRType(MVT::v1f64);
451 addDRType(MVT::v4f16);
452 addDRType(MVT::v4bf16);
454 addQRType(MVT::v4f32);
455 addQRType(MVT::v2f64);
456 addQRType(MVT::v16i8);
457 addQRType(MVT::v8i16);
458 addQRType(MVT::v4i32);
459 addQRType(MVT::v2i64);
460 addQRType(MVT::v8f16);
461 addQRType(MVT::v8bf16);
464 if (Subtarget->isSVEorStreamingSVEAvailable()) {
492 if (Subtarget->useSVEForFixedLengthVectors()) {
534 if (Subtarget->hasFPARMv8()) {
625 if (Subtarget->hasFPARMv8()) {
631 if (Subtarget->hasFPARMv8()) {
685 if (Subtarget->hasCSSC()) {
764 if (Subtarget->hasFullFP16()) {
796 if (Subtarget->hasFullFP16()) {
809 auto LegalizeNarrowFP = [
this](
MVT ScalarVT) {
917 if (!Subtarget->hasFullFP16()) {
918 LegalizeNarrowFP(MVT::f16);
920 LegalizeNarrowFP(MVT::bf16);
938 for (
MVT Ty : {MVT::f32, MVT::f64})
940 if (Subtarget->hasFullFP16())
948 for (
MVT Ty : {MVT::f32, MVT::f64})
950 if (Subtarget->hasFullFP16())
963 if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
975 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
1003 if (Subtarget->outlineAtomics() && !Subtarget->hasLSFE()) {
1030 if (Subtarget->hasLSE128()) {
1044 if (Subtarget->hasLSE2()) {
1101 if (WideVT.getScalarSizeInBits() > NarrowVT.getScalarSizeInBits()) {
1107 if (Subtarget->hasFPARMv8()) {
1234 if (!Subtarget->isTargetWindows())
1250 if (Subtarget->hasSME())
1253 if (Subtarget->isNeonAvailable()) {
1298 for (
auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
1301 if (Subtarget->hasFullFP16()) {
1330 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1339 for (
auto VT : {MVT::v1i64, MVT::v2i64}) {
1355 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64,
1356 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1363 for (
MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1374 for (
MVT VT : { MVT::v4f16, MVT::v2f32,
1375 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1376 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1385 if (Subtarget->hasFullFP16())
1388 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1389 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1411 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1421 if (VT == MVT::v4i16 || VT == MVT::v8i16 || VT == MVT::v2i32 ||
1422 VT == MVT::v4i32 || VT == MVT::v2i64)
1427 if (VT == MVT::v8i8 || VT == MVT::v16i8 || VT == MVT::v8i16 ||
1428 VT == MVT::v4i16 || VT == MVT::v2i32 || VT == MVT::v4i32)
1447 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1449 if (Subtarget->hasFullFP16())
1450 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1456 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1458 if (Subtarget->hasFullFP16())
1459 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1494 for (
MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1497 for (
MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1500 if (Subtarget->hasDotProd()) {
1509 if (Subtarget->hasMatMulInt8()) {
1520 if (Subtarget->hasF16F32DOT() || Subtarget->hasFP16FML()) {
1527 if (Subtarget->hasBF16())
1529 MVT::v8bf16,
Legal);
1533 if (Subtarget->hasAES()) {
1543 if (VT.is128BitVector() || VT.is64BitVector()) {
1558 for (
MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1564 if (Subtarget->hasSME()) {
1570 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1572 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1578 for (
auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
1585 if (Subtarget->isSVEorStreamingSVEAvailable() &&
1586 (Subtarget->hasSVE2p1() || Subtarget->hasSME2()))
1589 for (
auto VT : {MVT::v16i8, MVT::v8i8, MVT::v4i16, MVT::v2i32})
1592 for (
auto VT : {MVT::v8f16, MVT::v4f32, MVT::v2f64})
1596 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1597 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1666 if (!Subtarget->isLittleEndian())
1669 if (Subtarget->hasSVE2() ||
1670 (Subtarget->hasSME() && Subtarget->isStreaming()))
1675 for (
auto VT : {MVT::nxv4i32, MVT::nxv2i64}) {
1681 for (
auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1687 for (
auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32})
1691 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
1692 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
1704 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1718 if (VT != MVT::nxv16i1) {
1728 {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1729 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1730 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1769 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1770 MVT::nxv4f32, MVT::nxv2f64}) {
1852 for (
auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1871 if (Subtarget->hasSVEB16B16() &&
1872 Subtarget->isNonStreamingSVEorSME2Available()) {
1874 for (
auto VT : {MVT::v4bf16, MVT::v8bf16, MVT::nxv2bf16, MVT::nxv4bf16,
1897 if (!Subtarget->hasSVEB16B16() ||
1898 !Subtarget->isNonStreamingSVEorSME2Available()) {
1899 for (
MVT VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1909 if (VT != MVT::nxv2bf16 && Subtarget->hasBF16())
1915 if (Subtarget->hasBF16() && Subtarget->isNeonAvailable())
1924 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1925 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1939 if (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable()) {
1948 if (Subtarget->useSVEForFixedLengthVectors()) {
1951 VT, !Subtarget->isNeonAvailable()))
1952 addTypeForFixedLengthSVE(VT);
1956 VT, !Subtarget->isNeonAvailable()))
1957 addTypeForFixedLengthSVE(VT);
1961 for (
auto VT : {MVT::v8i8, MVT::v4i16})
1966 for (
auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1968 for (
auto VT : {MVT::v8f16, MVT::v4f32, MVT::v8bf16})
1990 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1991 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
2000 for (
auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
2023 for (
auto VT : {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1})
2030 if (Subtarget->isSVEorStreamingSVEAvailable()) {
2040 if (Subtarget->hasMatMulInt8()) {
2042 MVT::nxv16i8,
Legal);
2047 if (Subtarget->hasSVE2() || Subtarget->hasSME()) {
2056 MVT::nxv8f16,
Legal);
2060 if (!Subtarget->isNeonAvailable() ||
2061 (!Subtarget->hasF16F32DOT() && !Subtarget->hasFP16FML())) {
2069 if (Subtarget->hasBF16())
2071 MVT::nxv8bf16,
Legal);
2074 if (Subtarget->hasSVEAES() &&
2075 (Subtarget->isSVEAvailable() || Subtarget->hasSSVE_AES()))
2079 if (Subtarget->hasSVE2() ||
2080 (Subtarget->hasSME() && Subtarget->isStreaming())) {
2082 for (
auto VT : {MVT::v2i32, MVT::v4i16, MVT::v8i8, MVT::v16i8}) {
2086 for (
auto VT : {MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, MVT::nxv16i1}) {
2093 if (Subtarget->isSVEAvailable()) {
2094 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64,
2095 MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
2096 MVT::nxv4f32, MVT::nxv2f64, MVT::nxv2bf16, MVT::nxv4bf16,
2097 MVT::nxv8bf16, MVT::v4f16, MVT::v8f16, MVT::v2f32,
2098 MVT::v4f32, MVT::v1f64, MVT::v2f64, MVT::v8i8,
2099 MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
2100 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
2105 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
2106 MVT::nxv4f32, MVT::nxv2f64, MVT::v4f16, MVT::v8f16,
2107 MVT::v2f32, MVT::v4f32, MVT::v2f64})
2112 {MVT::nxv4i32, MVT::nxv2i64, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv2f64})
2117 for (
auto VT : {MVT::v2i32, MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32,
2127 for (
auto VT : {MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64,
2128 MVT::nxv2f32, MVT::nxv2f64, MVT::nxv4i8, MVT::nxv4i16,
2129 MVT::nxv4i32, MVT::nxv4f32}) {
2137 if (Subtarget->hasSVE2()) {
2155 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
2162 if (Subtarget->hasSVE()) {
2176 if (Subtarget->isTargetWindows()) {
2196void AArch64TargetLowering::addTypeForNEON(
MVT VT) {
2206 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
2228 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
2229 ((VT == MVT::v4bf16 || VT == MVT::v8bf16 || VT == MVT::v4f16 ||
2230 VT == MVT::v8f16) &&
2231 Subtarget->hasFullFP16()))
2256 if (VT != MVT::v8i8 && VT != MVT::v16i8)
2265 for (
unsigned Opcode :
2283 for (
unsigned Opcode :
2314 if (Subtarget->isLittleEndian()) {
2325 if (Subtarget->hasD128()) {
2343 if (!Subtarget->isSVEorStreamingSVEAvailable() ||
2350 (OpVT != MVT::i32 && OpVT != MVT::i64))))
2362 if (!Subtarget->isSVEorStreamingSVEAvailable())
2367 return VT != MVT::nxv16i1 && VT != MVT::nxv8i1 && VT != MVT::nxv4i1 &&
2368 VT != MVT::nxv2i1 && VT != MVT::v16i1 && VT != MVT::v8i1 &&
2369 VT != MVT::v4i1 && VT != MVT::v2i1;
2373 unsigned SearchSize)
const {
2375 if (!Subtarget->hasSVE2() || !Subtarget->isSVEAvailable())
2378 if (VT == MVT::nxv8i16 || VT == MVT::v8i16)
2379 return SearchSize != 8;
2380 if (VT == MVT::nxv16i8 || VT == MVT::v16i8 || VT == MVT::v8i8)
2381 return SearchSize != 8 && SearchSize != 16;
2385void AArch64TargetLowering::addTypeForFixedLengthSVE(
MVT VT) {
2410 while (InnerVT != VT) {
2424 while (InnerVT != VT) {
2433 bool PreferSVE = !PreferNEON && Subtarget->isSVEAvailable();
2454 if (Subtarget->hasMatMulInt8()) {
2569void AArch64TargetLowering::addDRType(
MVT VT) {
2571 if (Subtarget->isNeonAvailable())
2575void AArch64TargetLowering::addQRType(
MVT VT) {
2577 if (Subtarget->isNeonAvailable())
2594 Imm =
C->getZExtValue();
2602 case AArch64ISD::SQDMULH:
2614 return N->getOpcode() ==
Opc &&
2619 const APInt &Demanded,
2622 uint64_t OldImm = Imm, NewImm, Enc;
2627 if (Imm == 0 || Imm == Mask ||
2631 unsigned EltSize =
Size;
2648 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2650 uint64_t Sum = RotatedImm + NonDemandedBits;
2651 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2652 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
2653 NewImm = (Imm | Ones) & Mask;
2681 while (EltSize <
Size) {
2682 NewImm |= NewImm << EltSize;
2688 "demanded bits should never be altered");
2689 assert(OldImm != NewImm &&
"the new imm shouldn't be equal to the old imm");
2692 EVT VT =
Op.getValueType();
2698 if (NewImm == 0 || NewImm == OrigMask) {
2723 EVT VT =
Op.getValueType();
2737 switch (
Op.getOpcode()) {
2741 NewOpc =
Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
2744 NewOpc =
Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
2747 NewOpc =
Size == 32 ? AArch64::EORWri : AArch64::EORXri;
2762 switch (
Op.getOpcode()) {
2765 case AArch64ISD::DUP: {
2768 if (
SrcOp.getValueSizeInBits() !=
Op.getScalarValueSizeInBits()) {
2769 assert(
SrcOp.getValueSizeInBits() >
Op.getScalarValueSizeInBits() &&
2770 "Expected DUP implicit truncation");
2771 Known = Known.
trunc(
Op.getScalarValueSizeInBits());
2775 case AArch64ISD::CSEL: {
2782 case AArch64ISD::CSNEG:
2783 case AArch64ISD::CSINC:
2784 case AArch64ISD::CSINV: {
2792 if (
Op.getOpcode() == AArch64ISD::CSINC)
2796 else if (
Op.getOpcode() == AArch64ISD::CSINV)
2798 else if (
Op.getOpcode() == AArch64ISD::CSNEG)
2801 Op.getScalarValueSizeInBits())));
2806 case AArch64ISD::BICi: {
2809 ~(
Op->getConstantOperandAPInt(1) <<
Op->getConstantOperandAPInt(2))
2815 case AArch64ISD::VLSHR: {
2822 case AArch64ISD::VASHR: {
2829 case AArch64ISD::VSHL: {
2836 case AArch64ISD::MOVI: {
2841 case AArch64ISD::MOVIshift: {
2844 <<
Op->getConstantOperandVal(1)));
2847 case AArch64ISD::MOVImsl: {
2850 Known.
getBitWidth(), ~(~
Op->getConstantOperandVal(0) << ShiftAmt)));
2853 case AArch64ISD::MOVIedit: {
2859 case AArch64ISD::MVNIshift: {
2862 ~(
Op->getConstantOperandVal(0) <<
Op->getConstantOperandVal(1)),
2866 case AArch64ISD::MVNImsl: {
2873 case AArch64ISD::LOADgot:
2874 case AArch64ISD::ADDlow: {
2875 if (!Subtarget->isTargetILP32())
2881 case AArch64ISD::ASSERT_ZEXT_BOOL: {
2891 case Intrinsic::aarch64_ldaxr:
2892 case Intrinsic::aarch64_ldxr: {
2904 unsigned IntNo =
Op.getConstantOperandVal(0);
2908 case Intrinsic::aarch64_neon_uaddlv: {
2909 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2911 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2912 unsigned Bound = (VT == MVT::v8i8) ? 11 : 12;
2919 case Intrinsic::aarch64_neon_umaxv:
2920 case Intrinsic::aarch64_neon_uminv: {
2925 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2927 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2931 }
else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
2945 unsigned Depth)
const {
2946 EVT VT =
Op.getValueType();
2948 unsigned Opcode =
Op.getOpcode();
2950 case AArch64ISD::FCMEQ:
2951 case AArch64ISD::FCMGE:
2952 case AArch64ISD::FCMGT:
2955 case AArch64ISD::VASHR: {
2958 return std::min<uint64_t>(Tmp +
Op.getConstantOperandVal(1), VTBits);
2972 unsigned *
Fast)
const {
2982 if (ElementSizeBits % 8 == 0 && Alignment >=
Align(ElementSizeBits / 8))
2986 if (Subtarget->requiresStrictAlign())
2991 *
Fast = !Subtarget->isMisaligned128StoreSlow() || VT.
getStoreSize() != 16 ||
3010 unsigned *
Fast)
const {
3011 if (Subtarget->requiresStrictAlign())
3016 *
Fast = !Subtarget->isMisaligned128StoreSlow() ||
3017 Ty.getSizeInBytes() != 16 ||
3060 Register DestReg =
MI.getOperand(0).getReg();
3061 Register IfTrueReg =
MI.getOperand(1).getReg();
3062 Register IfFalseReg =
MI.getOperand(2).getReg();
3063 unsigned CondCode =
MI.getOperand(3).getImm();
3064 bool NZCVKilled =
MI.getOperand(4).isKill();
3078 MBB->addSuccessor(TrueBB);
3079 MBB->addSuccessor(EndBB);
3095 MI.eraseFromParent();
3103 "SEH does not use catchret!");
3114 Register TargetReg =
MI.getOperand(0).getReg();
3116 TII.probedStackAlloc(
MBBI, TargetReg,
false);
3118 MI.eraseFromParent();
3119 return NextInst->getParent();
3172 MBB->addSuccessor(TrapBB);
3173 MBB->addSuccessor(PassBB);
3175 MI.eraseFromParent();
3187 MIB.
add(
MI.getOperand(1));
3188 MIB.
add(
MI.getOperand(2));
3189 MIB.
add(
MI.getOperand(3));
3190 MIB.
add(
MI.getOperand(4));
3191 MIB.
add(
MI.getOperand(5));
3193 MI.eraseFromParent();
3204 MIB.
add(
MI.getOperand(0));
3205 MIB.
add(
MI.getOperand(1));
3206 MIB.
add(
MI.getOperand(2));
3207 MIB.
add(
MI.getOperand(1));
3209 MI.eraseFromParent();
3216 bool Op0IsDef)
const {
3222 for (
unsigned I = 1;
I <
MI.getNumOperands(); ++
I)
3223 MIB.
add(
MI.getOperand(
I));
3225 MI.eraseFromParent();
3235 unsigned StartIdx = 0;
3237 bool HasTile = BaseReg != AArch64::ZA;
3238 bool HasZPROut = HasTile &&
MI.getOperand(0).isReg();
3240 MIB.
add(
MI.getOperand(StartIdx));
3244 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm(),
3246 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm());
3250 if (
MI.getOperand(0).isReg() && !
MI.getOperand(1).isImm()) {
3251 MIB.
add(
MI.getOperand(StartIdx));
3256 for (
unsigned I = StartIdx;
I <
MI.getNumOperands(); ++
I)
3257 MIB.
add(
MI.getOperand(
I));
3259 MI.eraseFromParent();
3268 MIB.
add(
MI.getOperand(0));
3270 unsigned Mask =
MI.getOperand(0).getImm();
3271 for (
unsigned I = 0;
I < 8;
I++) {
3272 if (Mask & (1 <<
I))
3276 MI.eraseFromParent();
3286 Register ResultReg =
MI.getOperand(0).getReg();
3289 }
else if (Subtarget->hasSME()) {
3291 .
addImm(AArch64SysReg::SVCR)
3294 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
3303 MI.eraseFromParent();
3311 while (
Reg.isVirtual()) {
3313 assert(
DefMI &&
"Virtual register definition not found");
3314 unsigned Opcode =
DefMI->getOpcode();
3316 if (Opcode == AArch64::COPY) {
3317 Reg =
DefMI->getOperand(1).getReg();
3319 if (
Reg.isPhysical())
3323 if (Opcode == AArch64::SUBREG_TO_REG) {
3324 Reg =
DefMI->getOperand(1).getReg();
3341 int64_t IntDisc = IntDiscOp.
getImm();
3342 assert(IntDisc == 0 &&
"Blend components are already expanded");
3347 case AArch64::MOVKXi:
3356 case AArch64::MOVi32imm:
3357 case AArch64::MOVi64imm:
3361 AddrDisc = AArch64::NoRegister;
3370 if (AddrDisc == AArch64::XZR)
3371 AddrDisc = AArch64::NoRegister;
3374 if (AddrDisc && MRI.
getRegClass(AddrDisc) != AddrDiscRC) {
3380 AddrDiscOp.
setReg(AddrDisc);
3381 IntDiscOp.
setImm(IntDisc);
3388 if (SMEOrigInstr != -1) {
3392 switch (SMEMatrixType) {
3408 switch (
MI.getOpcode()) {
3414 case AArch64::EntryPStateSM:
3416 case AArch64::F128CSEL:
3418 case TargetOpcode::STATEPOINT:
3424 MI.addOperand(*
MI.getMF(),
3430 case TargetOpcode::STACKMAP:
3431 case TargetOpcode::PATCHPOINT:
3434 case TargetOpcode::PATCHABLE_EVENT_CALL:
3435 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
3438 case AArch64::CATCHRET:
3441 case AArch64::PROBED_STACKALLOC_DYN:
3444 case AArch64::CHECK_MATCHING_VL_PSEUDO:
3447 case AArch64::LD1_MXIPXX_H_PSEUDO_B:
3448 return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0,
MI, BB);
3449 case AArch64::LD1_MXIPXX_H_PSEUDO_H:
3450 return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0,
MI, BB);
3451 case AArch64::LD1_MXIPXX_H_PSEUDO_S:
3452 return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0,
MI, BB);
3453 case AArch64::LD1_MXIPXX_H_PSEUDO_D:
3454 return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0,
MI, BB);
3455 case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
3456 return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0,
MI, BB);
3457 case AArch64::LD1_MXIPXX_V_PSEUDO_B:
3458 return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0,
MI, BB);
3459 case AArch64::LD1_MXIPXX_V_PSEUDO_H:
3460 return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0,
MI, BB);
3461 case AArch64::LD1_MXIPXX_V_PSEUDO_S:
3462 return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0,
MI, BB);
3463 case AArch64::LD1_MXIPXX_V_PSEUDO_D:
3464 return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0,
MI, BB);
3465 case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
3466 return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0,
MI, BB);
3467 case AArch64::LDR_ZA_PSEUDO:
3469 case AArch64::LDR_TX_PSEUDO:
3471 case AArch64::STR_TX_PSEUDO:
3473 case AArch64::ZERO_M_PSEUDO:
3475 case AArch64::ZERO_T_PSEUDO:
3477 case AArch64::MOVT_TIZ_PSEUDO:
3482 &AArch64::GPR64noipRegClass);
3510 N =
N->getOperand(0).getNode();
3515 if (
N->getOpcode() != AArch64ISD::DUP)
3518 auto Opnd0 =
N->getOperand(0);
3524 (V.getOpcode() == AArch64ISD::DUP &&
isOneConstant(V.getOperand(0)));
3685 CondCode, CondCode2);
3698 bool IsLegal = (
C >> 12 == 0) || ((
C & 0xFFFULL) == 0 &&
C >> 24 == 0);
3700 <<
" legal: " << (IsLegal ?
"yes\n" :
"no\n"));
3719 if (
Op->getFlags().hasNoSignedWrap())
3745 (isIntEqualitySetCC(CC) ||
3753 EVT VT =
LHS.getValueType();
3758 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3763 Chain =
RHS.getValue(1);
3766 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
3772 EVT VT =
LHS.getValueType();
3777 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3788 unsigned Opcode = AArch64ISD::SUBS;
3792 Opcode = AArch64ISD::ADDS;
3795 isIntEqualitySetCC(CC)) {
3798 Opcode = AArch64ISD::ADDS;
3807 LHS.getOperand(0),
LHS.getOperand(1));
3811 }
else if (
LHS.getOpcode() == AArch64ISD::ANDS) {
3813 return LHS.getValue(1);
3879 unsigned Opcode = 0;
3882 if (
LHS.getValueType().isFloatingPoint()) {
3883 assert(
LHS.getValueType() != MVT::f128);
3884 if ((
LHS.getValueType() == MVT::f16 && !FullFP16) ||
3885 LHS.getValueType() == MVT::bf16) {
3889 Opcode = AArch64ISD::FCCMP;
3891 APInt Imm = Const->getAPIntValue();
3892 if (Imm.isNegative() && Imm.sgt(-32)) {
3893 Opcode = AArch64ISD::CCMN;
3897 Opcode = AArch64ISD::CCMN;
3900 isIntEqualitySetCC(CC)) {
3903 Opcode = AArch64ISD::CCMN;
3907 Opcode = AArch64ISD::CCMP;
3933 bool &CanNegate,
bool &MustBeFirst,
3934 bool &PreferFirst,
bool WillNegate,
3935 unsigned Depth = 0) {
3941 if (VT == MVT::f128)
3944 MustBeFirst =
false;
3948 {Val->getOperand(0), Val->getOperand(1)});
3955 bool IsOR = Opcode ==
ISD::OR;
3971 if (MustBeFirstL && MustBeFirstR)
3977 if (!CanNegateL && !CanNegateR)
3981 CanNegate = WillNegate && CanNegateL && CanNegateR;
3984 MustBeFirst = !CanNegate;
3989 MustBeFirst = MustBeFirstL || MustBeFirstR;
3991 PreferFirst = PreferFirstL || PreferFirstR;
4014 bool isInteger =
LHS.getValueType().isInteger();
4016 CC = getSetCCInverse(CC,
LHS.getValueType());
4022 assert(
LHS.getValueType().isFloatingPoint());
4048 bool IsOR = Opcode ==
ISD::OR;
4055 PreferFirstL, IsOR);
4056 assert(ValidL &&
"Valid conjunction/disjunction tree");
4064 PreferFirstR, IsOR);
4065 assert(ValidR &&
"Valid conjunction/disjunction tree");
4068 bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR;
4071 if (MustBeFirstL || ShouldFirstL) {
4072 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
4081 bool NegateAfterAll;
4085 assert(CanNegateR &&
"at least one side must be negatable");
4086 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
4090 NegateAfterR =
true;
4093 NegateR = CanNegateR;
4094 NegateAfterR = !CanNegateR;
4097 NegateAfterAll = !Negate;
4099 assert(Opcode ==
ISD::AND &&
"Valid conjunction/disjunction tree");
4100 assert(!Negate &&
"Valid conjunction/disjunction tree");
4104 NegateAfterR =
false;
4105 NegateAfterAll =
false;
4125 bool DummyCanNegate;
4126 bool DummyMustBeFirst;
4127 bool DummyPreferFirst;
4129 DummyPreferFirst,
false))
4140 auto isSupportedExtend = [&](
SDValue V) {
4146 uint64_t Mask = MaskCst->getZExtValue();
4147 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
4153 if (!
Op.hasOneUse())
4156 if (isSupportedExtend(
Op))
4159 unsigned Opc =
Op.getOpcode();
4162 uint64_t Shift = ShiftCst->getZExtValue();
4163 if (isSupportedExtend(
Op.getOperand(0)))
4164 return (Shift <= 4) ? 2 : 1;
4165 EVT VT =
Op.getValueType();
4166 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
4178 if (
LHS.getOpcode() !=
ISD::AND &&
LHS.getOpcode() != AArch64ISD::ANDS)
4198 EVT VT =
RHS.getValueType();
4199 APInt C = RHSC->getAPIntValue();
4214 if (!
C.isMinSignedValue()) {
4226 assert(!
C.isZero() &&
"C should not be zero here");
4237 if (!
C.isMaxSignedValue()) {
4248 if (!
C.isAllOnes()) {
4273 bool LHSIsCMN =
isCMN(
LHS, CC, DAG);
4274 bool RHSIsCMN =
isCMN(
RHS, CC, DAG);
4309 LHS.getNode()->hasNUsesOfValue(1, 0)) {
4310 int16_t ValueofRHS =
RHS->getAsZExtVal();
4338static std::pair<SDValue, SDValue>
4340 assert((
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::i64) &&
4341 "Unsupported value type");
4347 switch (
Op.getOpcode()) {
4351 Opc = AArch64ISD::ADDS;
4355 Opc = AArch64ISD::ADDS;
4359 Opc = AArch64ISD::SUBS;
4363 Opc = AArch64ISD::SUBS;
4371 if (
Op.getValueType() == MVT::i32) {
4394 assert(
Op.getValueType() == MVT::i64 &&
"Expected an i64 value type");
4404 Overflow = DAG.
getNode(AArch64ISD::SUBS,
DL, VTs, UpperBits, LowerBits)
4423 Overflow =
Value.getValue(1);
4425 return std::make_pair(
Value, Overflow);
4430 !Subtarget->isNeonAvailable()))
4431 return LowerToScalableOp(
Op, DAG);
4455 return DAG.
getNode(AArch64ISD::CSEL,
DL,
Op.getValueType(), TVal, FVal,
4478 if (
LHS.getValueType() != MVT::i32 &&
LHS.getValueType() != MVT::i64)
4485 if (!CFVal || !CTVal)
4522 return Cmp.getValue(1);
4535 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, One, Zero, CC, Glue);
4545 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, One, Zero, CC, Glue);
4551 unsigned Opcode,
bool IsSigned) {
4552 EVT VT0 =
Op.getValue(0).getValueType();
4553 EVT VT1 =
Op.getValue(1).getValueType();
4555 if (VT0 != MVT::i32 && VT0 != MVT::i64)
4558 bool InvertCarry = Opcode == AArch64ISD::SBCS;
4577 bool LastOperandIsImm =
false) {
4578 if (
Op.getValueType().isVector())
4583 const unsigned NumOperands =
Op.getNumOperands();
4584 auto getFloatVT = [](
EVT VT) {
4585 assert((VT == MVT::i32 || VT == MVT::i64) &&
"Unexpected VT");
4586 return VT == MVT::i32 ? MVT::f32 : MVT::f64;
4588 auto bitcastToFloat = [&](
SDValue Val) {
4589 return DAG.
getBitcast(getFloatVT(Val.getValueType()), Val);
4593 for (
unsigned I = 1;
I < NumOperands; ++
I) {
4595 const bool KeepInt = LastOperandIsImm && (
I == NumOperands - 1);
4596 NewOps.
push_back(KeepInt ? Val : bitcastToFloat(Val));
4598 EVT OrigVT =
Op.getValueType();
4623 DAG.
getNode(AArch64ISD::CSEL,
DL, MVT::i32, FVal, TVal, CCVal, Overflow);
4635 unsigned IsWrite =
Op.getConstantOperandVal(2);
4636 unsigned Locality =
Op.getConstantOperandVal(3);
4637 unsigned IsData =
Op.getConstantOperandVal(4);
4639 bool IsStream = !Locality;
4643 assert(Locality <= 3 &&
"Prefetch locality out-of-range");
4647 Locality = 3 - Locality;
4651 unsigned PrfOp = (IsWrite << 4) |
4655 return DAG.
getNode(AArch64ISD::PREFETCH,
DL, MVT::Other,
Op.getOperand(0),
4668 if (LHSConstOp && RHSConst) {
4672 uint64_t NewMaskValue = LHSConstValue & ~(RHSConstant - 1);
4685 EVT VT =
Op.getValueType();
4689 if (VT == MVT::nxv2f64 && SrcVal.
getValueType() == MVT::nxv2bf16) {
4697 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
4701 return LowerFixedLengthFPExtendToSVE(
Op, DAG);
4703 bool IsStrict =
Op->isStrictFPOpcode();
4704 SDValue Op0 =
Op.getOperand(IsStrict ? 1 : 0);
4706 if (VT == MVT::f64) {
4708 if (Op0VT == MVT::f32 || Op0VT == MVT::f16)
4711 if (Op0VT == MVT::bf16 && IsStrict) {
4714 {Op0,
Op.getOperand(0)});
4718 if (Op0VT == MVT::bf16)
4724 assert(
Op.getValueType() == MVT::f128 &&
"Unexpected lowering");
4730 EVT VT =
Op.getValueType();
4731 bool IsStrict =
Op->isStrictFPOpcode();
4732 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4734 bool Trunc =
Op.getConstantOperandVal(IsStrict ? 2 : 1) == 1;
4735 SDNodeFlags
Flags =
Op->getFlags();
4739 if (SrcVT == MVT::nxv8f32)
4743 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
4746 constexpr EVT
I32 = MVT::nxv4i32;
4752 if (SrcVT == MVT::nxv2f32 || SrcVT == MVT::nxv4f32) {
4753 if (Subtarget->hasBF16())
4754 return LowerToPredicatedOp(
Op, DAG,
4755 AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
4757 Narrow = getSVESafeBitCast(I32, SrcVal, DAG);
4762 }
else if (SrcVT == MVT::nxv2f64 &&
4763 (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable())) {
4766 Narrow = DAG.
getNode(AArch64ISD::FCVTX_MERGE_PASSTHRU,
DL, MVT::nxv2f32,
4767 Pg, SrcVal, DAG.
getPOISON(MVT::nxv2f32));
4773 NewOps.
push_back(
Op.getOperand(IsStrict ? 2 : 1));
4774 return DAG.
getNode(
Op.getOpcode(),
DL, VT, NewOps,
Op->getFlags());
4791 IsNaN = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, I1, IsNaN);
4792 Narrow = DAG.
getSelect(
DL, I32, IsNaN, NaN, Narrow);
4797 return getSVESafeBitCast(VT, Narrow, DAG);
4801 return LowerFixedLengthFPRoundToSVE(
Op, DAG);
4806 !((Subtarget->hasNEON() || Subtarget->hasSME()) &&
4807 Subtarget->hasBF16())) {
4822 Narrow = DAG.
getNode(AArch64ISD::FCVTXN,
DL,
F32, Narrow);
4843 Narrow = DAG.
getSelect(
DL, I32, IsNaN, NaN, Narrow);
4860 if (SrcVT != MVT::f128) {
4877 bool IsStrict =
Op->isStrictFPOpcode();
4878 EVT InVT =
Op.getOperand(IsStrict ? 1 : 0).getValueType();
4879 EVT VT =
Op.getValueType();
4882 "Unimplemented SVE support for STRICT_FP_to_INT!");
4891 {
Op.getOperand(0),
Op.getOperand(1)});
4892 return DAG.
getNode(
Op.getOpcode(),
DL, {VT, MVT::Other},
4893 {Ext.getValue(1), Ext.getValue(0)});
4896 Op.getOpcode(),
DL,
Op.getValueType(),
4910 if (InVT == MVT::nxv8f32)
4914 ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
4915 : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
4916 return LowerToPredicatedOp(
Op, DAG, Opcode);
4921 return LowerFixedLengthFPToIntToSVE(
Op, DAG);
4925 if (VTSize < InVTSize) {
4930 {Op.getOperand(0), Op.getOperand(1)});
4940 if (VTSize > InVTSize) {
4947 {
Op.getOperand(0),
Op.getOperand(1)});
4948 return DAG.
getNode(
Op.getOpcode(),
DL, {VT, MVT::Other},
4949 {Ext.getValue(1), Ext.getValue(0)});
4964 return DAG.
getNode(
Op.getOpcode(),
DL, {ScalarVT, MVT::Other},
4965 {Op.getOperand(0), Extract});
4966 return DAG.
getNode(
Op.getOpcode(),
DL, ScalarVT, Extract);
4975 bool IsStrict =
Op->isStrictFPOpcode();
4976 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4979 return LowerVectorFP_TO_INT(
Op, DAG);
4982 if ((SrcVal.
getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
4988 {
Op.getOperand(0), SrcVal});
4989 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
4990 {Ext.getValue(1), Ext.getValue(0)});
5005AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(
SDValue Op,
5011 EVT DstVT =
Op.getValueType();
5017 assert(SatWidth <= DstElementWidth &&
5018 "Saturation width cannot exceed result width");
5031 if ((SrcElementVT == MVT::f16 &&
5032 (!Subtarget->hasFullFP16() || DstElementWidth > 16)) ||
5033 SrcElementVT == MVT::bf16) {
5043 SrcElementVT = MVT::f32;
5044 SrcElementWidth = 32;
5045 }
else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
5046 SrcElementVT != MVT::f16 && SrcElementVT != MVT::bf16)
5051 if (SatWidth == 64 && SrcElementWidth < 64) {
5055 SrcElementVT = MVT::f64;
5056 SrcElementWidth = 64;
5059 if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth) {
5074 if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
5081 SrcVal2 ? DAG.
getNode(
Op.getOpcode(),
DL, IntVT, SrcVal2,
5117 return LowerVectorFP_TO_INT_SAT(
Op, DAG);
5119 EVT DstVT =
Op.getValueType();
5123 assert(SatWidth <= DstWidth &&
"Saturation width cannot exceed result width");
5126 if ((SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) || SrcVT == MVT::bf16) {
5129 }
else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16 &&
5135 if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
5136 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
5137 DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32))
5138 return DAG.
getNode(
Op.getOpcode(),
DL, DstVT, SrcVal,
5144 if (DstWidth < SatWidth)
5147 if (SrcVT == MVT::f16 && SatVT == MVT::i16 && DstVT == MVT::i32) {
5150 DAG.
getNode(AArch64ISD::FCVTZS_HALF,
DL, MVT::f32, SrcVal);
5155 SDValue CVTf32 = DAG.
getNode(AArch64ISD::FCVTZU_HALF,
DL, MVT::f32, SrcVal);
5180 EVT VT =
Op.getValueType();
5187 *DAG.
getContext(), Src.getValueType().getVectorElementType());
5203 bool IsStrict =
Op->isStrictFPOpcode();
5204 EVT VT =
Op.getValueType();
5207 EVT InVT =
In.getValueType();
5208 unsigned Opc =
Op.getOpcode();
5212 "Unimplemented SVE support for ISD:::STRICT_INT_TO_FP!");
5227 {Op.getOperand(0), In});
5229 {
Op.getValueType(), MVT::Other},
5240 if (VT == MVT::nxv8f32)
5243 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
5244 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
5245 return LowerToPredicatedOp(
Op, DAG, Opcode);
5250 return LowerFixedLengthIntToFPToSVE(
Op, DAG);
5254 if (VTSize < InVTSize) {
5260 bool IsTargetf16 =
false;
5261 if (
Op.hasOneUse() &&
5266 SDNode *
U = *
Op->user_begin();
5267 if (
U->hasOneUse() &&
U->user_begin()->getOpcode() ==
ISD::FP_ROUND) {
5268 EVT TmpVT =
U->user_begin()->getValueType(0);
5274 if (IsTargetf32 && !IsTargetf16) {
5284 {
In.getValue(1),
In.getValue(0),
5292 if (VTSize > InVTSize) {
5309 return DAG.
getNode(
Op.getOpcode(),
DL, {ScalarVT, MVT::Other},
5310 {Op.getOperand(0), Extract});
5311 return DAG.
getNode(
Op.getOpcode(),
DL, ScalarVT, Extract);
5319 if (
Op.getValueType().isVector())
5320 return LowerVectorINT_TO_FP(
Op, DAG);
5322 bool IsStrict =
Op->isStrictFPOpcode();
5323 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
5328 auto IntToFpViaPromotion = [&](EVT PromoteVT) {
5332 {Op.getOperand(0), SrcVal});
5334 {
Op.getValueType(), MVT::Other},
5339 DAG.
getNode(
Op.getOpcode(),
DL, PromoteVT, SrcVal),
5343 if (
Op.getValueType() == MVT::bf16) {
5344 unsigned MaxWidth = IsSigned
5348 if (MaxWidth <= 24) {
5349 return IntToFpViaPromotion(MVT::f32);
5353 if (MaxWidth <= 53) {
5354 return IntToFpViaPromotion(MVT::f64);
5405 IsStrict ? DAG.
getNode(
Op.getOpcode(),
DL, {MVT::f64, MVT::Other},
5406 {Op.getOperand(0), ToRound})
5407 : DAG.
getNode(
Op.getOpcode(),
DL, MVT::f64, ToRound);
5434 {
Op.getValueType(), MVT::Other},
5438 DAG.getIntPtrConstant(0,
DL,
true));
5443 if (
Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
5444 return IntToFpViaPromotion(MVT::f32);
5453 if (
Op.getValueType() != MVT::f128)
5461AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(
SDValue Op,
5463 assert((Subtarget->hasSVE2() ||
5464 (Subtarget->hasSME() && Subtarget->isStreaming())) &&
5465 "Lowering loop_dependence_raw_mask or loop_dependence_war_mask "
5466 "requires SVE or SME");
5469 EVT VT =
Op.getValueType();
5470 unsigned LaneOffset =
Op.getConstantOperandVal(3);
5472 uint64_t EltSizeInBytes =
Op.getConstantOperandVal(2);
5473 EVT AddrTy =
Op->getOperand(0).getValueType();
5476 if (LaneOffset != 0 || !
is_contained({1u, 2u, 4u, 8u}, EltSizeInBytes))
5485 if (AddrTy == MVT::i64)
5489 assert(AddrTy == MVT::i32 &&
"Only expected i32 to be legal!");
5491 Op.getOpcode(),
DL, VT,
5517 EVT OpVT =
Op.getValueType();
5518 EVT ArgVT =
Op.getOperand(0).getValueType();
5521 return LowerFixedLengthBitcastToSVE(
Op, DAG);
5529 "Expected int->fp bitcast!");
5542 return getSVESafeBitCast(OpVT, ExtResult, DAG);
5553 return getSVESafeBitCast(OpVT,
Op.getOperand(0), DAG);
5556 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
5560 if (ArgVT == MVT::f16 || ArgVT == MVT::bf16)
5570 assert(ArgVT == MVT::i16);
5579static std::optional<uint64_t>
5583 return std::nullopt;
5588 return std::nullopt;
5590 return C->getZExtValue();
5595 EVT VT =
N.getValueType();
5600 for (
const SDValue &Elt :
N->op_values()) {
5603 unsigned HalfSize = EltSize / 2;
5605 if (!
isIntN(HalfSize,
C->getSExtValue()))
5608 if (!
isUIntN(HalfSize,
C->getZExtValue()))
5620 EVT VT =
N.getValueType();
5642 unsigned Opcode =
N.getOpcode();
5653 unsigned Opcode =
N.getOpcode();
5795 if (IsN0SExt && IsN1SExt)
5796 return AArch64ISD::SMULL;
5801 if (IsN0ZExt && IsN1ZExt)
5802 return AArch64ISD::UMULL;
5808 if (IsN0ZExt || IsN1ZExt) {
5810 return AArch64ISD::UMULL;
5815 return AArch64ISD::UMULL;
5818 if (IsN0SExt || IsN1SExt) {
5820 return AArch64ISD::SMULL;
5823 return AArch64ISD::SMULL;
5826 if (!IsN1SExt && !IsN1ZExt)
5833 return AArch64ISD::SMULL;
5837 return AArch64ISD::UMULL;
5842 return AArch64ISD::UMULL;
5863 const SDNode *Operand =
N->getOperand(1).getNode();
5872 SplatValue = SplatValue.
abs();
5881 if ((SplatValue - 1).isPowerOf2())
5883 else if ((SplatValue + 1).isPowerOf2())
5892 EVT VT =
N->getValueType(0);
5895 unsigned ShiftAmt = MathOp ==
ISD::ADD ? (SplatValue - 1).logBase2()
5896 : (SplatValue + 1).logBase2();
5914 EVT VT =
Op.getValueType();
5916 bool OverrideNEON = !Subtarget->isNeonAvailable();
5918 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5923 "unexpected type for custom-lowering ISD::MUL");
5939 if (VT == MVT::v1i64) {
5940 if (Subtarget->hasSVE())
5941 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5957 if (Subtarget->hasSVE())
5958 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5976 "unexpected types for extended operands to VMULL");
5999 if (
Pattern == AArch64SVEPredPattern::all)
6008 if (PatNumElts == (NumElts * VScale))
6012 return DAG.
getNode(AArch64ISD::PTRUE,
DL, VT,
6017 bool IsSigned,
bool IsEqual) {
6021 if (!
N->getValueType(0).isScalableVector() ||
6026 APInt Y =
N->getConstantOperandAPInt(Op1);
6031 if (IsSigned ?
Y.isMaxSignedValue() :
Y.isMaxValue())
6037 APInt X =
N->getConstantOperandAPInt(Op0);
6040 APInt NumActiveElems =
6041 IsSigned ?
Y.ssub_ov(
X, Overflow) :
Y.usub_ov(
X, Overflow);
6048 NumActiveElems = IsSigned ? NumActiveElems.
sadd_ov(One, Overflow)
6049 : NumActiveElems.
uadd_ov(One, Overflow);
6054 std::optional<unsigned> PredPattern =
6056 unsigned MinSVEVectorSize = std::max(
6058 unsigned ElementSize = 128 /
N->getValueType(0).getVectorMinNumElements();
6059 if (PredPattern != std::nullopt &&
6060 NumActiveElems.
getZExtValue() <= (MinSVEVectorSize / ElementSize))
6061 return getPTrue(DAG,
DL,
N->getValueType(0), *PredPattern);
6070 EVT VT =
N->getValueType(0);
6078 unsigned BrkID = Intrinsic::aarch64_sve_brkb_z;
6081 BrkID = Intrinsic::aarch64_sve_brka_z;
6088 Upper.getOperand(0).getValueType() != VT)
6097 DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, MVT::nxv16i1, Mask);
6101 return DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, VT, Brk);
6108 EVT InVT =
Op.getValueType();
6112 "Expected a predicate-to-predicate bitcast");
6116 "Only expect to cast between legal scalable predicate types!");
6126 Op.getConstantOperandVal(0) == Intrinsic::aarch64_sve_convert_to_svbool &&
6127 Op.getOperand(1).getValueType().bitsGT(VT))
6128 Op =
Op.getOperand(1);
6146 Mask = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, VT, Mask);
6153 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
6159 TargetLowering::CallLoweringInfo CLI(DAG);
6161 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
6164 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
6210 SDValue TileSlice =
N->getOperand(2);
6213 int32_t ConstAddend = 0;
6222 ConstAddend = ImmNode->getSExtValue();
6226 int32_t ImmAddend = ConstAddend % 16;
6227 if (int32_t
C = (ConstAddend - ImmAddend)) {
6229 VarAddend = VarAddend
6236 auto SVL = DAG.
getNode(AArch64ISD::RDSVL,
DL, MVT::i64,
6248 return DAG.
getNode(IsLoad ? AArch64ISD::SME_ZA_LDR : AArch64ISD::SME_ZA_STR,
6250 {
N.getOperand(0), TileSlice,
Base,
6259 auto Op1 =
Op.getOperand(1);
6260 auto Op2 =
Op.getOperand(2);
6261 auto Mask =
Op.getOperand(3);
6264 EVT Op2VT = Op2.getValueType();
6265 EVT ResVT =
Op.getValueType();
6269 "Expected 8-bit or 16-bit characters.");
6283 Op2 = DAG.
getNode(AArch64ISD::DUPLANE128,
DL, OpContainerVT, Op2,
6311 ID, Mask, Op1, Op2);
6322 unsigned IntNo =
Op.getConstantOperandVal(1);
6327 case Intrinsic::aarch64_prefetch: {
6331 unsigned IsWrite =
Op.getConstantOperandVal(3);
6332 unsigned Locality =
Op.getConstantOperandVal(4);
6333 unsigned IsStream =
Op.getConstantOperandVal(5);
6334 unsigned IsData =
Op.getConstantOperandVal(6);
6335 unsigned PrfOp = (IsWrite << 4) |
6340 return DAG.
getNode(AArch64ISD::PREFETCH,
DL, MVT::Other, Chain,
6343 case Intrinsic::aarch64_range_prefetch: {
6347 unsigned IsWrite =
Op.getConstantOperandVal(3);
6348 unsigned IsStream =
Op.getConstantOperandVal(4);
6349 unsigned PrfOp = (IsStream << 2) | IsWrite;
6352 return DAG.
getNode(AArch64ISD::RANGE_PREFETCH,
DL, MVT::Other, Chain,
6356 case Intrinsic::aarch64_prefetch_ir:
6357 return DAG.
getNode(AArch64ISD::PREFETCH,
DL, MVT::Other,
6361 case Intrinsic::aarch64_sme_str:
6362 case Intrinsic::aarch64_sme_ldr: {
6365 case Intrinsic::aarch64_sme_za_enable:
6367 AArch64ISD::SMSTART,
DL, DAG.
getVTList(MVT::Other, MVT::Glue),
6370 case Intrinsic::aarch64_sme_za_disable:
6372 AArch64ISD::SMSTOP,
DL, DAG.
getVTList(MVT::Other, MVT::Glue),
6380 unsigned IntNo =
Op.getConstantOperandVal(1);
6385 case Intrinsic::aarch64_mops_memset_tag: {
6392 auto Alignment =
Node->getMemOperand()->getAlign();
6393 bool IsVol =
Node->isVolatile();
6394 auto DstPtrInfo =
Node->getPointerInfo();
6398 SDValue MS = SDI.EmitMOPS(AArch64::MOPSMemorySetTaggingPseudo, DAG,
DL,
6399 Chain, Dst, Val,
Size, Alignment, IsVol,
6400 DstPtrInfo, MachinePointerInfo{});
6413 unsigned IntNo =
Op.getConstantOperandVal(0);
6417 case Intrinsic::thread_pointer: {
6419 return DAG.
getNode(AArch64ISD::THREAD_POINTER,
DL, PtrVT);
6421 case Intrinsic::aarch64_sve_whilewr_b:
6423 Op.getOperand(1),
Op.getOperand(2),
6426 case Intrinsic::aarch64_sve_whilewr_h:
6428 Op.getOperand(1),
Op.getOperand(2),
6431 case Intrinsic::aarch64_sve_whilewr_s:
6433 Op.getOperand(1),
Op.getOperand(2),
6436 case Intrinsic::aarch64_sve_whilewr_d:
6438 Op.getOperand(1),
Op.getOperand(2),
6441 case Intrinsic::aarch64_sve_whilerw_b:
6443 Op.getOperand(1),
Op.getOperand(2),
6446 case Intrinsic::aarch64_sve_whilerw_h:
6448 Op.getOperand(1),
Op.getOperand(2),
6451 case Intrinsic::aarch64_sve_whilerw_s:
6453 Op.getOperand(1),
Op.getOperand(2),
6456 case Intrinsic::aarch64_sve_whilerw_d:
6458 Op.getOperand(1),
Op.getOperand(2),
6461 case Intrinsic::aarch64_neon_abs: {
6462 EVT Ty =
Op.getValueType();
6463 if (Ty == MVT::i64) {
6475 case Intrinsic::aarch64_neon_pmull64: {
6479 std::optional<uint64_t> LHSLane =
6481 std::optional<uint64_t> RHSLane =
6484 assert((!LHSLane || *LHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6485 assert((!RHSLane || *RHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6491 auto TryVectorizeOperand = [](
SDValue N, std::optional<uint64_t> NLane,
6492 std::optional<uint64_t> OtherLane,
6494 SelectionDAG &DAG) ->
SDValue {
6503 if (OtherLane == 1) {
6512 DAG.
getNode(AArch64ISD::DUPLANE64,
DL, MVT::v2i64,
6518 return DAG.
getNode(AArch64ISD::DUP,
DL, MVT::v1i64,
N);
6523 assert(
N.getValueType() == MVT::i64 &&
6524 "Intrinsic aarch64_neon_pmull64 requires i64 parameters");
6528 LHS = TryVectorizeOperand(
LHS, LHSLane, RHSLane,
DL, DAG);
6529 RHS = TryVectorizeOperand(
RHS, RHSLane, LHSLane,
DL, DAG);
6533 case Intrinsic::aarch64_neon_smax:
6536 case Intrinsic::aarch64_neon_umax:
6539 case Intrinsic::aarch64_neon_smin:
6542 case Intrinsic::aarch64_neon_umin:
6545 case Intrinsic::aarch64_neon_scalar_sqxtn:
6546 case Intrinsic::aarch64_neon_scalar_sqxtun:
6547 case Intrinsic::aarch64_neon_scalar_uqxtn: {
6548 assert(
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::f32);
6549 if (
Op.getValueType() == MVT::i32)
6554 Op.getOperand(1))));
6557 case Intrinsic::aarch64_neon_sqxtn:
6560 case Intrinsic::aarch64_neon_sqxtun:
6563 case Intrinsic::aarch64_neon_uqxtn:
6566 case Intrinsic::aarch64_neon_sqshrn:
6567 if (
Op.getValueType().isVector())
6570 Op.getOperand(1).getValueType(),
6571 Op.getOperand(1),
Op.getOperand(2)));
6574 case Intrinsic::aarch64_neon_sqshrun:
6575 if (
Op.getValueType().isVector())
6578 Op.getOperand(1).getValueType(),
6579 Op.getOperand(1),
Op.getOperand(2)));
6582 case Intrinsic::aarch64_neon_uqshrn:
6583 if (
Op.getValueType().isVector())
6586 Op.getOperand(1).getValueType(),
6587 Op.getOperand(1),
Op.getOperand(2)));
6590 case Intrinsic::aarch64_neon_sqrshrn:
6591 if (
Op.getValueType().isVector())
6594 Op.getOperand(1).getValueType(),
6595 Op.getOperand(1),
Op.getOperand(2)));
6598 case Intrinsic::aarch64_neon_sqrshrun:
6599 if (
Op.getValueType().isVector())
6602 Op.getOperand(1).getValueType(),
6603 Op.getOperand(1),
Op.getOperand(2)));
6606 case Intrinsic::aarch64_neon_uqrshrn:
6607 if (
Op.getValueType().isVector())
6610 Op.getOperand(1).getValueType(),
6611 Op.getOperand(1),
Op.getOperand(2)));
6614 case Intrinsic::aarch64_neon_sqdmulh:
6616 case Intrinsic::aarch64_neon_sqrdmulh:
6618 case Intrinsic::aarch64_neon_sqrdmlah:
6620 case Intrinsic::aarch64_neon_sqrdmlsh:
6622 case Intrinsic::aarch64_neon_sqrshl:
6624 case Intrinsic::aarch64_neon_sqshl:
6626 case Intrinsic::aarch64_neon_uqrshl:
6628 case Intrinsic::aarch64_neon_uqshl:
6630 case Intrinsic::aarch64_neon_sqadd:
6631 if (
Op.getValueType().isVector())
6636 case Intrinsic::aarch64_neon_sqsub:
6637 if (
Op.getValueType().isVector())
6642 case Intrinsic::aarch64_neon_uqadd:
6643 if (
Op.getValueType().isVector())
6647 case Intrinsic::aarch64_neon_suqadd:
6649 case Intrinsic::aarch64_neon_usqadd:
6651 case Intrinsic::aarch64_neon_uqsub:
6652 if (
Op.getValueType().isVector())
6656 case Intrinsic::aarch64_neon_sqdmulls_scalar:
6658 case Intrinsic::aarch64_neon_sqabs:
6660 case Intrinsic::aarch64_neon_sqneg:
6662 case Intrinsic::aarch64_sve_whilelt:
6665 case Intrinsic::aarch64_sve_whilels:
6668 case Intrinsic::aarch64_sve_whilele:
6671 case Intrinsic::aarch64_sve_sunpkhi:
6672 return DAG.
getNode(AArch64ISD::SUNPKHI,
DL,
Op.getValueType(),
6674 case Intrinsic::aarch64_sve_sunpklo:
6675 return DAG.
getNode(AArch64ISD::SUNPKLO,
DL,
Op.getValueType(),
6677 case Intrinsic::aarch64_sve_uunpkhi:
6678 return DAG.
getNode(AArch64ISD::UUNPKHI,
DL,
Op.getValueType(),
6680 case Intrinsic::aarch64_sve_uunpklo:
6681 return DAG.
getNode(AArch64ISD::UUNPKLO,
DL,
Op.getValueType(),
6683 case Intrinsic::aarch64_sve_clasta_n:
6684 return DAG.
getNode(AArch64ISD::CLASTA_N,
DL,
Op.getValueType(),
6685 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6686 case Intrinsic::aarch64_sve_clastb_n:
6687 return DAG.
getNode(AArch64ISD::CLASTB_N,
DL,
Op.getValueType(),
6688 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6689 case Intrinsic::aarch64_sve_lasta:
6690 return DAG.
getNode(AArch64ISD::LASTA,
DL,
Op.getValueType(),
6691 Op.getOperand(1),
Op.getOperand(2));
6692 case Intrinsic::aarch64_sve_lastb:
6693 return DAG.
getNode(AArch64ISD::LASTB,
DL,
Op.getValueType(),
6694 Op.getOperand(1),
Op.getOperand(2));
6695 case Intrinsic::aarch64_sve_tbl:
6696 return DAG.
getNode(AArch64ISD::TBL,
DL,
Op.getValueType(),
Op.getOperand(1),
6698 case Intrinsic::aarch64_sve_trn1:
6699 return DAG.
getNode(AArch64ISD::TRN1,
DL,
Op.getValueType(),
6700 Op.getOperand(1),
Op.getOperand(2));
6701 case Intrinsic::aarch64_sve_trn2:
6702 return DAG.
getNode(AArch64ISD::TRN2,
DL,
Op.getValueType(),
6703 Op.getOperand(1),
Op.getOperand(2));
6704 case Intrinsic::aarch64_sve_uzp1:
6705 return DAG.
getNode(AArch64ISD::UZP1,
DL,
Op.getValueType(),
6706 Op.getOperand(1),
Op.getOperand(2));
6707 case Intrinsic::aarch64_sve_uzp2:
6708 return DAG.
getNode(AArch64ISD::UZP2,
DL,
Op.getValueType(),
6709 Op.getOperand(1),
Op.getOperand(2));
6710 case Intrinsic::aarch64_sve_zip1:
6711 return DAG.
getNode(AArch64ISD::ZIP1,
DL,
Op.getValueType(),
6712 Op.getOperand(1),
Op.getOperand(2));
6713 case Intrinsic::aarch64_sve_zip2:
6714 return DAG.
getNode(AArch64ISD::ZIP2,
DL,
Op.getValueType(),
6715 Op.getOperand(1),
Op.getOperand(2));
6716 case Intrinsic::aarch64_sve_splice:
6717 return DAG.
getNode(AArch64ISD::SPLICE,
DL,
Op.getValueType(),
6718 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6719 case Intrinsic::aarch64_sve_ptrue:
6720 return getPTrue(DAG,
DL,
Op.getValueType(),
Op.getConstantOperandVal(1));
6721 case Intrinsic::aarch64_sve_clz:
6722 return DAG.
getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6723 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6724 case Intrinsic::aarch64_sme_cntsd: {
6730 case Intrinsic::aarch64_sve_cnt: {
6733 if (
Data.getValueType().isFloatingPoint())
6735 return DAG.
getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6736 Op.getOperand(2),
Data,
Op.getOperand(1));
6738 case Intrinsic::aarch64_sve_dupq_lane:
6739 return LowerDUPQLane(
Op, DAG);
6740 case Intrinsic::aarch64_sve_convert_from_svbool:
6741 if (
Op.getValueType() == MVT::aarch64svcount)
6744 case Intrinsic::aarch64_sve_convert_to_svbool:
6745 if (
Op.getOperand(1).getValueType() == MVT::aarch64svcount)
6748 case Intrinsic::aarch64_sve_fneg:
6749 return DAG.
getNode(AArch64ISD::FNEG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6750 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6751 case Intrinsic::aarch64_sve_frintp:
6752 return DAG.
getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6753 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6754 case Intrinsic::aarch64_sve_frintm:
6755 return DAG.
getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6756 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6757 case Intrinsic::aarch64_sve_frinti:
6758 return DAG.
getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU,
DL,
6759 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6761 case Intrinsic::aarch64_sve_frintx:
6762 return DAG.
getNode(AArch64ISD::FRINT_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6763 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6764 case Intrinsic::aarch64_sve_frint32x:
6765 return DAG.
getNode(AArch64ISD::FRINT32_MERGE_PASSTHRU,
DL,
6766 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6768 case Intrinsic::aarch64_sve_frint64x:
6769 return DAG.
getNode(AArch64ISD::FRINT64_MERGE_PASSTHRU,
DL,
6770 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6772 case Intrinsic::aarch64_sve_frinta:
6773 return DAG.
getNode(AArch64ISD::FROUND_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6774 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6775 case Intrinsic::aarch64_sve_frintn:
6776 return DAG.
getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU,
DL,
6777 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6779 case Intrinsic::aarch64_sve_frintz:
6780 return DAG.
getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6781 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6782 case Intrinsic::aarch64_sve_frint32z:
6783 return DAG.
getNode(AArch64ISD::FTRUNC32_MERGE_PASSTHRU,
DL,
6784 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6786 case Intrinsic::aarch64_sve_frint64z:
6787 return DAG.
getNode(AArch64ISD::FTRUNC64_MERGE_PASSTHRU,
DL,
6788 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6790 case Intrinsic::aarch64_sve_ucvtf:
6791 return DAG.
getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU,
DL,
6792 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6794 case Intrinsic::aarch64_sve_scvtf:
6795 return DAG.
getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU,
DL,
6796 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6798 case Intrinsic::aarch64_sve_fcvtzu:
6799 return DAG.
getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6800 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6801 case Intrinsic::aarch64_sve_fcvtzs:
6802 return DAG.
getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6803 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6804 case Intrinsic::aarch64_sve_fsqrt:
6805 return DAG.
getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6806 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6807 case Intrinsic::aarch64_sve_frecpx:
6808 return DAG.
getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6809 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6810 case Intrinsic::aarch64_sve_frecpe_x:
6811 return DAG.
getNode(AArch64ISD::FRECPE,
DL,
Op.getValueType(),
6813 case Intrinsic::aarch64_sve_frecps_x:
6814 return DAG.
getNode(AArch64ISD::FRECPS,
DL,
Op.getValueType(),
6815 Op.getOperand(1),
Op.getOperand(2));
6816 case Intrinsic::aarch64_sve_frsqrte_x:
6817 return DAG.
getNode(AArch64ISD::FRSQRTE,
DL,
Op.getValueType(),
6819 case Intrinsic::aarch64_sve_frsqrts_x:
6820 return DAG.
getNode(AArch64ISD::FRSQRTS,
DL,
Op.getValueType(),
6821 Op.getOperand(1),
Op.getOperand(2));
6822 case Intrinsic::aarch64_sve_fabs:
6823 return DAG.
getNode(AArch64ISD::FABS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6824 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6825 case Intrinsic::aarch64_sve_abs:
6826 return DAG.
getNode(AArch64ISD::ABS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6827 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6828 case Intrinsic::aarch64_sve_neg:
6829 return DAG.
getNode(AArch64ISD::NEG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6830 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6831 case Intrinsic::aarch64_sve_insr: {
6833 EVT ScalarTy =
Scalar.getValueType();
6834 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
6837 return DAG.
getNode(AArch64ISD::INSR,
DL,
Op.getValueType(),
6838 Op.getOperand(1), Scalar);
6840 case Intrinsic::aarch64_sve_rbit:
6841 return DAG.
getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
DL,
6842 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6844 case Intrinsic::aarch64_sve_revb:
6845 return DAG.
getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6846 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6847 case Intrinsic::aarch64_sve_revh:
6848 return DAG.
getNode(AArch64ISD::REVH_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6849 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6850 case Intrinsic::aarch64_sve_revw:
6851 return DAG.
getNode(AArch64ISD::REVW_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6852 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6853 case Intrinsic::aarch64_sve_revd:
6854 return DAG.
getNode(AArch64ISD::REVD_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6855 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6856 case Intrinsic::aarch64_sve_sxtb:
6858 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6859 Op.getOperand(2),
Op.getOperand(3),
6863 case Intrinsic::aarch64_sve_sxth:
6865 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6866 Op.getOperand(2),
Op.getOperand(3),
6870 case Intrinsic::aarch64_sve_sxtw:
6872 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6873 Op.getOperand(2),
Op.getOperand(3),
6877 case Intrinsic::aarch64_sve_uxtb:
6879 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6880 Op.getOperand(2),
Op.getOperand(3),
6884 case Intrinsic::aarch64_sve_uxth:
6886 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6887 Op.getOperand(2),
Op.getOperand(3),
6891 case Intrinsic::aarch64_sve_uxtw:
6893 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6894 Op.getOperand(2),
Op.getOperand(3),
6898 case Intrinsic::localaddress: {
6900 const auto *RegInfo = Subtarget->getRegisterInfo();
6901 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
6903 Op.getSimpleValueType());
6906 case Intrinsic::eh_recoverfp: {
6911 SDValue IncomingFPOp =
Op.getOperand(2);
6916 "llvm.eh.recoverfp must take a function as the first argument");
6917 return IncomingFPOp;
6919 case Intrinsic::aarch64_neon_vsri:
6920 case Intrinsic::aarch64_neon_vsli:
6921 case Intrinsic::aarch64_sve_sri:
6922 case Intrinsic::aarch64_sve_sli: {
6923 EVT Ty =
Op.getValueType();
6930 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri ||
6931 IntNo == Intrinsic::aarch64_sve_sri;
6932 unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
6933 return DAG.
getNode(Opcode,
DL, Ty,
Op.getOperand(1),
Op.getOperand(2),
6937 case Intrinsic::aarch64_neon_srhadd:
6938 case Intrinsic::aarch64_neon_urhadd:
6939 case Intrinsic::aarch64_neon_shadd:
6940 case Intrinsic::aarch64_neon_uhadd: {
6941 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6942 IntNo == Intrinsic::aarch64_neon_shadd);
6943 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6944 IntNo == Intrinsic::aarch64_neon_urhadd);
6945 unsigned Opcode = IsSignedAdd
6948 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1),
6951 case Intrinsic::aarch64_neon_saddlp:
6952 case Intrinsic::aarch64_neon_uaddlp: {
6953 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
6954 ? AArch64ISD::UADDLP
6955 : AArch64ISD::SADDLP;
6956 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1));
6958 case Intrinsic::aarch64_neon_sdot:
6959 case Intrinsic::aarch64_neon_udot:
6960 case Intrinsic::aarch64_sve_sdot:
6961 case Intrinsic::aarch64_sve_udot: {
6962 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
6963 IntNo == Intrinsic::aarch64_sve_udot)
6966 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1),
6967 Op.getOperand(2),
Op.getOperand(3));
6969 case Intrinsic::aarch64_neon_usdot:
6970 case Intrinsic::aarch64_sve_usdot: {
6971 return DAG.
getNode(AArch64ISD::USDOT,
DL,
Op.getValueType(),
6972 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6974 case Intrinsic::aarch64_neon_saddlv:
6975 case Intrinsic::aarch64_neon_uaddlv: {
6976 EVT OpVT =
Op.getOperand(1).getValueType();
6977 EVT ResVT =
Op.getValueType();
6979 ((ResVT == MVT::i32 && (OpVT == MVT::v8i8 || OpVT == MVT::v16i8 ||
6980 OpVT == MVT::v8i16 || OpVT == MVT::v4i16)) ||
6981 (ResVT == MVT::i64 && (OpVT == MVT::v4i32 || OpVT == MVT::v2i32))) &&
6982 "Unexpected aarch64_neon_u/saddlv type");
6986 IntNo == Intrinsic::aarch64_neon_uaddlv ? AArch64ISD::UADDLV
6987 : AArch64ISD::SADDLV,
6988 DL, ResVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64,
Op.getOperand(1));
6992 return EXTRACT_VEC_ELT;
6994 case Intrinsic::experimental_vector_match: {
6997 case Intrinsic::aarch64_cls:
6998 case Intrinsic::aarch64_cls64: {
7003 case Intrinsic::aarch64_neon_cls: {
7007 case Intrinsic::aarch64_sve_pmul:
7008 case Intrinsic::aarch64_neon_pmul:
7014bool AArch64TargetLowering::shouldExtendGSIndex(
EVT VT,
EVT &EltTy)
const {
7023bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(
SDValue Extend,
7044 if (LD->isVolatile())
7047 EVT MemVT = LD->getMemoryVT();
7048 if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8 && MemVT != MVT::v2i16)
7051 Align Alignment = LD->getAlign();
7053 if (Subtarget.requiresStrictAlign() && Alignment < RequiredAlignment)
7059bool AArch64TargetLowering::isVectorLoadExtDesirable(
SDValue ExtVal)
const {
7067 if (!ExtVT.
isScalableVector() && !Subtarget->useSVEForFixedLengthVectors())
7083 unsigned NumExtMaskedLoads = 0;
7084 for (
auto *U : Ld->getMask()->users())
7086 NumExtMaskedLoads++;
7088 if (NumExtMaskedLoads <= 1)
7094 return PreExtScalarVT == MVT::i8 || PreExtScalarVT == MVT::i16 ||
7095 PreExtScalarVT == MVT::i32 || PreExtScalarVT == MVT::i64;
7099 std::map<std::tuple<bool, bool, bool>,
unsigned> AddrModes = {
7100 {std::make_tuple(
false,
false,
false),
7101 AArch64ISD::GLD1_MERGE_ZERO},
7102 {std::make_tuple(
false,
false,
true),
7103 AArch64ISD::GLD1_UXTW_MERGE_ZERO},
7104 {std::make_tuple(
false,
true,
false),
7105 AArch64ISD::GLD1_MERGE_ZERO},
7106 {std::make_tuple(
false,
true,
true),
7107 AArch64ISD::GLD1_SXTW_MERGE_ZERO},
7108 {std::make_tuple(
true,
false,
false),
7109 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
7110 {std::make_tuple(
true,
false,
true),
7111 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
7112 {std::make_tuple(
true,
true,
false),
7113 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
7114 {std::make_tuple(
true,
true,
true),
7115 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
7117 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
7118 return AddrModes.find(
Key)->second;
7126 case AArch64ISD::GLD1_MERGE_ZERO:
7127 return AArch64ISD::GLD1S_MERGE_ZERO;
7128 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
7129 return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
7130 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
7131 return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
7132 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
7133 return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
7134 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
7135 return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
7136 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
7137 return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
7138 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
7139 return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
7154 EVT VT =
Op.getValueType();
7178 EVT IndexVT =
Index.getValueType();
7190 assert(Subtarget->useSVEForFixedLengthVectors() &&
7191 "Cannot lower when not using SVE for fixed vectors!");
7200 Index.getValueType().getVectorElementType() == MVT::i64 ||
7201 Mask.getValueType().getVectorElementType() == MVT::i64)
7267 EVT IndexVT =
Index.getValueType();
7279 assert(Subtarget->useSVEForFixedLengthVectors() &&
7280 "Cannot lower when not using SVE for fixed vectors!");
7292 Index.getValueType().getVectorElementType() == MVT::i64 ||
7293 Mask.getValueType().getVectorElementType() == MVT::i64)
7303 if (PromotedVT != VT)
7328 assert(LoadNode &&
"Expected custom lowering of a masked load node");
7329 EVT VT =
Op->getValueType(0);
7332 return LowerFixedLengthVectorMLoadToSVE(
Op, DAG);
7352 if ((!Subtarget->isSVEAvailable() || !Subtarget->hasSVE2p2()) &&
7353 (!Subtarget->isSVEorStreamingSVEAvailable() || !Subtarget->hasSME2p2()))
7392 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
7415 return DAG.
getStore(ST->getChain(),
DL, ExtractTrunc,
7416 ST->getBasePtr(), ST->getMemOperand());
7422 MVT DestVT =
Op.getSimpleValueType();
7426 unsigned SrcAS =
N->getSrcAddressSpace();
7427 unsigned DestAS =
N->getDestAddressSpace();
7428 assert(SrcAS != DestAS &&
7429 "addrspacecast must be between different address spaces");
7432 "addrspacecast must be between different ptr sizes");
7457 if (!
DL.isLittleEndian())
7461 if (DataType->isIntegerTy(64))
7469 unsigned NumElements = DataTypeTy->getNumElements();
7470 unsigned EltSizeBits = DataTypeTy->getElementType()->getScalarSizeInBits();
7476 unsigned TotalSizeBits = DataTypeTy->getPrimitiveSizeInBits().getFixedValue();
7480 if (TotalSizeBits == 64u || TotalSizeBits == 128u)
7484 if (TotalSizeBits == 256u && (EltSizeBits == 8u || EltSizeBits == 16u ||
7485 EltSizeBits == 32u || EltSizeBits == 64u))
7498 assert(StoreNode &&
"Expected a store operation");
7531 {StoreNode->getChain(), DAG.getBitcast(MVT::v2i64, Lo),
7532 DAG.getBitcast(MVT::v2i64, Hi), StoreNode->getBasePtr()},
7536 "Lowering should be consistent with legality");
7550 assert (StoreNode &&
"Can only custom lower store nodes");
7554 EVT VT =
Value.getValueType();
7558 if (
auto MaybeSTNP =
LowerNTStore(StoreNode, VT, MemVT, Dl, DAG))
7565 Subtarget->useSVEForFixedLengthVectors()))
7566 return LowerFixedLengthVectorStoreToSVE(
Op, DAG);
7578 MemVT == MVT::v4i8) {
7581 }
else if (MemVT == MVT::i128 && StoreNode->
isVolatile()) {
7582 return LowerStore128(
Op, DAG);
7583 }
else if (MemVT == MVT::i64x8) {
7588 EVT PtrVT =
Base.getValueType();
7589 for (
unsigned i = 0; i < 8; i++) {
7610 bool IsStoreRelease =
7613 assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
7614 Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
7624 unsigned Opcode = IsStoreRelease ? AArch64ISD::STILP : AArch64ISD::STP;
7626 std::swap(StoreValue.first, StoreValue.second);
7629 {StoreNode->getChain(), StoreValue.first, StoreValue.second,
7630 StoreNode->getBasePtr()},
7642 EVT MemVT = Load->getMemoryVT();
7643 EVT ResVT = Load->getValueType(0);
7649 switch (Load->getExtensionType()) {
7662 SDValue Chain = Load->getChain();
7663 SDValue BasePtr = Load->getBasePtr();
7665 Align Alignment = Load->getAlign();
7671 DAG.
getLoad(ScalarLoadType,
DL, Chain, BasePtr, PtrInfo, Alignment);
7683 while (CurrentEltBits < DstEltBits) {
7685 CurrentNumElts = CurrentNumElts / 2;
7691 CurrentEltBits = CurrentEltBits * 2;
7694 Res = DAG.
getNode(ExtOpcode,
DL, ExtVT, Res);
7697 if (CurrentNumElts != NumElts) {
7710 assert(LoadNode &&
"Expected custom lowering of a load node");
7719 EVT PtrVT =
Base.getValueType();
7720 for (
unsigned i = 0; i < 8; i++) {
7726 Ops.push_back(Part);
7736SDValue AArch64TargetLowering::LowerFixedLengthVectorCompressToSVE(
7739 EVT VT =
Op.getValueType();
7754 EVT VT =
Op.getValueType();
7755 if (!Subtarget->isSVEAvailable())
7759 return LowerFixedLengthVectorCompressToSVE(
Op, DAG);
7765 EVT MaskVT =
Mask.getValueType();
7792 MVT VT =
Op.getSimpleValueType();
7795 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
7803 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT,
Op.getOperand(0), Neg,
7816 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
7831 MVT VT =
Op.getSimpleValueType();
7835 if (NewShiftNo == 0)
7836 return Op.getOperand(0);
7845 if (NewShiftNo == 0)
7846 return Op.getOperand(1);
7848 if (ShiftNo->getZExtValue() == NewShiftNo)
7863 EVT XScalarTy =
X.getValueType();
7868 switch (
Op.getSimpleValueType().SimpleTy) {
7877 ExpVT = MVT::nxv4i32;
7881 ExpVT = MVT::nxv2i64;
7899 if (
X.getValueType() != XScalarTy)
7907 return Op.getOperand(0);
7942 const char FptrReg = 0x11;
7948 Chain,
DL, DAG.
getConstant(0x58000080u | NestReg,
DL, MVT::i32), Addr,
7949 MachinePointerInfo(TrmpAddr));
7954 Chain,
DL, DAG.
getConstant(0x580000b0u | FptrReg,
DL, MVT::i32), Addr,
7955 MachinePointerInfo(TrmpAddr, 4));
7961 MachinePointerInfo(TrmpAddr, 8));
7966 DAG.
getStore(Chain,
DL, Nest, Addr, MachinePointerInfo(TrmpAddr, 16));
7971 DAG.
getStore(Chain,
DL, FPtr, Addr, MachinePointerInfo(TrmpAddr, 24));
7985 EVT VT =
Op.getValueType();
7987 (Subtarget->hasSVEB16B16() &&
7988 Subtarget->isNonStreamingSVEorSME2Available()))
7989 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMUL_PRED);
7991 assert(Subtarget->hasBF16() &&
"Expected +bf16 for custom FMUL lowering");
7992 assert((VT == MVT::nxv4bf16 || VT == MVT::nxv8bf16 || VT == MVT::v8bf16) &&
7993 "Unexpected FMUL VT");
7996 return [&, IID](EVT VT,
auto...
Ops) {
8003 EVT SrcVT =
Value.getValueType();
8014 auto FCVT = MakeGetIntrinsic(Intrinsic::aarch64_sve_fcvt_bf16f32_v2);
8015 auto FCVTNT = MakeGetIntrinsic(Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2);
8020 MakeGetIntrinsic(UseSVEBFMLAL ? Intrinsic::aarch64_sve_bfmlalb
8021 : Intrinsic::aarch64_neon_bfmlalb);
8023 MakeGetIntrinsic(UseSVEBFMLAL ? Intrinsic::aarch64_sve_bfmlalt
8024 : Intrinsic::aarch64_neon_bfmlalt);
8026 EVT AccVT = UseSVEBFMLAL ? MVT::nxv4f32 : MVT::v4f32;
8039 LHS = Reinterpret(
LHS, MVT::nxv8bf16);
8040 RHS = Reinterpret(
RHS, MVT::nxv8bf16);
8043 SDValue BottomF32 = Reinterpret(BFMLALB(AccVT, Zero,
LHS,
RHS), MVT::nxv4f32);
8045 FCVT(MVT::nxv8bf16, DAG.
getPOISON(MVT::nxv8bf16), Pg, BottomF32);
8047 if (VT == MVT::nxv4bf16)
8048 return Reinterpret(BottomBF16, VT);
8050 SDValue TopF32 = Reinterpret(BFMLALT(AccVT, Zero,
LHS,
RHS), MVT::nxv4f32);
8051 SDValue TopBF16 = FCVTNT(MVT::nxv8bf16, BottomBF16, Pg, TopF32);
8052 return Reinterpret(TopBF16, VT);
8059 EVT VT =
Op.getValueType();
8062 assert(VT.
isVector() &&
"Scalar fma lowering should be handled by patterns");
8065 if (VT != MVT::v8f16 && VT != MVT::v4f32 && VT != MVT::v2f64)
8066 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMA_PRED);
8070 ? LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMA_PRED)
8080 auto ConvertToScalableFnegMt = [&](
SDValue Op) {
8082 Op = LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
8086 OpA = ConvertToScalableFnegMt(OpA);
8087 OpB = ConvertToScalableFnegMt(OpB);
8088 OpC = ConvertToScalableFnegMt(OpC);
8091 DAG.
getNode(AArch64ISD::FMA_PRED,
DL, ContainerVT, Pg, OpA, OpB, OpC);
8096 EVT VT =
Op.getValueType();
8098 (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) &&
8102 EVT CLMULTy = VT == MVT::i8 ? MVT::v8i8 : MVT::v1i64;
8103 EVT ExtractTy = VT == MVT::i64 ? MVT::i64 : MVT::i32;
8109 if (VecVT != CLMULTy) {
8114 if (ExtractTy == MVT::i32)
8119 if (ExtractTy != VT)
8121 return ExtractVecElt;
8129 switch (
Op.getOpcode()) {
8135 return LowerLOOP_DEPENDENCE_MASK(
Op, DAG);
8137 return LowerBITCAST(
Op, DAG);
8139 return LowerGlobalAddress(
Op, DAG);
8141 return LowerGlobalTLSAddress(
Op, DAG);
8143 return LowerPtrAuthGlobalAddress(
Op, DAG);
8145 return LowerADJUST_TRAMPOLINE(
Op, DAG);
8147 return LowerINIT_TRAMPOLINE(
Op, DAG);
8151 return LowerSETCC(
Op, DAG);
8153 return LowerSETCCCARRY(
Op, DAG);
8157 return LowerBR_CC(
Op, DAG);
8159 return LowerSELECT(
Op, DAG);
8161 return LowerSELECT_CC(
Op, DAG);
8163 return LowerJumpTable(
Op, DAG);
8165 return LowerBR_JT(
Op, DAG);
8167 return LowerBRIND(
Op, DAG);
8169 return LowerConstantPool(
Op, DAG);
8171 return LowerBlockAddress(
Op, DAG);
8173 return LowerVASTART(
Op, DAG);
8175 return LowerVACOPY(
Op, DAG);
8177 return LowerVAARG(
Op, DAG);
8194 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FADD_PRED);
8196 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FSUB_PRED);
8198 return LowerFMUL(
Op, DAG);
8200 return LowerFMA(
Op, DAG);
8202 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FDIV_PRED);
8204 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
8206 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
8208 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
8210 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
8212 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
8214 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
8216 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
8218 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
8220 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
8222 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
8225 return LowerFP_ROUND(
Op, DAG);
8228 return LowerFP_EXTEND(
Op, DAG);
8230 return LowerFRAMEADDR(
Op, DAG);
8232 return LowerSPONENTRY(
Op, DAG);
8234 return LowerRETURNADDR(
Op, DAG);
8236 return LowerADDROFRETURNADDR(
Op, DAG);
8238 return LowerCONCAT_VECTORS(
Op, DAG);
8240 return LowerINSERT_VECTOR_ELT(
Op, DAG);
8242 return LowerEXTRACT_VECTOR_ELT(
Op, DAG);
8244 return LowerBUILD_VECTOR(
Op, DAG);
8247 return LowerEXTEND_VECTOR_INREG(
Op, DAG);
8249 return LowerZERO_EXTEND_VECTOR_INREG(
Op, DAG);
8251 return LowerVECTOR_SHUFFLE(
Op, DAG);
8253 return LowerSPLAT_VECTOR(
Op, DAG);
8255 return LowerEXTRACT_SUBVECTOR(
Op, DAG);
8257 return LowerINSERT_SUBVECTOR(
Op, DAG);
8262 return LowerDIV(
Op, DAG);
8267 return LowerMinMax(
Op, DAG);
8271 return LowerVectorSRA_SRL_SHL(
Op, DAG);
8275 return LowerShiftParts(
Op, DAG);
8278 return LowerCTPOP_PARITY(
Op, DAG);
8280 return LowerFCOPYSIGN(
Op, DAG);
8282 return LowerVectorOR(
Op, DAG);
8284 return LowerXOR(
Op, DAG);
8291 return LowerINT_TO_FP(
Op, DAG);
8296 return LowerFP_TO_INT(
Op, DAG);
8299 return LowerFP_TO_INT_SAT(
Op, DAG);
8301 return LowerGET_ROUNDING(
Op, DAG);
8303 return LowerSET_ROUNDING(
Op, DAG);
8305 return LowerGET_FPMODE(
Op, DAG);
8307 return LowerSET_FPMODE(
Op, DAG);
8309 return LowerRESET_FPMODE(
Op, DAG);
8311 return LowerMUL(
Op, DAG);
8313 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MULHS_PRED);
8315 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MULHU_PRED);
8317 return LowerINTRINSIC_W_CHAIN(
Op, DAG);
8319 return LowerINTRINSIC_WO_CHAIN(
Op, DAG);
8321 return LowerINTRINSIC_VOID(
Op, DAG);
8324 assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
8325 return LowerStore128(
Op, DAG);
8329 return LowerSTORE(
Op, DAG);
8331 return LowerMSTORE(
Op, DAG);
8333 return LowerMGATHER(
Op, DAG);
8335 return LowerMSCATTER(
Op, DAG);
8337 return LowerVECREDUCE_SEQ_FADD(
Op, DAG);
8351 return LowerVECREDUCE(
Op, DAG);
8354 return LowerVECREDUCE_MUL(
Op, DAG);
8356 return LowerATOMIC_LOAD_AND(
Op, DAG);
8358 return LowerDYNAMIC_STACKALLOC(
Op, DAG);
8360 return LowerVSCALE(
Op, DAG);
8362 return LowerVECTOR_COMPRESS(
Op, DAG);
8366 return LowerFixedLengthVectorIntExtendToSVE(
Op, DAG);
8373 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
8374 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
8377 return LowerToPredicatedOp(
Op, DAG,
8378 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
8381 return LowerTRUNCATE(
Op, DAG);
8383 return LowerMLOAD(
Op, DAG);
8386 !Subtarget->isNeonAvailable()))
8387 return LowerFixedLengthVectorLoadToSVE(
Op, DAG);
8388 return LowerLOAD(
Op, DAG);
8392 return LowerToScalableOp(
Op, DAG);
8394 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMAX_PRED);
8397 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMAXNM_PRED);
8399 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMIN_PRED);
8402 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMINNM_PRED);
8404 return LowerFixedLengthVectorSelectToSVE(
Op, DAG);
8406 return LowerABS(
Op, DAG);
8408 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABDS_PRED);
8410 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABDU_PRED);
8412 return LowerAVG(
Op, DAG, AArch64ISD::HADDS_PRED);
8414 return LowerAVG(
Op, DAG, AArch64ISD::HADDU_PRED);
8416 return LowerAVG(
Op, DAG, AArch64ISD::RHADDS_PRED);
8418 return LowerAVG(
Op, DAG, AArch64ISD::RHADDU_PRED);
8420 return LowerBitreverse(
Op, DAG);
8422 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
8424 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU);
8426 return LowerCTTZ(
Op, DAG);
8429 return LowerVECTOR_SPLICE(
Op, DAG);
8431 return LowerVECTOR_DEINTERLEAVE(
Op, DAG);
8433 return LowerVECTOR_INTERLEAVE(
Op, DAG);
8435 return LowerGET_ACTIVE_LANE_MASK(
Op, DAG);
8438 if (
Op.getValueType().isVector())
8439 return LowerVectorXRINT(
Op, DAG);
8443 assert((
Op.getOperand(0).getValueType() == MVT::f16 ||
8444 Op.getOperand(0).getValueType() == MVT::bf16) &&
8445 "Expected custom lowering of rounding operations only for f16");
8448 return DAG.
getNode(
Op.getOpcode(),
DL,
Op.getValueType(), Ext);
8454 assert((
Op.getOperand(1).getValueType() == MVT::f16 ||
8455 Op.getOperand(1).getValueType() == MVT::bf16) &&
8456 "Expected custom lowering of rounding operations only for f16");
8459 {
Op.getOperand(0),
Op.getOperand(1)});
8460 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
8461 {Ext.getValue(1), Ext.getValue(0)});
8464 assert(
Op.getOperand(2).getValueType() == MVT::i128 &&
8465 "WRITE_REGISTER custom lowering is only for 128-bit sysregs");
8470 std::pair<SDValue, SDValue> Pair =
8475 SysRegName, Pair.first, Pair.second);
8485 return LowerVECTOR_HISTOGRAM(
Op, DAG);
8490 return LowerPARTIAL_REDUCE_MLA(
Op, DAG);
8494 return LowerFCANONICALIZE(
Op, DAG);
8512 DAG.
getNode(AArch64ISD::CTTZ_ELTS,
DL, MVT::i64, Pg, CttzOp);
8519 return !Subtarget->useSVEForFixedLengthVectors();
8523 EVT VT,
bool OverrideNEON)
const {
8546 return Subtarget->isSVEorStreamingSVEAvailable();
8553 if (!Subtarget->useSVEForFixedLengthVectors())
8573 unsigned Opcode =
N->getOpcode();
8578 unsigned IID =
N->getConstantOperandVal(0);
8579 if (IID < Intrinsic::num_intrinsics)
8593 if (IID == Intrinsic::aarch64_neon_umull ||
8595 IID == Intrinsic::aarch64_neon_smull ||
8604 bool IsVarArg)
const {
8627 if (Subtarget->isTargetWindows()) {
8629 if (Subtarget->isWindowsArm64EC())
8635 if (!Subtarget->isTargetDarwin())
8643 if (Subtarget->isWindowsArm64EC())
8649 if (Subtarget->isWindowsArm64EC())
8673 if (Subtarget->isWindowsArm64EC())
8690 auto &FuncInfo = *MF.
getInfo<AArch64FunctionInfo>();
8692 SMEAttrs SMEFnAttrs = FuncInfo.getSMEFnAttrs();
8711 if (SMEFnAttrs.hasStreamingInterfaceOrBody())
8714 else if (SMEFnAttrs.hasStreamingCompatibleInterface())
8720SDValue AArch64TargetLowering::LowerFormalArguments(
8728 Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
8730 (isVarArg && Subtarget->isWindowsArm64EC());
8731 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
8741 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.
getContext());
8749 unsigned NumArgs = Ins.
size();
8751 unsigned CurArgIdx = 0;
8752 bool UseVarArgCC =
false;
8754 UseVarArgCC = isVarArg;
8758 for (
unsigned i = 0; i != NumArgs; ++i) {
8759 MVT ValVT = Ins[i].VT;
8760 if (Ins[i].isOrigArg()) {
8761 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
8762 CurArgIdx = Ins[i].getOrigArgIndex();
8769 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
8771 else if (ActualMVT == MVT::i16)
8775 Ins[i].OrigTy, CCInfo);
8776 assert(!Res &&
"Call operand has unhandled type");
8781 bool IsLocallyStreaming =
8782 !
Attrs.hasStreamingInterface() &&
Attrs.hasStreamingBody();
8786 unsigned ExtraArgLocs = 0;
8787 for (
unsigned i = 0, e = Ins.
size(); i != e; ++i) {
8788 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
8790 if (Ins[i].
Flags.isByVal()) {
8794 int Size = Ins[i].Flags.getByValSize();
8795 unsigned NumRegs = (
Size + 7) / 8;
8807 if (Ins[i].
Flags.isSwiftAsync())
8808 MF.
getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(
true);
8814 const TargetRegisterClass *RC;
8816 if (RegVT == MVT::i32)
8817 RC = &AArch64::GPR32RegClass;
8818 else if (RegVT == MVT::i64)
8819 RC = &AArch64::GPR64RegClass;
8820 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
8821 RC = &AArch64::FPR16RegClass;
8822 else if (RegVT == MVT::f32)
8823 RC = &AArch64::FPR32RegClass;
8825 RC = &AArch64::FPR64RegClass;
8827 RC = &AArch64::FPR128RegClass;
8831 RC = &AArch64::PPRRegClass;
8832 }
else if (RegVT == MVT::aarch64svcount) {
8834 RC = &AArch64::PPRRegClass;
8837 RC = &AArch64::ZPRRegClass;
8844 if (IsLocallyStreaming) {
8859 DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
8878 "Indirect arguments should be scalable on most subtargets");
8900 uint32_t BEAlign = 0;
8901 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
8902 !Ins[i].Flags.isInConsecutiveRegs())
8903 BEAlign = 8 - ArgSize;
8906 MachinePointerInfo PtrInfo;
8912 unsigned ObjOffset = ArgOffset + BEAlign;
8942 "Indirect arguments should be scalable on most subtargets");
8962 Subtarget->isWindowsArm64EC()) &&
8963 "Indirect arguments should be scalable on most subtargets");
8966 unsigned NumParts = 1;
8967 if (Ins[i].
Flags.isInConsecutiveRegs()) {
8968 while (!Ins[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
8977 while (NumParts > 0) {
8978 ArgValue = DAG.
getLoad(PartLoad,
DL, Chain, Ptr, MachinePointerInfo());
8991 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
8997 if (Ins[i].isOrigArg()) {
8998 Argument *OrigArg =
F.getArg(Ins[i].getOrigArgIndex());
9000 if (!Ins[i].
Flags.isZExt()) {
9001 ArgValue = DAG.
getNode(AArch64ISD::ASSERT_ZEXT_BOOL,
DL,
9012 if (
Attrs.hasStreamingCompatibleInterface()) {
9014 DAG.
getNode(AArch64ISD::ENTRY_PSTATE_SM,
DL,
9015 DAG.
getVTList(MVT::i64, MVT::Other), {Chain});
9027 if (IsLocallyStreaming) {
9028 if (
Attrs.hasStreamingCompatibleInterface())
9037 for (
unsigned I=0;
I<InVals.
size(); ++
I) {
9049 if (!Subtarget->isTargetDarwin() || IsWin64) {
9055 saveVarArgRegisters(CCInfo, DAG,
DL, Chain);
9059 unsigned VarArgsOffset = CCInfo.getStackSize();
9062 alignTo(VarArgsOffset, Subtarget->isTargetILP32() ? 4 : 8);
9073 SmallVectorImpl<ForwardedRegister> &Forwards =
9075 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
9079 if (!CCInfo.isAllocated(AArch64::X8)) {
9081 Forwards.
push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
9090 for (
unsigned I = 0,
E = Ins.
size();
I !=
E; ++
I) {
9092 Ins[
I].Flags.isInReg()) &&
9093 Ins[
I].Flags.isSRet()) {
9108 unsigned StackArgSize = CCInfo.getStackSize();
9110 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
9114 StackArgSize =
alignTo(StackArgSize, 16);
9128 if (Subtarget->hasCustomCallingConv())
9129 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
9133 if (
Attrs.hasZAState()) {
9137 }
else if (
Attrs.hasAgnosticZAInterface()) {
9138 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
9143 auto *RetTy = EVT(MVT::i64).getTypeForEVT(*DAG.
getContext());
9144 TargetLowering::CallLoweringInfo CLI(DAG);
9145 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
9153 {Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
9159 Chain = DAG.
getNode(AArch64ISD::SME_STATE_ALLOC,
DL,
9167 for (
const ISD::InputArg &
I : Ins) {
9168 if (
I.Flags.isSwiftSelf() ||
I.Flags.isSwiftError() ||
9169 I.Flags.isSwiftAsync()) {
9173 "Swift attributes can't be used with preserve_none",
9183void AArch64TargetLowering::saveVarArgRegisters(
CCState &CCInfo,
9189 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9193 Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
9199 if (Subtarget->isWindowsArm64EC()) {
9206 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
9208 if (GPRSaveSize != 0) {
9211 if (GPRSaveSize & 15)
9218 if (Subtarget->isWindowsArm64EC()) {
9231 for (
unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
9237 MF, GPRIdx, (i - FirstVariadicGPR) * 8)
9247 if (Subtarget->hasFPARMv8() && !IsWin64) {
9249 const unsigned NumFPRArgRegs =
FPRArgRegs.size();
9252 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
9254 if (FPRSaveSize != 0) {
9259 for (
unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
9274 if (!MemOps.
empty()) {
9281SDValue AArch64TargetLowering::LowerCallResult(
9285 SDValue ThisVal,
bool RequiresSMChange)
const {
9286 DenseMap<unsigned, SDValue> CopiedRegs;
9288 for (
unsigned i = 0; i != RVLocs.
size(); ++i) {
9289 CCValAssign VA = RVLocs[i];
9293 if (i == 0 && isThisReturn) {
9295 "unexpected return calling convention register assignment");
9331 Val = DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
9398 unsigned NumArgs = Outs.
size();
9399 for (
unsigned i = 0; i != NumArgs; ++i) {
9400 MVT ArgVT = Outs[i].VT;
9403 bool UseVarArgCC =
false;
9407 if (IsCalleeWin64) {
9421 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
9423 else if (ActualMVT == MVT::i16)
9431 Outs[i].OrigTy, CCInfo);
9432 assert(!Res &&
"Call operand has unhandled type");
9447bool AArch64TargetLowering::isEligibleForTailCallOptimization(
9448 const CallLoweringInfo &CLI)
const {
9454 bool IsVarArg = CLI.IsVarArg;
9458 const SelectionDAG &DAG = CLI.DAG;
9465 SMECallAttrs CallAttrs =
9479 MF.
getInfo<AArch64FunctionInfo>()->isSVECC())
9482 bool CCMatch = CallerCC == CalleeCC;
9497 if (i->hasByValAttr())
9506 if (i->hasInRegAttr()) {
9507 unsigned ArgIdx = i - CallerF.
arg_begin();
9508 if (!CLI.CB || CLI.CB->arg_size() <= ArgIdx)
9510 AttributeSet
Attrs = CLI.CB->getParamAttributes(ArgIdx);
9511 if (!
Attrs.hasAttribute(Attribute::InReg) ||
9512 !
Attrs.hasAttribute(Attribute::StructRet) || !i->hasStructRetAttr() ||
9513 CLI.CB->getArgOperand(ArgIdx) != i) {
9530 const GlobalValue *GV =
G->getGlobal();
9533 (!
TT.isOSWindows() ||
TT.isOSBinFormatELF() ||
TT.isOSBinFormatMachO()))
9553 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
9554 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
9556 const uint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
9557 if (Subtarget->hasCustomCallingConv()) {
9558 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
9559 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
9561 if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
9570 CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs,
C);
9574 if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
9582 for (
const CCValAssign &ArgLoc : ArgLocs)
9583 if (!ArgLoc.isRegLoc())
9587 const AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9595 A.getValVT().isScalableVector() ||
9596 Subtarget->isWindowsArm64EC()) &&
9597 "Expected value to be scalable");
9607 const MachineRegisterInfo &MRI = MF.
getRegInfo();
9617 int ClobberedFI)
const {
9620 int64_t LastByte = FirstByte + MFI.
getObjectSize(ClobberedFI) - 1;
9631 if (FI->getIndex() < 0) {
9633 int64_t InLastByte = InFirstByte;
9636 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
9637 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
9645bool AArch64TargetLowering::DoesCalleeRestoreStack(
CallingConv::ID CallCC,
9646 bool TailCallOpt)
const {
9657 APInt RequiredZero(SizeInBits, 0xFE);
9659 bool ZExtBool = (Bits.Zero & RequiredZero) == RequiredZero;
9663void AArch64TargetLowering::AdjustInstrPostInstrSelection(
MachineInstr &
MI,
9669 if (
MI.getOpcode() == AArch64::MSRpstatesvcrImm1 ||
9670 MI.getOpcode() == AArch64::MSRpstatePseudo) {
9671 for (
unsigned I =
MI.getNumOperands() - 1;
I > 0; --
I)
9672 if (MachineOperand &MO =
MI.getOperand(
I);
9673 MO.isReg() && MO.isImplicit() && MO.isDef() &&
9674 (AArch64::GPR32RegClass.contains(MO.getReg()) ||
9675 AArch64::GPR64RegClass.contains(MO.getReg())))
9676 MI.removeOperand(
I);
9680 if (
MI.getOperand(0).getImm() == AArch64SVCR::SVCRSM ||
9681 MI.getOperand(0).getImm() == AArch64SVCR::SVCRSMZA) {
9696 const MachineFunction &MF = *
MI.getMF();
9697 if (MF.
getInfo<AArch64FunctionInfo>()->hasStreamingModeChanges() &&
9698 (
MI.getOpcode() == AArch64::ADDXri ||
9699 MI.getOpcode() == AArch64::SUBXri)) {
9700 const MachineOperand &MO =
MI.getOperand(1);
9709 unsigned Condition,
bool InsertVectorLengthCheck)
const {
9717 Ops.push_back(InGlue);
9718 return DAG.
getNode(AArch64ISD::CHECK_MATCHING_VL,
DL,
9722 if (InsertVectorLengthCheck &&
Enable) {
9725 SDValue CheckVL = GetCheckVL(Chain, InGlue);
9738 assert(PStateReg.
isValid() &&
"PStateSM Register is invalid");
9745 Opcode =
Enable ? AArch64ISD::COND_SMSTART : AArch64ISD::COND_SMSTOP;
9746 Ops.push_back(ConditionOp);
9747 Ops.push_back(PStateSM);
9749 Opcode =
Enable ? AArch64ISD::SMSTART : AArch64ISD::SMSTOP;
9751 Ops.push_back(RegMask);
9754 Ops.push_back(InGlue);
9759 if (!InsertVectorLengthCheck ||
Enable)
9786 if (Flags.isZExt() || Flags.isSExt())
9793 Arg->
isAssert() ||
Op == AArch64ISD::ASSERT_ZEXT_BOOL) {
9805 int FI = FINode->getIndex();
9823AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
9825 SelectionDAG &DAG = CLI.DAG;
9832 bool &IsTailCall = CLI.IsTailCall;
9834 bool IsVarArg = CLI.IsVarArg;
9835 const CallBase *CB = CLI.CB;
9838 MachineFunction::CallSiteInfo CSInfo;
9839 bool IsThisReturn =
false;
9841 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9843 bool IsCFICall = CLI.CB && CLI.CB->isIndirectCall() && CLI.CFIType;
9844 bool IsSibCall =
false;
9845 bool GuardWithBTI =
false;
9847 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
9848 !Subtarget->noBTIAtReturnTwice()) {
9854 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.
getContext());
9857 unsigned NumArgs = Outs.
size();
9859 for (
unsigned i = 0; i != NumArgs; ++i) {
9860 if (Outs[i].
Flags.isVarArg() && Outs[i].VT.isScalableVector())
9862 "currently not supported");
9873 RetCCInfo.AnalyzeCallResult(Ins, RetCC);
9881 auto HasSVERegLoc = [](CCValAssign &Loc) {
9882 if (!Loc.isRegLoc())
9884 return AArch64::ZPRRegClass.contains(Loc.getLocReg()) ||
9885 AArch64::PPRRegClass.contains(Loc.getLocReg());
9887 if (
any_of(RVLocs, HasSVERegLoc) ||
any_of(ArgLocs, HasSVERegLoc))
9892 SMECallAttrs CallAttrs =
9895 std::optional<unsigned> ZAMarkerNode;
9897 ZAMarkerNode = AArch64ISD::REQUIRES_ZA_SAVE;
9899 ZAMarkerNode = AArch64ISD::REQUIRES_ZT0_SAVE;
9901 ZAMarkerNode = AArch64ISD::INOUT_ZA_USE;
9905 IsTailCall = isEligibleForTailCallOptimization(CLI);
9909 if (!ZAMarkerNode && !TailCallOpt && IsTailCall &&
9917 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
9919 "site marked musttail");
9937 if (IsTailCall && !IsSibCall) {
9942 NumBytes =
alignTo(NumBytes, 16);
9947 FPDiff = NumReusableBytes - NumBytes;
9951 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (
unsigned)-FPDiff)
9959 assert(FPDiff % 16 == 0 &&
"unaligned stack on tail call");
9962 auto DescribeCallsite =
9963 [&](OptimizationRemarkAnalysis &
R) -> OptimizationRemarkAnalysis & {
9966 R <<
ore::NV(
"Callee", ES->getSymbol());
9967 else if (CLI.CB && CLI.CB->getCalledFunction())
9968 R <<
ore::NV(
"Callee", CLI.CB->getCalledFunction()->getName());
9970 R <<
"unknown callee";
9976 if (RequiresSMChange) {
9979 auto R = CLI.CB ? OptimizationRemarkAnalysis(
"sme",
"SMETransition",
9981 : OptimizationRemarkAnalysis(
"sme",
"SMETransition",
9983 DescribeCallsite(R) <<
" requires a streaming mode transition";
9990 assert((!IsSibCall || !ZAMarkerNode) &&
"ZA markers require CALLSEQ_START");
10000 {Chain, Chain.getValue(1)});
10008 SmallSet<unsigned, 8> RegsUsed;
10012 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
10014 for (
const auto &
F : Forwards) {
10021 unsigned ExtraArgLocs = 0;
10022 for (
unsigned i = 0, e = Outs.
size(); i != e; ++i) {
10023 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
10025 ISD::ArgFlagsTy
Flags = Outs[i].Flags;
10040 if (Outs[i].ArgVT == MVT::i1) {
10062 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
10078 assert((isScalable || Subtarget->isWindowsArm64EC()) &&
10079 "Indirect arguments should be scalable on most subtargets");
10082 TypeSize PartSize = StoreSize;
10083 unsigned NumParts = 1;
10084 if (Outs[i].
Flags.isInConsecutiveRegs()) {
10085 while (!Outs[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
10087 StoreSize *= NumParts;
10096 bool IsPred = VA.
getValVT() == MVT::aarch64svcount ||
10114 if (NumParts > 0) {
10130 if (i == 0 &&
Flags.isReturned() && !
Flags.isSwiftSelf() &&
10131 Outs[0].VT == MVT::i64) {
10133 "unexpected calling convention register assignment");
10135 "unexpected use of 'returned'");
10136 IsThisReturn =
true;
10145 [=](
const std::pair<unsigned, SDValue> &Elt) {
10154 [&VA](MachineFunction::ArgRegPair ArgReg) {
10155 return ArgReg.Reg == VA.getLocReg();
10162 Arg = DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
10167 if (
Options.EmitCallSiteInfo)
10174 MachinePointerInfo DstInfo;
10178 uint32_t BEAlign = 0;
10184 OpSize =
Flags.isByVal() ?
Flags.getByValSize() * 8
10186 OpSize = (OpSize + 7) / 8;
10187 if (!Subtarget->isLittleEndian() && !
Flags.isByVal() &&
10188 !
Flags.isInConsecutiveRegs()) {
10190 BEAlign = 8 - OpSize;
10193 int32_t
Offset = LocMemOffset + BEAlign;
10210 Chain = addTokenForArgument(Chain, DAG, MF.
getFrameInfo(), FI);
10218 if (Outs[i].
Flags.isByVal()) {
10222 Chain,
DL, DstAddr, Arg, SizeNode,
10223 Outs[i].
Flags.getNonZeroByValAlign(),
10225 nullptr, std::nullopt, DstInfo, MachinePointerInfo());
10242 if (IsVarArg && Subtarget->isWindowsArm64EC() &&
10243 !(CLI.CB && CLI.CB->isMustTailCall())) {
10261 if (!MemOpChains.
empty())
10265 if (RequiresSMChange) {
10266 bool InsertVectorLengthCheck =
10276 for (
auto &RegToPass : RegsToPass) {
10278 RegToPass.second, InGlue);
10285 const GlobalValue *CalledGlobal =
nullptr;
10286 unsigned OpFlags = 0;
10288 CalledGlobal =
G->getGlobal();
10289 OpFlags = Subtarget->classifyGlobalFunctionReference(CalledGlobal,
10294 }
else if (!CLI.PAI || !IsTailCall) {
10295 const GlobalValue *GV =
G->getGlobal();
10300 Subtarget->isTargetMachO()) ||
10302 const char *Sym = S->getSymbol();
10315 if (IsTailCall && !IsSibCall) {
10320 unsigned Opc = IsTailCall ? AArch64ISD::TC_RETURN : AArch64ISD::CALL;
10322 std::vector<SDValue>
Ops;
10323 Ops.push_back(Chain);
10324 Ops.push_back(Callee);
10331 "tail calls cannot be marked with clang.arc.attachedcall");
10332 Opc = AArch64ISD::CALL_RVMARKER;
10338 Ops.insert(
Ops.begin() + 1, GA);
10345 Ops.insert(
Ops.begin() + 2, DoEmitMarker);
10347 Opc = AArch64ISD::CALL_ARM64EC_TO_X64;
10348 }
else if (GuardWithBTI) {
10349 Opc = AArch64ISD::CALL_BTI;
10360 const uint64_t
Key = CLI.PAI->Key;
10362 "Invalid auth call key");
10366 std::tie(IntDisc, AddrDisc) =
10369 if (
Opc == AArch64ISD::CALL_RVMARKER)
10370 Opc = AArch64ISD::AUTH_CALL_RVMARKER;
10372 Opc = IsTailCall ? AArch64ISD::AUTH_TC_RETURN : AArch64ISD::AUTH_CALL;
10374 Ops.push_back(IntDisc);
10375 Ops.push_back(AddrDisc);
10380 for (
auto &RegToPass : RegsToPass)
10382 RegToPass.second.getValueType()));
10385 const uint32_t *
Mask;
10386 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10387 if (IsThisReturn) {
10389 Mask =
TRI->getThisReturnPreservedMask(MF, CallConv);
10391 IsThisReturn =
false;
10392 Mask =
TRI->getCallPreservedMask(MF, CallConv);
10395 Mask =
TRI->getCallPreservedMask(MF, CallConv);
10397 if (Subtarget->hasCustomCallingConv())
10398 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
10400 if (
TRI->isAnyArgRegReserved(MF))
10401 TRI->emitReservedArgRegCallError(MF);
10403 assert(Mask &&
"Missing call preserved mask for calling convention");
10407 Ops.push_back(InGlue);
10409 if (CLI.DeactivationSymbol)
10422 if (CalledGlobal &&
10436 if (CalledGlobal &&
10440 uint64_t CalleePopBytes =
10441 DoesCalleeRestoreStack(CallConv, TailCallOpt) ?
alignTo(NumBytes, 16) : 0;
10449 Chain, InGlue, CallConv, IsVarArg, RVLocs,
DL, DAG, InVals, IsThisReturn,
10450 IsThisReturn ? OutVals[0] :
SDValue(), RequiresSMChange);
10455 if (RequiresSMChange) {
10461 if (RequiresSMChange) {
10462 for (
unsigned I = 0;
I < InVals.
size(); ++
I) {
10477 for (
const ISD::OutputArg &O : Outs) {
10478 if (
O.Flags.isSwiftSelf() ||
O.Flags.isSwiftError() ||
10479 O.Flags.isSwiftAsync()) {
10483 "Swift attributes can't be used with preserve_none",
10484 DL.getDebugLoc()));
10493bool AArch64TargetLowering::CanLowerReturn(
10496 const Type *RetTy)
const {
10499 CCState CCInfo(CallConv, isVarArg, MF, RVLocs,
Context);
10510 auto *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
10514 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.
getContext());
10520 SmallSet<unsigned, 4> RegsUsed;
10521 for (
unsigned i = 0, realRVLocIdx = 0; i != RVLocs.
size();
10522 ++i, ++realRVLocIdx) {
10523 CCValAssign &VA = RVLocs[i];
10525 SDValue Arg = OutVals[realRVLocIdx];
10531 if (Outs[i].ArgVT == MVT::i1) {
10547 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
10556 llvm::find_if(RetVals, [=](
const std::pair<unsigned, SDValue> &Elt) {
10566 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10582 for (
auto &RetVal : RetVals) {
10586 DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
10587 DAG.
getVTList(RetVal.second.getValueType(), MVT::Glue),
10589 Chain = DAG.
getCopyToReg(Chain,
DL, RetVal.first, RetVal.second, Glue);
10592 DAG.
getRegister(RetVal.first, RetVal.second.getValueType()));
10603 unsigned RetValReg = AArch64::X0;
10605 RetValReg = AArch64::X8;
10616 if (AArch64::GPR64RegClass.
contains(*
I))
10618 else if (AArch64::FPR64RegClass.
contains(*
I))
10629 RetOps.push_back(Glue);
10640 MachinePointerInfo());
10641 RetOps.insert(RetOps.begin() + 1, Arm64ECRetDest);
10643 return DAG.
getNode(AArch64ISD::TC_RETURN,
DL, MVT::Other, RetOps);
10646 return DAG.
getNode(AArch64ISD::RET_GLUE,
DL, MVT::Other, RetOps);
10655 unsigned Flag)
const {
10657 N->getOffset(), Flag);
10662 unsigned Flag)
const {
10668 unsigned Flag)
const {
10670 N->getOffset(), Flag);
10675 unsigned Flag)
const {
10681 unsigned Flag)
const {
10686template <
class NodeTy>
10688 unsigned Flags)
const {
10696 .
getInfo<AArch64FunctionInfo>()
10697 ->hasELFSignedGOT())
10700 return DAG.
getNode(AArch64ISD::LOADgot,
DL, Ty, GotAddr);
10704template <
class NodeTy>
10706 unsigned Flags)
const {
10712 AArch64ISD::WrapperLarge,
DL, Ty,
10720template <
class NodeTy>
10722 unsigned Flags)
const {
10730 return DAG.
getNode(AArch64ISD::ADDlow,
DL, Ty, ADRP,
Lo);
10734template <
class NodeTy>
10736 unsigned Flags)
const {
10740 SDValue Sym = getTargetNode(
N, Ty, DAG, Flags);
10741 return DAG.
getNode(AArch64ISD::ADR,
DL, Ty, Sym);
10747 const GlobalValue *GV = GN->
getGlobal();
10748 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV,
getTargetMachine());
10752 "unexpected offset in global node");
10757 return getGOT(GN, DAG, OpFlags);
10763 Result = getAddrLarge(GN, DAG, OpFlags);
10765 Result = getAddrTiny(GN, DAG, OpFlags);
10767 Result = getAddr(GN, DAG, OpFlags);
10806AArch64TargetLowering::LowerDarwinGlobalTLSAddress(
SDValue Op,
10808 assert(Subtarget->isTargetDarwin() &&
10809 "This function expects a Darwin target");
10824 PtrMemVT,
DL, Chain, DescAddr,
10839 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10840 const uint32_t *
Mask =
TRI->getTLSCallPreservedMask();
10841 if (Subtarget->hasCustomCallingConv())
10849 unsigned Opcode = AArch64ISD::CALL;
10851 Ops.push_back(Chain);
10852 Ops.push_back(FuncTLVGet);
10856 Opcode = AArch64ISD::AUTH_CALL;
10978SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(
SDValue SymAddr,
10983 auto *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
10987 SDVTList NodeTys = DAG.
getVTList(MVT::Other, MVT::Glue);
10990 bool RequiresSMChange = TLSCallAttrs.requiresSMChange();
10992 auto ChainAndGlue = [](
SDValue Chain) -> std::pair<SDValue, SDValue> {
10993 return {Chain, Chain.
getValue(1)};
10996 if (RequiresSMChange)
10997 std::tie(Chain, Glue) =
11003 ? AArch64ISD::TLSDESC_AUTH_CALLSEQ
11004 : AArch64ISD::TLSDESC_CALLSEQ;
11006 std::tie(Chain, Glue) = ChainAndGlue(DAG.
getNode(
11009 if (TLSCallAttrs.requiresLazySave())
11010 std::tie(Chain, Glue) = ChainAndGlue(DAG.
getNode(
11011 AArch64ISD::REQUIRES_ZA_SAVE,
DL, NodeTys, {Chain, Chain.getValue(1)}));
11013 if (RequiresSMChange)
11014 std::tie(Chain, Glue) =
11022AArch64TargetLowering::LowerELFGlobalTLSAddress(
SDValue Op,
11024 assert(Subtarget->isTargetELF() &&
"This function expects an ELF target");
11027 AArch64FunctionInfo *MFI =
11042 "in local exec TLS model");
11053 const GlobalValue *GV = GA->
getGlobal();
11058 return LowerELFTLSLocalExec(GV, ThreadBase,
DL, DAG);
11061 TPOff = DAG.
getNode(AArch64ISD::LOADgot,
DL, PtrVT, TPOff);
11079 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
11086 GV,
DL, MVT::i64, 0,
11103 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
11111AArch64TargetLowering::LowerWindowsGlobalTLSAddress(
SDValue Op,
11113 assert(Subtarget->isTargetWindows() &&
"Windows specific TLS lowering");
11125 TLSArray = DAG.
getLoad(PtrVT,
DL, Chain, TLSArray, MachinePointerInfo());
11138 DAG.
getNode(AArch64ISD::ADDlow,
DL, PtrVT, ADRP, TLSIndexLo);
11139 TLSIndex = DAG.
getLoad(MVT::i32,
DL, Chain, TLSIndex, MachinePointerInfo());
11149 MachinePointerInfo());
11150 Chain =
TLS.getValue(1);
11153 const GlobalValue *GV = GA->
getGlobal();
11165 Addr = DAG.
getNode(AArch64ISD::ADDlow,
DL, PtrVT, Addr, TGALo);
11175 if (Subtarget->isTargetDarwin())
11176 return LowerDarwinGlobalTLSAddress(
Op, DAG);
11177 if (Subtarget->isTargetELF())
11178 return LowerELFGlobalTLSAddress(
Op, DAG);
11179 if (Subtarget->isTargetWindows())
11180 return LowerWindowsGlobalTLSAddress(
Op, DAG);
11218 assert(TGN->getGlobal()->hasExternalWeakLinkage());
11224 if (TGN->getOffset() != 0)
11226 "unsupported non-zero offset in weak ptrauth global reference");
11233 {TGA, Key, Discriminator}),
11238AArch64TargetLowering::LowerPtrAuthGlobalAddress(
SDValue Op,
11241 uint64_t KeyC =
Op.getConstantOperandVal(1);
11242 SDValue AddrDiscriminator =
Op.getOperand(2);
11243 uint64_t DiscriminatorC =
Op.getConstantOperandVal(3);
11244 EVT VT =
Op.getValueType();
11254 "constant discriminator in ptrauth global out of range [0, 0xffff]");
11257 if (!Subtarget->isTargetELF() && !Subtarget->isTargetMachO())
11260 int64_t PtrOffsetC = 0;
11266 const GlobalValue *PtrGV = PtrN->getGlobal();
11269 const unsigned OpFlags =
11273 "unsupported non-GOT op flags on ptrauth global reference");
11276 PtrOffsetC += PtrN->getOffset();
11279 assert(PtrN->getTargetFlags() == 0 &&
11280 "unsupported target flags on ptrauth global");
11285 ? AddrDiscriminator
11289 if (!NeedsGOTLoad) {
11293 {TPtr, Key, TAddrDiscriminator, Discriminator}),
11302 {TPtr, Key, TAddrDiscriminator, Discriminator}),
11330 SDValue Dest,
unsigned Opcode,
11342 uint64_t Mask =
Op.getConstantOperandVal(1);
11347 if (
Op.getOperand(0).getOpcode() ==
ISD::SHL) {
11348 auto Op00 =
Op.getOperand(0).getOperand(0);
11351 Op.getOperand(1),
Op.getOperand(0).getOperand(1));
11352 return DAG.
getNode(Opcode,
DL, MVT::Other, Chain, Shr,
11372 bool ProduceNonFlagSettingCondBr =
11378 if (
LHS.getValueType() == MVT::f128) {
11383 if (!
RHS.getNode()) {
11403 OFCC = getInvertedCondCode(OFCC);
11406 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
11410 if (
LHS.getValueType().isInteger()) {
11412 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
11417 if (RHSC && RHSC->
getZExtValue() == 1 && ProduceNonFlagSettingCondBr &&
11421 LHS.getResNo() == 0 &&
11425 bool CanNegate, MustBeFirst, PreferFirst;
11458 if (RHSC && RHSC->
getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
11464 return DAG.
getNode(AArch64ISD::CBZ,
DL, MVT::Other, Chain,
LHS, Dest);
11470 return DAG.
getNode(AArch64ISD::CBNZ,
DL, MVT::Other, Chain,
LHS, Dest);
11475 uint64_t SignBitPos;
11477 return DAG.
getNode(AArch64ISD::TBNZ,
DL, MVT::Other, Chain,
LHS,
11482 LHS.getOpcode() !=
ISD::AND && ProduceNonFlagSettingCondBr) {
11486 uint64_t SignBitPos;
11488 return DAG.
getNode(AArch64ISD::TBZ,
DL, MVT::Other, Chain,
LHS,
11494 if (Subtarget->hasCMPBR() &&
11496 ProduceNonFlagSettingCondBr) {
11505 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
11509 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::bf16 ||
11510 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
11519 DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CC1Val, Cmp);
11522 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, BR1, Dest, CC2Val,
11531 if (!Subtarget->isNeonAvailable() &&
11532 !Subtarget->useSVEForFixedLengthVectors())
11535 EVT VT =
Op.getValueType();
11563 if (!VT.
isVector() && !Subtarget->isNeonAvailable() &&
11564 Subtarget->isSVEorStreamingSVEAvailable()) {
11565 if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64 && VT != MVT::bf16)
11579 auto BitCast = [
this](EVT VT,
SDValue Op, SelectionDAG &DAG) {
11581 return getSVESafeBitCast(VT,
Op, DAG);
11588 auto SetVecVal = [&](
int Idx = -1) {
11594 VecVal1 = BitCast(VecVT, In1, DAG);
11595 VecVal2 = BitCast(VecVT, In2, DAG);
11601 }
else if (VT == MVT::f64) {
11602 VecVT = MVT::v2i64;
11603 SetVecVal(AArch64::dsub);
11604 }
else if (VT == MVT::f32) {
11605 VecVT = MVT::v4i32;
11606 SetVecVal(AArch64::ssub);
11607 }
else if (VT == MVT::f16 || VT == MVT::bf16) {
11608 VecVT = MVT::v8i16;
11609 SetVecVal(AArch64::hsub);
11620 if (VT == MVT::f64 || VT == MVT::v2f64) {
11628 DAG.
getNode(AArch64ISD::BSP,
DL, VecVT, SignMaskV, VecVal1, VecVal2);
11629 if (VT == MVT::f16 || VT == MVT::bf16)
11631 if (VT == MVT::f32)
11633 if (VT == MVT::f64)
11636 return BitCast(VT, BSP, DAG);
11642 Attribute::NoImplicitFloat))
11645 EVT VT =
Op.getValueType();
11648 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
11656 if (VT == MVT::i32 && IsParity)
11659 if (Subtarget->isSVEorStreamingSVEAvailable()) {
11660 if (VT == MVT::i32 || VT == MVT::i64) {
11661 EVT ContainerVT = VT == MVT::i32 ? MVT::nxv4i32 : MVT::nxv2i64;
11673 if (VT == MVT::i128) {
11686 if (!Subtarget->isNeonAvailable())
11697 if (VT == MVT::i32 || VT == MVT::i64) {
11698 if (VT == MVT::i32)
11704 AddV = DAG.
getNode(AArch64ISD::NVCAST,
DL,
11705 VT == MVT::i32 ? MVT::v2i32 : MVT::v1i64, AddV);
11711 }
else if (VT == MVT::i128) {
11717 DAG.
getNode(AArch64ISD::NVCAST,
DL, MVT::v2i64, AddV),
11725 assert(!IsParity &&
"ISD::PARITY of vector types not supported");
11727 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
11728 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
11729 "Unexpected type for custom ctpop lowering");
11737 EVT DT = VT == MVT::v2i64 ? MVT::v4i32 : VT;
11741 if (VT == MVT::v2i64) {
11742 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11743 Val = DAG.
getNode(AArch64ISD::UADDLP,
DL, VT, Val);
11744 }
else if (VT == MVT::v2i32) {
11745 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11746 }
else if (VT == MVT::v4i32) {
11747 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11756 unsigned EltSize = 8;
11762 Val = DAG.
getNode(AArch64ISD::UADDLP,
DL, WidenVT, Val);
11769 EVT VT =
Op.getValueType();
11778 EVT VT =
Op.getValueType();
11780 unsigned Opcode =
Op.getOpcode();
11807 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SMAX_PRED);
11809 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SMIN_PRED);
11811 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::UMAX_PRED);
11813 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::UMIN_PRED);
11825 EVT VT =
Op.getValueType();
11829 VT, Subtarget->useSVEForFixedLengthVectors()))
11830 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU);
11842 REVB = DAG.
getNode(AArch64ISD::REV32,
DL, VST,
Op.getOperand(0));
11849 REVB = DAG.
getNode(AArch64ISD::REV32,
DL, VST,
Op.getOperand(0));
11856 REVB = DAG.
getNode(AArch64ISD::REV64,
DL, VST,
Op.getOperand(0));
11863 REVB = DAG.
getNode(AArch64ISD::REV64,
DL, VST,
Op.getOperand(0));
11869 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT,
11876 SmallVector<std::pair<SDValue, SDValue>, 16> &WorkList) {
11882 N =
N->getOperand(0);
11886 WorkList.push_back(std::make_pair(
N->getOperand(0),
N->getOperand(1)));
11892 if (
N->getOpcode() !=
ISD::OR || !
N->hasOneUse())
11906 EVT VT =
N->getValueType(0);
11916 unsigned NumXors = 0;
11921 std::tie(XOR0, XOR1) = WorkList[0];
11924 for (
unsigned I = 1;
I < WorkList.
size();
I++) {
11925 std::tie(XOR0, XOR1) = WorkList[
I];
11927 Cmp = DAG.
getNode(LogicOp,
DL, VT, Cmp, CmpChain);
11939 if (
Op.getValueType().isVector())
11940 return LowerVSETCC(
Op, DAG);
11942 bool IsStrict =
Op->isStrictFPOpcode();
11944 unsigned OpNo = IsStrict ? 1 : 0;
11947 Chain =
Op.getOperand(0);
11954 EVT VT =
Op.getValueType();
11960 if (
LHS.getValueType() == MVT::f128) {
11965 if (!
RHS.getNode()) {
11966 assert(
LHS.getValueType() ==
Op.getValueType() &&
11967 "Unexpected setcc expansion!");
11972 if (
LHS.getValueType().isInteger()) {
11988 SDValue Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, FVal, TVal, CCVal, Cmp);
11993 assert(
LHS.getValueType() == MVT::bf16 ||
LHS.getValueType() == MVT::f16 ||
11994 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
12015 Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, FVal, TVal, CC1Val, Cmp);
12025 DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, FVal, CC1Val, Cmp);
12028 Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, CS1, CC2Val, Cmp);
12038 EVT VT =
LHS.getValueType();
12039 if (VT != MVT::i32 && VT != MVT::i64)
12049 EVT OpVT =
Op.getValueType();
12058 return DAG.
getNode(AArch64ISD::CSEL,
DL, OpVT, FVal, TVal, CCVal,
12067 "function only supposed to emit natural comparisons");
12076 if (!
LHS.getValueType().isVector()) {
12115 assert(!
LHS.getValueType().isVector());
12116 assert(!
RHS.getValueType().isVector());
12120 if (!CTVal || !CFVal)
12134 bool OneNaN =
false;
12150 bool ShouldInvert =
false;
12159 if (!Cmp2 && !ShouldInvert)
12176SDValue AArch64TargetLowering::LowerSELECT_CC(
12182 if (
LHS.getValueType() == MVT::f128) {
12187 if (!
RHS.getNode()) {
12194 if ((
LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
12195 LHS.getValueType() == MVT::bf16) {
12201 if (
LHS.getValueType().isInteger()) {
12203 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
12215 LHS.getValueType() ==
RHS.getValueType()) {
12216 EVT VT =
LHS.getValueType();
12222 Shift = DAG.
getNOT(
DL, Shift, VT);
12236 uint64_t SignBitPos;
12238 EVT TestVT =
LHS.getValueType();
12242 LHS, SignBitConst);
12270 unsigned Opcode = AArch64ISD::CSEL;
12278 }
else if (CTVal && CFVal && CTVal->
isOne() && CFVal->
isZero()) {
12298 }
else if (CTVal && CFVal) {
12306 if (TrueVal == ~FalseVal) {
12307 Opcode = AArch64ISD::CSINV;
12308 }
else if (FalseVal > std::numeric_limits<int64_t>::min() &&
12309 TrueVal == -FalseVal) {
12310 Opcode = AArch64ISD::CSNEG;
12320 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
12321 Opcode = AArch64ISD::CSINC;
12323 if (TrueVal32 > FalseVal32) {
12329 const uint64_t TrueVal64 =
TrueVal;
12330 const uint64_t FalseVal64 =
FalseVal;
12332 if ((TrueVal64 == FalseVal64 + 1) || (TrueVal64 + 1 == FalseVal64)) {
12333 Opcode = AArch64ISD::CSINC;
12335 if (TrueVal > FalseVal) {
12348 if (Opcode != AArch64ISD::CSEL) {
12361 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->
isOne() &&
12366 if (CTVal && CTVal == RHSVal && AArch64CC ==
AArch64CC::EQ)
12368 else if (CFVal && CFVal == RHSVal && AArch64CC ==
AArch64CC::NE)
12370 }
else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->
isOne()) {
12371 assert (CTVal && CFVal &&
"Expected constant operands for CSNEG.");
12376 Opcode = AArch64ISD::CSINV;
12385 return DAG.
getNode(Opcode,
DL, VT, TVal, FVal, CCVal, Cmp);
12389 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::f32 ||
12390 LHS.getValueType() == MVT::f64);
12397 if (Subtarget->isNeonAvailable() &&
all_of(
Users, [](
const SDNode *U) {
12398 switch (
U->getOpcode()) {
12403 case AArch64ISD::DUP:
12421 if (
Flags.hasNoSignedZeros()) {
12425 if (RHSVal && RHSVal->
isZero()) {
12433 CFVal && CFVal->
isZero() &&
12441 SDValue CS1 = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, FVal, CC1Val, Cmp);
12447 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, CS1, CC2Val, Cmp);
12456 EVT Ty =
Op.getValueType();
12459 auto Idx =
Op.getConstantOperandAPInt(2);
12460 int64_t IdxVal = Idx.getSExtValue();
12462 "Only expect scalable vectors for custom lowering of VECTOR_SPLICE");
12471 std::optional<unsigned> PredPattern;
12483 return DAG.
getNode(AArch64ISD::SPLICE,
DL, Ty, Pred,
Op.getOperand(0),
12503 SDNodeFlags
Flags =
Op->getFlags();
12505 return LowerSELECT_CC(CC,
LHS,
RHS, TVal, FVal,
Op->users(), Flags,
DL, DAG);
12515 EVT Ty =
Op.getValueType();
12516 if (Ty == MVT::aarch64svcount) {
12553 return DAG.
getNode(AArch64ISD::CSEL,
DL,
Op.getValueType(), TVal, FVal,
12572 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
12579 Op->getFlags(),
DL, DAG);
12581 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
12596 !Subtarget->isTargetMachO())
12597 return getAddrLarge(JT, DAG);
12599 return getAddrTiny(JT, DAG);
12600 return getAddr(JT, DAG);
12613 AFI->setJumpTableEntryInfo(JTI, 4,
nullptr);
12618 "aarch64-jump-table-hardening")) {
12620 if (Subtarget->isTargetMachO()) {
12625 assert(Subtarget->isTargetELF() &&
12626 "jump table hardening only supported on MachO/ELF");
12657 std::optional<uint16_t> BADisc =
12658 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(MF.
getFunction());
12669 {Dest,
Key, Disc, AddrDisc, Chain});
12679 if (Subtarget->isTargetMachO()) {
12680 return getGOT(CP, DAG);
12683 return getAddrLarge(CP, DAG);
12685 return getAddrTiny(CP, DAG);
12687 return getAddr(CP, DAG);
12695 if (std::optional<uint16_t> BADisc =
12696 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(
12710 {TargetBA,
Key, AddrDisc, Disc});
12718 return getAddrLarge(BAN, DAG);
12720 return getAddrTiny(BAN, DAG);
12722 return getAddr(BAN, DAG);
12727 AArch64FunctionInfo *FuncInfo =
12736 MachinePointerInfo(SV));
12742 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
12746 if (Subtarget->isWindowsArm64EC()) {
12752 uint64_t StackOffset;
12767 MachinePointerInfo(SV));
12775 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
12776 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
12791 MachinePointerInfo(SV),
Align(PtrSize)));
12808 MachinePointerInfo(SV,
Offset),
12826 MachinePointerInfo(SV,
Offset),
12836 GROffsAddr, MachinePointerInfo(SV,
Offset),
Align(4)));
12844 VROffsAddr, MachinePointerInfo(SV,
Offset),
Align(4)));
12854 if (Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg()))
12855 return LowerWin64_VASTART(
Op, DAG);
12856 else if (Subtarget->isTargetDarwin())
12857 return LowerDarwin_VASTART(
Op, DAG);
12859 return LowerAAPCS_VASTART(
Op, DAG);
12867 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
12868 unsigned VaListSize =
12869 (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
12871 : Subtarget->isTargetILP32() ? 20 : 32;
12877 Align(PtrSize),
false,
false,
nullptr,
12878 std::nullopt, MachinePointerInfo(DestSV),
12879 MachinePointerInfo(SrcSV));
12883 assert(Subtarget->isTargetDarwin() &&
12884 "automatic va_arg instruction only works on Darwin");
12887 EVT VT =
Op.getValueType();
12891 MaybeAlign
Align(
Op.getConstantOperandVal(3));
12892 unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
12896 DAG.
getLoad(PtrMemVT,
DL, Chain, Addr, MachinePointerInfo(V));
12902 "currently not supported");
12904 if (Align && *Align > MinSlotSize) {
12920 ArgSize = std::max(ArgSize, MinSlotSize);
12921 bool NeedFPTrunc =
false;
12924 NeedFPTrunc =
true;
12934 DAG.
getStore(Chain,
DL, VANext, Addr, MachinePointerInfo(V));
12940 DAG.
getLoad(MVT::f64,
DL, APStore, VAList, MachinePointerInfo());
12950 return DAG.
getLoad(VT,
DL, APStore, VAList, MachinePointerInfo());
12958 EVT VT =
Op.getValueType();
12960 unsigned Depth =
Op.getConstantOperandVal(0);
12965 MachinePointerInfo());
12967 if (Subtarget->isTargetILP32())
12983#define GET_REGISTER_MATCHER
12984#include "AArch64GenAsmMatcher.inc"
12991 if (AArch64::X1 <=
Reg &&
Reg <= AArch64::X28) {
12993 unsigned DwarfRegNum = MRI->getDwarfRegNum(
Reg,
false);
13005 EVT VT =
Op.getValueType();
13021 EVT VT =
Op.getValueType();
13023 unsigned Depth =
Op.getConstantOperandVal(0);
13026 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
13043 if (Subtarget->hasPAuth()) {
13080 const APInt ImmInt = Imm.bitcastToAPInt();
13082 if (VT == MVT::f64)
13085 if (VT == MVT::f32)
13088 if (VT == MVT::f16 || VT == MVT::bf16)
13096 bool OptForSize)
const {
13098 const APInt ImmInt = Imm.bitcastToAPInt();
13105 if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
13115 "Should be able to build any value with at most 4 moves");
13116 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 4 : 2));
13117 IsLegal = Insn.
size() <= Limit;
13121 <<
" imm value: "; Imm.dump(););
13133 if ((ST->hasNEON() &&
13134 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
13135 VT == MVT::f32 || VT == MVT::v1f32 || VT == MVT::v2f32 ||
13136 VT == MVT::v4f32)) ||
13138 (VT == MVT::nxv8f16 || VT == MVT::nxv4f32 || VT == MVT::nxv2f64))) {
13145 constexpr unsigned AccurateBits = 8;
13147 ExtraSteps = DesiredBits <= AccurateBits
13152 return DAG.
getNode(Opcode,
SDLoc(Operand), VT, Operand);
13162 EVT VT =
Op.getValueType();
13170AArch64TargetLowering::getSqrtResultForDenormInput(
SDValue Op,
13179 bool Reciprocal)
const {
13183 DAG, ExtraSteps)) {
13188 SDNodeFlags
Flags =
13193 for (
int i = ExtraSteps; i > 0; --i) {
13196 Step = DAG.
getNode(AArch64ISD::FRSQRTS,
DL, VT, Operand, Step, Flags);
13211 int &ExtraSteps)
const {
13214 DAG, ExtraSteps)) {
13222 for (
int i = ExtraSteps; i > 0; --i) {
13262const char *AArch64TargetLowering::LowerXConstraint(
EVT ConstraintVT)
const {
13270 if (!Subtarget->hasFPARMv8())
13295static std::optional<std::pair<unsigned, const TargetRegisterClass *>>
13298 (Constraint[1] !=
'p' && Constraint[1] !=
'z'))
13299 return std::nullopt;
13301 bool IsPredicate = Constraint[1] ==
'p';
13302 Constraint = Constraint.
substr(2, Constraint.
size() - 3);
13303 bool IsPredicateAsCount = IsPredicate && Constraint.
starts_with(
"n");
13304 if (IsPredicateAsCount)
13309 return std::nullopt;
13311 if (IsPredicateAsCount)
13312 return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
13314 return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
13315 return std::make_pair(AArch64::Z0 + V, &AArch64::ZPRRegClass);
13318static std::optional<PredicateConstraint>
13329 if (VT != MVT::aarch64svcount &&
13333 switch (Constraint) {
13335 return VT == MVT::aarch64svcount ? &AArch64::PNR_p8to15RegClass
13336 : &AArch64::PPR_p8to15RegClass;
13338 return VT == MVT::aarch64svcount ? &AArch64::PNR_3bRegClass
13339 : &AArch64::PPR_3bRegClass;
13341 return VT == MVT::aarch64svcount ? &AArch64::PNRRegClass
13342 : &AArch64::PPRRegClass;
13350static std::optional<ReducedGprConstraint>
13363 switch (Constraint) {
13365 return &AArch64::MatrixIndexGPR32_8_11RegClass;
13367 return &AArch64::MatrixIndexGPR32_12_15RegClass;
13401 return DAG.
getNode(AArch64ISD::CSINC,
DL, MVT::i32,
13404 getCondCode(DAG, getInvertedCondCode(CC)), NZCV);
13408SDValue AArch64TargetLowering::LowerAsmOutputForConstraint(
13410 const AsmOperandInfo &OpInfo,
SelectionDAG &DAG)
const {
13415 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
13416 OpInfo.ConstraintVT.getSizeInBits() < 8)
13431 if (OpInfo.ConstraintVT.getSizeInBits() <= 32)
13442AArch64TargetLowering::getConstraintType(
StringRef Constraint)
const {
13443 if (Constraint.
size() == 1) {
13444 switch (Constraint[0]) {
13481AArch64TargetLowering::getSingleConstraintMatchWeight(
13482 AsmOperandInfo &
info,
const char *constraint)
const {
13484 Value *CallOperandVal =
info.CallOperandVal;
13487 if (!CallOperandVal)
13491 switch (*constraint) {
13513std::pair<unsigned, const TargetRegisterClass *>
13514AArch64TargetLowering::getRegForInlineAsmConstraint(
13516 if (Constraint.
size() == 1) {
13517 switch (Constraint[0]) {
13520 return std::make_pair(0U,
nullptr);
13522 return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
13524 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
13525 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
13527 if (!Subtarget->hasFPARMv8())
13531 return std::make_pair(0U, &AArch64::ZPRRegClass);
13532 return std::make_pair(0U,
nullptr);
13534 if (VT == MVT::Other)
13538 return std::make_pair(0U, &AArch64::FPR16RegClass);
13540 return std::make_pair(0U, &AArch64::FPR32RegClass);
13542 return std::make_pair(0U, &AArch64::FPR64RegClass);
13544 return std::make_pair(0U, &AArch64::FPR128RegClass);
13550 if (!Subtarget->hasFPARMv8())
13553 return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
13555 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
13558 if (!Subtarget->hasFPARMv8())
13561 return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
13569 if (AArch64::ZPRRegClass.hasSubClassEq(
P->second) &&
13570 !Subtarget->isSVEorStreamingSVEAvailable())
13571 return std::make_pair(
TRI->getSubReg(
P->first, AArch64::zsub),
13572 &AArch64::FPR128RegClass);
13577 return std::make_pair(0U, RegClass);
13581 return std::make_pair(0U, RegClass);
13583 if (StringRef(
"{cc}").equals_insensitive(Constraint) ||
13585 return std::make_pair(
unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
13587 if (Constraint ==
"{za}") {
13588 return std::make_pair(
unsigned(AArch64::ZA), &AArch64::MPRRegClass);
13591 if (Constraint ==
"{zt0}") {
13592 return std::make_pair(
unsigned(AArch64::ZT0), &AArch64::ZTRRegClass);
13597 std::pair<unsigned, const TargetRegisterClass *> Res;
13602 unsigned Size = Constraint.
size();
13603 if ((
Size == 4 ||
Size == 5) && Constraint[0] ==
'{' &&
13604 tolower(Constraint[1]) ==
'v' && Constraint[
Size - 1] ==
'}') {
13607 if (!
Failed && RegNo >= 0 && RegNo <= 31) {
13611 if (VT != MVT::Other) {
13614 Res.first = AArch64::FPR16RegClass.getRegister(RegNo);
13615 Res.second = &AArch64::FPR16RegClass;
13618 Res.first = AArch64::FPR32RegClass.getRegister(RegNo);
13619 Res.second = &AArch64::FPR32RegClass;
13622 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
13623 Res.second = &AArch64::FPR64RegClass;
13626 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
13627 Res.second = &AArch64::FPR128RegClass;
13630 return std::make_pair(0U,
nullptr);
13633 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
13634 Res.second = &AArch64::FPR128RegClass;
13640 if (Res.second && !Subtarget->hasFPARMv8() &&
13641 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
13642 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
13643 return std::make_pair(0U,
nullptr);
13650 bool AllowUnknown)
const {
13651 if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
13652 return EVT(MVT::i64x8);
13659void AArch64TargetLowering::LowerAsmOperandForConstraint(
13665 if (Constraint.
size() != 1)
13668 char ConstraintLetter = Constraint[0];
13669 switch (ConstraintLetter) {
13680 if (
Op.getValueType() == MVT::i64)
13681 Result = DAG.
getRegister(AArch64::XZR, MVT::i64);
13683 Result = DAG.
getRegister(AArch64::WZR, MVT::i32);
13705 switch (ConstraintLetter) {
13719 CVal =
C->getSExtValue();
13750 if ((CVal & 0xFFFF) == CVal)
13752 if ((CVal & 0xFFFF0000ULL) == CVal)
13754 uint64_t NCVal = ~(uint32_t)CVal;
13755 if ((NCVal & 0xFFFFULL) == NCVal)
13757 if ((NCVal & 0xFFFF0000ULL) == NCVal)
13764 if ((CVal & 0xFFFFULL) == CVal)
13766 if ((CVal & 0xFFFF0000ULL) == CVal)
13768 if ((CVal & 0xFFFF00000000ULL) == CVal)
13770 if ((CVal & 0xFFFF000000000000ULL) == CVal)
13772 uint64_t NCVal = ~CVal;
13773 if ((NCVal & 0xFFFFULL) == NCVal)
13775 if ((NCVal & 0xFFFF0000ULL) == NCVal)
13777 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
13779 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
13793 Ops.push_back(Result);
13830 EVT VT =
Op.getValueType();
13832 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
13836 if (VT != MVT::v16i8 && VT != MVT::v8i8)
13840 assert((NumElts == 8 || NumElts == 16) &&
13841 "Need to have exactly 8 or 16 elements in vector.");
13847 for (
unsigned i = 0; i < NumElts; ++i) {
13854 SourceVec = OperandSourceVec;
13855 else if (SourceVec != OperandSourceVec)
13868 }
else if (!AndMaskConstants.
empty()) {
13888 if (!MaskSourceVec) {
13892 }
else if (MaskSourceVec != MaskSource->
getOperand(0)) {
13906 if (!AndMaskConstants.
empty())
13913 SourceVec, MaskSourceVec);
13921 LLVM_DEBUG(
dbgs() <<
"AArch64TargetLowering::ReconstructShuffle\n");
13923 EVT VT =
Op.getValueType();
13925 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
13928 struct ShuffleSourceInfo {
13943 ShuffleSourceInfo(
SDValue Vec)
13944 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
13945 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
13953 for (
unsigned i = 0; i < NumElts; ++i) {
13959 V.getOperand(0).getValueType().isScalableVector()) {
13961 dbgs() <<
"Reshuffle failed: "
13962 "a shuffle can only come from building a vector from "
13963 "various elements of other fixed-width vectors, provided "
13964 "their indices are constant\n");
13970 auto Source =
find(Sources, SourceVec);
13971 if (Source == Sources.
end())
13972 Source = Sources.
insert(Sources.
end(), ShuffleSourceInfo(SourceVec));
13975 unsigned EltNo = V.getConstantOperandVal(1);
13976 Source->MinElt = std::min(Source->MinElt, EltNo);
13977 Source->MaxElt = std::max(Source->MaxElt, EltNo);
13982 if ((Sources.
size() == 3 || Sources.
size() == 4) && NumElts > 4) {
13987 for (
unsigned I = 0;
I < NumElts; ++
I) {
13990 for (
unsigned OF = 0; OF < OutputFactor; OF++)
13991 Mask.push_back(-1);
13997 unsigned Lane = V.getConstantOperandVal(1);
13998 for (
unsigned S = 0; S < Sources.
size(); S++) {
13999 if (V.getOperand(0) == Sources[S].Vec) {
14000 unsigned InputSize = Sources[S].Vec.getScalarValueSizeInBits();
14001 unsigned InputBase = 16 * S + Lane * InputSize / 8;
14002 for (
unsigned OF = 0; OF < OutputFactor; OF++)
14003 Mask.push_back(InputBase + OF);
14013 ? Intrinsic::aarch64_neon_tbl3
14014 : Intrinsic::aarch64_neon_tbl4,
14016 for (
unsigned i = 0; i < Sources.
size(); i++) {
14017 SDValue Src = Sources[i].Vec;
14018 EVT SrcVT = Src.getValueType();
14021 "Expected a legally typed vector");
14029 for (
unsigned i = 0; i < Mask.size(); i++)
14031 assert((Mask.size() == 8 || Mask.size() == 16) &&
14032 "Expected a v8i8 or v16i8 Mask");
14034 Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8,
DL, TBLMask));
14038 Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, TBLOperands);
14042 if (Sources.
size() > 2) {
14043 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: currently only do something "
14044 <<
"sensible when at most two source vectors are "
14052 for (
auto &Source : Sources) {
14053 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
14054 if (SrcEltTy.
bitsLT(SmallestEltTy)) {
14055 SmallestEltTy = SrcEltTy;
14058 unsigned ResMultiplier =
14067 for (
auto &Src : Sources) {
14068 EVT SrcVT = Src.ShuffleVec.getValueType();
14081 assert(2 * SrcVTSize == VTSize);
14086 DAG.
getPOISON(Src.ShuffleVec.getValueType()));
14092 dbgs() <<
"Reshuffle failed: result vector too small to extract\n");
14096 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
14098 dbgs() <<
"Reshuffle failed: span too large for a VEXT to cope\n");
14102 if (Src.MinElt >= NumSrcElts) {
14107 Src.WindowBase = -NumSrcElts;
14108 }
else if (Src.MaxElt < NumSrcElts) {
14125 dbgs() <<
"Reshuffle failed: don't know how to lower AArch64ISD::EXT "
14126 "for SVE vectors.");
14131 DAG.
getNode(AArch64ISD::EXT,
DL, DestVT, VEXTSrc1, VEXTSrc2,
14133 Src.WindowBase = -Src.MinElt;
14140 for (
auto &Src : Sources) {
14142 if (SrcEltTy == SmallestEltTy)
14147 DAG.
getNode(AArch64ISD::NVCAST,
DL, ShuffleVT, Src.ShuffleVec);
14153 Src.WindowBase *= Src.WindowScale;
14158 for (
auto Src : Sources)
14159 assert(Src.ShuffleVec.getValueType() == ShuffleVT);
14167 if (Entry.isUndef())
14170 auto Src =
find(Sources, Entry.getOperand(0));
14179 int LanesDefined = BitsDefined / BitsPerShuffleLane;
14183 int *LaneMask = &Mask[i * ResMultiplier];
14185 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
14186 ExtractBase += NumElts * (Src - Sources.
begin());
14187 for (
int j = 0; j < LanesDefined; ++j)
14188 LaneMask[j] = ExtractBase + j;
14193 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: illegal shuffle mask\n");
14199 for (
unsigned i = 0; i < Sources.
size(); ++i)
14206 V = DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Shuffle);
14212 dbgs() <<
"Reshuffle, creating node: "; V.dump(););
14231 unsigned ExpectedElt = Imm;
14232 for (
unsigned i = 1; i < NumElts; ++i) {
14236 if (ExpectedElt == NumElts)
14241 if (ExpectedElt !=
static_cast<unsigned>(M[i]))
14252 if (V.getValueType() != MVT::v16i8)
14254 assert(V.getNumOperands() == 16 &&
"Expected 16 operands on the BUILDVECTOR");
14256 for (
unsigned X = 0;
X < 4;
X++) {
14268 for (
unsigned Y = 1;
Y < 4;
Y++) {
14284 V.getOperand(0).getOperand(0), V.getOperand(4).getOperand(0),
14285 V.getOperand(8).getOperand(0), V.getOperand(12).getOperand(0)};
14287 if (V.getValueType() == MVT::v4i32)
14303 unsigned &DupLaneOp) {
14305 "Only possible block sizes for wide DUP are: 16, 32, 64");
14324 for (
size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
14325 for (
size_t I = 0;
I < NumEltsPerBlock;
I++) {
14326 int Elt = M[BlockIndex * NumEltsPerBlock +
I];
14330 if ((
unsigned)Elt >= SingleVecNumElements)
14332 if (BlockElts[
I] < 0)
14333 BlockElts[
I] = Elt;
14334 else if (BlockElts[
I] != Elt)
14343 auto FirstRealEltIter =
find_if(BlockElts, [](
int Elt) {
return Elt >= 0; });
14344 assert(FirstRealEltIter != BlockElts.
end() &&
14345 "Shuffle with all-undefs must have been caught by previous cases, "
14347 if (FirstRealEltIter == BlockElts.
end()) {
14353 size_t FirstRealIndex = FirstRealEltIter - BlockElts.
begin();
14355 if ((
unsigned)*FirstRealEltIter < FirstRealIndex)
14358 size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
14361 if (Elt0 % NumEltsPerBlock != 0)
14365 for (
size_t I = 0;
I < NumEltsPerBlock;
I++)
14366 if (BlockElts[
I] >= 0 && (
unsigned)BlockElts[
I] != Elt0 +
I)
14369 DupLaneOp = Elt0 / NumEltsPerBlock;
14378 const int *FirstRealElt =
find_if(M, [](
int Elt) {
return Elt >= 0; });
14383 APInt ExpectedElt =
APInt(MaskBits, *FirstRealElt + 1,
false,
14387 bool FoundWrongElt = std::any_of(FirstRealElt + 1, M.end(), [&](
int Elt) {
14388 return Elt != ExpectedElt++ && Elt >= 0;
14420 bool &ReverseEXT,
unsigned &Imm) {
14422 unsigned OtherBase = SplatOperand == 0 ? NumElts : 0;
14423 auto IsSplatElt = [=](
int Elt) {
14425 (SplatOperand == 0 ? Elt < (int)NumElts : Elt >= (int)NumElts);
14428 unsigned PrefixSplatElts = 0;
14429 while (PrefixSplatElts != NumElts && IsSplatElt(M[PrefixSplatElts]))
14432 if (PrefixSplatElts > 0 && PrefixSplatElts < NumElts) {
14434 for (
unsigned I = PrefixSplatElts;
I != NumElts; ++
I) {
14435 int Expected = OtherBase +
I - PrefixSplatElts;
14443 ReverseEXT = SplatOperand == 1;
14444 Imm = NumElts - PrefixSplatElts;
14449 unsigned SuffixSplatElts = 0;
14450 while (SuffixSplatElts != NumElts &&
14451 IsSplatElt(M[NumElts - 1 - SuffixSplatElts]))
14454 if (0 < SuffixSplatElts && SuffixSplatElts < NumElts) {
14456 for (
unsigned I = 0;
I != NumElts - SuffixSplatElts; ++
I) {
14457 int Expected = OtherBase +
I + SuffixSplatElts;
14465 ReverseEXT = SplatOperand == 0;
14466 Imm = SuffixSplatElts;
14479 if (NumElts % 2 != 0)
14481 WhichResult = (M[0] == 0 ? 0 : 1);
14482 unsigned Idx = WhichResult * NumElts / 2;
14483 for (
unsigned i = 0; i != NumElts; i += 2) {
14484 if ((M[i] >= 0 && (
unsigned)M[i] != Idx) ||
14485 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != Idx))
14498 WhichResult = (M[0] == 0 ? 0 : 1);
14499 for (
unsigned j = 0; j != 2; ++j) {
14500 unsigned Idx = WhichResult;
14501 for (
unsigned i = 0; i != Half; ++i) {
14502 int MIdx = M[i + j * Half];
14503 if (MIdx >= 0 && (
unsigned)MIdx != Idx)
14517 if (NumElts % 2 != 0)
14519 WhichResult = (M[0] == 0 ? 0 : 1);
14520 for (
unsigned i = 0; i < NumElts; i += 2) {
14521 if ((M[i] >= 0 && (
unsigned)M[i] != i + WhichResult) ||
14522 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != i + WhichResult))
14529 bool &DstIsLeft,
int &Anomaly) {
14530 if (M.size() !=
static_cast<size_t>(NumInputElements))
14533 int NumLHSMatch = 0, NumRHSMatch = 0;
14534 int LastLHSMismatch = -1, LastRHSMismatch = -1;
14536 for (
int i = 0; i < NumInputElements; ++i) {
14546 LastLHSMismatch = i;
14548 if (M[i] == i + NumInputElements)
14551 LastRHSMismatch = i;
14554 if (NumLHSMatch == NumInputElements - 1) {
14556 Anomaly = LastLHSMismatch;
14558 }
else if (NumRHSMatch == NumInputElements - 1) {
14560 Anomaly = LastRHSMismatch;
14573 for (
int I = 0,
E = NumElts / 2;
I !=
E;
I++) {
14578 int Offset = NumElts / 2;
14579 for (
int I = NumElts / 2,
E = NumElts;
I !=
E;
I++) {
14580 if (Mask[
I] !=
I + SplitLHS *
Offset)
14589 EVT VT =
Op.getValueType();
14624 unsigned OpNum = (PFEntry >> 26) & 0x0F;
14625 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
14626 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
14648 if (LHSID == (1 * 9 + 2) * 9 + 3)
14650 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 &&
"Illegal OP_COPY!");
14654 if (OpNum == OP_MOVLANE) {
14656 auto getPFIDLane = [](
unsigned ID,
int Elt) ->
int {
14657 assert(Elt < 4 &&
"Expected Perfect Lanes to be less than 4");
14663 return (
ID % 9 == 8) ? -1 :
ID % 9;
14672 assert(RHSID < 8 &&
"Expected a lane index for RHSID!");
14673 unsigned ExtLane = 0;
14679 int MaskElt = getPFIDLane(
ID, (RHSID & 0x01) << 1) >> 1;
14681 MaskElt = (getPFIDLane(
ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
14682 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
14683 ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
14684 Input = MaskElt < 2 ? V1 : V2;
14690 "Expected 16 or 32 bit shuffle elements");
14695 int MaskElt = getPFIDLane(
ID, RHSID);
14696 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
14697 ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
14698 Input = MaskElt < 4 ? V1 : V2;
14700 if (VT == MVT::v4i16) {
14706 Input.getValueType().getVectorElementType(),
14728 return DAG.
getNode(AArch64ISD::REV64,
DL, VT, OpLHS);
14733 return DAG.
getNode(AArch64ISD::REV32,
DL, VT, OpLHS);
14735 assert(VT == MVT::v8i8 || VT == MVT::v16i8);
14736 EVT BSVT = VT == MVT::v8i8 ? MVT::v4i16 : MVT::v8i16;
14738 AArch64ISD::NVCAST,
DL, VT,
14740 DAG.
getNode(AArch64ISD::NVCAST,
DL, BSVT, OpLHS)));
14748 if (EltTy == MVT::i8)
14749 Opcode = AArch64ISD::DUPLANE8;
14750 else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
14751 Opcode = AArch64ISD::DUPLANE16;
14752 else if (EltTy == MVT::i32 || EltTy == MVT::f32)
14753 Opcode = AArch64ISD::DUPLANE32;
14754 else if (EltTy == MVT::i64 || EltTy == MVT::f64)
14755 Opcode = AArch64ISD::DUPLANE64;
14762 return DAG.
getNode(Opcode,
DL, VT, OpLHS, Lane);
14768 return DAG.
getNode(AArch64ISD::EXT,
DL, VT, OpLHS, OpRHS,
14772 return DAG.
getNode(AArch64ISD::UZP1,
DL, VT, OpLHS, OpRHS);
14774 return DAG.
getNode(AArch64ISD::UZP2,
DL, VT, OpLHS, OpRHS);
14776 return DAG.
getNode(AArch64ISD::ZIP1,
DL, VT, OpLHS, OpRHS);
14778 return DAG.
getNode(AArch64ISD::ZIP2,
DL, VT, OpLHS, OpRHS);
14780 return DAG.
getNode(AArch64ISD::TRN1,
DL, VT, OpLHS, OpRHS);
14782 return DAG.
getNode(AArch64ISD::TRN2,
DL, VT, OpLHS, OpRHS);
14793 EVT EltVT =
Op.getValueType().getVectorElementType();
14806 MVT IndexVT = MVT::v8i8;
14807 unsigned IndexLen = 8;
14808 if (
Op.getValueSizeInBits() == 128) {
14809 IndexVT = MVT::v16i8;
14814 for (
int Val : ShuffleMask) {
14815 for (
unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
14816 unsigned Offset = Byte + Val * BytesPerElt;
14819 if (IsUndefOrZero &&
Offset >= IndexLen)
14829 if (IsUndefOrZero) {
14838 if (IndexLen == 8) {
14863 if (EltType == MVT::i8)
14864 return AArch64ISD::DUPLANE8;
14865 if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
14866 return AArch64ISD::DUPLANE16;
14867 if (EltType == MVT::i32 || EltType == MVT::f32)
14868 return AArch64ISD::DUPLANE32;
14869 if (EltType == MVT::i64 || EltType == MVT::f64)
14870 return AArch64ISD::DUPLANE64;
14878 auto getScaledOffsetDup = [](
SDValue BitCast,
int &LaneC,
MVT &CastVT) {
14889 unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
14891 if (ExtIdxInBits % CastedEltBitWidth != 0)
14899 LaneC += ExtIdxInBits / CastedEltBitWidth;
14906 unsigned SrcVecNumElts =
14913 if (getScaledOffsetDup(V, Lane, CastVT)) {
14914 V = DAG.
getBitcast(CastVT, V.getOperand(0).getOperand(0));
14916 V.getOperand(0).getValueType().is128BitVector()) {
14919 Lane += V.getConstantOperandVal(1);
14920 V = V.getOperand(0);
14946 EVT VT =
Op.getValueType();
14956 if (ElementSize > 32 || ElementSize == 1)
14986 EVT VT =
Op.getValueType();
15003 for (
unsigned I = 0;
I < 16;
I++) {
15004 if (ShuffleMask[
I] < 16)
15010 TBLMaskParts[
I] = DAG.
getConstant(
C->getSExtValue() + 32,
DL, MVT::i32);
15024AArch64TargetLowering::LowerEXTEND_VECTOR_INREG(
SDValue Op,
15027 EVT VT =
Op.getValueType();
15031 unsigned UnpackOpcode =
Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
15039 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv8i16, Val);
15040 if (VT == MVT::nxv8i16)
15044 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv4i32, Val);
15045 if (VT == MVT::nxv4i32)
15049 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv2i64, Val);
15050 assert(VT == MVT::nxv2i64 &&
"Unexpected result type!");
15061AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(
SDValue Op,
15064 EVT VT =
Op.getValueType();
15067 return LowerEXTEND_VECTOR_INREG(
Op, DAG);
15072 "Unexpected extension factor.");
15079 DAG.
getNode(AArch64ISD::ZIP1,
DL, SrcVT, SrcOp, Zeros));
15085 EVT VT =
Op.getValueType();
15090 return LowerFixedLengthVECTOR_SHUFFLEToSVE(
Op, DAG);
15096 ArrayRef<int> ShuffleMask = SVN->
getMask();
15103 "Unexpected VECTOR_SHUFFLE mask size!");
15129 for (
unsigned LaneSize : {64U, 32U, 16U}) {
15132 unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
15133 : LaneSize == 32 ? AArch64ISD::DUPLANE32
15134 : AArch64ISD::DUPLANE16;
15149 if (
isREVMask(ShuffleMask, EltSize, NumElts, 64))
15151 if (
isREVMask(ShuffleMask, EltSize, NumElts, 32))
15153 if (
isREVMask(ShuffleMask, EltSize, NumElts, 16)) {
15155 assert(VT == MVT::v8i8 || VT == MVT::v16i8);
15156 EVT BSVT = VT == MVT::v8i8 ? MVT::v4i16 : MVT::v8i16;
15158 AArch64ISD::NVCAST,
DL, VT,
15160 DAG.
getNode(AArch64ISD::NVCAST,
DL, BSVT, V1)));
15163 if (((NumElts == 8 && EltSize == 16) || (NumElts == 16 && EltSize == 8)) &&
15166 return DAG.
getNode(AArch64ISD::EXT,
DL, VT, Rev, Rev,
15174 for (
unsigned SplatOperand : {0
U, 1U}) {
15175 if ((SplatOperand == 0 && !IsSplat1) || (SplatOperand == 1 && !IsSplat2))
15178 bool ReverseSplatEXT =
false;
15184 if (ReverseSplatEXT)
15187 return DAG.
getNode(AArch64ISD::EXT,
DL, VT, ExtOp1, ExtOp2,
15192 bool ReverseEXT =
false;
15194 if (
isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
15206 unsigned WhichResult;
15207 unsigned OperandOrder;
15208 if (
isZIPMask(ShuffleMask, NumElts, WhichResult, OperandOrder)) {
15209 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
15211 OperandOrder == 0 ? V2 : V1);
15213 if (
isUZPMask(ShuffleMask, NumElts, WhichResult)) {
15214 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
15217 if (
isTRNMask(ShuffleMask, NumElts, WhichResult, OperandOrder)) {
15218 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
15220 OperandOrder == 0 ? V2 : V1);
15224 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
15228 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
15232 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
15242 if (
isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
15243 SDValue DstVec = DstIsLeft ? V1 : V2;
15247 int SrcLane = ShuffleMask[Anomaly];
15248 if (SrcLane >= NumInputElements) {
15250 SrcLane -= NumElts;
15257 ScalarVT = MVT::i32;
15270 if (NumElts == 4) {
15271 unsigned PFIndexes[4];
15272 for (
unsigned i = 0; i != 4; ++i) {
15273 if (ShuffleMask[i] < 0)
15276 PFIndexes[i] = ShuffleMask[i];
15280 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
15281 PFIndexes[2] * 9 + PFIndexes[3];
15291 "Expected larger vector element sizes to be handled already");
15293 for (
int M : ShuffleMask)
15295 M >=
static_cast<int>(NumElts) ? 0 : 0xffffffff,
DL, MVT::i32));
15309 EVT VT =
Op.getValueType();
15312 return LowerToScalableOp(
Op, DAG);
15315 "Unexpected vector type!");
15330 if (VT == MVT::nxv1i1)
15342 EVT VT =
Op.getValueType();
15355 if (CIdx && (CIdx->getZExtValue() <= 3)) {
15357 return DAG.
getNode(AArch64ISD::DUPLANE128,
DL, VT,
Op.getOperand(1), CI);
15379 SDValue TBL = DAG.
getNode(AArch64ISD::TBL,
DL, MVT::nxv2i64, V, ShuffleMask);
15385 APInt &UndefBits) {
15387 APInt SplatBits, SplatUndef;
15388 unsigned SplatBitSize;
15390 if (BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
15393 for (
unsigned i = 0; i < NumSplats; ++i) {
15394 CnstBits <<= SplatBitSize;
15395 UndefBits <<= SplatBitSize;
15397 UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.
getSizeInBits());
15408 const APInt &Bits) {
15409 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15411 EVT VT =
Op.getValueType();
15420 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15431 EVT VT =
Op.getValueType();
15436 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15439 bool isAdvSIMDModImm =
false;
15459 if (isAdvSIMDModImm) {
15473 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15484 EVT VT =
Op.getValueType();
15489 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15492 bool isAdvSIMDModImm =
false;
15504 if (isAdvSIMDModImm) {
15518 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15528 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15530 EVT VT =
Op.getValueType();
15532 bool isAdvSIMDModImm =
false;
15544 if (isAdvSIMDModImm) {
15549 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15558 const APInt &Bits) {
15559 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15561 EVT VT =
Op.getValueType();
15570 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15579 const APInt &Bits) {
15580 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15582 EVT VT =
Op.getValueType();
15585 bool isAdvSIMDModImm =
false;
15589 MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
15594 MovTy = MVT::v2f64;
15597 if (isAdvSIMDModImm) {
15601 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15621 for (
unsigned i = 1; i < NumElts; ++i)
15630 while (
N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
15631 N =
N.getOperand(0);
15637 unsigned NumElts =
N.getValueType().getVectorMinNumElements();
15640 while (
N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
15641 N =
N.getOperand(0);
15644 if (
N.getValueType().getVectorMinNumElements() < NumElts)
15654 if (
N.getOpcode() == AArch64ISD::PTRUE &&
15655 N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
15656 return N.getValueType().getVectorMinNumElements() >= NumElts;
15668 EVT VT =
N->getValueType(0);
15678 SDValue FirstOp =
N->getOperand(0);
15679 unsigned FirstOpc = FirstOp.
getOpcode();
15680 SDValue SecondOp =
N->getOperand(1);
15681 unsigned SecondOpc = SecondOp.
getOpcode();
15688 if ((FirstOpc ==
ISD::AND || FirstOpc == AArch64ISD::BICi) &&
15689 (SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR ||
15690 SecondOpc == AArch64ISD::SHL_PRED ||
15691 SecondOpc == AArch64ISD::SRL_PRED)) {
15695 }
else if ((SecondOpc ==
ISD::AND || SecondOpc == AArch64ISD::BICi) &&
15696 (FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR ||
15697 FirstOpc == AArch64ISD::SHL_PRED ||
15698 FirstOpc == AArch64ISD::SRL_PRED)) {
15705 bool IsShiftRight = Shift.
getOpcode() == AArch64ISD::VLSHR ||
15706 Shift.
getOpcode() == AArch64ISD::SRL_PRED;
15707 bool ShiftHasPredOp = Shift.
getOpcode() == AArch64ISD::SHL_PRED ||
15708 Shift.
getOpcode() == AArch64ISD::SRL_PRED;
15712 if (ShiftHasPredOp) {
15718 C2 =
C.getZExtValue();
15721 C2 = C2node->getZExtValue();
15735 assert(C1nodeImm && C1nodeShift);
15737 C1AsAPInt = C1AsAPInt.
zextOrTrunc(ElemSizeInBits);
15743 if (C2 > ElemSizeInBits)
15748 if (C1AsAPInt != RequiredC1)
15756 unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
15761 EVT VT =
N->getValueType(0);
15762 assert(VT.
isVector() &&
"Expected vector type in tryLowerToBSL\n");
15780 for (
int i = 1; i >= 0; --i) {
15781 for (
int j = 1; j >= 0; --j) {
15807 if (
Sub.getOperand(1) !=
Add.getOperand(0))
15810 return DAG.
getNode(AArch64ISD::BSP,
DL, VT,
Sub, SubSibling, AddSibling);
15818 for (
int i = 1; i >= 0; --i)
15819 for (
int j = 1; j >= 0; --j) {
15830 if (!BVN0 || !BVN1)
15833 bool FoundMatch =
true;
15837 if (!CN0 || !CN1 ||
15840 FoundMatch =
false;
15855 !Subtarget->isNeonAvailable()))
15856 return LowerToScalableOp(
Op, DAG);
15865 EVT VT =
Op.getValueType();
15870 BuildVectorSDNode *BVN =
15874 LHS =
Op.getOperand(1);
15892 UndefBits, &
LHS)) ||
15908 EVT VT =
Op.getValueType();
15922 CstLane->getAPIntValue().trunc(EltTy.
getSizeInBits()).getZExtValue(),
15926 }
else if (Lane.getOpcode() ==
ISD::UNDEF) {
15929 assert(Lane.getValueType() == MVT::i32 &&
15930 "Unexpected BUILD_VECTOR operand type");
15932 Ops.push_back(Lane);
15939 EVT VT =
Op.getValueType();
15947 int32_t ImmVal, ShiftVal;
15956 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, SplatVal);
15961 EVT VT =
Op.getValueType();
15963 "Expected a legal NEON vector");
15969 auto TryMOVIWithBits = [&](
APInt DefBits) {
15983 APInt NotDefBits = ~DefBits;
15993 if (
SDValue R = TryMOVIWithBits(DefBits))
15995 if (
SDValue R = TryMOVIWithBits(UndefBits))
16003 auto TryWithFNeg = [&](
APInt DefBits,
MVT FVT) {
16009 unsigned NumElts = VT.
getSizeInBits() / FVT.getScalarSizeInBits();
16010 for (
unsigned i = 0; i < NumElts; i++)
16011 NegBits |= Neg << (FVT.getScalarSizeInBits() * i);
16012 NegBits = DefBits ^ NegBits;
16016 if (
SDValue NewOp = TryMOVIWithBits(NegBits)) {
16020 AArch64ISD::NVCAST,
DL, VT,
16022 DAG.
getNode(AArch64ISD::NVCAST,
DL, VFVT, NewOp)));
16027 if ((R = TryWithFNeg(DefBits, MVT::f32)) ||
16028 (R = TryWithFNeg(DefBits, MVT::f64)) ||
16029 (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
16036SDValue AArch64TargetLowering::LowerFixedLengthBuildVectorToSVE(
16038 EVT VT =
Op.getValueType();
16062 NumElems -
count_if(
Op->op_values(), IsExtractElt) > 4)
16069 return Op.isUndef() ? Poison
16070 : DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
16071 ContainerVT, Poison, Op, ZeroI64);
16075 while (Intermediates.
size() > 1) {
16078 for (
unsigned I = 0;
I < Intermediates.
size();
I += 2) {
16081 Intermediates[
I / 2] =
16083 : DAG.
getNode(AArch64ISD::ZIP1,
DL, ZipVT, Op0, Op1);
16086 Intermediates.
resize(Intermediates.
size() / 2);
16097 EVT VT =
Op.getValueType();
16099 bool OverrideNEON = !Subtarget->isNeonAvailable() ||
16102 return LowerFixedLengthBuildVectorToSVE(
Op, DAG);
16120 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
16121 if (Val.isZero() || (VT.
isInteger() && Val.isAllOnes()))
16125 if (
Const->isZero() && !
Const->isNegative())
16146 bool isOnlyLowElement =
true;
16147 bool usesOnlyOneValue =
true;
16148 bool usesOnlyOneConstantValue =
true;
16150 bool AllLanesExtractElt =
true;
16151 unsigned NumConstantLanes = 0;
16152 unsigned NumDifferentLanes = 0;
16153 unsigned NumUndefLanes = 0;
16156 SmallMapVector<SDValue, unsigned, 16> DifferentValueMap;
16157 unsigned ConsecutiveValCount = 0;
16162 bool MaybeLowHalfZeroHigh =
16164 unsigned HalfElts = MaybeLowHalfZeroHigh ? (NumElts >> 1) : 0;
16165 SDValue LowHalfFirstVal = MaybeLowHalfZeroHigh ?
Op.getOperand(0) :
SDValue();
16166 for (
unsigned i = 0; i < NumElts; ++i) {
16169 AllLanesExtractElt =
false;
16172 MaybeLowHalfZeroHigh =
false;
16176 isOnlyLowElement =
false;
16181 ++NumConstantLanes;
16182 if (!ConstantValue.
getNode())
16184 else if (ConstantValue != V)
16185 usesOnlyOneConstantValue =
false;
16188 if (!
Value.getNode())
16190 else if (V !=
Value) {
16191 usesOnlyOneValue =
false;
16192 ++NumDifferentLanes;
16195 if (PrevVal != V) {
16196 ConsecutiveValCount = 0;
16199 if (MaybeLowHalfZeroHigh) {
16200 if (i < HalfElts) {
16201 if (V != LowHalfFirstVal)
16202 MaybeLowHalfZeroHigh =
false;
16203 }
else if (!IsZero(V)) {
16204 MaybeLowHalfZeroHigh =
false;
16219 DifferentValueMap[
V] = ++ConsecutiveValCount;
16222 if (!
Value.getNode()) {
16224 dbgs() <<
"LowerBUILD_VECTOR: value undefined, creating undef node\n");
16232 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: only low element used, creating 1 "
16233 "SCALAR_TO_VECTOR node\n");
16237 if (MaybeLowHalfZeroHigh && LowHalfFirstVal.
getNode() &&
16248 : DAG.
getNode(AArch64ISD::DUP,
DL, HalfVT, LowHalfFirstVal);
16253 if (AllLanesExtractElt) {
16254 SDNode *
Vector =
nullptr;
16259 for (
unsigned i = 0; i < NumElts; ++i) {
16261 const SDNode *
N =
V.getNode();
16286 if (Val == 2 * i) {
16290 if (Val - 1 == 2 * i) {
16317 if (usesOnlyOneValue) {
16320 Value.getValueType() != VT) {
16322 dbgs() <<
"LowerBUILD_VECTOR: use DUP for non-constant splats\n");
16330 if (
Value.getValueSizeInBits() == 64) {
16332 dbgs() <<
"LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
16344 assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
16345 EltTy == MVT::f64) &&
"Unsupported floating-point vector type");
16347 dbgs() <<
"LowerBUILD_VECTOR: float constant splats, creating int "
16348 "BITCASTS, and try again\n");
16350 for (
unsigned i = 0; i < NumElts; ++i)
16354 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: trying to lower new vector: ";
16356 Val = LowerBUILD_VECTOR(Val, DAG);
16366 bool PreferDUPAndInsert =
16368 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
16369 NumDifferentLanes >= NumConstantLanes;
16375 if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
16379 APInt ConstantValueAPInt(1, 0);
16381 ConstantValueAPInt =
C->getAPIntValue().zextOrTrunc(BitSize);
16383 !ConstantValueAPInt.isAllOnes()) {
16387 Val = DAG.
getNode(AArch64ISD::DUP,
DL, VT, ConstantValue);
16391 for (
unsigned i = 0; i < NumElts; ++i) {
16408 const SDLoc
DL(
Op);
16409 APInt PackedVal(64, 0);
16410 unsigned BitPos = 0;
16417 LaneBits = APInt(EltSizeInBits, 0);
16419 LaneBits =
C->getAPIntValue();
16421 LaneBits = CFP->getValueAPF().bitcastToAPInt();
16426 BitPos += EltSizeInBits;
16433 if (Insns.
size() > 2)
16444 dbgs() <<
"LowerBUILD_VECTOR: all elements are constant, use default "
16456 if (NumElts >= 4) {
16464 if (PreferDUPAndInsert) {
16469 for (
unsigned I = 0;
I < NumElts; ++
I)
16480 if (DifferentValueMap.
size() == 2 && NumUndefLanes == 0) {
16492 bool canUseVECTOR_CONCAT =
true;
16493 for (
auto Pair : DifferentValueMap) {
16495 if (Pair.second != NumElts / 2)
16496 canUseVECTOR_CONCAT =
false;
16509 if (canUseVECTOR_CONCAT) {
16532 if (NumElts >= 8) {
16533 SmallVector<int, 16> MaskVec;
16535 SDValue FirstLaneVal =
Op.getOperand(0);
16536 for (
unsigned i = 0; i < NumElts; ++i) {
16538 if (FirstLaneVal == Val)
16562 dbgs() <<
"LowerBUILD_VECTOR: alternatives failed, creating sequence "
16563 "of INSERT_VECTOR_ELT\n");
16580 LLVM_DEBUG(
dbgs() <<
"Creating node for op0, it is not undefined:\n");
16586 dbgs() <<
"Creating nodes for the other vector elements:\n";
16588 for (; i < NumElts; ++i) {
16599 dbgs() <<
"LowerBUILD_VECTOR: use default expansion, failed to find "
16600 "better alternative\n");
16607 !Subtarget->isNeonAvailable()))
16608 return LowerFixedLengthConcatVectorsToSVE(
Op, DAG);
16610 assert(
Op.getValueType().isScalableVector() &&
16612 "Expected legal scalable vector type!");
16617 "Unexpected number of operands in CONCAT_VECTORS");
16619 if (NumOperands == 2)
16624 while (ConcatOps.size() > 1) {
16625 for (
unsigned I = 0,
E = ConcatOps.size();
I !=
E;
I += 2) {
16633 ConcatOps.resize(ConcatOps.size() / 2);
16635 return ConcatOps[0];
16646 !Subtarget->isNeonAvailable()))
16647 return LowerFixedLengthInsertVectorElt(
Op, DAG);
16649 EVT VT =
Op.getValueType();
16658 if (VT == MVT::nxv1i1) {
16662 WidenVec, Elt, Idx);
16671 PromoteVec, Elt, Idx);
16684AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(
SDValue Op,
16687 EVT VT =
Op.getOperand(0).getValueType();
16693 if (VT == MVT::nxv1i1) {
16697 WidenedPred,
Op.getOperand(1));
16704 MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
16706 Extend,
Op.getOperand(1));
16711 return LowerFixedLengthExtractVectorElt(
Op, DAG);
16719 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16720 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
16721 VT == MVT::v8f16 || VT == MVT::v8bf16)
16724 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
16725 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
16736 if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
16746 EVT VT =
Op.getValueType();
16748 "Only cases that extract a fixed length vector are supported!");
16749 EVT InVT =
Op.getOperand(0).getValueType();
16757 unsigned Idx =
Op.getConstantOperandVal(1);
16776 if (PackedVT != InVT) {
16800 assert(
Op.getValueType().isScalableVector() &&
16801 "Only expect to lower inserts into scalable vectors!");
16803 EVT InVT =
Op.getOperand(1).getValueType();
16804 unsigned Idx =
Op.getConstantOperandVal(2);
16809 EVT VT =
Op.getValueType();
16825 if (Idx < (NumElts / 2))
16851 Vec0 = getSVESafeBitCast(NarrowVT, Vec0, DAG);
16852 Vec1 = getSVESafeBitCast(NarrowVT, Vec1, DAG);
16856 Vec1 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, Vec1);
16865 HiVec0 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, HiVec0);
16866 Narrow = DAG.
getNode(AArch64ISD::UZP1,
DL, NarrowVT, Vec1, HiVec0);
16869 "Invalid subvector index!");
16871 LoVec0 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, LoVec0);
16872 Narrow = DAG.
getNode(AArch64ISD::UZP1,
DL, NarrowVT, LoVec0, Vec1);
16875 return getSVESafeBitCast(VT, Narrow, DAG);
16883 std::optional<unsigned> PredPattern =
16895 if (
Op.getOpcode() != AArch64ISD::DUP &&
16908 SplatVal =
Op->getConstantOperandVal(0);
16909 if (
Op.getValueType().getVectorElementType() != MVT::i64)
16910 SplatVal = (int32_t)SplatVal;
16918 SplatVal = -SplatVal;
16926 EVT VT =
Op.getValueType();
16930 return LowerFixedLengthVectorIntDivideToSVE(
Op, DAG);
16932 unsigned Opc =
Op.getOpcode();
16944 DAG.
getNode(AArch64ISD::ASRD_MERGE_OP1,
DL, VT, Pg,
Op->getOperand(0),
16952 if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64) {
16954 return DAG.
getNode(MaskedOpcode,
DL, VT,
Op.getOperand(0),
Op.getOperand(1),
16961 if (VT == MVT::nxv16i8)
16962 WidenedVT = MVT::nxv8i16;
16963 else if (VT == MVT::nxv8i16)
16964 WidenedVT = MVT::nxv4i32;
16968 unsigned UnpkLo =
Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
16969 unsigned UnpkHi =
Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
16978 return DAG.
getNode(AArch64ISD::UZP1,
DL, VT, ResultLoCast, ResultHiCast);
16981bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(
16982 EVT VT,
unsigned DefinedValues)
const {
16983 if (!Subtarget->isNeonAvailable())
17002 unsigned DummyUnsigned;
17010 isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
17012 isTRNMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
17013 isUZPMask(M, NumElts, DummyUnsigned) ||
17014 isZIPMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
17018 isINSMask(M, NumElts, DummyBool, DummyInt) ||
17034 Op =
Op.getOperand(0);
17036 APInt SplatBits, SplatUndef;
17037 unsigned SplatBitSize;
17039 if (!BVN || !BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
17040 HasAnyUndefs, ElementBits) ||
17041 SplatBitSize > ElementBits)
17052 assert(VT.
isVector() &&
"vector shift count is not a vector type");
17056 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
17063 assert(VT.
isVector() &&
"vector shift count is not a vector type");
17067 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
17072 EVT VT =
Op.getValueType();
17077 EVT OpVT =
Op.getOperand(0).getValueType();
17088 !Subtarget->isNeonAvailable()))
17089 return LowerFixedLengthVectorTruncateToSVE(
Op, DAG);
17092 if (VT.
is64BitVector() &&
Op.getOperand(0).getValueType().is128BitVector())
17103 unsigned &ShiftValue,
17116 ShiftValue = ShiftOp1->getZExtValue();
17125 "ResVT must be truncated or same type as the shift.");
17128 if (ShiftValue > ExtraBits && !
Add->getFlags().hasNoUnsignedWrap())
17135 uint64_t AddValue = AddOp1->getZExtValue();
17136 if (AddValue != 1ULL << (ShiftValue - 1))
17139 RShOperand =
Add->getOperand(0);
17145 EVT VT =
Op.getValueType();
17149 if (!
Op.getOperand(1).getValueType().isVector())
17153 switch (
Op.getOpcode()) {
17157 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SHL_PRED);
17159 if (
isVShiftLImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize)
17160 return DAG.
getNode(AArch64ISD::VSHL,
DL, VT,
Op.getOperand(0),
17165 Op.getOperand(0),
Op.getOperand(1));
17169 (Subtarget->hasSVE2() ||
17170 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
17172 unsigned ShiftValue;
17174 return DAG.
getNode(AArch64ISD::URSHR_I_PRED,
DL, VT,
17181 unsigned Opc =
Op.getOpcode() ==
ISD::SRA ? AArch64ISD::SRA_PRED
17182 : AArch64ISD::SRL_PRED;
17183 return LowerToPredicatedOp(
Op, DAG,
Opc);
17187 if (
isVShiftRImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize) {
17189 (
Op.getOpcode() ==
ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
17198 unsigned Opc = (
Op.getOpcode() ==
ISD::SRA) ? Intrinsic::aarch64_neon_sshl
17199 : Intrinsic::aarch64_neon_ushl;
17207 return NegShiftLeft;
17215 if (
Op.getValueType().isScalableVector())
17216 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
17219 !Subtarget->isNeonAvailable()))
17220 return LowerFixedLengthVectorSetccToSVE(
Op, DAG);
17225 EVT CmpVT =
LHS.getValueType().changeVectorElementTypeToInteger();
17228 if (
LHS.getValueType().getVectorElementType().isInteger())
17231 assert(((!Subtarget->hasFullFP16() &&
17232 LHS.getValueType().getVectorElementType() != MVT::f16) ||
17233 LHS.getValueType().getVectorElementType() != MVT::bf16 ||
17234 LHS.getValueType().getVectorElementType() != MVT::f128) &&
17235 "Unexpected type!");
17240 bool OneNaN =
false;
17261 bool NoNaNs =
Op->getFlags().hasNoNaNs();
17263 if (!
Cmp.getNode())
17292 unsigned ScalarOpcode;
17310 "Expected power-of-2 length vector");
17318 if (ElemVT == MVT::i1) {
17320 if (NumElems > 16) {
17323 EVT HalfVT =
Lo.getValueType();
17334 unsigned ExtendedWidth = 64;
17337 ExtendedWidth = 128;
17342 unsigned ExtendOp =
17351 NumElems == 2 && ExtendedWidth == 128) {
17352 Extended = DAG.
getBitcast(MVT::v4i32, Extended);
17353 ExtendedVT = MVT::i32;
17355 switch (ScalarOpcode) {
17376 VecVT =
Lo.getValueType();
17392 for (
unsigned Shift = NumElems / 2; Shift > 0; Shift /= 2) {
17397 Scalar = DAG.
getNode(ScalarOpcode,
DL, ScalarVT, Scalar, Shifted);
17410 EVT SrcVT = Src.getValueType();
17415 SrcVT == MVT::v2f16) {
17423 if (
SDValue Result = LowerReductionToSVE(
Op, DAG))
17427 switch (
Op.getOpcode()) {
17432 Op.getValueType(),
DL, DAG);
17452 EVT SrcVT = Src.getValueType();
17455 SDVTList SrcVTs = DAG.
getVTList(SrcVT, SrcVT);
17467 for (
unsigned I = 0;
I < Stages; ++
I) {
17469 Src = DAG.
getNode(BaseOpc,
DL, SrcVT, Src.getValue(0), Src.getValue(1));
17477 auto &Subtarget = DAG.
getSubtarget<AArch64Subtarget>();
17479 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
17484 MVT VT =
Op.getSimpleValueType();
17485 assert(VT != MVT::i128 &&
"Handled elsewhere, code replicated.");
17490 Op.getOperand(0),
Op.getOperand(1),
RHS,
17495AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
SDValue Op,
17500 SDNode *
Node =
Op.getNode();
17505 EVT VT =
Node->getValueType(0);
17508 "no-stack-arg-probe")) {
17510 Chain =
SP.getValue(1);
17520 RTLIB::LibcallImpl ChkStkImpl =
getLibcallImpl(RTLIB::STACK_PROBE);
17521 if (ChkStkImpl == RTLIB::Unsupported)
17530 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
17531 const uint32_t *
Mask =
TRI->getWindowsStackProbePreservedMask();
17532 if (Subtarget->hasCustomCallingConv())
17540 Chain, Callee, DAG.
getRegister(AArch64::X15, MVT::i64),
17551 Chain =
SP.getValue(1);
17565AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(
SDValue Op,
17568 SDNode *
Node =
Op.getNode();
17575 EVT VT =
Node->getValueType(0);
17579 Chain =
SP.getValue(1);
17586 Chain = DAG.
getNode(AArch64ISD::PROBED_ALLOCA,
DL, MVT::Other, Chain, SP);
17592AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(
SDValue Op,
17596 if (Subtarget->isTargetWindows())
17597 return LowerWindowsDYNAMIC_STACKALLOC(
Op, DAG);
17599 return LowerInlineDYNAMIC_STACKALLOC(
Op, DAG);
17605 unsigned NewOp)
const {
17606 if (Subtarget->hasSVE2())
17607 return LowerToPredicatedOp(
Op, DAG, NewOp);
17615 EVT VT =
Op.getValueType();
17616 assert(VT != MVT::i64 &&
"Expected illegal VSCALE node");
17619 APInt MulImm =
Op.getConstantOperandAPInt(0);
17625template <
unsigned NumVecs>
17635 for (
unsigned I = 0;
I < NumVecs; ++
I)
17644 Info.align.reset();
17655 auto &
DL =
I.getDataLayout();
17657 case Intrinsic::aarch64_sve_st2:
17661 case Intrinsic::aarch64_sve_st3:
17665 case Intrinsic::aarch64_sve_st4:
17669 case Intrinsic::aarch64_neon_ld2:
17670 case Intrinsic::aarch64_neon_ld3:
17671 case Intrinsic::aarch64_neon_ld4:
17672 case Intrinsic::aarch64_neon_ld1x2:
17673 case Intrinsic::aarch64_neon_ld1x3:
17674 case Intrinsic::aarch64_neon_ld1x4: {
17676 uint64_t NumElts =
DL.getTypeSizeInBits(
I.getType()) / 64;
17678 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17680 Info.align.reset();
17686 case Intrinsic::aarch64_neon_ld2lane:
17687 case Intrinsic::aarch64_neon_ld3lane:
17688 case Intrinsic::aarch64_neon_ld4lane:
17689 case Intrinsic::aarch64_neon_ld2r:
17690 case Intrinsic::aarch64_neon_ld3r:
17691 case Intrinsic::aarch64_neon_ld4r: {
17694 Type *RetTy =
I.getType();
17696 unsigned NumElts = StructTy->getNumElements();
17697 Type *VecTy = StructTy->getElementType(0);
17700 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17702 Info.align.reset();
17708 case Intrinsic::aarch64_neon_st2:
17709 case Intrinsic::aarch64_neon_st3:
17710 case Intrinsic::aarch64_neon_st4:
17711 case Intrinsic::aarch64_neon_st1x2:
17712 case Intrinsic::aarch64_neon_st1x3:
17713 case Intrinsic::aarch64_neon_st1x4: {
17715 unsigned NumElts = 0;
17716 for (
const Value *Arg :
I.args()) {
17717 Type *ArgTy = Arg->getType();
17720 NumElts +=
DL.getTypeSizeInBits(ArgTy) / 64;
17723 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17725 Info.align.reset();
17731 case Intrinsic::aarch64_neon_st2lane:
17732 case Intrinsic::aarch64_neon_st3lane:
17733 case Intrinsic::aarch64_neon_st4lane: {
17735 unsigned NumElts = 0;
17737 Type *VecTy =
I.getArgOperand(0)->getType();
17740 for (
const Value *Arg :
I.args()) {
17741 Type *ArgTy = Arg->getType();
17748 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17750 Info.align.reset();
17756 case Intrinsic::aarch64_ldaxr:
17757 case Intrinsic::aarch64_ldxr: {
17758 Type *ValTy =
I.getParamElementType(0);
17761 Info.ptrVal =
I.getArgOperand(0);
17763 Info.align =
DL.getABITypeAlign(ValTy);
17768 case Intrinsic::aarch64_stlxr:
17769 case Intrinsic::aarch64_stxr: {
17770 Type *ValTy =
I.getParamElementType(1);
17773 Info.ptrVal =
I.getArgOperand(1);
17775 Info.align =
DL.getABITypeAlign(ValTy);
17780 case Intrinsic::aarch64_ldaxp:
17781 case Intrinsic::aarch64_ldxp:
17783 Info.memVT = MVT::i128;
17784 Info.ptrVal =
I.getArgOperand(0);
17786 Info.align =
Align(16);
17790 case Intrinsic::aarch64_stlxp:
17791 case Intrinsic::aarch64_stxp:
17793 Info.memVT = MVT::i128;
17794 Info.ptrVal =
I.getArgOperand(2);
17796 Info.align =
Align(16);
17800 case Intrinsic::aarch64_sve_ldnt1: {
17804 Info.ptrVal =
I.getArgOperand(1);
17806 Info.align =
DL.getABITypeAlign(ElTy);
17811 case Intrinsic::aarch64_sve_stnt1: {
17815 Info.memVT =
MVT::getVT(
I.getOperand(0)->getType());
17816 Info.ptrVal =
I.getArgOperand(2);
17818 Info.align =
DL.getABITypeAlign(ElTy);
17823 case Intrinsic::aarch64_mops_memset_tag: {
17824 Value *Dst =
I.getArgOperand(0);
17825 Value *Val =
I.getArgOperand(1);
17830 Info.align =
I.getParamAlign(0).valueOrOne();
17844 std::optional<unsigned> ByteOffset)
const {
17861 Base.getOperand(1).hasOneUse() &&
17868 uint64_t ShiftAmount =
Base.getOperand(1).getConstantOperandVal(1);
17870 if (ShiftAmount ==
Log2_32(LoadBytes))
17880 if ((VT == MVT::i64 || VT == MVT::i32) && Extend->
use_size()) {
17899 return NumBits1 > NumBits2;
17906 return NumBits1 > NumBits2;
17913 if (
I->getOpcode() != Instruction::FMul)
17916 if (!
I->hasOneUse())
17921 if (!(
User->getOpcode() == Instruction::FSub ||
17922 User->getOpcode() == Instruction::FAdd))
17933 I->getFastMathFlags().allowContract()));
17943 return NumBits1 == 32 && NumBits2 == 64;
17950 return NumBits1 == 32 && NumBits2 == 64;
17968bool AArch64TargetLowering::isExtFreeImpl(
const Instruction *Ext)
const {
17976 for (
const Use &U : Ext->
uses()) {
17984 switch (Instr->getOpcode()) {
17985 case Instruction::Shl:
17989 case Instruction::GetElementPtr: {
17992 std::advance(GTI, U.getOperandNo()-1);
18005 if (ShiftAmt == 0 || ShiftAmt > 4)
18009 case Instruction::Trunc:
18026 unsigned NumElts,
bool IsLittleEndian,
18028 if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth > 64)
18031 assert(DstWidth % SrcWidth == 0 &&
18032 "TBL lowering is not supported for a conversion instruction with this "
18033 "source and destination element type.");
18035 unsigned Factor = DstWidth / SrcWidth;
18036 unsigned MaskLen = NumElts * Factor;
18039 Mask.resize(MaskLen, NumElts);
18041 unsigned SrcIndex = 0;
18042 for (
unsigned I = IsLittleEndian ? 0 : Factor - 1;
I < MaskLen;
I += Factor)
18043 Mask[
I] = SrcIndex++;
18051 bool IsLittleEndian) {
18053 unsigned NumElts = SrcTy->getNumElements();
18061 auto *FirstEltZero = Builder.CreateInsertElement(
18063 Value *Result = Builder.CreateShuffleVector(
Op, FirstEltZero, Mask);
18064 Result = Builder.CreateBitCast(Result, DstTy);
18065 if (DstTy != ZExtTy)
18066 Result = Builder.CreateZExt(Result, ZExtTy);
18072 bool IsLittleEndian) {
18079 !IsLittleEndian, Mask))
18082 auto *FirstEltZero = Builder.CreateInsertElement(
18085 return Builder.CreateShuffleVector(
Op, FirstEltZero, Mask);
18094 assert(SrcTy->getElementType()->isIntegerTy() &&
18095 "Non-integer type source vector element is not supported");
18096 assert(DstTy->getElementType()->isIntegerTy(8) &&
18097 "Unsupported destination vector element type");
18098 unsigned SrcElemTySz =
18100 unsigned DstElemTySz =
18102 assert((SrcElemTySz % DstElemTySz == 0) &&
18103 "Cannot lower truncate to tbl instructions for a source element size "
18104 "that is not divisible by the destination element size");
18105 unsigned TruncFactor = SrcElemTySz / DstElemTySz;
18106 assert((SrcElemTySz == 16 || SrcElemTySz == 32 || SrcElemTySz == 64) &&
18107 "Unsupported source vector element type size");
18115 for (
int Itr = 0; Itr < 16; Itr++) {
18116 if (Itr < NumElements)
18118 IsLittleEndian ? Itr * TruncFactor
18119 : Itr * TruncFactor + (TruncFactor - 1)));
18121 MaskConst.
push_back(Builder.getInt8(255));
18124 int MaxTblSz = 128 * 4;
18125 int MaxSrcSz = SrcElemTySz * NumElements;
18127 (MaxTblSz > MaxSrcSz) ? NumElements : (MaxTblSz / SrcElemTySz);
18128 assert(ElemsPerTbl <= 16 &&
18129 "Maximum elements selected using TBL instruction cannot exceed 16!");
18131 int ShuffleCount = 128 / SrcElemTySz;
18133 for (
int i = 0; i < ShuffleCount; ++i)
18140 while (ShuffleLanes.
back() < NumElements) {
18142 Builder.CreateShuffleVector(TI->
getOperand(0), ShuffleLanes), VecTy));
18144 if (Parts.
size() == 4) {
18147 Builder.CreateIntrinsic(Intrinsic::aarch64_neon_tbl4, VecTy, Parts));
18151 for (
int i = 0; i < ShuffleCount; ++i)
18152 ShuffleLanes[i] += ShuffleCount;
18156 "Lowering trunc for vectors requiring different TBL instructions is "
18160 if (!Parts.
empty()) {
18162 switch (Parts.
size()) {
18164 TblID = Intrinsic::aarch64_neon_tbl1;
18167 TblID = Intrinsic::aarch64_neon_tbl2;
18170 TblID = Intrinsic::aarch64_neon_tbl3;
18175 Results.push_back(Builder.CreateIntrinsic(TblID, VecTy, Parts));
18180 assert(
Results.size() <= 2 &&
"Trunc lowering does not support generation of "
18181 "more than 2 tbl instructions!");
18184 if (ElemsPerTbl < 16) {
18186 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
18187 FinalResult = Builder.CreateShuffleVector(
Results[0], FinalMask);
18191 if (ElemsPerTbl < 16) {
18192 std::iota(FinalMask.
begin(), FinalMask.
begin() + ElemsPerTbl, 0);
18193 std::iota(FinalMask.
begin() + ElemsPerTbl, FinalMask.
end(), 16);
18195 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
18209 if (!
EnableExtToTBL || Subtarget->useSVEForFixedLengthVectors())
18217 if (!L || L->getHeader() !=
I->getParent() ||
F->hasOptSize())
18222 if (!SrcTy || !DstTy)
18229 if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
18230 auto DstWidth = DstTy->getElementType()->getScalarSizeInBits();
18231 if (DstWidth % 8 != 0)
18234 auto *TruncDstType =
18238 auto SrcWidth = SrcTy->getElementType()->getScalarSizeInBits();
18239 if (
TTI.getCastInstrCost(
I->getOpcode(), DstTy, TruncDstType,
18242 if (SrcWidth * 2 >= TruncDstType->getElementType()->getScalarSizeInBits())
18245 DstTy = TruncDstType;
18253 if (SrcWidth * 4 <= DstWidth) {
18254 if (
all_of(
I->users(), [&](
auto *U) {
18255 using namespace llvm::PatternMatch;
18256 auto *SingleUser = cast<Instruction>(&*U);
18257 if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value()))))
18259 if (match(SingleUser,
18260 m_Intrinsic<Intrinsic::vector_partial_reduce_add>(
18261 m_Value(), m_Specific(I))))
18268 if (DstTy->getScalarSizeInBits() >= 64)
18274 DstTy, Subtarget->isLittleEndian());
18277 ZExt->replaceAllUsesWith(Result);
18278 ZExt->eraseFromParent();
18283 if (UIToFP && ((SrcTy->getElementType()->isIntegerTy(8) &&
18284 DstTy->getElementType()->isFloatTy()) ||
18285 (SrcTy->getElementType()->isIntegerTy(16) &&
18286 DstTy->getElementType()->isDoubleTy()))) {
18291 assert(ZExt &&
"Cannot fail for the i8 to float conversion");
18292 auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
18293 I->replaceAllUsesWith(UI);
18294 I->eraseFromParent();
18299 if (SIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
18300 DstTy->getElementType()->isFloatTy()) {
18304 Subtarget->isLittleEndian());
18305 assert(Shuffle &&
"Cannot fail for the i8 to float conversion");
18307 auto *AShr = Builder.CreateAShr(Cast, 24,
"",
true);
18308 auto *
SI = Builder.CreateSIToFP(AShr, DstTy);
18309 I->replaceAllUsesWith(
SI);
18310 I->eraseFromParent();
18318 (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
18319 SrcTy->getElementType()->isFloatTy() &&
18320 DstTy->getElementType()->isIntegerTy(8)) {
18322 auto *WideConv = Builder.CreateFPToUI(FPToUI->getOperand(0),
18324 auto *TruncI = Builder.CreateTrunc(WideConv, DstTy);
18325 I->replaceAllUsesWith(TruncI);
18326 I->eraseFromParent();
18336 if (TI && DstTy->getElementType()->isIntegerTy(8) &&
18337 ((SrcTy->getElementType()->isIntegerTy(32) ||
18338 SrcTy->getElementType()->isIntegerTy(64)) &&
18339 (SrcTy->getNumElements() == 16 || SrcTy->getNumElements() == 8))) {
18348 Align &RequiredAlignment)
const {
18353 RequiredAlignment =
Align(1);
18355 return NumBits == 32 || NumBits == 64;
18362 unsigned VecSize = 128;
18366 VecSize = std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
18367 return std::max<unsigned>(1, (MinElts * ElSize + 127) / VecSize);
18372 if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
18382 unsigned MinElts = EC.getKnownMinValue();
18384 UseScalable =
false;
18387 (!Subtarget->useSVEForFixedLengthVectors() ||
18392 !Subtarget->isSVEorStreamingSVEAvailable())
18400 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
18403 if (EC.isScalable()) {
18404 UseScalable =
true;
18405 return isPowerOf2_32(MinElts) && (MinElts * ElSize) % 128 == 0;
18408 unsigned VecSize =
DL.getTypeSizeInBits(VecTy);
18409 if (Subtarget->useSVEForFixedLengthVectors()) {
18410 unsigned MinSVEVectorSize =
18411 std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
18412 if (VecSize % MinSVEVectorSize == 0 ||
18414 (!Subtarget->isNeonAvailable() || VecSize > 128))) {
18415 UseScalable =
true;
18422 return Subtarget->isNeonAvailable() && (VecSize == 64 || VecSize % 128 == 0);
18454 bool Scalable,
Type *LDVTy,
18456 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
18457 static const Intrinsic::ID SVELoads[3] = {Intrinsic::aarch64_sve_ld2_sret,
18458 Intrinsic::aarch64_sve_ld3_sret,
18459 Intrinsic::aarch64_sve_ld4_sret};
18460 static const Intrinsic::ID NEONLoads[3] = {Intrinsic::aarch64_neon_ld2,
18461 Intrinsic::aarch64_neon_ld3,
18462 Intrinsic::aarch64_neon_ld4};
18472 bool Scalable,
Type *STVTy,
18474 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
18475 static const Intrinsic::ID SVEStores[3] = {Intrinsic::aarch64_sve_st2,
18476 Intrinsic::aarch64_sve_st3,
18477 Intrinsic::aarch64_sve_st4};
18478 static const Intrinsic::ID NEONStores[3] = {Intrinsic::aarch64_neon_st2,
18479 Intrinsic::aarch64_neon_st3,
18480 Intrinsic::aarch64_neon_st4};
18504 "Invalid interleave factor");
18505 assert(!Shuffles.
empty() &&
"Empty shufflevector input");
18507 "Unmatched number of shufflevectors and indices");
18512 assert(!Mask && GapMask.
popcount() == Factor &&
"Unexpected mask on a load");
18531 SI->getType()->getScalarSizeInBits() * 4 ==
18532 SI->user_back()->getType()->getScalarSizeInBits();
18542 Type *EltTy = FVTy->getElementType();
18550 FVTy->getNumElements() / NumLoads);
18558 Value *BaseAddr = LI->getPointerOperand();
18560 Type *PtrTy = LI->getPointerOperandType();
18562 LDVTy->getElementCount());
18565 UseScalable, LDVTy, PtrTy);
18572 Value *PTrue =
nullptr;
18574 std::optional<unsigned> PgPattern =
18576 if (Subtarget->getMinSVEVectorSizeInBits() ==
18577 Subtarget->getMaxSVEVectorSizeInBits() &&
18578 Subtarget->getMinSVEVectorSizeInBits() ==
DL.getTypeSizeInBits(FVTy))
18579 PgPattern = AArch64SVEPredPattern::all;
18583 PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
18587 for (
unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
18592 BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
18593 FVTy->getNumElements() * Factor);
18597 LdN = Builder.CreateCall(LdNFunc, {PTrue, BaseAddr},
"ldN");
18599 LdN = Builder.CreateCall(LdNFunc, BaseAddr,
"ldN");
18602 for (
unsigned i = 0; i < Shuffles.
size(); i++) {
18604 unsigned Index = Indices[i];
18606 Value *SubVec = Builder.CreateExtractValue(LdN, Index);
18609 SubVec = Builder.CreateExtractVector(FVTy, SubVec,
uint64_t(0));
18613 SubVec = Builder.CreateIntToPtr(
18615 FVTy->getNumElements()));
18617 SubVecs[SVI].push_back(SubVec);
18626 auto &SubVec = SubVecs[SVI];
18629 SVI->replaceAllUsesWith(WideVec);
18635template <
typename Iter>
18637 int MaxLookupDist = 20;
18638 unsigned IdxWidth =
DL.getIndexSizeInBits(0);
18639 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
18640 const Value *PtrA1 =
18643 while (++It != End) {
18644 if (It->isDebugOrPseudoInst())
18646 if (MaxLookupDist-- == 0)
18649 const Value *PtrB1 =
18650 SI->getPointerOperand()->stripAndAccumulateInBoundsConstantOffsets(
18652 if (PtrA1 == PtrB1 &&
18653 (OffsetA.sextOrTrunc(IdxWidth) - OffsetB.
sextOrTrunc(IdxWidth))
18692 const APInt &GapMask)
const {
18695 "Invalid interleave factor");
18700 "Unexpected mask on store");
18703 assert(VecTy->getNumElements() % Factor == 0 &&
"Invalid interleaved store");
18705 unsigned LaneLen = VecTy->getNumElements() / Factor;
18706 Type *EltTy = VecTy->getElementType();
18727 Type *IntTy =
DL.getIntPtrType(EltTy);
18728 unsigned NumOpElts =
18733 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
18734 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
18741 LaneLen /= NumStores;
18748 Value *BaseAddr =
SI->getPointerOperand();
18762 if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 &&
18781 if (Factor == 2 &&
SI->hasMetadata(LLVMContext::MD_nontemporal) &&
18782 !
F->hasOptSize() && !
F->hasMinSize() &&
18786 Type *PtrTy =
SI->getPointerOperandType();
18788 STVTy->getElementCount());
18791 UseScalable, STVTy, PtrTy);
18793 Value *PTrue =
nullptr;
18795 std::optional<unsigned> PgPattern =
18797 if (Subtarget->getMinSVEVectorSizeInBits() ==
18798 Subtarget->getMaxSVEVectorSizeInBits() &&
18799 Subtarget->getMinSVEVectorSizeInBits() ==
18800 DL.getTypeSizeInBits(SubVecTy))
18801 PgPattern = AArch64SVEPredPattern::all;
18805 PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
18809 for (
unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
18814 for (
unsigned i = 0; i < Factor; i++) {
18816 unsigned IdxI = StoreCount * LaneLen * Factor + i;
18817 if (Mask[IdxI] >= 0) {
18818 Shuffle = Builder.CreateShuffleVector(
18821 unsigned StartMask = 0;
18822 for (
unsigned j = 1; j < LaneLen; j++) {
18823 unsigned IdxJ = StoreCount * LaneLen * Factor + j * Factor + i;
18824 if (Mask[IdxJ] >= 0) {
18825 StartMask = Mask[IdxJ] - j;
18834 Shuffle = Builder.CreateShuffleVector(
18842 Ops.push_back(Shuffle);
18846 Ops.push_back(PTrue);
18850 if (StoreCount > 0)
18851 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
18852 BaseAddr, LaneLen * Factor);
18854 Ops.push_back(BaseAddr);
18855 Builder.CreateCall(StNFunc,
Ops);
18863 if (Factor != 2 && Factor != 3 && Factor != 4) {
18864 LLVM_DEBUG(
dbgs() <<
"Matching ld2, ld3 and ld4 patterns failed\n");
18870 assert(!Mask &&
"Unexpected mask on a load\n");
18874 const DataLayout &
DL = LI->getModule()->getDataLayout();
18889 Type *PtrTy = LI->getPointerOperandType();
18891 UseScalable, LdTy, PtrTy);
18894 Value *Pred =
nullptr;
18897 Builder.CreateVectorSplat(LdTy->
getElementCount(), Builder.getTrue());
18899 Value *BaseAddr = LI->getPointerOperand();
18900 Value *Result =
nullptr;
18901 if (NumLoads > 1) {
18904 for (
unsigned I = 0;
I < NumLoads; ++
I) {
18908 Value *LdN =
nullptr;
18910 LdN = Builder.CreateCall(LdNFunc, {Pred,
Address},
"ldN");
18912 LdN = Builder.CreateCall(LdNFunc,
Address,
"ldN");
18915 for (
unsigned J = 0; J < Factor; ++J) {
18916 ExtractedLdValues[J] = Builder.CreateInsertVector(
18917 VTy, ExtractedLdValues[J], Builder.CreateExtractValue(LdN, J), Idx);
18924 for (
unsigned J = 0; J < Factor; ++J)
18925 Result = Builder.CreateInsertValue(Result, ExtractedLdValues[J], J);
18928 Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr},
"ldN");
18930 Result = Builder.CreateCall(LdNFunc, BaseAddr,
"ldN");
18941 unsigned Factor = InterleavedValues.
size();
18942 if (Factor != 2 && Factor != 3 && Factor != 4) {
18943 LLVM_DEBUG(
dbgs() <<
"Matching st2, st3 and st4 patterns failed\n");
18949 assert(!Mask &&
"Unexpected mask on plain store");
18969 Type *PtrTy =
SI->getPointerOperandType();
18971 UseScalable, StTy, PtrTy);
18975 Value *BaseAddr =
SI->getPointerOperand();
18976 Value *Pred =
nullptr;
18980 Builder.CreateVectorSplat(StTy->
getElementCount(), Builder.getTrue());
18982 auto ExtractedValues = InterleavedValues;
18987 for (
unsigned I = 0;
I < NumStores; ++
I) {
18989 if (NumStores > 1) {
18994 for (
unsigned J = 0; J < Factor; J++) {
18996 Builder.CreateExtractVector(StTy, ExtractedValues[J], Idx);
18999 StoreOperands[StoreOperands.
size() - 1] =
Address;
19001 Builder.CreateCall(StNFunc, StoreOperands);
19008 const AttributeList &FuncAttributes)
const {
19009 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
19010 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
19011 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
19016 bool IsSmallZeroMemset =
Op.isMemset() &&
Op.size() < 32 &&
Op.isZeroMemset();
19017 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
19018 if (
Op.isAligned(AlignCheck))
19028 if (CanUseNEON &&
Op.isMemset() && !IsSmallZeroMemset &&
19029 AlignmentIsAcceptable(MVT::v16i8,
Align(1)))
19031 if (CanUseFP && !IsSmallZeroMemset &&
19032 AlignmentIsAcceptable(MVT::f128,
Align(16)))
19034 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
19036 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
19042 LLVMContext &Context, std::vector<EVT> &MemOps,
unsigned Limit,
19043 const MemOp &
Op,
unsigned DstAS,
unsigned SrcAS,
19044 const AttributeList &FuncAttributes,
EVT *LargestVT)
const {
19048 if (VT == MVT::v16i8 &&
Op.isMemset() && !
Op.isZeroMemset() &&
19050 unsigned Size =
Op.size();
19051 unsigned RemainingSize =
Size;
19057 while (RemainingSize > 0) {
19061 if (RemainingSize >= 8) {
19062 TargetVT = MVT::i64;
19063 RemainingSize -= 8;
19064 }
else if (RemainingSize >= 4) {
19065 TargetVT = MVT::i32;
19066 RemainingSize -= 4;
19067 }
else if (RemainingSize >= 2) {
19068 TargetVT = MVT::i16;
19069 RemainingSize -= 2;
19070 }
else if (RemainingSize >= 1) {
19071 TargetVT = MVT::i8;
19072 RemainingSize -= 1;
19078 MemOps.push_back(TargetVT);
19084 if (RemainingSize == 0 && !MemOps.empty()) {
19095 Context, MemOps, Limit,
Op, DstAS, SrcAS, FuncAttributes, LargestVT);
19099 const MemOp &
Op,
const AttributeList &FuncAttributes)
const {
19100 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
19101 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
19102 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
19107 bool IsSmallZeroMemset =
Op.isMemset() &&
Op.size() < 32 &&
Op.isZeroMemset();
19108 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
19109 if (
Op.isAligned(AlignCheck))
19119 if (CanUseNEON &&
Op.isMemset() && !IsSmallZeroMemset &&
19120 AlignmentIsAcceptable(MVT::v16i8,
Align(1)))
19122 if (CanUseFP && !IsSmallZeroMemset &&
19123 AlignmentIsAcceptable(MVT::f128,
Align(16)))
19125 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
19127 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
19134 if (Immed == std::numeric_limits<int64_t>::min()) {
19143 if (!Subtarget->hasSVE2())
19162 return std::abs(Imm / 8) <= 16;
19165 return std::abs(Imm / 4) <= 16;
19168 return std::abs(Imm / 2) <= 16;
19195 if (Insn.
size() > 1)
19232 if (AM.
Scale == 1) {
19235 }
else if (AM.
Scale == 2) {
19247 if (Ty->isScalableTy()) {
19253 uint64_t VecNumBytes =
DL.getTypeSizeInBits(Ty).getKnownMinValue() / 8;
19275 if (Ty->isSized()) {
19276 uint64_t NumBits =
DL.getTypeSizeInBits(Ty);
19277 NumBytes = NumBits / 8;
19282 return Subtarget->getInstrInfo()->isLegalAddressingMode(NumBytes, AM.
BaseOffs,
19290 int64_t MaxOffset)
const {
19291 int64_t HighPart = MinOffset & ~0xfffULL;
19314 return Subtarget->hasFullFP16();
19320 Subtarget->isNonStreamingSVEorSME2Available();
19330 switch (Ty->getScalarType()->getTypeID()) {
19350 static const MCPhysReg ScratchRegs[] = {
19351 AArch64::X16, AArch64::X17, AArch64::LR, 0
19353 return ScratchRegs;
19357 static const MCPhysReg RCRegs[] = {AArch64::FPCR};
19366 "Expected shift op");
19368 SDValue ShiftLHS =
N->getOperand(0);
19369 EVT VT =
N->getValueType(0);
19390 return SRLC->getZExtValue() == SHLC->getZExtValue();
19402 (
N->getOperand(0).getOpcode() ==
ISD::SHL ||
19403 N->getOperand(0).getOpcode() ==
ISD::SRL) &&
19404 "Expected XOR(SHIFT) pattern");
19409 if (XorC && ShiftC) {
19410 unsigned MaskIdx, MaskLen;
19411 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
19412 unsigned ShiftAmt = ShiftC->getZExtValue();
19413 unsigned BitWidth =
N->getValueType(0).getScalarSizeInBits();
19414 if (
N->getOperand(0).getOpcode() ==
ISD::SHL)
19415 return MaskIdx == ShiftAmt && MaskLen == (
BitWidth - ShiftAmt);
19416 return MaskIdx == 0 && MaskLen == (
BitWidth - ShiftAmt);
19426 N->getOperand(0).getOpcode() ==
ISD::SRL) ||
19428 N->getOperand(0).getOpcode() ==
ISD::SHL)) &&
19429 "Expected shift-shift mask");
19431 if (!
N->getOperand(0)->hasOneUse())
19435 EVT VT =
N->getValueType(0);
19436 if (
N->getOpcode() ==
ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
19439 return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
19444 if (
N->getOpcode() ==
ISD::SHL &&
N->hasOneUse()) {
19446 unsigned ShlAmt = C2->getZExtValue();
19447 if (
auto ShouldADD = *
N->user_begin();
19448 ShouldADD->getOpcode() ==
ISD::ADD && ShouldADD->hasOneUse()) {
19450 EVT MemVT = Load->getMemoryVT();
19452 if (Load->getValueType(0).isScalableVector())
19466 unsigned BinOpcode,
EVT VT,
unsigned SelectOpcode,
SDValue X,
19474 assert(Ty->isIntegerTy());
19476 unsigned BitSize = Ty->getPrimitiveSizeInBits();
19480 int64_t Val = Imm.getSExtValue();
19487 Val &= (1LL << 32) - 1;
19495 unsigned Index)
const {
19517 EVT VT =
N->getValueType(0);
19518 if (!Subtarget->hasNEON() || !VT.
isVector())
19532 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.
getSizeInBits() - 1)
19567 if (
N->getValueType(0) != MVT::i32)
19570 SDValue VecReduceOp0 =
N->getOperand(0);
19571 bool SawTrailingZext =
false;
19577 SawTrailingZext =
true;
19582 MVT AbsInputVT = SawTrailingZext ? MVT::v16i16 : MVT::v16i32;
19584 unsigned Opcode = VecReduceOp0.
getOpcode();
19590 if (ABS->getOperand(0)->getOpcode() !=
ISD::SUB ||
19591 ABS->getOperand(0)->getValueType(0) != AbsInputVT)
19594 SDValue SUB = ABS->getOperand(0);
19595 unsigned Opcode0 = SUB->getOperand(0).getOpcode();
19596 unsigned Opcode1 = SUB->getOperand(1).getOpcode();
19598 if (SUB->getOperand(0)->getValueType(0) != AbsInputVT ||
19599 SUB->getOperand(1)->getValueType(0) != AbsInputVT)
19603 bool IsZExt =
false;
19611 SDValue EXT0 = SUB->getOperand(0);
19612 SDValue EXT1 = SUB->getOperand(1);
19629 UABDHigh8Op0, UABDHigh8Op1);
19640 UABDLo8Op0, UABDLo8Op1);
19664 if (!
N->getValueType(0).isScalableVector() ||
19665 !ST->isSVEorStreamingSVEAvailable() ||
19666 !(ST->hasSVE2p1() || ST->hasSME2()))
19671 return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR;
19674 auto MaskEC =
N->getValueType(0).getVectorElementCount();
19675 if (!MaskEC.isKnownMultipleOf(NumExts))
19689 if (
Use->getValueType(0).getVectorElementCount() != ExtMinEC)
19693 unsigned Offset =
Use->getConstantOperandVal(1);
19695 if (Extracts[Part] !=
nullptr)
19698 Extracts[Part] =
Use;
19714 EVT ExtVT = Extracts[0]->getValueType(0);
19718 DCI.
CombineTo(Extracts[0], R.getValue(0));
19719 DCI.
CombineTo(Extracts[1], R.getValue(1));
19723 if (NumExts == 2) {
19724 assert(
N->getValueType(0) == DoubleExtVT);
19730 for (
unsigned I = 2;
I < NumExts;
I += 2) {
19735 DCI.
CombineTo(Extracts[
I + 1], R.getValue(1));
19737 R.getValue(0), R.getValue(1)));
19769 if (!ST->isNeonAvailable())
19772 if (!ST->hasDotProd())
19790 unsigned DotOpcode;
19794 if (
A.getOperand(0).getValueType() !=
B.getOperand(0).getValueType())
19796 auto OpCodeA =
A.getOpcode();
19800 auto OpCodeB =
B.getOpcode();
19804 if (OpCodeA == OpCodeB) {
19809 if (!ST->hasMatMulInt8())
19811 DotOpcode = AArch64ISD::USDOT;
19816 DotOpcode = AArch64ISD::UDOT;
19818 DotOpcode = AArch64ISD::SDOT;
19823 EVT Op0VT =
A.getOperand(0).getValueType();
19826 if (!IsValidElementCount || !IsValidSize)
19835 B =
B.getOperand(0);
19838 unsigned NumOfVecReduce;
19840 if (IsMultipleOf16) {
19842 TargetType = MVT::v4i32;
19845 TargetType = MVT::v2i32;
19848 if (NumOfVecReduce == 1) {
19851 A.getOperand(0),
B);
19858 for (;
I < VecReduce16Num;
I += 1) {
19877 if (VecReduce8Num == 0)
19878 return VecReduceAdd16;
19900 auto DetectAddExtract = [&](
SDValue A) {
19904 EVT VT =
A.getValueType();
19929 : AArch64ISD::SADDLP;
19933 if (
SDValue R = DetectAddExtract(
A))
19936 if (
A.getOperand(0).getOpcode() ==
ISD::ADD &&
A.getOperand(0).hasOneUse())
19940 if (
A.getOperand(1).getOpcode() ==
ISD::ADD &&
A.getOperand(1).hasOneUse())
19953 EVT VT =
A.getValueType();
19954 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
19965 if (ExtVT0 != ExtVT1 ||
19980 return DAG.
getNode(AArch64ISD::NVCAST,
SDLoc(
A), MVT::v8i16, Uaddlv);
19997 MVT OpVT =
A.getSimpleValueType();
19998 assert(
N->getSimpleValueType(0) == OpVT &&
19999 "The operand type should be consistent with the result type of UADDV");
20003 if (KnownLeadingLanes.
isZero())
20013 APInt DemandedElts =
20032AArch64TargetLowering::BuildSDIVPow2(
SDNode *
N,
const APInt &Divisor,
20039 EVT VT =
N->getValueType(0);
20044 if (VT.
isVector() && Subtarget->isSVEorStreamingSVEAvailable())
20048 if ((VT != MVT::i32 && VT != MVT::i64) ||
20054 if (Divisor == 2 ||
20055 Divisor == APInt(Divisor.
getBitWidth(), -2,
true))
20062AArch64TargetLowering::BuildSREMPow2(
SDNode *
N,
const APInt &Divisor,
20069 EVT VT =
N->getValueType(0);
20077 if ((VT != MVT::i32 && VT != MVT::i64) ||
20093 CSNeg = DAG.
getNode(AArch64ISD::CSNEG,
DL, VT,
And,
And, CCVal, Cmp);
20104 CSNeg = DAG.
getNode(AArch64ISD::CSNEG,
DL, VT, AndPos, AndNeg, CCVal,
20119 case Intrinsic::aarch64_sve_cntb:
20120 case Intrinsic::aarch64_sve_cnth:
20121 case Intrinsic::aarch64_sve_cntw:
20122 case Intrinsic::aarch64_sve_cntd:
20132 if (IID == Intrinsic::aarch64_sve_cntp)
20133 return Op.getOperand(1).getValueType().getVectorElementCount();
20135 case Intrinsic::aarch64_sve_cntd:
20137 case Intrinsic::aarch64_sve_cntw:
20139 case Intrinsic::aarch64_sve_cnth:
20141 case Intrinsic::aarch64_sve_cntb:
20144 return std::nullopt;
20171 return TypeNode->
getVT();
20181 if (Mask == UCHAR_MAX)
20183 else if (Mask == USHRT_MAX)
20185 else if (Mask == UINT_MAX)
20207 unsigned ExtendOpcode = Extend.
getOpcode();
20224 if (PreExtendType == MVT::Other ||
20229 bool SeenZExtOrSExt = !IsAnyExt;
20237 unsigned Opc =
Op.getOpcode();
20248 if (SeenZExtOrSExt && OpcIsSExt != IsSExt)
20251 IsSExt = OpcIsSExt;
20252 SeenZExtOrSExt =
true;
20260 EVT PreExtendLegalType =
20266 PreExtendLegalType));
20277 unsigned ExtOpc = !SeenZExtOrSExt
20280 return DAG.
getNode(ExtOpc,
DL, VT, NBV);
20287 EVT VT =
Mul->getValueType(0);
20288 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
20299 return DAG.
getNode(
Mul->getOpcode(),
DL, VT, Op0 ? Op0 :
Mul->getOperand(0),
20300 Op1 ? Op1 :
Mul->getOperand(1));
20315 EVT VT =
Mul->getValueType(0);
20317 int ConstMultiplier =
20323 unsigned AbsConstValue =
abs(ConstMultiplier);
20324 unsigned OperandShift =
20333 unsigned B = ConstMultiplier < 0 ? 32 : 31;
20334 unsigned CeilAxOverB = (AbsConstValue + (
B - 1)) /
B;
20338 if (LowerBound > UpperBound)
20343 int Shift = std::min(std::max( 0, LowerBound), UpperBound);
20346 int32_t RdsvlMul = (AbsConstValue >> (OperandShift + Shift)) *
20347 (ConstMultiplier < 0 ? -1 : 1);
20348 auto Rdsvl = DAG.
getNode(AArch64ISD::RDSVL,
DL, MVT::i64,
20361 EVT VT =
N->getValueType(0);
20362 if (VT != MVT::v2i64 && VT != MVT::v1i64 && VT != MVT::v2i32 &&
20363 VT != MVT::v4i32 && VT != MVT::v4i16 && VT != MVT::v8i16)
20365 if (
N->getOperand(0).getOpcode() !=
ISD::AND ||
20366 N->getOperand(0).getOperand(0).getOpcode() !=
ISD::SRL)
20379 if (!V1.
isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
20380 V3 != (HalfSize - 1))
20391 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, CM);
20399 EVT VT =
N->getValueType(0);
20405 N->getOperand(0).getOperand(0).getValueType() !=
20406 N->getOperand(1).getOperand(0).getValueType())
20410 N->getOperand(0).getOpcode() !=
N->getOperand(1).getOpcode())
20413 SDValue N0 =
N->getOperand(0).getOperand(0);
20414 SDValue N1 =
N->getOperand(1).getOperand(0);
20419 if ((S2 == MVT::i32 &&
S1 == MVT::i8) ||
20420 (S2 == MVT::i64 && (
S1 == MVT::i8 ||
S1 == MVT::i16))) {
20452 EVT VT =
N->getValueType(0);
20456 unsigned AddSubOpc;
20458 auto IsAddSubWith1 = [&](
SDValue V) ->
bool {
20459 AddSubOpc = V->getOpcode();
20471 if (IsAddSubWith1(N0)) {
20473 return DAG.
getNode(AddSubOpc,
DL, VT, N1, MulVal);
20476 if (IsAddSubWith1(N1)) {
20478 return DAG.
getNode(AddSubOpc,
DL, VT, N0, MulVal);
20489 const APInt &ConstValue =
C->getAPIntValue();
20496 if (ConstValue.
sge(1) && ConstValue.
sle(16))
20511 unsigned TrailingZeroes = ConstValue.
countr_zero();
20512 if (TrailingZeroes) {
20520 if (
N->hasOneUse() && (
N->user_begin()->getOpcode() ==
ISD::ADD ||
20521 N->user_begin()->getOpcode() ==
ISD::SUB))
20526 APInt ShiftedConstValue = ConstValue.
ashr(TrailingZeroes);
20529 auto Shl = [&](
SDValue N0,
unsigned N1) {
20560 for (
unsigned i = 1; i <
BitWidth / 2; i++) {
20580 unsigned TrailingZeroes = CVMinus1.
countr_zero();
20581 APInt SCVMinus1 = CVMinus1.
ashr(TrailingZeroes) - 1;
20597 unsigned TrailingZeroes = CVMinus1.
countr_zero();
20598 APInt CVPlus1 = CVMinus1.
ashr(TrailingZeroes) + 1;
20618 APInt SCVMinus1 = ShiftedConstValue - 1;
20619 APInt SCVPlus1 = ShiftedConstValue + 1;
20620 APInt CVPlus1 = ConstValue + 1;
20624 return Shl(
Add(Shl(N0, ShiftAmt), N0), TrailingZeroes);
20626 ShiftAmt = CVPlus1.