74#include "llvm/IR/IntrinsicsAArch64.h"
109#define DEBUG_TYPE "aarch64-lower"
112STATISTIC(NumOptimizedImms,
"Number of times immediates were optimized");
119 cl::desc(
"Allow AArch64 Local Dynamic TLS code generation"),
124 cl::desc(
"Enable AArch64 logical imm instruction "
134 cl::desc(
"Combine extends of AArch64 masked "
135 "gather intrinsics"),
139 cl::desc(
"Combine ext and trunc to TBL"),
154 cl::desc(
"Enable / disable SVE scalable vectors in Global ISel"),
161 cl::desc(
"Generate ISD::PTRADD nodes for pointer arithmetic in "
162 "SelectionDAG for FEAT_CPA"),
172 AArch64::X3, AArch64::X4, AArch64::X5,
173 AArch64::X6, AArch64::X7};
175 AArch64::Q3, AArch64::Q4, AArch64::Q5,
176 AArch64::Q6, AArch64::Q7};
201 return MVT::nxv8bf16;
208 switch (EC.getKnownMinValue()) {
224 "Expected scalable predicate vector type!");
246 "Expected legal vector type!");
253 "Expected legal type!");
254 return VT == MVT::nxv16i1;
267 "Unexpected fixed-size unpacked type.");
277 case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
278 case AArch64ISD::BSWAP_MERGE_PASSTHRU:
279 case AArch64ISD::REVH_MERGE_PASSTHRU:
280 case AArch64ISD::REVW_MERGE_PASSTHRU:
281 case AArch64ISD::REVD_MERGE_PASSTHRU:
282 case AArch64ISD::CTLZ_MERGE_PASSTHRU:
283 case AArch64ISD::CTPOP_MERGE_PASSTHRU:
284 case AArch64ISD::DUP_MERGE_PASSTHRU:
285 case AArch64ISD::ABS_MERGE_PASSTHRU:
286 case AArch64ISD::NEG_MERGE_PASSTHRU:
287 case AArch64ISD::FNEG_MERGE_PASSTHRU:
288 case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
289 case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
290 case AArch64ISD::FCEIL_MERGE_PASSTHRU:
291 case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
292 case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
293 case AArch64ISD::FRINT_MERGE_PASSTHRU:
294 case AArch64ISD::FRINT32_MERGE_PASSTHRU:
295 case AArch64ISD::FRINT64_MERGE_PASSTHRU:
296 case AArch64ISD::FROUND_MERGE_PASSTHRU:
297 case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
298 case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
299 case AArch64ISD::FTRUNC32_MERGE_PASSTHRU:
300 case AArch64ISD::FTRUNC64_MERGE_PASSTHRU:
301 case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
302 case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
303 case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
304 case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
305 case AArch64ISD::FCVTX_MERGE_PASSTHRU:
306 case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
307 case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
308 case AArch64ISD::FSQRT_MERGE_PASSTHRU:
309 case AArch64ISD::FRECPX_MERGE_PASSTHRU:
310 case AArch64ISD::FABS_MERGE_PASSTHRU:
317 switch (
Op.getOpcode()) {
323 case AArch64ISD::PTRUE:
324 case AArch64ISD::SETCC_MERGE_ZERO:
327 switch (
Op.getConstantOperandVal(0)) {
330 case Intrinsic::aarch64_sve_ptrue:
331 case Intrinsic::aarch64_sve_pnext:
332 case Intrinsic::aarch64_sve_cmpeq:
333 case Intrinsic::aarch64_sve_cmpne:
334 case Intrinsic::aarch64_sve_cmpge:
335 case Intrinsic::aarch64_sve_cmpgt:
336 case Intrinsic::aarch64_sve_cmphs:
337 case Intrinsic::aarch64_sve_cmphi:
338 case Intrinsic::aarch64_sve_cmpeq_wide:
339 case Intrinsic::aarch64_sve_cmpne_wide:
340 case Intrinsic::aarch64_sve_cmpge_wide:
341 case Intrinsic::aarch64_sve_cmpgt_wide:
342 case Intrinsic::aarch64_sve_cmplt_wide:
343 case Intrinsic::aarch64_sve_cmple_wide:
344 case Intrinsic::aarch64_sve_cmphs_wide:
345 case Intrinsic::aarch64_sve_cmphi_wide:
346 case Intrinsic::aarch64_sve_cmplo_wide:
347 case Intrinsic::aarch64_sve_cmpls_wide:
348 case Intrinsic::aarch64_sve_fcmpeq:
349 case Intrinsic::aarch64_sve_fcmpne:
350 case Intrinsic::aarch64_sve_fcmpge:
351 case Intrinsic::aarch64_sve_fcmpgt:
352 case Intrinsic::aarch64_sve_fcmpuo:
353 case Intrinsic::aarch64_sve_facgt:
354 case Intrinsic::aarch64_sve_facge:
355 case Intrinsic::aarch64_sve_whilege:
356 case Intrinsic::aarch64_sve_whilegt:
357 case Intrinsic::aarch64_sve_whilehi:
358 case Intrinsic::aarch64_sve_whilehs:
359 case Intrinsic::aarch64_sve_whilele:
360 case Intrinsic::aarch64_sve_whilelo:
361 case Intrinsic::aarch64_sve_whilels:
362 case Intrinsic::aarch64_sve_whilelt:
363 case Intrinsic::aarch64_sve_match:
364 case Intrinsic::aarch64_sve_nmatch:
365 case Intrinsic::aarch64_sve_whilege_x2:
366 case Intrinsic::aarch64_sve_whilegt_x2:
367 case Intrinsic::aarch64_sve_whilehi_x2:
368 case Intrinsic::aarch64_sve_whilehs_x2:
369 case Intrinsic::aarch64_sve_whilele_x2:
370 case Intrinsic::aarch64_sve_whilelo_x2:
371 case Intrinsic::aarch64_sve_whilels_x2:
372 case Intrinsic::aarch64_sve_whilelt_x2:
378static std::tuple<SDValue, SDValue>
399 if (!ConstDiscN || !
isUInt<16>(ConstDiscN->getZExtValue()))
405 AddrDisc = DAG->
getRegister(AArch64::NoRegister, MVT::i64);
407 return std::make_tuple(
426 if (Subtarget->hasLS64()) {
432 if (Subtarget->hasFPARMv8()) {
441 if (Subtarget->hasNEON()) {
445 addDRType(MVT::v2f32);
446 addDRType(MVT::v8i8);
447 addDRType(MVT::v4i16);
448 addDRType(MVT::v2i32);
449 addDRType(MVT::v1i64);
450 addDRType(MVT::v1f64);
451 addDRType(MVT::v4f16);
452 addDRType(MVT::v4bf16);
454 addQRType(MVT::v4f32);
455 addQRType(MVT::v2f64);
456 addQRType(MVT::v16i8);
457 addQRType(MVT::v8i16);
458 addQRType(MVT::v4i32);
459 addQRType(MVT::v2i64);
460 addQRType(MVT::v8f16);
461 addQRType(MVT::v8bf16);
464 if (Subtarget->isSVEorStreamingSVEAvailable()) {
492 if (Subtarget->useSVEForFixedLengthVectors()) {
534 if (Subtarget->hasFPARMv8()) {
625 if (Subtarget->hasFPARMv8()) {
631 if (Subtarget->hasFPARMv8()) {
685 if (Subtarget->hasCSSC()) {
764 if (Subtarget->hasFullFP16()) {
796 if (Subtarget->hasFullFP16()) {
809 auto LegalizeNarrowFP = [
this](
MVT ScalarVT) {
915 if (!Subtarget->hasFullFP16()) {
916 LegalizeNarrowFP(MVT::f16);
918 LegalizeNarrowFP(MVT::bf16);
936 for (
MVT Ty : {MVT::f32, MVT::f64})
938 if (Subtarget->hasFullFP16())
946 for (
MVT Ty : {MVT::f32, MVT::f64})
948 if (Subtarget->hasFullFP16())
961 if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
973 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
1001 if (Subtarget->outlineAtomics() && !Subtarget->hasLSFE()) {
1028 if (Subtarget->hasLSE128()) {
1042 if (Subtarget->hasLSE2()) {
1099 if (WideVT.getScalarSizeInBits() > NarrowVT.getScalarSizeInBits()) {
1105 if (Subtarget->hasFPARMv8()) {
1229 if (!Subtarget->isTargetWindows())
1245 if (Subtarget->hasSME())
1248 if (Subtarget->isNeonAvailable()) {
1293 for (
auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
1296 if (Subtarget->hasFullFP16()) {
1329 for (
auto VT : {MVT::v1i64, MVT::v2i64}) {
1345 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64,
1346 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1353 for (
MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1364 for (
MVT VT : { MVT::v4f16, MVT::v2f32,
1365 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1366 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1375 if (Subtarget->hasFullFP16())
1378 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1379 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1401 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1428 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1430 if (Subtarget->hasFullFP16())
1431 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1437 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1439 if (Subtarget->hasFullFP16())
1440 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1475 for (
MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1478 for (
MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1481 if (Subtarget->hasDotProd()) {
1490 if (Subtarget->hasMatMulInt8()) {
1506 if (VT.is128BitVector() || VT.is64BitVector()) {
1521 for (
MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1527 if (Subtarget->hasSME()) {
1533 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1535 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1541 for (
auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
1546 if (Subtarget->hasSVE2p1() ||
1547 (Subtarget->hasSME2() && Subtarget->isStreaming()))
1550 for (
auto VT : {MVT::v16i8, MVT::v8i8, MVT::v4i16, MVT::v2i32})
1553 for (
auto VT : {MVT::v8f16, MVT::v4f32, MVT::v2f64})
1557 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1558 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1627 if (!Subtarget->isLittleEndian())
1630 if (Subtarget->hasSVE2() ||
1631 (Subtarget->hasSME() && Subtarget->isStreaming()))
1637 for (
auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1643 for (
auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32})
1647 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
1648 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
1660 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1674 if (VT != MVT::nxv16i1) {
1684 {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1685 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1686 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1725 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1726 MVT::nxv4f32, MVT::nxv2f64}) {
1804 for (
auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1823 if (Subtarget->hasSVEB16B16() &&
1824 Subtarget->isNonStreamingSVEorSME2Available()) {
1826 for (
auto VT : {MVT::v4bf16, MVT::v8bf16, MVT::nxv2bf16, MVT::nxv4bf16,
1849 if (!Subtarget->hasSVEB16B16() ||
1850 !Subtarget->isNonStreamingSVEorSME2Available()) {
1851 for (
MVT VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1861 if (VT != MVT::nxv2bf16 && Subtarget->hasBF16())
1867 if (Subtarget->hasBF16() && Subtarget->isNeonAvailable())
1876 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1877 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1890 if (Subtarget->useSVEForFixedLengthVectors()) {
1893 VT, !Subtarget->isNeonAvailable()))
1894 addTypeForFixedLengthSVE(VT);
1898 VT, !Subtarget->isNeonAvailable()))
1899 addTypeForFixedLengthSVE(VT);
1903 for (
auto VT : {MVT::v8i8, MVT::v4i16})
1908 for (
auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1910 for (
auto VT : {MVT::v8f16, MVT::v4f32, MVT::v8bf16})
1932 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1933 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1942 for (
auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1965 for (
auto VT : {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1})
1970 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1980 if (Subtarget->hasMatMulInt8()) {
1982 MVT::nxv16i8,
Legal);
1988 if (Subtarget->hasSVE2() || Subtarget->hasSME()) {
1995 if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
1997 MVT::nxv8f16,
Legal);
2005 if (Subtarget->hasSVE2() ||
2006 (Subtarget->hasSME() && Subtarget->isStreaming())) {
2008 for (
auto VT : {MVT::v2i32, MVT::v4i16, MVT::v8i8, MVT::v16i8}) {
2012 for (
auto VT : {MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, MVT::nxv16i1}) {
2019 if (Subtarget->isSVEAvailable()) {
2020 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64,
2021 MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
2022 MVT::nxv4f32, MVT::nxv2f64, MVT::nxv2bf16, MVT::nxv4bf16,
2023 MVT::nxv8bf16, MVT::v4f16, MVT::v8f16, MVT::v2f32,
2024 MVT::v4f32, MVT::v1f64, MVT::v2f64, MVT::v8i8,
2025 MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
2026 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
2031 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
2032 MVT::nxv4f32, MVT::nxv2f64, MVT::v4f16, MVT::v8f16,
2033 MVT::v2f32, MVT::v4f32, MVT::v2f64})
2038 {MVT::nxv4i32, MVT::nxv2i64, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv2f64})
2043 for (
auto VT : {MVT::v2i32, MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32,
2053 for (
auto VT : {MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64,
2054 MVT::nxv2f32, MVT::nxv2f64, MVT::nxv4i8, MVT::nxv4i16,
2055 MVT::nxv4i32, MVT::nxv4f32}) {
2063 if (Subtarget->hasSVE2()) {
2081 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
2088 if (Subtarget->hasSVE()) {
2102 if (Subtarget->isTargetWindows()) {
2119void AArch64TargetLowering::addTypeForNEON(
MVT VT) {
2129 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
2150 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
2151 ((VT == MVT::v4bf16 || VT == MVT::v8bf16 || VT == MVT::v4f16 ||
2152 VT == MVT::v8f16) &&
2153 Subtarget->hasFullFP16()))
2178 if (VT != MVT::v8i8 && VT != MVT::v16i8)
2187 for (
unsigned Opcode :
2205 for (
unsigned Opcode :
2236 if (Subtarget->isLittleEndian()) {
2247 if (Subtarget->hasD128()) {
2265 if (!Subtarget->isSVEorStreamingSVEAvailable() ||
2272 (OpVT != MVT::i32 && OpVT != MVT::i64))))
2284 if (!Subtarget->isSVEorStreamingSVEAvailable())
2289 return VT != MVT::nxv16i1 && VT != MVT::nxv8i1 && VT != MVT::nxv4i1 &&
2290 VT != MVT::nxv2i1 && VT != MVT::v16i1 && VT != MVT::v8i1 &&
2291 VT != MVT::v4i1 && VT != MVT::v2i1;
2295 unsigned SearchSize)
const {
2297 if (!Subtarget->hasSVE2() || !Subtarget->isSVEAvailable())
2300 if (VT == MVT::nxv8i16 || VT == MVT::v8i16)
2301 return SearchSize != 8;
2302 if (VT == MVT::nxv16i8 || VT == MVT::v16i8 || VT == MVT::v8i8)
2303 return SearchSize != 8 && SearchSize != 16;
2307void AArch64TargetLowering::addTypeForFixedLengthSVE(
MVT VT) {
2332 while (InnerVT != VT) {
2346 while (InnerVT != VT) {
2355 bool PreferSVE = !PreferNEON && Subtarget->isSVEAvailable();
2376 if (Subtarget->hasMatMulInt8()) {
2486void AArch64TargetLowering::addDRType(
MVT VT) {
2488 if (Subtarget->isNeonAvailable())
2492void AArch64TargetLowering::addQRType(
MVT VT) {
2494 if (Subtarget->isNeonAvailable())
2511 Imm =
C->getZExtValue();
2519 case AArch64ISD::SQDMULH:
2531 return N->getOpcode() ==
Opc &&
2536 const APInt &Demanded,
2539 uint64_t OldImm = Imm, NewImm, Enc;
2544 if (Imm == 0 || Imm == Mask ||
2548 unsigned EltSize =
Size;
2565 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2567 uint64_t Sum = RotatedImm + NonDemandedBits;
2568 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2569 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
2570 NewImm = (Imm | Ones) & Mask;
2598 while (EltSize <
Size) {
2599 NewImm |= NewImm << EltSize;
2605 "demanded bits should never be altered");
2606 assert(OldImm != NewImm &&
"the new imm shouldn't be equal to the old imm");
2609 EVT VT =
Op.getValueType();
2615 if (NewImm == 0 || NewImm == OrigMask) {
2640 EVT VT =
Op.getValueType();
2654 switch (
Op.getOpcode()) {
2658 NewOpc =
Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
2661 NewOpc =
Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
2664 NewOpc =
Size == 32 ? AArch64::EORWri : AArch64::EORXri;
2679 switch (
Op.getOpcode()) {
2682 case AArch64ISD::DUP: {
2685 if (
SrcOp.getValueSizeInBits() !=
Op.getScalarValueSizeInBits()) {
2686 assert(
SrcOp.getValueSizeInBits() >
Op.getScalarValueSizeInBits() &&
2687 "Expected DUP implicit truncation");
2688 Known = Known.
trunc(
Op.getScalarValueSizeInBits());
2692 case AArch64ISD::CSEL: {
2699 case AArch64ISD::CSNEG:
2700 case AArch64ISD::CSINC:
2701 case AArch64ISD::CSINV: {
2709 if (
Op.getOpcode() == AArch64ISD::CSINC)
2713 else if (
Op.getOpcode() == AArch64ISD::CSINV)
2715 else if (
Op.getOpcode() == AArch64ISD::CSNEG)
2718 Op.getScalarValueSizeInBits())));
2723 case AArch64ISD::BICi: {
2726 ~(
Op->getConstantOperandAPInt(1) <<
Op->getConstantOperandAPInt(2))
2732 case AArch64ISD::VLSHR: {
2739 case AArch64ISD::VASHR: {
2746 case AArch64ISD::VSHL: {
2753 case AArch64ISD::MOVI: {
2758 case AArch64ISD::MOVIshift: {
2761 <<
Op->getConstantOperandVal(1)));
2764 case AArch64ISD::MOVImsl: {
2767 Known.
getBitWidth(), ~(~
Op->getConstantOperandVal(0) << ShiftAmt)));
2770 case AArch64ISD::MOVIedit: {
2776 case AArch64ISD::MVNIshift: {
2779 ~(
Op->getConstantOperandVal(0) <<
Op->getConstantOperandVal(1)),
2783 case AArch64ISD::MVNImsl: {
2790 case AArch64ISD::LOADgot:
2791 case AArch64ISD::ADDlow: {
2792 if (!Subtarget->isTargetILP32())
2798 case AArch64ISD::ASSERT_ZEXT_BOOL: {
2808 case Intrinsic::aarch64_ldaxr:
2809 case Intrinsic::aarch64_ldxr: {
2821 unsigned IntNo =
Op.getConstantOperandVal(0);
2825 case Intrinsic::aarch64_neon_uaddlv: {
2826 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2828 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2829 unsigned Bound = (VT == MVT::v8i8) ? 11 : 12;
2836 case Intrinsic::aarch64_neon_umaxv:
2837 case Intrinsic::aarch64_neon_uminv: {
2842 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2844 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2848 }
else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
2862 unsigned Depth)
const {
2863 EVT VT =
Op.getValueType();
2865 unsigned Opcode =
Op.getOpcode();
2867 case AArch64ISD::FCMEQ:
2868 case AArch64ISD::FCMGE:
2869 case AArch64ISD::FCMGT:
2872 case AArch64ISD::VASHR: {
2875 return std::min<uint64_t>(Tmp +
Op.getConstantOperandVal(1), VTBits);
2889 unsigned *
Fast)
const {
2899 if (ElementSizeBits % 8 == 0 && Alignment >=
Align(ElementSizeBits / 8))
2903 if (Subtarget->requiresStrictAlign())
2908 *
Fast = !Subtarget->isMisaligned128StoreSlow() || VT.
getStoreSize() != 16 ||
2927 unsigned *
Fast)
const {
2928 if (Subtarget->requiresStrictAlign())
2933 *
Fast = !Subtarget->isMisaligned128StoreSlow() ||
2934 Ty.getSizeInBytes() != 16 ||
2977 Register DestReg =
MI.getOperand(0).getReg();
2978 Register IfTrueReg =
MI.getOperand(1).getReg();
2979 Register IfFalseReg =
MI.getOperand(2).getReg();
2980 unsigned CondCode =
MI.getOperand(3).getImm();
2981 bool NZCVKilled =
MI.getOperand(4).isKill();
2995 MBB->addSuccessor(TrueBB);
2996 MBB->addSuccessor(EndBB);
3012 MI.eraseFromParent();
3020 "SEH does not use catchret!");
3031 Register TargetReg =
MI.getOperand(0).getReg();
3033 TII.probedStackAlloc(
MBBI, TargetReg,
false);
3035 MI.eraseFromParent();
3036 return NextInst->getParent();
3048 Register RegVL_GPR =
MRI.createVirtualRegister(RC_GPR);
3049 Register RegVL_GPRsp =
MRI.createVirtualRegister(RC_GPRsp);
3050 Register RegSVL_GPR =
MRI.createVirtualRegister(RC_GPR);
3051 Register RegSVL_GPRsp =
MRI.createVirtualRegister(RC_GPRsp);
3089 MBB->addSuccessor(TrapBB);
3090 MBB->addSuccessor(PassBB);
3092 MI.eraseFromParent();
3104 MIB.
add(
MI.getOperand(1));
3105 MIB.
add(
MI.getOperand(2));
3106 MIB.
add(
MI.getOperand(3));
3107 MIB.
add(
MI.getOperand(4));
3108 MIB.
add(
MI.getOperand(5));
3110 MI.eraseFromParent();
3121 MIB.
add(
MI.getOperand(0));
3122 MIB.
add(
MI.getOperand(1));
3123 MIB.
add(
MI.getOperand(2));
3124 MIB.
add(
MI.getOperand(1));
3126 MI.eraseFromParent();
3133 bool Op0IsDef)
const {
3139 for (
unsigned I = 1;
I <
MI.getNumOperands(); ++
I)
3140 MIB.
add(
MI.getOperand(
I));
3142 MI.eraseFromParent();
3152 unsigned StartIdx = 0;
3154 bool HasTile = BaseReg != AArch64::ZA;
3155 bool HasZPROut = HasTile &&
MI.getOperand(0).isReg();
3157 MIB.
add(
MI.getOperand(StartIdx));
3161 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm(),
3163 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm());
3167 if (
MI.getOperand(0).isReg() && !
MI.getOperand(1).isImm()) {
3168 MIB.
add(
MI.getOperand(StartIdx));
3173 for (
unsigned I = StartIdx;
I <
MI.getNumOperands(); ++
I)
3174 MIB.
add(
MI.getOperand(
I));
3176 MI.eraseFromParent();
3185 MIB.
add(
MI.getOperand(0));
3187 unsigned Mask =
MI.getOperand(0).getImm();
3188 for (
unsigned I = 0;
I < 8;
I++) {
3189 if (Mask & (1 <<
I))
3193 MI.eraseFromParent();
3204 if (TPIDR2.Uses > 0) {
3207 if (!Subtarget->isLittleEndian())
3209 "TPIDR2 block initialization is not supported on big-endian targets");
3237 "Lazy ZA save is not yet supported on Windows");
3241 if (TPIDR2.
Uses > 0) {
3247 Register SP =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3248 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::COPY), SP)
3252 auto Size =
MI.getOperand(1).getReg();
3253 auto Dest =
MI.getOperand(0).getReg();
3254 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(AArch64::MSUBXrrr), Dest)
3278 "Lazy ZA save is not yet supported on Windows");
3283 auto Size =
MI.getOperand(1).getReg();
3284 auto Dest =
MI.getOperand(0).getReg();
3285 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(AArch64::SUBXrx64), AArch64::SP)
3289 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::COPY), Dest)
3295 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF),
3296 MI.getOperand(0).getReg());
3310 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
3317 MI.getOperand(0).getReg())
3321 MI.getOperand(0).getReg())
3333 Register ResultReg =
MI.getOperand(0).getReg();
3336 }
else if (Subtarget->hasSME()) {
3338 .
addImm(AArch64SysReg::SVCR)
3341 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
3350 MI.eraseFromParent();
3358 while (
Reg.isVirtual()) {
3360 assert(
DefMI &&
"Virtual register definition not found");
3361 unsigned Opcode =
DefMI->getOpcode();
3363 if (Opcode == AArch64::COPY) {
3364 Reg =
DefMI->getOperand(1).getReg();
3366 if (
Reg.isPhysical())
3370 if (Opcode == AArch64::SUBREG_TO_REG) {
3371 Reg =
DefMI->getOperand(2).getReg();
3388 int64_t IntDisc = IntDiscOp.
getImm();
3389 assert(IntDisc == 0 &&
"Blend components are already expanded");
3394 case AArch64::MOVKXi:
3403 case AArch64::MOVi32imm:
3404 case AArch64::MOVi64imm:
3408 AddrDisc = AArch64::NoRegister;
3417 if (AddrDisc == AArch64::XZR)
3418 AddrDisc = AArch64::NoRegister;
3421 if (AddrDisc &&
MRI.getRegClass(AddrDisc) != AddrDiscRC) {
3422 Register TmpReg =
MRI.createVirtualRegister(AddrDiscRC);
3427 AddrDiscOp.
setReg(AddrDisc);
3428 IntDiscOp.
setImm(IntDisc);
3435 if (SMEOrigInstr != -1) {
3439 switch (SMEMatrixType) {
3455 switch (
MI.getOpcode()) {
3461 case AArch64::InitTPIDR2Obj:
3463 case AArch64::AllocateZABuffer:
3465 case AArch64::AllocateSMESaveBuffer:
3467 case AArch64::GetSMESaveSize:
3469 case AArch64::EntryPStateSM:
3471 case AArch64::F128CSEL:
3473 case TargetOpcode::STATEPOINT:
3479 MI.addOperand(*
MI.getMF(),
3485 case TargetOpcode::STACKMAP:
3486 case TargetOpcode::PATCHPOINT:
3489 case TargetOpcode::PATCHABLE_EVENT_CALL:
3490 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
3493 case AArch64::CATCHRET:
3496 case AArch64::PROBED_STACKALLOC_DYN:
3499 case AArch64::CHECK_MATCHING_VL_PSEUDO:
3502 case AArch64::LD1_MXIPXX_H_PSEUDO_B:
3503 return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0,
MI, BB);
3504 case AArch64::LD1_MXIPXX_H_PSEUDO_H:
3505 return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0,
MI, BB);
3506 case AArch64::LD1_MXIPXX_H_PSEUDO_S:
3507 return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0,
MI, BB);
3508 case AArch64::LD1_MXIPXX_H_PSEUDO_D:
3509 return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0,
MI, BB);
3510 case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
3511 return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0,
MI, BB);
3512 case AArch64::LD1_MXIPXX_V_PSEUDO_B:
3513 return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0,
MI, BB);
3514 case AArch64::LD1_MXIPXX_V_PSEUDO_H:
3515 return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0,
MI, BB);
3516 case AArch64::LD1_MXIPXX_V_PSEUDO_S:
3517 return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0,
MI, BB);
3518 case AArch64::LD1_MXIPXX_V_PSEUDO_D:
3519 return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0,
MI, BB);
3520 case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
3521 return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0,
MI, BB);
3522 case AArch64::LDR_ZA_PSEUDO:
3524 case AArch64::LDR_TX_PSEUDO:
3526 case AArch64::STR_TX_PSEUDO:
3528 case AArch64::ZERO_M_PSEUDO:
3530 case AArch64::ZERO_T_PSEUDO:
3532 case AArch64::MOVT_TIZ_PSEUDO:
3537 &AArch64::GPR64noipRegClass);
3564 N =
N->getOperand(0).getNode();
3569 if (
N->getOpcode() != AArch64ISD::DUP)
3572 auto Opnd0 =
N->getOperand(0);
3726 CondCode, CondCode2);
3739 bool IsLegal = (
C >> 12 == 0) || ((
C & 0xFFFULL) == 0 &&
C >> 24 == 0);
3741 <<
" legal: " << (IsLegal ?
"yes\n" :
"no\n"));
3760 if (
Op->getFlags().hasNoSignedWrap())
3786 (isIntEqualitySetCC(CC) ||
3794 EVT VT =
LHS.getValueType();
3799 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3804 Chain =
RHS.getValue(1);
3807 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
3813 EVT VT =
LHS.getValueType();
3818 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3829 unsigned Opcode = AArch64ISD::SUBS;
3833 Opcode = AArch64ISD::ADDS;
3836 isIntEqualitySetCC(CC)) {
3839 Opcode = AArch64ISD::ADDS;
3848 LHS.getOperand(0),
LHS.getOperand(1));
3852 }
else if (
LHS.getOpcode() == AArch64ISD::ANDS) {
3854 return LHS.getValue(1);
3920 unsigned Opcode = 0;
3923 if (
LHS.getValueType().isFloatingPoint()) {
3924 assert(
LHS.getValueType() != MVT::f128);
3925 if ((
LHS.getValueType() == MVT::f16 && !FullFP16) ||
3926 LHS.getValueType() == MVT::bf16) {
3930 Opcode = AArch64ISD::FCCMP;
3932 APInt Imm = Const->getAPIntValue();
3933 if (Imm.isNegative() && Imm.sgt(-32)) {
3934 Opcode = AArch64ISD::CCMN;
3938 Opcode = AArch64ISD::CCMN;
3941 isIntEqualitySetCC(CC)) {
3944 Opcode = AArch64ISD::CCMN;
3948 Opcode = AArch64ISD::CCMP;
3974 bool &CanNegate,
bool &MustBeFirst,
3975 bool &PreferFirst,
bool WillNegate,
3976 unsigned Depth = 0) {
3982 if (VT == MVT::f128)
3985 MustBeFirst =
false;
3989 {Val->getOperand(0), Val->getOperand(1)});
3996 bool IsOR = Opcode ==
ISD::OR;
4012 if (MustBeFirstL && MustBeFirstR)
4018 if (!CanNegateL && !CanNegateR)
4022 CanNegate = WillNegate && CanNegateL && CanNegateR;
4025 MustBeFirst = !CanNegate;
4030 MustBeFirst = MustBeFirstL || MustBeFirstR;
4032 PreferFirst = PreferFirstL || PreferFirstR;
4055 bool isInteger =
LHS.getValueType().isInteger();
4057 CC = getSetCCInverse(CC,
LHS.getValueType());
4063 assert(
LHS.getValueType().isFloatingPoint());
4089 bool IsOR = Opcode ==
ISD::OR;
4096 PreferFirstL, IsOR);
4097 assert(ValidL &&
"Valid conjunction/disjunction tree");
4105 PreferFirstR, IsOR);
4106 assert(ValidR &&
"Valid conjunction/disjunction tree");
4109 bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR;
4112 if (MustBeFirstL || ShouldFirstL) {
4113 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
4122 bool NegateAfterAll;
4126 assert(CanNegateR &&
"at least one side must be negatable");
4127 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
4131 NegateAfterR =
true;
4134 NegateR = CanNegateR;
4135 NegateAfterR = !CanNegateR;
4138 NegateAfterAll = !Negate;
4140 assert(Opcode ==
ISD::AND &&
"Valid conjunction/disjunction tree");
4141 assert(!Negate &&
"Valid conjunction/disjunction tree");
4145 NegateAfterR =
false;
4146 NegateAfterAll =
false;
4166 bool DummyCanNegate;
4167 bool DummyMustBeFirst;
4168 bool DummyPreferFirst;
4170 DummyPreferFirst,
false))
4181 auto isSupportedExtend = [&](
SDValue V) {
4187 uint64_t Mask = MaskCst->getZExtValue();
4188 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
4194 if (!
Op.hasOneUse())
4197 if (isSupportedExtend(
Op))
4200 unsigned Opc =
Op.getOpcode();
4203 uint64_t Shift = ShiftCst->getZExtValue();
4204 if (isSupportedExtend(
Op.getOperand(0)))
4205 return (Shift <= 4) ? 2 : 1;
4206 EVT VT =
Op.getValueType();
4207 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
4219 if (
LHS.getOpcode() !=
ISD::AND &&
LHS.getOpcode() != AArch64ISD::ANDS)
4239 EVT VT =
RHS.getValueType();
4240 APInt C = RHSC->getAPIntValue();
4255 if (!
C.isMinSignedValue()) {
4267 assert(!
C.isZero() &&
"C should not be zero here");
4278 if (!
C.isMaxSignedValue()) {
4289 if (!
C.isAllOnes()) {
4314 bool LHSIsCMN =
isCMN(
LHS, CC, DAG);
4315 bool RHSIsCMN =
isCMN(
RHS, CC, DAG);
4350 LHS.getNode()->hasNUsesOfValue(1, 0)) {
4351 int16_t ValueofRHS =
RHS->getAsZExtVal();
4379static std::pair<SDValue, SDValue>
4381 assert((
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::i64) &&
4382 "Unsupported value type");
4388 switch (
Op.getOpcode()) {
4392 Opc = AArch64ISD::ADDS;
4396 Opc = AArch64ISD::ADDS;
4400 Opc = AArch64ISD::SUBS;
4404 Opc = AArch64ISD::SUBS;
4412 if (
Op.getValueType() == MVT::i32) {
4435 assert(
Op.getValueType() == MVT::i64 &&
"Expected an i64 value type");
4445 Overflow = DAG.
getNode(AArch64ISD::SUBS,
DL, VTs, UpperBits, LowerBits)
4464 Overflow =
Value.getValue(1);
4466 return std::make_pair(
Value, Overflow);
4471 !Subtarget->isNeonAvailable()))
4472 return LowerToScalableOp(
Op, DAG);
4496 return DAG.
getNode(AArch64ISD::CSEL,
DL,
Op.getValueType(), TVal, FVal,
4519 if (
LHS.getValueType() != MVT::i32 &&
LHS.getValueType() != MVT::i64)
4526 if (!CFVal || !CTVal)
4563 return Cmp.getValue(1);
4576 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, One, Zero, CC, Glue);
4586 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, One, Zero, CC, Glue);
4592 unsigned Opcode,
bool IsSigned) {
4593 EVT VT0 =
Op.getValue(0).getValueType();
4594 EVT VT1 =
Op.getValue(1).getValueType();
4596 if (VT0 != MVT::i32 && VT0 != MVT::i64)
4599 bool InvertCarry = Opcode == AArch64ISD::SBCS;
4618 bool LastOperandIsImm =
false) {
4619 if (
Op.getValueType().isVector())
4624 const unsigned NumOperands =
Op.getNumOperands();
4625 auto getFloatVT = [](
EVT VT) {
4626 assert((VT == MVT::i32 || VT == MVT::i64) &&
"Unexpected VT");
4627 return VT == MVT::i32 ? MVT::f32 : MVT::f64;
4629 auto bitcastToFloat = [&](
SDValue Val) {
4630 return DAG.
getBitcast(getFloatVT(Val.getValueType()), Val);
4634 for (
unsigned I = 1;
I < NumOperands; ++
I) {
4636 const bool KeepInt = LastOperandIsImm && (
I == NumOperands - 1);
4637 NewOps.
push_back(KeepInt ? Val : bitcastToFloat(Val));
4639 EVT OrigVT =
Op.getValueType();
4664 DAG.
getNode(AArch64ISD::CSEL,
DL, MVT::i32, FVal, TVal, CCVal, Overflow);
4676 unsigned IsWrite =
Op.getConstantOperandVal(2);
4677 unsigned Locality =
Op.getConstantOperandVal(3);
4678 unsigned IsData =
Op.getConstantOperandVal(4);
4680 bool IsStream = !Locality;
4684 assert(Locality <= 3 &&
"Prefetch locality out-of-range");
4688 Locality = 3 - Locality;
4692 unsigned PrfOp = (IsWrite << 4) |
4696 return DAG.
getNode(AArch64ISD::PREFETCH,
DL, MVT::Other,
Op.getOperand(0),
4709 if (LHSConstOp && RHSConst) {
4713 uint64_t NewMaskValue = LHSConstValue & ~(RHSConstant - 1);
4726 EVT VT =
Op.getValueType();
4730 if (VT == MVT::nxv2f64 && SrcVal.
getValueType() == MVT::nxv2bf16) {
4738 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
4742 return LowerFixedLengthFPExtendToSVE(
Op, DAG);
4744 bool IsStrict =
Op->isStrictFPOpcode();
4745 SDValue Op0 =
Op.getOperand(IsStrict ? 1 : 0);
4747 if (VT == MVT::f64) {
4749 if (Op0VT == MVT::f32 || Op0VT == MVT::f16)
4752 if (Op0VT == MVT::bf16 && IsStrict) {
4755 {Op0,
Op.getOperand(0)});
4759 if (Op0VT == MVT::bf16)
4765 assert(
Op.getValueType() == MVT::f128 &&
"Unexpected lowering");
4771 EVT VT =
Op.getValueType();
4772 bool IsStrict =
Op->isStrictFPOpcode();
4773 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4775 bool Trunc =
Op.getConstantOperandVal(IsStrict ? 2 : 1) == 1;
4779 if (SrcVT == MVT::nxv8f32)
4783 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
4786 constexpr EVT
I32 = MVT::nxv4i32;
4792 if (SrcVT == MVT::nxv2f32 || SrcVT == MVT::nxv4f32) {
4793 if (Subtarget->hasBF16())
4794 return LowerToPredicatedOp(
Op, DAG,
4795 AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
4797 Narrow = getSVESafeBitCast(I32, SrcVal, DAG);
4802 }
else if (SrcVT == MVT::nxv2f64 &&
4803 (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable())) {
4806 Narrow = DAG.
getNode(AArch64ISD::FCVTX_MERGE_PASSTHRU,
DL, MVT::nxv2f32,
4807 Pg, SrcVal, DAG.
getUNDEF(MVT::nxv2f32));
4813 NewOps.
push_back(
Op.getOperand(IsStrict ? 2 : 1));
4814 return DAG.
getNode(
Op.getOpcode(),
DL, VT, NewOps,
Op->getFlags());
4831 IsNaN = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, I1, IsNaN);
4832 Narrow = DAG.
getSelect(
DL, I32, IsNaN, NaN, Narrow);
4837 return getSVESafeBitCast(VT, Narrow, DAG);
4841 return LowerFixedLengthFPRoundToSVE(
Op, DAG);
4846 !((Subtarget->hasNEON() || Subtarget->hasSME()) &&
4847 Subtarget->hasBF16())) {
4862 Narrow = DAG.
getNode(AArch64ISD::FCVTXN,
DL,
F32, Narrow);
4883 Narrow = DAG.
getSelect(
DL, I32, IsNaN, NaN, Narrow);
4900 if (SrcVT != MVT::f128) {
4917 bool IsStrict =
Op->isStrictFPOpcode();
4918 EVT InVT =
Op.getOperand(IsStrict ? 1 : 0).getValueType();
4919 EVT VT =
Op.getValueType();
4922 "Unimplemented SVE support for STRICT_FP_to_INT!");
4931 {
Op.getOperand(0),
Op.getOperand(1)});
4932 return DAG.
getNode(
Op.getOpcode(),
DL, {VT, MVT::Other},
4933 {Ext.getValue(1), Ext.getValue(0)});
4936 Op.getOpcode(),
DL,
Op.getValueType(),
4950 if (InVT == MVT::nxv8f32)
4954 ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
4955 : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
4956 return LowerToPredicatedOp(
Op, DAG, Opcode);
4961 return LowerFixedLengthFPToIntToSVE(
Op, DAG);
4965 if (VTSize < InVTSize) {
4970 {Op.getOperand(0), Op.getOperand(1)});
4980 if (VTSize > InVTSize) {
4987 {
Op.getOperand(0),
Op.getOperand(1)});
4988 return DAG.
getNode(
Op.getOpcode(),
DL, {VT, MVT::Other},
4989 {Ext.getValue(1), Ext.getValue(0)});
5004 return DAG.
getNode(
Op.getOpcode(),
DL, {ScalarVT, MVT::Other},
5005 {Op.getOperand(0), Extract});
5006 return DAG.
getNode(
Op.getOpcode(),
DL, ScalarVT, Extract);
5015 bool IsStrict =
Op->isStrictFPOpcode();
5016 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
5019 return LowerVectorFP_TO_INT(
Op, DAG);
5022 if ((SrcVal.
getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
5028 {
Op.getOperand(0), SrcVal});
5029 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
5030 {Ext.getValue(1), Ext.getValue(0)});
5045AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(
SDValue Op,
5051 EVT DstVT =
Op.getValueType();
5057 assert(SatWidth <= DstElementWidth &&
5058 "Saturation width cannot exceed result width");
5071 if ((SrcElementVT == MVT::f16 &&
5072 (!Subtarget->hasFullFP16() || DstElementWidth > 16)) ||
5073 SrcElementVT == MVT::bf16) {
5083 SrcElementVT = MVT::f32;
5084 SrcElementWidth = 32;
5085 }
else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
5086 SrcElementVT != MVT::f16 && SrcElementVT != MVT::bf16)
5091 if (SatWidth == 64 && SrcElementWidth < 64) {
5095 SrcElementVT = MVT::f64;
5096 SrcElementWidth = 64;
5099 if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth) {
5114 if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
5121 SrcVal2 ? DAG.
getNode(
Op.getOpcode(),
DL, IntVT, SrcVal2,
5157 return LowerVectorFP_TO_INT_SAT(
Op, DAG);
5159 EVT DstVT =
Op.getValueType();
5163 assert(SatWidth <= DstWidth &&
"Saturation width cannot exceed result width");
5166 if ((SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) || SrcVT == MVT::bf16) {
5169 }
else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16 &&
5175 if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
5176 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
5177 DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32))
5178 return DAG.
getNode(
Op.getOpcode(),
DL, DstVT, SrcVal,
5184 if (DstWidth < SatWidth)
5187 if (SrcVT == MVT::f16 && SatVT == MVT::i16 && DstVT == MVT::i32) {
5190 DAG.
getNode(AArch64ISD::FCVTZS_HALF,
DL, MVT::f32, SrcVal);
5195 SDValue CVTf32 = DAG.
getNode(AArch64ISD::FCVTZU_HALF,
DL, MVT::f32, SrcVal);
5220 EVT VT =
Op.getValueType();
5227 *DAG.
getContext(), Src.getValueType().getVectorElementType());
5243 bool IsStrict =
Op->isStrictFPOpcode();
5244 EVT VT =
Op.getValueType();
5247 EVT InVT =
In.getValueType();
5248 unsigned Opc =
Op.getOpcode();
5252 "Unimplemented SVE support for ISD:::STRICT_INT_TO_FP!");
5267 {Op.getOperand(0), In});
5269 {
Op.getValueType(), MVT::Other},
5280 if (VT == MVT::nxv8f32)
5283 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
5284 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
5285 return LowerToPredicatedOp(
Op, DAG, Opcode);
5290 return LowerFixedLengthIntToFPToSVE(
Op, DAG);
5294 if (VTSize < InVTSize) {
5300 bool IsTargetf16 =
false;
5301 if (
Op.hasOneUse() &&
5306 SDNode *
U = *
Op->user_begin();
5307 if (
U->hasOneUse() &&
U->user_begin()->getOpcode() ==
ISD::FP_ROUND) {
5308 EVT TmpVT =
U->user_begin()->getValueType(0);
5314 if (IsTargetf32 && !IsTargetf16) {
5324 {
In.getValue(1),
In.getValue(0),
5332 if (VTSize > InVTSize) {
5349 return DAG.
getNode(
Op.getOpcode(),
DL, {ScalarVT, MVT::Other},
5350 {Op.getOperand(0), Extract});
5351 return DAG.
getNode(
Op.getOpcode(),
DL, ScalarVT, Extract);
5359 if (
Op.getValueType().isVector())
5360 return LowerVectorINT_TO_FP(
Op, DAG);
5362 bool IsStrict =
Op->isStrictFPOpcode();
5363 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
5368 auto IntToFpViaPromotion = [&](EVT PromoteVT) {
5372 {Op.getOperand(0), SrcVal});
5374 {
Op.getValueType(), MVT::Other},
5379 DAG.
getNode(
Op.getOpcode(),
DL, PromoteVT, SrcVal),
5383 if (
Op.getValueType() == MVT::bf16) {
5384 unsigned MaxWidth = IsSigned
5388 if (MaxWidth <= 24) {
5389 return IntToFpViaPromotion(MVT::f32);
5393 if (MaxWidth <= 53) {
5394 return IntToFpViaPromotion(MVT::f64);
5445 IsStrict ? DAG.
getNode(
Op.getOpcode(),
DL, {MVT::f64, MVT::Other},
5446 {Op.getOperand(0), ToRound})
5447 : DAG.
getNode(
Op.getOpcode(),
DL, MVT::f64, ToRound);
5474 {
Op.getValueType(), MVT::Other},
5478 DAG.getIntPtrConstant(0,
DL,
true));
5483 if (
Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
5484 return IntToFpViaPromotion(MVT::f32);
5493 if (
Op.getValueType() != MVT::f128)
5501AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(
SDValue Op,
5503 assert((Subtarget->hasSVE2() ||
5504 (Subtarget->hasSME() && Subtarget->isStreaming())) &&
5505 "Lowering loop_dependence_raw_mask or loop_dependence_war_mask "
5506 "requires SVE or SME");
5509 EVT VT =
Op.getValueType();
5510 unsigned LaneOffset =
Op.getConstantOperandVal(3);
5512 uint64_t EltSizeInBytes =
Op.getConstantOperandVal(2);
5515 if (LaneOffset != 0 || !
is_contained({1u, 2u, 4u, 8u}, EltSizeInBytes))
5546 EVT OpVT =
Op.getValueType();
5547 EVT ArgVT =
Op.getOperand(0).getValueType();
5550 return LowerFixedLengthBitcastToSVE(
Op, DAG);
5558 "Expected int->fp bitcast!");
5571 return getSVESafeBitCast(OpVT, ExtResult, DAG);
5582 return getSVESafeBitCast(OpVT,
Op.getOperand(0), DAG);
5585 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
5589 if (ArgVT == MVT::f16 || ArgVT == MVT::bf16)
5592 assert(ArgVT == MVT::i16);
5602static std::optional<uint64_t>
5606 return std::nullopt;
5611 return std::nullopt;
5613 return C->getZExtValue();
5618 EVT VT =
N.getValueType();
5623 for (
const SDValue &Elt :
N->op_values()) {
5626 unsigned HalfSize = EltSize / 2;
5628 if (!
isIntN(HalfSize,
C->getSExtValue()))
5631 if (!
isUIntN(HalfSize,
C->getZExtValue()))
5643 EVT VT =
N.getValueType();
5665 unsigned Opcode =
N.getOpcode();
5676 unsigned Opcode =
N.getOpcode();
5818 if (IsN0SExt && IsN1SExt)
5819 return AArch64ISD::SMULL;
5824 if (IsN0ZExt && IsN1ZExt)
5825 return AArch64ISD::UMULL;
5831 if (IsN0ZExt || IsN1ZExt) {
5833 return AArch64ISD::UMULL;
5838 return AArch64ISD::UMULL;
5841 if (IsN0SExt || IsN1SExt) {
5843 return AArch64ISD::SMULL;
5846 return AArch64ISD::SMULL;
5849 if (!IsN1SExt && !IsN1ZExt)
5856 return AArch64ISD::SMULL;
5860 return AArch64ISD::UMULL;
5865 return AArch64ISD::UMULL;
5871 EVT VT =
Op.getValueType();
5873 bool OverrideNEON = !Subtarget->isNeonAvailable();
5875 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5880 "unexpected type for custom-lowering ISD::MUL");
5896 if (VT == MVT::v1i64) {
5897 if (Subtarget->hasSVE())
5898 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5914 if (Subtarget->hasSVE())
5915 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5930 "unexpected types for extended operands to VMULL");
5953 if (
Pattern == AArch64SVEPredPattern::all)
5962 if (PatNumElts == (NumElts * VScale))
5966 return DAG.
getNode(AArch64ISD::PTRUE,
DL, VT,
5971 bool IsSigned,
bool IsEqual) {
5975 if (!
N->getValueType(0).isScalableVector() ||
5980 APInt Y =
N->getConstantOperandAPInt(Op1);
5985 if (IsSigned ?
Y.isMaxSignedValue() :
Y.isMaxValue())
5991 APInt X =
N->getConstantOperandAPInt(Op0);
5994 APInt NumActiveElems =
5995 IsSigned ?
Y.ssub_ov(
X, Overflow) :
Y.usub_ov(
X, Overflow);
6002 NumActiveElems = IsSigned ? NumActiveElems.
sadd_ov(One, Overflow)
6003 : NumActiveElems.
uadd_ov(One, Overflow);
6008 std::optional<unsigned> PredPattern =
6010 unsigned MinSVEVectorSize = std::max(
6012 unsigned ElementSize = 128 /
N->getValueType(0).getVectorMinNumElements();
6013 if (PredPattern != std::nullopt &&
6014 NumActiveElems.
getZExtValue() <= (MinSVEVectorSize / ElementSize))
6015 return getPTrue(DAG,
DL,
N->getValueType(0), *PredPattern);
6024 EVT InVT =
Op.getValueType();
6028 "Expected a predicate-to-predicate bitcast");
6032 "Only expect to cast between legal scalable predicate types!");
6042 Op.getConstantOperandVal(0) == Intrinsic::aarch64_sve_convert_to_svbool &&
6043 Op.getOperand(1).getValueType().bitsGT(VT))
6044 Op =
Op.getOperand(1);
6062 Mask = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, VT, Mask);
6069 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
6075 TargetLowering::CallLoweringInfo CLI(DAG);
6077 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
6080 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
6126 SDValue TileSlice =
N->getOperand(2);
6129 int32_t ConstAddend = 0;
6138 ConstAddend = ImmNode->getSExtValue();
6142 int32_t ImmAddend = ConstAddend % 16;
6143 if (int32_t
C = (ConstAddend - ImmAddend)) {
6145 VarAddend = VarAddend
6152 auto SVL = DAG.
getNode(AArch64ISD::RDSVL,
DL, MVT::i64,
6164 return DAG.
getNode(IsLoad ? AArch64ISD::SME_ZA_LDR : AArch64ISD::SME_ZA_STR,
6166 {
N.getOperand(0), TileSlice,
Base,
6175 auto Op1 =
Op.getOperand(1);
6176 auto Op2 =
Op.getOperand(2);
6177 auto Mask =
Op.getOperand(3);
6180 EVT Op2VT = Op2.getValueType();
6181 EVT ResVT =
Op.getValueType();
6185 "Expected 8-bit or 16-bit characters.");
6199 Op2 = DAG.
getNode(AArch64ISD::DUPLANE128,
DL, OpContainerVT, Op2,
6227 ID, Mask, Op1, Op2);
6238 unsigned IntNo =
Op.getConstantOperandVal(1);
6243 case Intrinsic::aarch64_prefetch: {
6247 unsigned IsWrite =
Op.getConstantOperandVal(3);
6248 unsigned Locality =
Op.getConstantOperandVal(4);
6249 unsigned IsStream =
Op.getConstantOperandVal(5);
6250 unsigned IsData =
Op.getConstantOperandVal(6);
6251 unsigned PrfOp = (IsWrite << 4) |
6256 return DAG.
getNode(AArch64ISD::PREFETCH,
DL, MVT::Other, Chain,
6259 case Intrinsic::aarch64_range_prefetch: {
6263 unsigned IsWrite =
Op.getConstantOperandVal(3);
6264 unsigned IsStream =
Op.getConstantOperandVal(4);
6265 unsigned PrfOp = (IsStream << 2) | IsWrite;
6268 return DAG.
getNode(AArch64ISD::RANGE_PREFETCH,
DL, MVT::Other, Chain,
6272 case Intrinsic::aarch64_sme_str:
6273 case Intrinsic::aarch64_sme_ldr: {
6276 case Intrinsic::aarch64_sme_za_enable:
6278 AArch64ISD::SMSTART,
DL, DAG.
getVTList(MVT::Other, MVT::Glue),
6281 case Intrinsic::aarch64_sme_za_disable:
6283 AArch64ISD::SMSTOP,
DL, DAG.
getVTList(MVT::Other, MVT::Glue),
6291 unsigned IntNo =
Op.getConstantOperandVal(1);
6296 case Intrinsic::aarch64_mops_memset_tag: {
6303 auto Alignment =
Node->getMemOperand()->getAlign();
6304 bool IsVol =
Node->isVolatile();
6305 auto DstPtrInfo =
Node->getPointerInfo();
6309 SDValue MS = SDI.EmitMOPS(AArch64::MOPSMemorySetTaggingPseudo, DAG,
DL,
6310 Chain, Dst, Val,
Size, Alignment, IsVol,
6311 DstPtrInfo, MachinePointerInfo{});
6324 unsigned IntNo =
Op.getConstantOperandVal(0);
6328 case Intrinsic::thread_pointer: {
6330 return DAG.
getNode(AArch64ISD::THREAD_POINTER,
DL, PtrVT);
6332 case Intrinsic::aarch64_sve_whilewr_b:
6334 Op.getOperand(1),
Op.getOperand(2),
6337 case Intrinsic::aarch64_sve_whilewr_h:
6339 Op.getOperand(1),
Op.getOperand(2),
6342 case Intrinsic::aarch64_sve_whilewr_s:
6344 Op.getOperand(1),
Op.getOperand(2),
6347 case Intrinsic::aarch64_sve_whilewr_d:
6349 Op.getOperand(1),
Op.getOperand(2),
6352 case Intrinsic::aarch64_sve_whilerw_b:
6354 Op.getOperand(1),
Op.getOperand(2),
6357 case Intrinsic::aarch64_sve_whilerw_h:
6359 Op.getOperand(1),
Op.getOperand(2),
6362 case Intrinsic::aarch64_sve_whilerw_s:
6364 Op.getOperand(1),
Op.getOperand(2),
6367 case Intrinsic::aarch64_sve_whilerw_d:
6369 Op.getOperand(1),
Op.getOperand(2),
6372 case Intrinsic::aarch64_neon_abs: {
6373 EVT Ty =
Op.getValueType();
6374 if (Ty == MVT::i64) {
6385 case Intrinsic::aarch64_neon_pmull64: {
6389 std::optional<uint64_t> LHSLane =
6391 std::optional<uint64_t> RHSLane =
6394 assert((!LHSLane || *LHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6395 assert((!RHSLane || *RHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6401 auto TryVectorizeOperand = [](
SDValue N, std::optional<uint64_t> NLane,
6402 std::optional<uint64_t> OtherLane,
6404 SelectionDAG &DAG) ->
SDValue {
6413 if (OtherLane == 1) {
6422 DAG.
getNode(AArch64ISD::DUPLANE64,
DL, MVT::v2i64,
6428 return DAG.
getNode(AArch64ISD::DUP,
DL, MVT::v1i64,
N);
6433 assert(
N.getValueType() == MVT::i64 &&
6434 "Intrinsic aarch64_neon_pmull64 requires i64 parameters");
6438 LHS = TryVectorizeOperand(
LHS, LHSLane, RHSLane,
DL, DAG);
6439 RHS = TryVectorizeOperand(
RHS, RHSLane, LHSLane,
DL, DAG);
6443 case Intrinsic::aarch64_neon_smax:
6446 case Intrinsic::aarch64_neon_umax:
6449 case Intrinsic::aarch64_neon_smin:
6452 case Intrinsic::aarch64_neon_umin:
6455 case Intrinsic::aarch64_neon_scalar_sqxtn:
6456 case Intrinsic::aarch64_neon_scalar_sqxtun:
6457 case Intrinsic::aarch64_neon_scalar_uqxtn: {
6458 assert(
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::f32);
6459 if (
Op.getValueType() == MVT::i32)
6464 Op.getOperand(1))));
6467 case Intrinsic::aarch64_neon_sqxtn:
6470 case Intrinsic::aarch64_neon_sqxtun:
6473 case Intrinsic::aarch64_neon_uqxtn:
6476 case Intrinsic::aarch64_neon_sqshrn:
6477 if (
Op.getValueType().isVector())
6480 Op.getOperand(1).getValueType(),
6481 Op.getOperand(1),
Op.getOperand(2)));
6484 case Intrinsic::aarch64_neon_sqshrun:
6485 if (
Op.getValueType().isVector())
6488 Op.getOperand(1).getValueType(),
6489 Op.getOperand(1),
Op.getOperand(2)));
6492 case Intrinsic::aarch64_neon_uqshrn:
6493 if (
Op.getValueType().isVector())
6496 Op.getOperand(1).getValueType(),
6497 Op.getOperand(1),
Op.getOperand(2)));
6500 case Intrinsic::aarch64_neon_sqrshrn:
6501 if (
Op.getValueType().isVector())
6504 Op.getOperand(1).getValueType(),
6505 Op.getOperand(1),
Op.getOperand(2)));
6508 case Intrinsic::aarch64_neon_sqrshrun:
6509 if (
Op.getValueType().isVector())
6512 Op.getOperand(1).getValueType(),
6513 Op.getOperand(1),
Op.getOperand(2)));
6516 case Intrinsic::aarch64_neon_uqrshrn:
6517 if (
Op.getValueType().isVector())
6520 Op.getOperand(1).getValueType(),
6521 Op.getOperand(1),
Op.getOperand(2)));
6524 case Intrinsic::aarch64_neon_sqdmulh:
6526 case Intrinsic::aarch64_neon_sqrdmulh:
6528 case Intrinsic::aarch64_neon_sqrdmlah:
6530 case Intrinsic::aarch64_neon_sqrdmlsh:
6532 case Intrinsic::aarch64_neon_sqrshl:
6534 case Intrinsic::aarch64_neon_sqshl:
6536 case Intrinsic::aarch64_neon_uqrshl:
6538 case Intrinsic::aarch64_neon_uqshl:
6540 case Intrinsic::aarch64_neon_sqadd:
6541 if (
Op.getValueType().isVector())
6546 case Intrinsic::aarch64_neon_sqsub:
6547 if (
Op.getValueType().isVector())
6552 case Intrinsic::aarch64_neon_uqadd:
6553 if (
Op.getValueType().isVector())
6557 case Intrinsic::aarch64_neon_uqsub:
6558 if (
Op.getValueType().isVector())
6562 case Intrinsic::aarch64_neon_sqdmulls_scalar:
6564 case Intrinsic::aarch64_sve_whilelt:
6567 case Intrinsic::aarch64_sve_whilels:
6570 case Intrinsic::aarch64_sve_whilele:
6573 case Intrinsic::aarch64_sve_sunpkhi:
6574 return DAG.
getNode(AArch64ISD::SUNPKHI,
DL,
Op.getValueType(),
6576 case Intrinsic::aarch64_sve_sunpklo:
6577 return DAG.
getNode(AArch64ISD::SUNPKLO,
DL,
Op.getValueType(),
6579 case Intrinsic::aarch64_sve_uunpkhi:
6580 return DAG.
getNode(AArch64ISD::UUNPKHI,
DL,
Op.getValueType(),
6582 case Intrinsic::aarch64_sve_uunpklo:
6583 return DAG.
getNode(AArch64ISD::UUNPKLO,
DL,
Op.getValueType(),
6585 case Intrinsic::aarch64_sve_clasta_n:
6586 return DAG.
getNode(AArch64ISD::CLASTA_N,
DL,
Op.getValueType(),
6587 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6588 case Intrinsic::aarch64_sve_clastb_n:
6589 return DAG.
getNode(AArch64ISD::CLASTB_N,
DL,
Op.getValueType(),
6590 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6591 case Intrinsic::aarch64_sve_lasta:
6592 return DAG.
getNode(AArch64ISD::LASTA,
DL,
Op.getValueType(),
6593 Op.getOperand(1),
Op.getOperand(2));
6594 case Intrinsic::aarch64_sve_lastb:
6595 return DAG.
getNode(AArch64ISD::LASTB,
DL,
Op.getValueType(),
6596 Op.getOperand(1),
Op.getOperand(2));
6597 case Intrinsic::aarch64_sve_tbl:
6598 return DAG.
getNode(AArch64ISD::TBL,
DL,
Op.getValueType(),
Op.getOperand(1),
6600 case Intrinsic::aarch64_sve_trn1:
6601 return DAG.
getNode(AArch64ISD::TRN1,
DL,
Op.getValueType(),
6602 Op.getOperand(1),
Op.getOperand(2));
6603 case Intrinsic::aarch64_sve_trn2:
6604 return DAG.
getNode(AArch64ISD::TRN2,
DL,
Op.getValueType(),
6605 Op.getOperand(1),
Op.getOperand(2));
6606 case Intrinsic::aarch64_sve_uzp1:
6607 return DAG.
getNode(AArch64ISD::UZP1,
DL,
Op.getValueType(),
6608 Op.getOperand(1),
Op.getOperand(2));
6609 case Intrinsic::aarch64_sve_uzp2:
6610 return DAG.
getNode(AArch64ISD::UZP2,
DL,
Op.getValueType(),
6611 Op.getOperand(1),
Op.getOperand(2));
6612 case Intrinsic::aarch64_sve_zip1:
6613 return DAG.
getNode(AArch64ISD::ZIP1,
DL,
Op.getValueType(),
6614 Op.getOperand(1),
Op.getOperand(2));
6615 case Intrinsic::aarch64_sve_zip2:
6616 return DAG.
getNode(AArch64ISD::ZIP2,
DL,
Op.getValueType(),
6617 Op.getOperand(1),
Op.getOperand(2));
6618 case Intrinsic::aarch64_sve_splice:
6619 return DAG.
getNode(AArch64ISD::SPLICE,
DL,
Op.getValueType(),
6620 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6621 case Intrinsic::aarch64_sve_ptrue:
6622 return getPTrue(DAG,
DL,
Op.getValueType(),
Op.getConstantOperandVal(1));
6623 case Intrinsic::aarch64_sve_clz:
6624 return DAG.
getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6625 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6626 case Intrinsic::aarch64_sme_cntsd: {
6632 case Intrinsic::aarch64_sve_cnt: {
6635 if (
Data.getValueType().isFloatingPoint())
6637 return DAG.
getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6638 Op.getOperand(2),
Data,
Op.getOperand(1));
6640 case Intrinsic::aarch64_sve_dupq_lane:
6641 return LowerDUPQLane(
Op, DAG);
6642 case Intrinsic::aarch64_sve_convert_from_svbool:
6643 if (
Op.getValueType() == MVT::aarch64svcount)
6646 case Intrinsic::aarch64_sve_convert_to_svbool:
6647 if (
Op.getOperand(1).getValueType() == MVT::aarch64svcount)
6650 case Intrinsic::aarch64_sve_fneg:
6651 return DAG.
getNode(AArch64ISD::FNEG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6652 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6653 case Intrinsic::aarch64_sve_frintp:
6654 return DAG.
getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6655 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6656 case Intrinsic::aarch64_sve_frintm:
6657 return DAG.
getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6658 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6659 case Intrinsic::aarch64_sve_frinti:
6660 return DAG.
getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU,
DL,
6661 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6663 case Intrinsic::aarch64_sve_frintx:
6664 return DAG.
getNode(AArch64ISD::FRINT_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6665 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6666 case Intrinsic::aarch64_sve_frint32x:
6667 return DAG.
getNode(AArch64ISD::FRINT32_MERGE_PASSTHRU,
DL,
6668 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6670 case Intrinsic::aarch64_sve_frint64x:
6671 return DAG.
getNode(AArch64ISD::FRINT64_MERGE_PASSTHRU,
DL,
6672 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6674 case Intrinsic::aarch64_sve_frinta:
6675 return DAG.
getNode(AArch64ISD::FROUND_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6676 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6677 case Intrinsic::aarch64_sve_frintn:
6678 return DAG.
getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU,
DL,
6679 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6681 case Intrinsic::aarch64_sve_frintz:
6682 return DAG.
getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6683 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6684 case Intrinsic::aarch64_sve_frint32z:
6685 return DAG.
getNode(AArch64ISD::FTRUNC32_MERGE_PASSTHRU,
DL,
6686 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6688 case Intrinsic::aarch64_sve_frint64z:
6689 return DAG.
getNode(AArch64ISD::FTRUNC64_MERGE_PASSTHRU,
DL,
6690 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6692 case Intrinsic::aarch64_sve_ucvtf:
6693 return DAG.
getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU,
DL,
6694 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6696 case Intrinsic::aarch64_sve_scvtf:
6697 return DAG.
getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU,
DL,
6698 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6700 case Intrinsic::aarch64_sve_fcvtzu:
6701 return DAG.
getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6702 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6703 case Intrinsic::aarch64_sve_fcvtzs:
6704 return DAG.
getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6705 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6706 case Intrinsic::aarch64_sve_fsqrt:
6707 return DAG.
getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6708 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6709 case Intrinsic::aarch64_sve_frecpx:
6710 return DAG.
getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6711 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6712 case Intrinsic::aarch64_sve_frecpe_x:
6713 return DAG.
getNode(AArch64ISD::FRECPE,
DL,
Op.getValueType(),
6715 case Intrinsic::aarch64_sve_frecps_x:
6716 return DAG.
getNode(AArch64ISD::FRECPS,
DL,
Op.getValueType(),
6717 Op.getOperand(1),
Op.getOperand(2));
6718 case Intrinsic::aarch64_sve_frsqrte_x:
6719 return DAG.
getNode(AArch64ISD::FRSQRTE,
DL,
Op.getValueType(),
6721 case Intrinsic::aarch64_sve_frsqrts_x:
6722 return DAG.
getNode(AArch64ISD::FRSQRTS,
DL,
Op.getValueType(),
6723 Op.getOperand(1),
Op.getOperand(2));
6724 case Intrinsic::aarch64_sve_fabs:
6725 return DAG.
getNode(AArch64ISD::FABS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6726 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6727 case Intrinsic::aarch64_sve_abs:
6728 return DAG.
getNode(AArch64ISD::ABS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6729 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6730 case Intrinsic::aarch64_sve_neg:
6731 return DAG.
getNode(AArch64ISD::NEG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6732 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6733 case Intrinsic::aarch64_sve_insr: {
6735 EVT ScalarTy =
Scalar.getValueType();
6736 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
6739 return DAG.
getNode(AArch64ISD::INSR,
DL,
Op.getValueType(),
6740 Op.getOperand(1), Scalar);
6742 case Intrinsic::aarch64_sve_rbit:
6743 return DAG.
getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
DL,
6744 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6746 case Intrinsic::aarch64_sve_revb:
6747 return DAG.
getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6748 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6749 case Intrinsic::aarch64_sve_revh:
6750 return DAG.
getNode(AArch64ISD::REVH_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6751 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6752 case Intrinsic::aarch64_sve_revw:
6753 return DAG.
getNode(AArch64ISD::REVW_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6754 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6755 case Intrinsic::aarch64_sve_revd:
6756 return DAG.
getNode(AArch64ISD::REVD_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6757 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6758 case Intrinsic::aarch64_sve_sxtb:
6760 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6761 Op.getOperand(2),
Op.getOperand(3),
6765 case Intrinsic::aarch64_sve_sxth:
6767 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6768 Op.getOperand(2),
Op.getOperand(3),
6772 case Intrinsic::aarch64_sve_sxtw:
6774 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6775 Op.getOperand(2),
Op.getOperand(3),
6779 case Intrinsic::aarch64_sve_uxtb:
6781 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6782 Op.getOperand(2),
Op.getOperand(3),
6786 case Intrinsic::aarch64_sve_uxth:
6788 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6789 Op.getOperand(2),
Op.getOperand(3),
6793 case Intrinsic::aarch64_sve_uxtw:
6795 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6796 Op.getOperand(2),
Op.getOperand(3),
6800 case Intrinsic::localaddress: {
6802 const auto *RegInfo = Subtarget->getRegisterInfo();
6803 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
6805 Op.getSimpleValueType());
6808 case Intrinsic::eh_recoverfp: {
6813 SDValue IncomingFPOp =
Op.getOperand(2);
6818 "llvm.eh.recoverfp must take a function as the first argument");
6819 return IncomingFPOp;
6821 case Intrinsic::aarch64_neon_vsri:
6822 case Intrinsic::aarch64_neon_vsli:
6823 case Intrinsic::aarch64_sve_sri:
6824 case Intrinsic::aarch64_sve_sli: {
6825 EVT Ty =
Op.getValueType();
6832 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri ||
6833 IntNo == Intrinsic::aarch64_sve_sri;
6834 unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
6835 return DAG.
getNode(Opcode,
DL, Ty,
Op.getOperand(1),
Op.getOperand(2),
6839 case Intrinsic::aarch64_neon_srhadd:
6840 case Intrinsic::aarch64_neon_urhadd:
6841 case Intrinsic::aarch64_neon_shadd:
6842 case Intrinsic::aarch64_neon_uhadd: {
6843 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6844 IntNo == Intrinsic::aarch64_neon_shadd);
6845 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6846 IntNo == Intrinsic::aarch64_neon_urhadd);
6847 unsigned Opcode = IsSignedAdd
6850 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1),
6853 case Intrinsic::aarch64_neon_saddlp:
6854 case Intrinsic::aarch64_neon_uaddlp: {
6855 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
6856 ? AArch64ISD::UADDLP
6857 : AArch64ISD::SADDLP;
6858 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1));
6860 case Intrinsic::aarch64_neon_sdot:
6861 case Intrinsic::aarch64_neon_udot:
6862 case Intrinsic::aarch64_sve_sdot:
6863 case Intrinsic::aarch64_sve_udot: {
6864 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
6865 IntNo == Intrinsic::aarch64_sve_udot)
6868 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1),
6869 Op.getOperand(2),
Op.getOperand(3));
6871 case Intrinsic::aarch64_neon_usdot:
6872 case Intrinsic::aarch64_sve_usdot: {
6873 return DAG.
getNode(AArch64ISD::USDOT,
DL,
Op.getValueType(),
6874 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6876 case Intrinsic::aarch64_neon_saddlv:
6877 case Intrinsic::aarch64_neon_uaddlv: {
6878 EVT OpVT =
Op.getOperand(1).getValueType();
6879 EVT ResVT =
Op.getValueType();
6881 ((ResVT == MVT::i32 && (OpVT == MVT::v8i8 || OpVT == MVT::v16i8 ||
6882 OpVT == MVT::v8i16 || OpVT == MVT::v4i16)) ||
6883 (ResVT == MVT::i64 && (OpVT == MVT::v4i32 || OpVT == MVT::v2i32))) &&
6884 "Unexpected aarch64_neon_u/saddlv type");
6888 IntNo == Intrinsic::aarch64_neon_uaddlv ? AArch64ISD::UADDLV
6889 : AArch64ISD::SADDLV,
6890 DL, ResVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64,
Op.getOperand(1));
6894 return EXTRACT_VEC_ELT;
6896 case Intrinsic::experimental_cttz_elts: {
6910 DAG.
getNode(AArch64ISD::CTTZ_ELTS,
DL, MVT::i64, CttzOp);
6913 case Intrinsic::experimental_vector_match: {
6916 case Intrinsic::aarch64_cls:
6917 case Intrinsic::aarch64_cls64:
6924bool AArch64TargetLowering::shouldExtendGSIndex(
EVT VT,
EVT &EltTy)
const {
6933bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(
SDValue Extend,
6954 if (LD->isVolatile())
6957 EVT MemVT = LD->getMemoryVT();
6958 if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8 && MemVT != MVT::v2i16)
6961 Align Alignment = LD->getAlign();
6963 if (Subtarget.requiresStrictAlign() && Alignment < RequiredAlignment)
6969bool AArch64TargetLowering::isVectorLoadExtDesirable(
SDValue ExtVal)
const {
6977 if (!ExtVT.
isScalableVector() && !Subtarget->useSVEForFixedLengthVectors())
6992 unsigned NumExtMaskedLoads = 0;
6993 for (
auto *U : Ld->getMask()->users())
6995 NumExtMaskedLoads++;
6997 if (NumExtMaskedLoads <= 1)
7003 return PreExtScalarVT == MVT::i8 || PreExtScalarVT == MVT::i16 ||
7004 PreExtScalarVT == MVT::i32 || PreExtScalarVT == MVT::i64;
7008 std::map<std::tuple<bool, bool, bool>,
unsigned> AddrModes = {
7009 {std::make_tuple(
false,
false,
false),
7010 AArch64ISD::GLD1_MERGE_ZERO},
7011 {std::make_tuple(
false,
false,
true),
7012 AArch64ISD::GLD1_UXTW_MERGE_ZERO},
7013 {std::make_tuple(
false,
true,
false),
7014 AArch64ISD::GLD1_MERGE_ZERO},
7015 {std::make_tuple(
false,
true,
true),
7016 AArch64ISD::GLD1_SXTW_MERGE_ZERO},
7017 {std::make_tuple(
true,
false,
false),
7018 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
7019 {std::make_tuple(
true,
false,
true),
7020 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
7021 {std::make_tuple(
true,
true,
false),
7022 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
7023 {std::make_tuple(
true,
true,
true),
7024 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
7026 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
7027 return AddrModes.find(
Key)->second;
7035 case AArch64ISD::GLD1_MERGE_ZERO:
7036 return AArch64ISD::GLD1S_MERGE_ZERO;
7037 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
7038 return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
7039 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
7040 return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
7041 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
7042 return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
7043 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
7044 return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
7045 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
7046 return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
7047 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
7048 return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
7063 EVT VT =
Op.getValueType();
7087 EVT IndexVT =
Index.getValueType();
7099 assert(Subtarget->useSVEForFixedLengthVectors() &&
7100 "Cannot lower when not using SVE for fixed vectors!");
7109 Index.getValueType().getVectorElementType() == MVT::i64 ||
7110 Mask.getValueType().getVectorElementType() == MVT::i64)
7176 EVT IndexVT =
Index.getValueType();
7188 assert(Subtarget->useSVEForFixedLengthVectors() &&
7189 "Cannot lower when not using SVE for fixed vectors!");
7201 Index.getValueType().getVectorElementType() == MVT::i64 ||
7202 Mask.getValueType().getVectorElementType() == MVT::i64)
7212 if (PromotedVT != VT)
7237 assert(LoadNode &&
"Expected custom lowering of a masked load node");
7238 EVT VT =
Op->getValueType(0);
7241 return LowerFixedLengthVectorMLoadToSVE(
Op, DAG);
7265 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
7278 {Undef, Undef, Undef, Undef});
7288 return DAG.
getStore(ST->getChain(),
DL, ExtractTrunc,
7289 ST->getBasePtr(), ST->getMemOperand());
7295 MVT DestVT =
Op.getSimpleValueType();
7299 unsigned SrcAS =
N->getSrcAddressSpace();
7300 unsigned DestAS =
N->getDestAddressSpace();
7301 assert(SrcAS != DestAS &&
7302 "addrspacecast must be between different address spaces");
7305 "addrspacecast must be between different ptr sizes");
7327 assert(StoreNode &&
"Expected a store operation");
7360 {StoreNode->getChain(), DAG.getBitcast(MVT::v2i64, Lo),
7361 DAG.getBitcast(MVT::v2i64, Hi), StoreNode->getBasePtr()},
7376 assert (StoreNode &&
"Can only custom lower store nodes");
7380 EVT VT =
Value.getValueType();
7384 if (
auto MaybeSTNP =
LowerNTStore(StoreNode, VT, MemVT, Dl, DAG))
7391 Subtarget->useSVEForFixedLengthVectors()))
7392 return LowerFixedLengthVectorStoreToSVE(
Op, DAG);
7404 MemVT == MVT::v4i8) {
7407 }
else if (MemVT == MVT::i128 && StoreNode->
isVolatile()) {
7408 return LowerStore128(
Op, DAG);
7409 }
else if (MemVT == MVT::i64x8) {
7414 EVT PtrVT =
Base.getValueType();
7415 for (
unsigned i = 0; i < 8; i++) {
7436 bool IsStoreRelease =
7439 assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
7440 Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
7450 unsigned Opcode = IsStoreRelease ? AArch64ISD::STILP : AArch64ISD::STP;
7452 std::swap(StoreValue.first, StoreValue.second);
7455 {StoreNode->getChain(), StoreValue.first, StoreValue.second,
7456 StoreNode->getBasePtr()},
7468 EVT MemVT = Load->getMemoryVT();
7469 EVT ResVT = Load->getValueType(0);
7475 switch (Load->getExtensionType()) {
7488 SDValue Chain = Load->getChain();
7489 SDValue BasePtr = Load->getBasePtr();
7491 Align Alignment = Load->getAlign();
7497 DAG.
getLoad(ScalarLoadType,
DL, Chain, BasePtr, PtrInfo, Alignment);
7509 while (CurrentEltBits < DstEltBits) {
7511 CurrentNumElts = CurrentNumElts / 2;
7517 CurrentEltBits = CurrentEltBits * 2;
7520 Res = DAG.
getNode(ExtOpcode,
DL, ExtVT, Res);
7523 if (CurrentNumElts != NumElts) {
7536 assert(LoadNode &&
"Expected custom lowering of a load node");
7545 EVT PtrVT =
Base.getValueType();
7546 for (
unsigned i = 0; i < 8; i++) {
7552 Ops.push_back(Part);
7562SDValue AArch64TargetLowering::LowerFixedLengthVectorCompressToSVE(
7565 EVT VT =
Op.getValueType();
7580 EVT VT =
Op.getValueType();
7581 if (!Subtarget->isSVEAvailable())
7585 return LowerFixedLengthVectorCompressToSVE(
Op, DAG);
7591 EVT MaskVT =
Mask.getValueType();
7618 MVT VT =
Op.getSimpleValueType();
7621 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
7629 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT,
Op.getOperand(0), Neg,
7642 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
7657 MVT VT =
Op.getSimpleValueType();
7661 if (NewShiftNo == 0)
7662 return Op.getOperand(0);
7671 if (NewShiftNo == 0)
7672 return Op.getOperand(1);
7674 if (ShiftNo->getZExtValue() == NewShiftNo)
7689 EVT XScalarTy =
X.getValueType();
7694 switch (
Op.getSimpleValueType().SimpleTy) {
7703 ExpVT = MVT::nxv4i32;
7707 ExpVT = MVT::nxv2i64;
7725 if (
X.getValueType() != XScalarTy)
7733 return Op.getOperand(0);
7768 const char FptrReg = 0x11;
7774 Chain,
DL, DAG.
getConstant(0x58000080u | NestReg,
DL, MVT::i32), Addr,
7775 MachinePointerInfo(TrmpAddr));
7780 Chain,
DL, DAG.
getConstant(0x580000b0u | FptrReg,
DL, MVT::i32), Addr,
7781 MachinePointerInfo(TrmpAddr, 4));
7787 MachinePointerInfo(TrmpAddr, 8));
7792 DAG.
getStore(Chain,
DL, Nest, Addr, MachinePointerInfo(TrmpAddr, 16));
7797 DAG.
getStore(Chain,
DL, FPtr, Addr, MachinePointerInfo(TrmpAddr, 24));
7811 EVT VT =
Op.getValueType();
7813 (Subtarget->hasSVEB16B16() &&
7814 Subtarget->isNonStreamingSVEorSME2Available()))
7815 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMUL_PRED);
7817 assert(Subtarget->hasBF16() &&
"Expected +bf16 for custom FMUL lowering");
7818 assert((VT == MVT::nxv4bf16 || VT == MVT::nxv8bf16 || VT == MVT::v8bf16) &&
7819 "Unexpected FMUL VT");
7822 return [&, IID](EVT VT,
auto...
Ops) {
7829 EVT SrcVT =
Value.getValueType();
7840 auto FCVT = MakeGetIntrinsic(Intrinsic::aarch64_sve_fcvt_bf16f32_v2);
7841 auto FCVTNT = MakeGetIntrinsic(Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2);
7846 MakeGetIntrinsic(UseSVEBFMLAL ? Intrinsic::aarch64_sve_bfmlalb
7847 : Intrinsic::aarch64_neon_bfmlalb);
7849 MakeGetIntrinsic(UseSVEBFMLAL ? Intrinsic::aarch64_sve_bfmlalt
7850 : Intrinsic::aarch64_neon_bfmlalt);
7852 EVT AccVT = UseSVEBFMLAL ? MVT::nxv4f32 : MVT::v4f32;
7853 bool IgnoreZeroSign =
7866 LHS = Reinterpret(
LHS, MVT::nxv8bf16);
7867 RHS = Reinterpret(
RHS, MVT::nxv8bf16);
7870 SDValue BottomF32 = Reinterpret(BFMLALB(AccVT, Zero,
LHS,
RHS), MVT::nxv4f32);
7872 FCVT(MVT::nxv8bf16, DAG.
getPOISON(MVT::nxv8bf16), Pg, BottomF32);
7874 if (VT == MVT::nxv4bf16)
7875 return Reinterpret(BottomBF16, VT);
7877 SDValue TopF32 = Reinterpret(BFMLALT(AccVT, Zero,
LHS,
RHS), MVT::nxv4f32);
7878 SDValue TopBF16 = FCVTNT(MVT::nxv8bf16, BottomBF16, Pg, TopF32);
7879 return Reinterpret(TopBF16, VT);
7886 EVT VT =
Op.getValueType();
7889 assert(VT.
isVector() &&
"Scalar fma lowering should be handled by patterns");
7892 if (VT != MVT::v8f16 && VT != MVT::v4f32 && VT != MVT::v2f64)
7893 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMA_PRED);
7897 ? LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMA_PRED)
7907 auto ConvertToScalableFnegMt = [&](
SDValue Op) {
7909 Op = LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
7913 OpA = ConvertToScalableFnegMt(OpA);
7914 OpB = ConvertToScalableFnegMt(OpB);
7915 OpC = ConvertToScalableFnegMt(OpC);
7918 DAG.
getNode(AArch64ISD::FMA_PRED,
DL, ContainerVT, Pg, OpA, OpB, OpC);
7927 switch (
Op.getOpcode()) {
7933 return LowerLOOP_DEPENDENCE_MASK(
Op, DAG);
7935 return LowerBITCAST(
Op, DAG);
7937 return LowerGlobalAddress(
Op, DAG);
7939 return LowerGlobalTLSAddress(
Op, DAG);
7941 return LowerPtrAuthGlobalAddress(
Op, DAG);
7943 return LowerADJUST_TRAMPOLINE(
Op, DAG);
7945 return LowerINIT_TRAMPOLINE(
Op, DAG);
7949 return LowerSETCC(
Op, DAG);
7951 return LowerSETCCCARRY(
Op, DAG);
7955 return LowerBR_CC(
Op, DAG);
7957 return LowerSELECT(
Op, DAG);
7959 return LowerSELECT_CC(
Op, DAG);
7961 return LowerJumpTable(
Op, DAG);
7963 return LowerBR_JT(
Op, DAG);
7965 return LowerBRIND(
Op, DAG);
7967 return LowerConstantPool(
Op, DAG);
7969 return LowerBlockAddress(
Op, DAG);
7971 return LowerVASTART(
Op, DAG);
7973 return LowerVACOPY(
Op, DAG);
7975 return LowerVAARG(
Op, DAG);
7992 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FADD_PRED);
7994 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FSUB_PRED);
7996 return LowerFMUL(
Op, DAG);
7998 return LowerFMA(
Op, DAG);
8000 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FDIV_PRED);
8002 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
8004 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
8006 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
8008 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
8010 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
8012 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
8014 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
8016 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
8018 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
8020 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
8023 return LowerFP_ROUND(
Op, DAG);
8026 return LowerFP_EXTEND(
Op, DAG);
8028 return LowerFRAMEADDR(
Op, DAG);
8030 return LowerSPONENTRY(
Op, DAG);
8032 return LowerRETURNADDR(
Op, DAG);
8034 return LowerADDROFRETURNADDR(
Op, DAG);
8036 return LowerCONCAT_VECTORS(
Op, DAG);
8038 return LowerINSERT_VECTOR_ELT(
Op, DAG);
8040 return LowerEXTRACT_VECTOR_ELT(
Op, DAG);
8042 return LowerBUILD_VECTOR(
Op, DAG);
8045 return LowerEXTEND_VECTOR_INREG(
Op, DAG);
8047 return LowerZERO_EXTEND_VECTOR_INREG(
Op, DAG);
8049 return LowerVECTOR_SHUFFLE(
Op, DAG);
8051 return LowerSPLAT_VECTOR(
Op, DAG);
8053 return LowerEXTRACT_SUBVECTOR(
Op, DAG);
8055 return LowerINSERT_SUBVECTOR(
Op, DAG);
8058 return LowerDIV(
Op, DAG);
8063 return LowerMinMax(
Op, DAG);
8067 return LowerVectorSRA_SRL_SHL(
Op, DAG);
8071 return LowerShiftParts(
Op, DAG);
8074 return LowerCTPOP_PARITY(
Op, DAG);
8076 return LowerFCOPYSIGN(
Op, DAG);
8078 return LowerVectorOR(
Op, DAG);
8080 return LowerXOR(
Op, DAG);
8087 return LowerINT_TO_FP(
Op, DAG);
8092 return LowerFP_TO_INT(
Op, DAG);
8095 return LowerFP_TO_INT_SAT(
Op, DAG);
8097 return LowerGET_ROUNDING(
Op, DAG);
8099 return LowerSET_ROUNDING(
Op, DAG);
8101 return LowerGET_FPMODE(
Op, DAG);
8103 return LowerSET_FPMODE(
Op, DAG);
8105 return LowerRESET_FPMODE(
Op, DAG);
8107 return LowerMUL(
Op, DAG);
8109 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MULHS_PRED);
8111 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MULHU_PRED);
8113 return LowerINTRINSIC_W_CHAIN(
Op, DAG);
8115 return LowerINTRINSIC_WO_CHAIN(
Op, DAG);
8117 return LowerINTRINSIC_VOID(
Op, DAG);
8120 assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
8121 return LowerStore128(
Op, DAG);
8125 return LowerSTORE(
Op, DAG);
8127 return LowerMSTORE(
Op, DAG);
8129 return LowerMGATHER(
Op, DAG);
8131 return LowerMSCATTER(
Op, DAG);
8133 return LowerVECREDUCE_SEQ_FADD(
Op, DAG);
8147 return LowerVECREDUCE(
Op, DAG);
8150 return LowerVECREDUCE_MUL(
Op, DAG);
8152 return LowerATOMIC_LOAD_AND(
Op, DAG);
8154 return LowerDYNAMIC_STACKALLOC(
Op, DAG);
8156 return LowerVSCALE(
Op, DAG);
8158 return LowerVECTOR_COMPRESS(
Op, DAG);
8162 return LowerFixedLengthVectorIntExtendToSVE(
Op, DAG);
8169 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
8170 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
8173 return LowerToPredicatedOp(
Op, DAG,
8174 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
8177 return LowerTRUNCATE(
Op, DAG);
8179 return LowerMLOAD(
Op, DAG);
8182 !Subtarget->isNeonAvailable()))
8183 return LowerFixedLengthVectorLoadToSVE(
Op, DAG);
8184 return LowerLOAD(
Op, DAG);
8188 return LowerToScalableOp(
Op, DAG);
8190 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMAX_PRED);
8192 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMAXNM_PRED);
8194 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMIN_PRED);
8196 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMINNM_PRED);
8198 return LowerFixedLengthVectorSelectToSVE(
Op, DAG);
8200 return LowerABS(
Op, DAG);
8202 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABDS_PRED);
8204 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABDU_PRED);
8206 return LowerAVG(
Op, DAG, AArch64ISD::HADDS_PRED);
8208 return LowerAVG(
Op, DAG, AArch64ISD::HADDU_PRED);
8210 return LowerAVG(
Op, DAG, AArch64ISD::RHADDS_PRED);
8212 return LowerAVG(
Op, DAG, AArch64ISD::RHADDU_PRED);
8214 return LowerBitreverse(
Op, DAG);
8216 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
8218 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU);
8220 return LowerCTTZ(
Op, DAG);
8223 return LowerVECTOR_SPLICE(
Op, DAG);
8225 return LowerVECTOR_DEINTERLEAVE(
Op, DAG);
8227 return LowerVECTOR_INTERLEAVE(
Op, DAG);
8229 return LowerGET_ACTIVE_LANE_MASK(
Op, DAG);
8232 if (
Op.getValueType().isVector())
8233 return LowerVectorXRINT(
Op, DAG);
8237 assert((
Op.getOperand(0).getValueType() == MVT::f16 ||
8238 Op.getOperand(0).getValueType() == MVT::bf16) &&
8239 "Expected custom lowering of rounding operations only for f16");
8242 return DAG.
getNode(
Op.getOpcode(),
DL,
Op.getValueType(), Ext);
8248 assert((
Op.getOperand(1).getValueType() == MVT::f16 ||
8249 Op.getOperand(1).getValueType() == MVT::bf16) &&
8250 "Expected custom lowering of rounding operations only for f16");
8253 {
Op.getOperand(0),
Op.getOperand(1)});
8254 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
8255 {Ext.getValue(1), Ext.getValue(0)});
8258 assert(
Op.getOperand(2).getValueType() == MVT::i128 &&
8259 "WRITE_REGISTER custom lowering is only for 128-bit sysregs");
8264 std::pair<SDValue, SDValue> Pair =
8269 SysRegName, Pair.first, Pair.second);
8279 return LowerVECTOR_HISTOGRAM(
Op, DAG);
8284 return LowerPARTIAL_REDUCE_MLA(
Op, DAG);
8289 return !Subtarget->useSVEForFixedLengthVectors();
8293 EVT VT,
bool OverrideNEON)
const {
8316 return Subtarget->isSVEorStreamingSVEAvailable();
8323 if (!Subtarget->useSVEForFixedLengthVectors())
8343 unsigned Opcode =
N->getOpcode();
8348 unsigned IID =
N->getConstantOperandVal(0);
8349 if (IID < Intrinsic::num_intrinsics)
8363 if (IID == Intrinsic::aarch64_neon_umull ||
8365 IID == Intrinsic::aarch64_neon_smull ||
8374 bool IsVarArg)
const {
8397 if (Subtarget->isTargetWindows()) {
8399 if (Subtarget->isWindowsArm64EC())
8405 if (!Subtarget->isTargetDarwin())
8413 if (Subtarget->isWindowsArm64EC())
8419 if (Subtarget->isWindowsArm64EC())
8443 if (Subtarget->isWindowsArm64EC())
8479 IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE;
8497 RTLIB::Libcall LC = RTLIB::SMEABI_TPIDR2_RESTORE;
8515 Chain = DAG.
getCopyToReg(Chain,
DL, AArch64::X0, TPIDR2Block, Glue);
8517 DAG.
getNode(AArch64ISD::RESTORE_ZA,
DL, MVT::Other,
8518 {Chain, TPIDR2_EL0, DAG.
getRegister(AArch64::X0, MVT::i64),
8519 RestoreRoutine, RegMask, Chain.
getValue(1)});
8535 auto &FuncInfo = *MF.
getInfo<AArch64FunctionInfo>();
8536 auto &Subtarget = DAG.
getSubtarget<AArch64Subtarget>();
8537 const AArch64RegisterInfo &
TRI = *Subtarget.getRegisterInfo();
8539 SMEAttrs SMEFnAttrs = FuncInfo.getSMEFnAttrs();
8565 if (
getTM().useNewSMEABILowering())
8575 AArch64ISD::SMSTART,
DL, DAG.
getVTList(MVT::Other, MVT::Glue), Chain,
8584 {Chain, DAG.getConstant(0, DL, MVT::i32), ZT0FrameIndex});
8595SDValue AArch64TargetLowering::LowerFormalArguments(
8603 Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
8605 (isVarArg && Subtarget->isWindowsArm64EC());
8606 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
8616 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.
getContext());
8624 unsigned NumArgs = Ins.
size();
8626 unsigned CurArgIdx = 0;
8627 bool UseVarArgCC =
false;
8629 UseVarArgCC = isVarArg;
8633 for (
unsigned i = 0; i != NumArgs; ++i) {
8634 MVT ValVT = Ins[i].VT;
8635 if (Ins[i].isOrigArg()) {
8636 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
8637 CurArgIdx = Ins[i].getOrigArgIndex();
8644 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
8646 else if (ActualMVT == MVT::i16)
8650 Ins[i].OrigTy, CCInfo);
8651 assert(!Res &&
"Call operand has unhandled type");
8656 bool IsLocallyStreaming =
8657 !
Attrs.hasStreamingInterface() &&
Attrs.hasStreamingBody();
8661 unsigned ExtraArgLocs = 0;
8662 for (
unsigned i = 0, e = Ins.
size(); i != e; ++i) {
8663 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
8665 if (Ins[i].
Flags.isByVal()) {
8669 int Size = Ins[i].Flags.getByValSize();
8670 unsigned NumRegs = (
Size + 7) / 8;
8682 if (Ins[i].
Flags.isSwiftAsync())
8683 MF.
getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(
true);
8689 const TargetRegisterClass *RC;
8691 if (RegVT == MVT::i32)
8692 RC = &AArch64::GPR32RegClass;
8693 else if (RegVT == MVT::i64)
8694 RC = &AArch64::GPR64RegClass;
8695 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
8696 RC = &AArch64::FPR16RegClass;
8697 else if (RegVT == MVT::f32)
8698 RC = &AArch64::FPR32RegClass;
8700 RC = &AArch64::FPR64RegClass;
8702 RC = &AArch64::FPR128RegClass;
8706 RC = &AArch64::PPRRegClass;
8707 }
else if (RegVT == MVT::aarch64svcount) {
8709 RC = &AArch64::PPRRegClass;
8712 RC = &AArch64::ZPRRegClass;
8719 if (IsLocallyStreaming) {
8734 DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
8753 "Indirect arguments should be scalable on most subtargets");
8775 uint32_t BEAlign = 0;
8776 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
8777 !Ins[i].Flags.isInConsecutiveRegs())
8778 BEAlign = 8 - ArgSize;
8781 MachinePointerInfo PtrInfo;
8787 unsigned ObjOffset = ArgOffset + BEAlign;
8817 "Indirect arguments should be scalable on most subtargets");
8837 Subtarget->isWindowsArm64EC()) &&
8838 "Indirect arguments should be scalable on most subtargets");
8841 unsigned NumParts = 1;
8842 if (Ins[i].
Flags.isInConsecutiveRegs()) {
8843 while (!Ins[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
8852 while (NumParts > 0) {
8853 ArgValue = DAG.
getLoad(PartLoad,
DL, Chain, Ptr, MachinePointerInfo());
8866 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
8872 if (Ins[i].isOrigArg()) {
8873 Argument *OrigArg =
F.getArg(Ins[i].getOrigArgIndex());
8875 if (!Ins[i].
Flags.isZExt()) {
8876 ArgValue = DAG.
getNode(AArch64ISD::ASSERT_ZEXT_BOOL,
DL,
8887 if (
Attrs.hasStreamingCompatibleInterface()) {
8889 DAG.
getNode(AArch64ISD::ENTRY_PSTATE_SM,
DL,
8890 DAG.
getVTList(MVT::i64, MVT::Other), {Chain});
8902 if (IsLocallyStreaming) {
8903 if (
Attrs.hasStreamingCompatibleInterface())
8912 for (
unsigned I=0;
I<InVals.
size(); ++
I) {
8924 if (!Subtarget->isTargetDarwin() || IsWin64) {
8930 saveVarArgRegisters(CCInfo, DAG,
DL, Chain);
8934 unsigned VarArgsOffset = CCInfo.getStackSize();
8937 alignTo(VarArgsOffset, Subtarget->isTargetILP32() ? 4 : 8);
8948 SmallVectorImpl<ForwardedRegister> &Forwards =
8950 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
8954 if (!CCInfo.isAllocated(AArch64::X8)) {
8956 Forwards.
push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
8965 for (
unsigned I = 0,
E = Ins.
size();
I !=
E; ++
I) {
8967 Ins[
I].Flags.isInReg()) &&
8968 Ins[
I].Flags.isSRet()) {
8983 unsigned StackArgSize = CCInfo.getStackSize();
8985 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
8989 StackArgSize =
alignTo(StackArgSize, 16);
9003 if (Subtarget->hasCustomCallingConv())
9004 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
9006 if (
getTM().useNewSMEABILowering()) {
9009 if (
Attrs.hasZAState()) {
9013 }
else if (
Attrs.hasAgnosticZAInterface()) {
9014 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
9019 auto *RetTy = EVT(MVT::i64).getTypeForEVT(*DAG.
getContext());
9020 TargetLowering::CallLoweringInfo CLI(DAG);
9021 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
9029 {Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
9035 Chain = DAG.
getNode(AArch64ISD::SME_STATE_ALLOC,
DL,
9046 if (
Attrs.hasZAState()) {
9053 Buffer = DAG.
getNode(AArch64ISD::ALLOCATE_ZA_BUFFER,
DL,
9054 DAG.
getVTList(MVT::i64, MVT::Other), {Chain, SVL});
9059 {Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
9065 AArch64ISD::INIT_TPIDR2OBJ,
DL, DAG.
getVTList(MVT::Other),
9066 { Buffer.getValue(1), Buffer.getValue(0),
9068 }
else if (
Attrs.hasAgnosticZAInterface()) {
9071 DAG.
getNode(AArch64ISD::GET_SME_SAVE_SIZE,
DL,
9072 DAG.
getVTList(MVT::i64, MVT::Other), Chain);
9076 Buffer = DAG.
getNode(AArch64ISD::ALLOC_SME_SAVE_BUFFER,
DL,
9078 {Chain, BufferSize});
9083 {Chain, BufferSize, DAG.getConstant(1, DL, MVT::i64)});
9095 for (
const ISD::InputArg &
I : Ins) {
9096 if (
I.Flags.isSwiftSelf() ||
I.Flags.isSwiftError() ||
9097 I.Flags.isSwiftAsync()) {
9101 "Swift attributes can't be used with preserve_none",
9111void AArch64TargetLowering::saveVarArgRegisters(
CCState &CCInfo,
9117 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9121 Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
9127 if (Subtarget->isWindowsArm64EC()) {
9134 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
9136 if (GPRSaveSize != 0) {
9139 if (GPRSaveSize & 15)
9146 if (Subtarget->isWindowsArm64EC()) {
9159 for (
unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
9165 MF, GPRIdx, (i - FirstVariadicGPR) * 8)
9175 if (Subtarget->hasFPARMv8() && !IsWin64) {
9177 const unsigned NumFPRArgRegs =
FPRArgRegs.size();
9180 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
9182 if (FPRSaveSize != 0) {
9187 for (
unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
9202 if (!MemOps.
empty()) {
9209SDValue AArch64TargetLowering::LowerCallResult(
9213 SDValue ThisVal,
bool RequiresSMChange)
const {
9214 DenseMap<unsigned, SDValue> CopiedRegs;
9216 for (
unsigned i = 0; i != RVLocs.
size(); ++i) {
9217 CCValAssign VA = RVLocs[i];
9221 if (i == 0 && isThisReturn) {
9223 "unexpected return calling convention register assignment");
9259 Val = DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
9326 unsigned NumArgs = Outs.
size();
9327 for (
unsigned i = 0; i != NumArgs; ++i) {
9328 MVT ArgVT = Outs[i].VT;
9331 bool UseVarArgCC =
false;
9335 if (IsCalleeWin64) {
9349 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
9351 else if (ActualMVT == MVT::i16)
9359 Outs[i].OrigTy, CCInfo);
9360 assert(!Res &&
"Call operand has unhandled type");
9375bool AArch64TargetLowering::isEligibleForTailCallOptimization(
9376 const CallLoweringInfo &CLI)
const {
9382 bool IsVarArg = CLI.IsVarArg;
9386 const SelectionDAG &DAG = CLI.DAG;
9393 SMECallAttrs CallAttrs =
9407 MF.
getInfo<AArch64FunctionInfo>()->isSVECC())
9410 bool CCMatch = CallerCC == CalleeCC;
9425 if (i->hasByValAttr())
9434 if (i->hasInRegAttr()) {
9435 unsigned ArgIdx = i - CallerF.
arg_begin();
9436 if (!CLI.CB || CLI.CB->arg_size() <= ArgIdx)
9438 AttributeSet
Attrs = CLI.CB->getParamAttributes(ArgIdx);
9439 if (!
Attrs.hasAttribute(Attribute::InReg) ||
9440 !
Attrs.hasAttribute(Attribute::StructRet) || !i->hasStructRetAttr() ||
9441 CLI.CB->getArgOperand(ArgIdx) != i) {
9458 const GlobalValue *GV =
G->getGlobal();
9461 (!
TT.isOSWindows() ||
TT.isOSBinFormatELF() ||
TT.isOSBinFormatMachO()))
9481 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
9482 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
9484 const uint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
9485 if (Subtarget->hasCustomCallingConv()) {
9486 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
9487 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
9489 if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
9498 CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs,
C);
9502 if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
9510 for (
const CCValAssign &ArgLoc : ArgLocs)
9511 if (!ArgLoc.isRegLoc())
9515 const AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9523 A.getValVT().isScalableVector() ||
9524 Subtarget->isWindowsArm64EC()) &&
9525 "Expected value to be scalable");
9545 int ClobberedFI)
const {
9548 int64_t LastByte = FirstByte + MFI.
getObjectSize(ClobberedFI) - 1;
9559 if (FI->getIndex() < 0) {
9561 int64_t InLastByte = InFirstByte;
9564 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
9565 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
9573bool AArch64TargetLowering::DoesCalleeRestoreStack(
CallingConv::ID CallCC,
9574 bool TailCallOpt)
const {
9585 APInt RequiredZero(SizeInBits, 0xFE);
9587 bool ZExtBool = (Bits.Zero & RequiredZero) == RequiredZero;
9591void AArch64TargetLowering::AdjustInstrPostInstrSelection(
MachineInstr &
MI,
9597 if (
MI.getOpcode() == AArch64::MSRpstatesvcrImm1 ||
9598 MI.getOpcode() == AArch64::MSRpstatePseudo) {
9599 for (
unsigned I =
MI.getNumOperands() - 1;
I > 0; --
I)
9600 if (MachineOperand &MO =
MI.getOperand(
I);
9601 MO.isReg() && MO.isImplicit() && MO.isDef() &&
9602 (AArch64::GPR32RegClass.contains(MO.getReg()) ||
9603 AArch64::GPR64RegClass.contains(MO.getReg())))
9604 MI.removeOperand(
I);
9608 if (
MI.getOperand(0).getImm() == AArch64SVCR::SVCRSM ||
9609 MI.getOperand(0).getImm() == AArch64SVCR::SVCRSMZA) {
9624 const MachineFunction &MF = *
MI.getMF();
9625 if (MF.
getInfo<AArch64FunctionInfo>()->hasStreamingModeChanges() &&
9626 (
MI.getOpcode() == AArch64::ADDXri ||
9627 MI.getOpcode() == AArch64::SUBXri)) {
9628 const MachineOperand &MO =
MI.getOperand(1);
9637 unsigned Condition,
bool InsertVectorLengthCheck)
const {
9645 Ops.push_back(InGlue);
9646 return DAG.
getNode(AArch64ISD::CHECK_MATCHING_VL,
DL,
9650 if (InsertVectorLengthCheck &&
Enable) {
9653 SDValue CheckVL = GetCheckVL(Chain, InGlue);
9666 assert(PStateReg.
isValid() &&
"PStateSM Register is invalid");
9673 Opcode =
Enable ? AArch64ISD::COND_SMSTART : AArch64ISD::COND_SMSTOP;
9674 Ops.push_back(ConditionOp);
9675 Ops.push_back(PStateSM);
9677 Opcode =
Enable ? AArch64ISD::SMSTART : AArch64ISD::SMSTOP;
9679 Ops.push_back(RegMask);
9682 Ops.push_back(InGlue);
9687 if (!InsertVectorLengthCheck ||
Enable)
9714 if (Flags.isZExt() || Flags.isSExt())
9721 Arg->
isAssert() ||
Op == AArch64ISD::ASSERT_ZEXT_BOOL) {
9733 int FI = FINode->getIndex();
9751AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
9753 SelectionDAG &DAG = CLI.DAG;
9760 bool &IsTailCall = CLI.IsTailCall;
9762 bool IsVarArg = CLI.IsVarArg;
9763 const CallBase *CB = CLI.CB;
9766 MachineFunction::CallSiteInfo CSInfo;
9767 bool IsThisReturn =
false;
9769 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9771 bool IsCFICall = CLI.CB && CLI.CB->isIndirectCall() && CLI.CFIType;
9772 bool IsSibCall =
false;
9773 bool GuardWithBTI =
false;
9775 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
9776 !Subtarget->noBTIAtReturnTwice()) {
9782 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.
getContext());
9785 unsigned NumArgs = Outs.
size();
9787 for (
unsigned i = 0; i != NumArgs; ++i) {
9788 if (Outs[i].
Flags.isVarArg() && Outs[i].VT.isScalableVector())
9790 "currently not supported");
9801 RetCCInfo.AnalyzeCallResult(Ins, RetCC);
9805 CSInfo = MachineFunction::CallSiteInfo(*CB);
9810 auto HasSVERegLoc = [](CCValAssign &Loc) {
9811 if (!Loc.isRegLoc())
9813 return AArch64::ZPRRegClass.contains(Loc.getLocReg()) ||
9814 AArch64::PPRRegClass.contains(Loc.getLocReg());
9816 if (
any_of(RVLocs, HasSVERegLoc) ||
any_of(ArgLocs, HasSVERegLoc))
9821 SMECallAttrs CallAttrs =
9824 std::optional<unsigned> ZAMarkerNode;
9827 if (UseNewSMEABILowering) {
9830 ZAMarkerNode = AArch64ISD::REQUIRES_ZA_SAVE;
9832 ZAMarkerNode = AArch64ISD::REQUIRES_ZT0_SAVE;
9835 ZAMarkerNode = AArch64ISD::INOUT_ZA_USE;
9840 IsTailCall = isEligibleForTailCallOptimization(CLI);
9844 if (!ZAMarkerNode && !TailCallOpt && IsTailCall &&
9852 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
9854 "site marked musttail");
9872 if (IsTailCall && !IsSibCall) {
9877 NumBytes =
alignTo(NumBytes, 16);
9882 FPDiff = NumReusableBytes - NumBytes;
9886 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (
unsigned)-FPDiff)
9894 assert(FPDiff % 16 == 0 &&
"unaligned stack on tail call");
9897 auto DescribeCallsite =
9898 [&](OptimizationRemarkAnalysis &
R) -> OptimizationRemarkAnalysis & {
9901 R <<
ore::NV(
"Callee", ES->getSymbol());
9902 else if (CLI.CB && CLI.CB->getCalledFunction())
9903 R <<
ore::NV(
"Callee", CLI.CB->getCalledFunction()->getName());
9905 R <<
"unknown callee";
9910 bool RequiresLazySave = !UseNewSMEABILowering && CallAttrs.
requiresLazySave();
9911 bool RequiresSaveAllZA =
9913 if (RequiresLazySave) {
9924 auto R = CLI.CB ? OptimizationRemarkAnalysis(
"sme",
"SMELazySaveZA",
9926 : OptimizationRemarkAnalysis(
"sme",
"SMELazySaveZA",
9928 return DescribeCallsite(R) <<
" sets up a lazy save for ZA";
9930 }
else if (RequiresSaveAllZA) {
9932 "Cannot share state that may not exist");
9938 if (RequiresSMChange) {
9941 auto R = CLI.CB ? OptimizationRemarkAnalysis(
"sme",
"SMETransition",
9943 : OptimizationRemarkAnalysis(
"sme",
"SMETransition",
9945 DescribeCallsite(R) <<
" requires a streaming mode transition";
9952 bool ShouldPreserveZT0 =
9957 if (ShouldPreserveZT0) {
9961 {Chain, DAG.getConstant(0, DL, MVT::i32), ZTFrameIdx});
9968 assert((!DisableZA || !RequiresLazySave) &&
9969 "Lazy-save should have PSTATE.SM=1 on entry to the function");
9973 AArch64ISD::SMSTOP,
DL, DAG.
getVTList(MVT::Other, MVT::Glue), Chain,
9978 assert((!IsSibCall || !ZAMarkerNode) &&
"ZA markers require CALLSEQ_START");
9988 {Chain, Chain.getValue(1)});
9996 SmallSet<unsigned, 8> RegsUsed;
10000 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
10002 for (
const auto &
F : Forwards) {
10009 unsigned ExtraArgLocs = 0;
10010 for (
unsigned i = 0, e = Outs.
size(); i != e; ++i) {
10011 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
10013 ISD::ArgFlagsTy
Flags = Outs[i].Flags;
10028 if (Outs[i].ArgVT == MVT::i1) {
10050 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
10066 assert((isScalable || Subtarget->isWindowsArm64EC()) &&
10067 "Indirect arguments should be scalable on most subtargets");
10070 TypeSize PartSize = StoreSize;
10071 unsigned NumParts = 1;
10072 if (Outs[i].
Flags.isInConsecutiveRegs()) {
10073 while (!Outs[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
10075 StoreSize *= NumParts;
10084 bool IsPred = VA.
getValVT() == MVT::aarch64svcount ||
10102 if (NumParts > 0) {
10118 if (i == 0 &&
Flags.isReturned() && !
Flags.isSwiftSelf() &&
10119 Outs[0].VT == MVT::i64) {
10121 "unexpected calling convention register assignment");
10123 "unexpected use of 'returned'");
10124 IsThisReturn =
true;
10133 [=](
const std::pair<unsigned, SDValue> &Elt) {
10142 [&VA](MachineFunction::ArgRegPair ArgReg) {
10143 return ArgReg.Reg == VA.getLocReg();
10150 Arg = DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
10155 if (
Options.EmitCallSiteInfo)
10162 MachinePointerInfo DstInfo;
10166 uint32_t BEAlign = 0;
10172 OpSize =
Flags.isByVal() ?
Flags.getByValSize() * 8
10174 OpSize = (OpSize + 7) / 8;
10175 if (!Subtarget->isLittleEndian() && !
Flags.isByVal() &&
10176 !
Flags.isInConsecutiveRegs()) {
10178 BEAlign = 8 - OpSize;
10181 int32_t
Offset = LocMemOffset + BEAlign;
10198 Chain = addTokenForArgument(Chain, DAG, MF.
getFrameInfo(), FI);
10206 if (Outs[i].
Flags.isByVal()) {
10210 Chain,
DL, DstAddr, Arg, SizeNode,
10211 Outs[i].
Flags.getNonZeroByValAlign(),
10213 nullptr, std::nullopt, DstInfo, MachinePointerInfo());
10230 if (IsVarArg && Subtarget->isWindowsArm64EC() &&
10231 !(CLI.CB && CLI.CB->isMustTailCall())) {
10249 if (!MemOpChains.
empty())
10253 if (RequiresSMChange) {
10254 bool InsertVectorLengthCheck =
10264 for (
auto &RegToPass : RegsToPass) {
10266 RegToPass.second, InGlue);
10273 const GlobalValue *CalledGlobal =
nullptr;
10274 unsigned OpFlags = 0;
10276 CalledGlobal =
G->getGlobal();
10277 OpFlags = Subtarget->classifyGlobalFunctionReference(CalledGlobal,
10283 const GlobalValue *GV =
G->getGlobal();
10288 Subtarget->isTargetMachO()) ||
10290 const char *Sym = S->getSymbol();
10303 if (IsTailCall && !IsSibCall) {
10308 unsigned Opc = IsTailCall ? AArch64ISD::TC_RETURN : AArch64ISD::CALL;
10310 std::vector<SDValue>
Ops;
10311 Ops.push_back(Chain);
10312 Ops.push_back(Callee);
10319 "tail calls cannot be marked with clang.arc.attachedcall");
10320 Opc = AArch64ISD::CALL_RVMARKER;
10326 Ops.insert(
Ops.begin() + 1, GA);
10333 Ops.insert(
Ops.begin() + 2, DoEmitMarker);
10335 Opc = AArch64ISD::CALL_ARM64EC_TO_X64;
10336 }
else if (GuardWithBTI) {
10337 Opc = AArch64ISD::CALL_BTI;
10348 const uint64_t
Key = CLI.PAI->Key;
10350 "Invalid auth call key");
10354 std::tie(IntDisc, AddrDisc) =
10357 if (
Opc == AArch64ISD::CALL_RVMARKER)
10358 Opc = AArch64ISD::AUTH_CALL_RVMARKER;
10360 Opc = IsTailCall ? AArch64ISD::AUTH_TC_RETURN : AArch64ISD::AUTH_CALL;
10362 Ops.push_back(IntDisc);
10363 Ops.push_back(AddrDisc);
10368 for (
auto &RegToPass : RegsToPass)
10370 RegToPass.second.getValueType()));
10373 const uint32_t *
Mask;
10374 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10375 if (IsThisReturn) {
10377 Mask =
TRI->getThisReturnPreservedMask(MF, CallConv);
10379 IsThisReturn =
false;
10380 Mask =
TRI->getCallPreservedMask(MF, CallConv);
10383 Mask =
TRI->getCallPreservedMask(MF, CallConv);
10385 if (Subtarget->hasCustomCallingConv())
10386 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
10388 if (
TRI->isAnyArgRegReserved(MF))
10389 TRI->emitReservedArgRegCallError(MF);
10391 assert(Mask &&
"Missing call preserved mask for calling convention");
10395 Ops.push_back(InGlue);
10397 if (CLI.DeactivationSymbol)
10410 if (CalledGlobal &&
10424 if (CalledGlobal &&
10428 uint64_t CalleePopBytes =
10429 DoesCalleeRestoreStack(CallConv, TailCallOpt) ?
alignTo(NumBytes, 16) : 0;
10437 Chain, InGlue, CallConv, IsVarArg, RVLocs,
DL, DAG, InVals, IsThisReturn,
10438 IsThisReturn ? OutVals[0] :
SDValue(), RequiresSMChange);
10443 if (RequiresSMChange) {
10449 if (!UseNewSMEABILowering &&
10453 AArch64ISD::SMSTART,
DL, DAG.
getVTList(MVT::Other, MVT::Glue), Result,
10456 if (ShouldPreserveZT0)
10459 {Result, DAG.getConstant(0, DL, MVT::i32), ZTFrameIdx});
10461 if (RequiresLazySave) {
10463 }
else if (RequiresSaveAllZA) {
10468 if (RequiresSMChange || RequiresLazySave || ShouldPreserveZT0 ||
10469 RequiresSaveAllZA) {
10470 for (
unsigned I = 0;
I < InVals.
size(); ++
I) {
10485 for (
const ISD::OutputArg &O : Outs) {
10486 if (
O.Flags.isSwiftSelf() ||
O.Flags.isSwiftError() ||
10487 O.Flags.isSwiftAsync()) {
10491 "Swift attributes can't be used with preserve_none",
10492 DL.getDebugLoc()));
10501bool AArch64TargetLowering::CanLowerReturn(
10504 const Type *RetTy)
const {
10507 CCState CCInfo(CallConv, isVarArg, MF, RVLocs,
Context);
10518 auto *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
10522 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.
getContext());
10528 SmallSet<unsigned, 4> RegsUsed;
10529 for (
unsigned i = 0, realRVLocIdx = 0; i != RVLocs.
size();
10530 ++i, ++realRVLocIdx) {
10531 CCValAssign &VA = RVLocs[i];
10533 SDValue Arg = OutVals[realRVLocIdx];
10539 if (Outs[i].ArgVT == MVT::i1) {
10555 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
10564 llvm::find_if(RetVals, [=](
const std::pair<unsigned, SDValue> &Elt) {
10574 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10590 for (
auto &RetVal : RetVals) {
10594 DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
10595 DAG.
getVTList(RetVal.second.getValueType(), MVT::Glue),
10597 Chain = DAG.
getCopyToReg(Chain,
DL, RetVal.first, RetVal.second, Glue);
10600 DAG.
getRegister(RetVal.first, RetVal.second.getValueType()));
10611 unsigned RetValReg = AArch64::X0;
10613 RetValReg = AArch64::X8;
10624 if (AArch64::GPR64RegClass.
contains(*
I))
10626 else if (AArch64::FPR64RegClass.
contains(*
I))
10637 RetOps.push_back(Glue);
10648 MachinePointerInfo());
10649 RetOps.insert(RetOps.begin() + 1, Arm64ECRetDest);
10651 return DAG.
getNode(AArch64ISD::TC_RETURN,
DL, MVT::Other, RetOps);
10654 return DAG.
getNode(AArch64ISD::RET_GLUE,
DL, MVT::Other, RetOps);
10663 unsigned Flag)
const {
10665 N->getOffset(), Flag);
10670 unsigned Flag)
const {
10676 unsigned Flag)
const {
10678 N->getOffset(), Flag);
10683 unsigned Flag)
const {
10689 unsigned Flag)
const {
10694template <
class NodeTy>
10696 unsigned Flags)
const {
10704 .
getInfo<AArch64FunctionInfo>()
10705 ->hasELFSignedGOT())
10708 return DAG.
getNode(AArch64ISD::LOADgot,
DL, Ty, GotAddr);
10712template <
class NodeTy>
10714 unsigned Flags)
const {
10720 AArch64ISD::WrapperLarge,
DL, Ty,
10728template <
class NodeTy>
10730 unsigned Flags)
const {
10738 return DAG.
getNode(AArch64ISD::ADDlow,
DL, Ty, ADRP,
Lo);
10742template <
class NodeTy>
10744 unsigned Flags)
const {
10748 SDValue Sym = getTargetNode(
N, Ty, DAG, Flags);
10749 return DAG.
getNode(AArch64ISD::ADR,
DL, Ty, Sym);
10755 const GlobalValue *GV = GN->
getGlobal();
10756 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV,
getTargetMachine());
10760 "unexpected offset in global node");
10765 return getGOT(GN, DAG, OpFlags);
10771 Result = getAddrLarge(GN, DAG, OpFlags);
10773 Result = getAddrTiny(GN, DAG, OpFlags);
10775 Result = getAddr(GN, DAG, OpFlags);
10814AArch64TargetLowering::LowerDarwinGlobalTLSAddress(
SDValue Op,
10816 assert(Subtarget->isTargetDarwin() &&
10817 "This function expects a Darwin target");
10832 PtrMemVT,
DL, Chain, DescAddr,
10847 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10848 const uint32_t *
Mask =
TRI->getTLSCallPreservedMask();
10849 if (Subtarget->hasCustomCallingConv())
10857 unsigned Opcode = AArch64ISD::CALL;
10859 Ops.push_back(Chain);
10860 Ops.push_back(FuncTLVGet);
10864 Opcode = AArch64ISD::AUTH_CALL;
10986SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(
SDValue SymAddr,
10991 auto *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
10995 SDVTList NodeTys = DAG.
getVTList(MVT::Other, MVT::Glue);
10998 bool RequiresSMChange = TLSCallAttrs.requiresSMChange();
11000 auto ChainAndGlue = [](
SDValue Chain) -> std::pair<SDValue, SDValue> {
11001 return {Chain, Chain.
getValue(1)};
11004 if (RequiresSMChange)
11005 std::tie(Chain, Glue) =
11011 ? AArch64ISD::TLSDESC_AUTH_CALLSEQ
11012 : AArch64ISD::TLSDESC_CALLSEQ;
11014 std::tie(Chain, Glue) = ChainAndGlue(DAG.
getNode(
11017 if (TLSCallAttrs.requiresLazySave())
11018 std::tie(Chain, Glue) = ChainAndGlue(DAG.
getNode(
11019 AArch64ISD::REQUIRES_ZA_SAVE,
DL, NodeTys, {Chain, Chain.getValue(1)}));
11021 if (RequiresSMChange)
11022 std::tie(Chain, Glue) =
11030AArch64TargetLowering::LowerELFGlobalTLSAddress(
SDValue Op,
11032 assert(Subtarget->isTargetELF() &&
"This function expects an ELF target");
11035 AArch64FunctionInfo *MFI =
11050 "in local exec TLS model");
11061 const GlobalValue *GV = GA->
getGlobal();
11066 return LowerELFTLSLocalExec(GV, ThreadBase,
DL, DAG);
11069 TPOff = DAG.
getNode(AArch64ISD::LOADgot,
DL, PtrVT, TPOff);
11087 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
11094 GV,
DL, MVT::i64, 0,
11111 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
11119AArch64TargetLowering::LowerWindowsGlobalTLSAddress(
SDValue Op,
11121 assert(Subtarget->isTargetWindows() &&
"Windows specific TLS lowering");
11133 TLSArray = DAG.
getLoad(PtrVT,
DL, Chain, TLSArray, MachinePointerInfo());
11146 DAG.
getNode(AArch64ISD::ADDlow,
DL, PtrVT, ADRP, TLSIndexLo);
11147 TLSIndex = DAG.
getLoad(MVT::i32,
DL, Chain, TLSIndex, MachinePointerInfo());
11157 MachinePointerInfo());
11158 Chain =
TLS.getValue(1);
11161 const GlobalValue *GV = GA->
getGlobal();
11173 Addr = DAG.
getNode(AArch64ISD::ADDlow,
DL, PtrVT, Addr, TGALo);
11183 if (Subtarget->isTargetDarwin())
11184 return LowerDarwinGlobalTLSAddress(
Op, DAG);
11185 if (Subtarget->isTargetELF())
11186 return LowerELFGlobalTLSAddress(
Op, DAG);
11187 if (Subtarget->isTargetWindows())
11188 return LowerWindowsGlobalTLSAddress(
Op, DAG);
11226 assert(TGN->getGlobal()->hasExternalWeakLinkage());
11232 if (TGN->getOffset() != 0)
11234 "unsupported non-zero offset in weak ptrauth global reference");
11241 {TGA, Key, Discriminator}),
11246AArch64TargetLowering::LowerPtrAuthGlobalAddress(
SDValue Op,
11249 uint64_t KeyC =
Op.getConstantOperandVal(1);
11250 SDValue AddrDiscriminator =
Op.getOperand(2);
11251 uint64_t DiscriminatorC =
Op.getConstantOperandVal(3);
11252 EVT VT =
Op.getValueType();
11262 "constant discriminator in ptrauth global out of range [0, 0xffff]");
11265 if (!Subtarget->isTargetELF() && !Subtarget->isTargetMachO())
11268 int64_t PtrOffsetC = 0;
11274 const GlobalValue *PtrGV = PtrN->getGlobal();
11277 const unsigned OpFlags =
11281 "unsupported non-GOT op flags on ptrauth global reference");
11284 PtrOffsetC += PtrN->getOffset();
11287 assert(PtrN->getTargetFlags() == 0 &&
11288 "unsupported target flags on ptrauth global");
11293 ? AddrDiscriminator
11297 if (!NeedsGOTLoad) {
11301 {TPtr, Key, TAddrDiscriminator, Discriminator}),
11310 {TPtr, Key, TAddrDiscriminator, Discriminator}),
11338 SDValue Dest,
unsigned Opcode,
11350 uint64_t Mask =
Op.getConstantOperandVal(1);
11355 if (
Op.getOperand(0).getOpcode() ==
ISD::SHL) {
11356 auto Op00 =
Op.getOperand(0).getOperand(0);
11359 Op.getOperand(1),
Op.getOperand(0).getOperand(1));
11360 return DAG.
getNode(Opcode,
DL, MVT::Other, Chain, Shr,
11380 bool ProduceNonFlagSettingCondBr =
11386 if (
LHS.getValueType() == MVT::f128) {
11391 if (!
RHS.getNode()) {
11411 OFCC = getInvertedCondCode(OFCC);
11414 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
11418 if (
LHS.getValueType().isInteger()) {
11420 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
11425 if (RHSC && RHSC->
getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
11431 return DAG.
getNode(AArch64ISD::CBZ,
DL, MVT::Other, Chain,
LHS, Dest);
11437 return DAG.
getNode(AArch64ISD::CBNZ,
DL, MVT::Other, Chain,
LHS, Dest);
11442 uint64_t SignBitPos;
11444 return DAG.
getNode(AArch64ISD::TBNZ,
DL, MVT::Other, Chain,
LHS,
11449 LHS.getOpcode() !=
ISD::AND && ProduceNonFlagSettingCondBr) {
11453 uint64_t SignBitPos;
11455 return DAG.
getNode(AArch64ISD::TBZ,
DL, MVT::Other, Chain,
LHS,
11461 if (Subtarget->hasCMPBR() &&
11463 ProduceNonFlagSettingCondBr) {
11472 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
11476 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::bf16 ||
11477 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
11486 DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CC1Val, Cmp);
11489 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, BR1, Dest, CC2Val,
11498 if (!Subtarget->isNeonAvailable() &&
11499 !Subtarget->useSVEForFixedLengthVectors())
11502 EVT VT =
Op.getValueType();
11530 if (!VT.
isVector() && !Subtarget->isNeonAvailable() &&
11531 Subtarget->isSVEorStreamingSVEAvailable()) {
11532 if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64 && VT != MVT::bf16)
11547 auto BitCast = [
this](EVT VT,
SDValue Op, SelectionDAG &DAG) {
11549 return getSVESafeBitCast(VT,
Op, DAG);
11556 auto SetVecVal = [&](
int Idx = -1) {
11563 VecVal1 = BitCast(VecVT, In1, DAG);
11564 VecVal2 = BitCast(VecVT, In2, DAG);
11570 }
else if (VT == MVT::f64) {
11571 VecVT = MVT::v2i64;
11572 SetVecVal(AArch64::dsub);
11573 }
else if (VT == MVT::f32) {
11574 VecVT = MVT::v4i32;
11575 SetVecVal(AArch64::ssub);
11576 }
else if (VT == MVT::f16 || VT == MVT::bf16) {
11577 VecVT = MVT::v8i16;
11578 SetVecVal(AArch64::hsub);
11589 if (VT == MVT::f64 || VT == MVT::v2f64) {
11597 DAG.
getNode(AArch64ISD::BSP,
DL, VecVT, SignMaskV, VecVal1, VecVal2);
11598 if (VT == MVT::f16 || VT == MVT::bf16)
11600 if (VT == MVT::f32)
11602 if (VT == MVT::f64)
11605 return BitCast(VT, BSP, DAG);
11611 Attribute::NoImplicitFloat))
11614 EVT VT =
Op.getValueType();
11617 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
11625 if (VT == MVT::i32 && IsParity)
11628 if (Subtarget->isSVEorStreamingSVEAvailable()) {
11629 if (VT == MVT::i32 || VT == MVT::i64) {
11630 EVT ContainerVT = VT == MVT::i32 ? MVT::nxv4i32 : MVT::nxv2i64;
11642 if (VT == MVT::i128) {
11655 if (!Subtarget->isNeonAvailable())
11666 if (VT == MVT::i32 || VT == MVT::i64) {
11667 if (VT == MVT::i32)
11673 AddV = DAG.
getNode(AArch64ISD::NVCAST,
DL,
11674 VT == MVT::i32 ? MVT::v2i32 : MVT::v1i64, AddV);
11680 }
else if (VT == MVT::i128) {
11686 DAG.
getNode(AArch64ISD::NVCAST,
DL, MVT::v2i64, AddV),
11694 assert(!IsParity &&
"ISD::PARITY of vector types not supported");
11696 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
11697 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
11698 "Unexpected type for custom ctpop lowering");
11706 EVT DT = VT == MVT::v2i64 ? MVT::v4i32 : VT;
11710 if (VT == MVT::v2i64) {
11711 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11712 Val = DAG.
getNode(AArch64ISD::UADDLP,
DL, VT, Val);
11713 }
else if (VT == MVT::v2i32) {
11714 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11715 }
else if (VT == MVT::v4i32) {
11716 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11725 unsigned EltSize = 8;
11731 Val = DAG.
getNode(AArch64ISD::UADDLP,
DL, WidenVT, Val);
11738 EVT VT =
Op.getValueType();
11741 VT, Subtarget->useSVEForFixedLengthVectors()));
11751 EVT VT =
Op.getValueType();
11753 unsigned Opcode =
Op.getOpcode();
11780 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SMAX_PRED);
11782 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SMIN_PRED);
11784 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::UMAX_PRED);
11786 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::UMIN_PRED);
11798 EVT VT =
Op.getValueType();
11802 VT, Subtarget->useSVEForFixedLengthVectors()))
11803 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU);
11815 REVB = DAG.
getNode(AArch64ISD::REV32,
DL, VST,
Op.getOperand(0));
11822 REVB = DAG.
getNode(AArch64ISD::REV32,
DL, VST,
Op.getOperand(0));
11829 REVB = DAG.
getNode(AArch64ISD::REV64,
DL, VST,
Op.getOperand(0));
11836 REVB = DAG.
getNode(AArch64ISD::REV64,
DL, VST,
Op.getOperand(0));
11842 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT,
11849 SmallVector<std::pair<SDValue, SDValue>, 16> &WorkList) {
11855 N =
N->getOperand(0);
11859 WorkList.push_back(std::make_pair(
N->getOperand(0),
N->getOperand(1)));
11865 if (
N->getOpcode() !=
ISD::OR || !
N->hasOneUse())
11879 EVT VT =
N->getValueType(0);
11889 unsigned NumXors = 0;
11894 std::tie(XOR0, XOR1) = WorkList[0];
11897 for (
unsigned I = 1;
I < WorkList.
size();
I++) {
11898 std::tie(XOR0, XOR1) = WorkList[
I];
11900 Cmp = DAG.
getNode(LogicOp,
DL, VT, Cmp, CmpChain);
11912 if (
Op.getValueType().isVector())
11913 return LowerVSETCC(
Op, DAG);
11915 bool IsStrict =
Op->isStrictFPOpcode();
11917 unsigned OpNo = IsStrict ? 1 : 0;
11920 Chain =
Op.getOperand(0);
11927 EVT VT =
Op.getValueType();
11933 if (
LHS.getValueType() == MVT::f128) {
11938 if (!
RHS.getNode()) {
11939 assert(
LHS.getValueType() ==
Op.getValueType() &&
11940 "Unexpected setcc expansion!");
11945 if (
LHS.getValueType().isInteger()) {
11961 SDValue Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, FVal, TVal, CCVal, Cmp);
11966 assert(
LHS.getValueType() == MVT::bf16 ||
LHS.getValueType() == MVT::f16 ||
11967 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
11988 Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, FVal, TVal, CC1Val, Cmp);
11998 DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, FVal, CC1Val, Cmp);
12001 Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, CS1, CC2Val, Cmp);
12011 EVT VT =
LHS.getValueType();
12012 if (VT != MVT::i32 && VT != MVT::i64)
12022 EVT OpVT =
Op.getValueType();
12031 return DAG.
getNode(AArch64ISD::CSEL,
DL, OpVT, FVal, TVal, CCVal,
12040 "function only supposed to emit natural comparisons");
12049 if (!
LHS.getValueType().isVector()) {
12054 DAG.
getUNDEF(VecVT), Fcmeq, Zero);
12088 assert(!
LHS.getValueType().isVector());
12089 assert(!
RHS.getValueType().isVector());
12093 if (!CTVal || !CFVal)
12107 bool OneNaN =
false;
12123 bool ShouldInvert =
false;
12132 if (!Cmp2 && !ShouldInvert)
12150SDValue AArch64TargetLowering::LowerSELECT_CC(
12156 if (
LHS.getValueType() == MVT::f128) {
12161 if (!
RHS.getNode()) {
12168 if ((
LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
12169 LHS.getValueType() == MVT::bf16) {
12175 if (
LHS.getValueType().isInteger()) {
12177 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
12189 LHS.getValueType() ==
RHS.getValueType()) {
12190 EVT VT =
LHS.getValueType();
12196 Shift = DAG.
getNOT(
DL, Shift, VT);
12210 uint64_t SignBitPos;
12212 EVT TestVT =
LHS.getValueType();
12216 LHS, SignBitConst);
12244 unsigned Opcode = AArch64ISD::CSEL;
12252 }
else if (CTVal && CFVal && CTVal->
isOne() && CFVal->
isZero()) {
12272 }
else if (CTVal && CFVal) {
12280 if (TrueVal == ~FalseVal) {
12281 Opcode = AArch64ISD::CSINV;
12282 }
else if (FalseVal > std::numeric_limits<int64_t>::min() &&
12283 TrueVal == -FalseVal) {
12284 Opcode = AArch64ISD::CSNEG;
12294 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
12295 Opcode = AArch64ISD::CSINC;
12297 if (TrueVal32 > FalseVal32) {
12303 const uint64_t TrueVal64 =
TrueVal;
12304 const uint64_t FalseVal64 =
FalseVal;
12306 if ((TrueVal64 == FalseVal64 + 1) || (TrueVal64 + 1 == FalseVal64)) {
12307 Opcode = AArch64ISD::CSINC;
12309 if (TrueVal > FalseVal) {
12322 if (Opcode != AArch64ISD::CSEL) {
12335 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->
isOne() &&
12340 if (CTVal && CTVal == RHSVal && AArch64CC ==
AArch64CC::EQ)
12342 else if (CFVal && CFVal == RHSVal && AArch64CC ==
AArch64CC::NE)
12344 }
else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->
isOne()) {
12345 assert (CTVal && CFVal &&
"Expected constant operands for CSNEG.");
12350 Opcode = AArch64ISD::CSINV;
12359 return DAG.
getNode(Opcode,
DL, VT, TVal, FVal, CCVal, Cmp);
12363 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::f32 ||
12364 LHS.getValueType() == MVT::f64);
12371 if (Subtarget->isNeonAvailable() &&
all_of(
Users, [](
const SDNode *U) {
12372 switch (
U->getOpcode()) {
12377 case AArch64ISD::DUP:
12395 if (
Flags.hasNoSignedZeros()) {
12399 if (RHSVal && RHSVal->
isZero()) {
12407 CFVal && CFVal->
isZero() &&
12415 SDValue CS1 = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, FVal, CC1Val, Cmp);
12421 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, CS1, CC2Val, Cmp);
12430 EVT Ty =
Op.getValueType();
12433 auto Idx =
Op.getConstantOperandAPInt(2);
12434 int64_t IdxVal = Idx.getSExtValue();
12436 "Only expect scalable vectors for custom lowering of VECTOR_SPLICE");
12445 std::optional<unsigned> PredPattern;
12457 return DAG.
getNode(AArch64ISD::SPLICE,
DL, Ty, Pred,
Op.getOperand(0),
12477 SDNodeFlags
Flags =
Op->getFlags();
12479 return LowerSELECT_CC(CC,
LHS,
RHS, TVal, FVal,
Op->users(), Flags,
DL, DAG);
12489 EVT Ty =
Op.getValueType();
12490 if (Ty == MVT::aarch64svcount) {
12527 return DAG.
getNode(AArch64ISD::CSEL,
DL,
Op.getValueType(), TVal, FVal,
12546 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
12554 Op->getFlags(),
DL, DAG);
12556 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
12571 !Subtarget->isTargetMachO())
12572 return getAddrLarge(JT, DAG);
12574 return getAddrTiny(JT, DAG);
12575 return getAddr(JT, DAG);
12588 AFI->setJumpTableEntryInfo(JTI, 4,
nullptr);
12593 "aarch64-jump-table-hardening")) {
12595 if (Subtarget->isTargetMachO()) {
12600 assert(Subtarget->isTargetELF() &&
12601 "jump table hardening only supported on MachO/ELF");
12632 std::optional<uint16_t> BADisc =
12633 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(MF.
getFunction());
12644 {Dest,
Key, Disc, AddrDisc, Chain});
12654 if (Subtarget->isTargetMachO()) {
12655 return getGOT(CP, DAG);
12658 return getAddrLarge(CP, DAG);
12660 return getAddrTiny(CP, DAG);
12662 return getAddr(CP, DAG);
12670 if (std::optional<uint16_t> BADisc =
12671 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(
12685 {TargetBA,
Key, AddrDisc, Disc});
12693 return getAddrLarge(BAN, DAG);
12695 return getAddrTiny(BAN, DAG);
12697 return getAddr(BAN, DAG);
12702 AArch64FunctionInfo *FuncInfo =
12711 MachinePointerInfo(SV));
12717 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
12721 if (Subtarget->isWindowsArm64EC()) {
12727 uint64_t StackOffset;
12742 MachinePointerInfo(SV));
12750 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
12751 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
12766 MachinePointerInfo(SV),
Align(PtrSize)));
12783 MachinePointerInfo(SV,
Offset),
12801 MachinePointerInfo(SV,
Offset),
12811 GROffsAddr, MachinePointerInfo(SV,
Offset),
Align(4)));
12819 VROffsAddr, MachinePointerInfo(SV,
Offset),
Align(4)));
12829 if (Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg()))
12830 return LowerWin64_VASTART(
Op, DAG);
12831 else if (Subtarget->isTargetDarwin())
12832 return LowerDarwin_VASTART(
Op, DAG);
12834 return LowerAAPCS_VASTART(
Op, DAG);
12842 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
12843 unsigned VaListSize =
12844 (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
12846 : Subtarget->isTargetILP32() ? 20 : 32;
12852 Align(PtrSize),
false,
false,
nullptr,
12853 std::nullopt, MachinePointerInfo(DestSV),
12854 MachinePointerInfo(SrcSV));
12858 assert(Subtarget->isTargetDarwin() &&
12859 "automatic va_arg instruction only works on Darwin");
12862 EVT VT =
Op.getValueType();
12866 MaybeAlign
Align(
Op.getConstantOperandVal(3));
12867 unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
12871 DAG.
getLoad(PtrMemVT,
DL, Chain, Addr, MachinePointerInfo(V));
12877 "currently not supported");
12879 if (Align && *Align > MinSlotSize) {
12895 ArgSize = std::max(ArgSize, MinSlotSize);
12896 bool NeedFPTrunc =
false;
12899 NeedFPTrunc =
true;
12909 DAG.
getStore(Chain,
DL, VANext, Addr, MachinePointerInfo(V));
12915 DAG.
getLoad(MVT::f64,
DL, APStore, VAList, MachinePointerInfo());
12925 return DAG.
getLoad(VT,
DL, APStore, VAList, MachinePointerInfo());
12933 EVT VT =
Op.getValueType();
12935 unsigned Depth =
Op.getConstantOperandVal(0);
12940 MachinePointerInfo());
12942 if (Subtarget->isTargetILP32())
12958#define GET_REGISTER_MATCHER
12959#include "AArch64GenAsmMatcher.inc"
12966 if (AArch64::X1 <=
Reg &&
Reg <= AArch64::X28) {
12968 unsigned DwarfRegNum =
MRI->getDwarfRegNum(
Reg,
false);
12969 if (!Subtarget->isXRegisterReserved(DwarfRegNum) &&
12970 !
MRI->isReservedReg(MF,
Reg))
12980 EVT VT =
Op.getValueType();
12996 EVT VT =
Op.getValueType();
12998 unsigned Depth =
Op.getConstantOperandVal(0);
13001 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
13018 if (Subtarget->hasPAuth()) {
13046 bool OptForSize)
const {
13047 bool IsLegal =
false;
13056 const APInt ImmInt = Imm.bitcastToAPInt();
13057 if (VT == MVT::f64)
13059 else if (VT == MVT::f32)
13061 else if (VT == MVT::f16 || VT == MVT::bf16)
13071 if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
13080 "Should be able to build any value with at most 4 moves");
13081 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 4 : 2));
13082 IsLegal = Insn.
size() <= Limit;
13086 <<
" imm value: "; Imm.dump(););
13098 if ((ST->hasNEON() &&
13099 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
13100 VT == MVT::f32 || VT == MVT::v1f32 || VT == MVT::v2f32 ||
13101 VT == MVT::v4f32)) ||
13103 (VT == MVT::nxv8f16 || VT == MVT::nxv4f32 || VT == MVT::nxv2f64))) {
13110 constexpr unsigned AccurateBits = 8;
13112 ExtraSteps = DesiredBits <= AccurateBits
13117 return DAG.
getNode(Opcode,
SDLoc(Operand), VT, Operand);
13127 EVT VT =
Op.getValueType();
13134AArch64TargetLowering::getSqrtResultForDenormInput(
SDValue Op,
13143 bool Reciprocal)
const {
13147 DAG, ExtraSteps)) {
13152 SDNodeFlags
Flags =
13157 for (
int i = ExtraSteps; i > 0; --i) {
13160 Step = DAG.
getNode(AArch64ISD::FRSQRTS,
DL, VT, Operand, Step, Flags);
13175 int &ExtraSteps)
const {
13178 DAG, ExtraSteps)) {
13186 for (
int i = ExtraSteps; i > 0; --i) {
13226const char *AArch64TargetLowering::LowerXConstraint(
EVT ConstraintVT)
const {
13234 if (!Subtarget->hasFPARMv8())
13259static std::optional<std::pair<unsigned, const TargetRegisterClass *>>
13262 (Constraint[1] !=
'p' && Constraint[1] !=
'z'))
13263 return std::nullopt;
13265 bool IsPredicate = Constraint[1] ==
'p';
13266 Constraint = Constraint.
substr(2, Constraint.
size() - 3);
13267 bool IsPredicateAsCount = IsPredicate && Constraint.
starts_with(
"n");
13268 if (IsPredicateAsCount)
13273 return std::nullopt;
13275 if (IsPredicateAsCount)
13276 return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
13278 return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
13279 return std::make_pair(AArch64::Z0 + V, &AArch64::ZPRRegClass);
13282static std::optional<PredicateConstraint>
13293 if (VT != MVT::aarch64svcount &&
13297 switch (Constraint) {
13299 return VT == MVT::aarch64svcount ? &AArch64::PNR_p8to15RegClass
13300 : &AArch64::PPR_p8to15RegClass;
13302 return VT == MVT::aarch64svcount ? &AArch64::PNR_3bRegClass
13303 : &AArch64::PPR_3bRegClass;
13305 return VT == MVT::aarch64svcount ? &AArch64::PNRRegClass
13306 : &AArch64::PPRRegClass;
13314static std::optional<ReducedGprConstraint>
13327 switch (Constraint) {
13329 return &AArch64::MatrixIndexGPR32_8_11RegClass;
13331 return &AArch64::MatrixIndexGPR32_12_15RegClass;
13365 return DAG.
getNode(AArch64ISD::CSINC,
DL, MVT::i32,
13368 getCondCode(DAG, getInvertedCondCode(CC)), NZCV);
13372SDValue AArch64TargetLowering::LowerAsmOutputForConstraint(
13374 const AsmOperandInfo &OpInfo,
SelectionDAG &DAG)
const {
13379 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
13380 OpInfo.ConstraintVT.getSizeInBits() < 8)
13395 if (OpInfo.ConstraintVT.getSizeInBits() <= 32)
13406AArch64TargetLowering::getConstraintType(
StringRef Constraint)
const {
13407 if (Constraint.
size() == 1) {
13408 switch (Constraint[0]) {
13445AArch64TargetLowering::getSingleConstraintMatchWeight(
13446 AsmOperandInfo &
info,
const char *constraint)
const {
13448 Value *CallOperandVal =
info.CallOperandVal;
13451 if (!CallOperandVal)
13455 switch (*constraint) {
13477std::pair<unsigned, const TargetRegisterClass *>
13478AArch64TargetLowering::getRegForInlineAsmConstraint(
13480 if (Constraint.
size() == 1) {
13481 switch (Constraint[0]) {
13484 return std::make_pair(0U,
nullptr);
13486 return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
13488 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
13489 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
13491 if (!Subtarget->hasFPARMv8())
13495 return std::make_pair(0U, &AArch64::ZPRRegClass);
13496 return std::make_pair(0U,
nullptr);
13498 if (VT == MVT::Other)
13502 return std::make_pair(0U, &AArch64::FPR16RegClass);
13504 return std::make_pair(0U, &AArch64::FPR32RegClass);
13506 return std::make_pair(0U, &AArch64::FPR64RegClass);
13508 return std::make_pair(0U, &AArch64::FPR128RegClass);
13514 if (!Subtarget->hasFPARMv8())
13517 return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
13519 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
13522 if (!Subtarget->hasFPARMv8())
13525 return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
13533 if (AArch64::ZPRRegClass.hasSubClassEq(
P->second) &&
13534 !Subtarget->isSVEorStreamingSVEAvailable())
13535 return std::make_pair(
TRI->getSubReg(
P->first, AArch64::zsub),
13536 &AArch64::FPR128RegClass);
13541 return std::make_pair(0U, RegClass);
13545 return std::make_pair(0U, RegClass);
13547 if (StringRef(
"{cc}").equals_insensitive(Constraint) ||
13549 return std::make_pair(
unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
13551 if (Constraint ==
"{za}") {
13552 return std::make_pair(
unsigned(AArch64::ZA), &AArch64::MPRRegClass);
13555 if (Constraint ==
"{zt0}") {
13556 return std::make_pair(
unsigned(AArch64::ZT0), &AArch64::ZTRRegClass);
13561 std::pair<unsigned, const TargetRegisterClass *> Res;
13566 unsigned Size = Constraint.
size();
13567 if ((
Size == 4 ||
Size == 5) && Constraint[0] ==
'{' &&
13568 tolower(Constraint[1]) ==
'v' && Constraint[
Size - 1] ==
'}') {
13571 if (!
Failed && RegNo >= 0 && RegNo <= 31) {
13576 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
13577 Res.second = &AArch64::FPR64RegClass;
13579 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
13580 Res.second = &AArch64::FPR128RegClass;
13586 if (Res.second && !Subtarget->hasFPARMv8() &&
13587 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
13588 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
13589 return std::make_pair(0U,
nullptr);
13596 bool AllowUnknown)
const {
13597 if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
13598 return EVT(MVT::i64x8);
13605void AArch64TargetLowering::LowerAsmOperandForConstraint(
13611 if (Constraint.
size() != 1)
13614 char ConstraintLetter = Constraint[0];
13615 switch (ConstraintLetter) {
13626 if (
Op.getValueType() == MVT::i64)
13627 Result = DAG.
getRegister(AArch64::XZR, MVT::i64);
13629 Result = DAG.
getRegister(AArch64::WZR, MVT::i32);
13651 switch (ConstraintLetter) {
13665 CVal =
C->getSExtValue();
13696 if ((CVal & 0xFFFF) == CVal)
13698 if ((CVal & 0xFFFF0000ULL) == CVal)
13700 uint64_t NCVal = ~(uint32_t)CVal;
13701 if ((NCVal & 0xFFFFULL) == NCVal)
13703 if ((NCVal & 0xFFFF0000ULL) == NCVal)
13710 if ((CVal & 0xFFFFULL) == CVal)
13712 if ((CVal & 0xFFFF0000ULL) == CVal)
13714 if ((CVal & 0xFFFF00000000ULL) == CVal)
13716 if ((CVal & 0xFFFF000000000000ULL) == CVal)
13718 uint64_t NCVal = ~CVal;
13719 if ((NCVal & 0xFFFFULL) == NCVal)
13721 if ((NCVal & 0xFFFF0000ULL) == NCVal)
13723 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
13725 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
13739 Ops.push_back(Result);
13776 EVT VT =
Op.getValueType();
13778 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
13782 if (VT != MVT::v16i8 && VT != MVT::v8i8)
13786 assert((NumElts == 8 || NumElts == 16) &&
13787 "Need to have exactly 8 or 16 elements in vector.");
13793 for (
unsigned i = 0; i < NumElts; ++i) {
13800 SourceVec = OperandSourceVec;
13801 else if (SourceVec != OperandSourceVec)
13814 }
else if (!AndMaskConstants.
empty()) {
13834 if (!MaskSourceVec) {
13838 }
else if (MaskSourceVec != MaskSource->
getOperand(0)) {
13852 if (!AndMaskConstants.
empty())
13859 SourceVec, MaskSourceVec);
13867 LLVM_DEBUG(
dbgs() <<
"AArch64TargetLowering::ReconstructShuffle\n");
13869 EVT VT =
Op.getValueType();
13871 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
13874 struct ShuffleSourceInfo {
13889 ShuffleSourceInfo(
SDValue Vec)
13890 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
13891 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
13899 for (
unsigned i = 0; i < NumElts; ++i) {
13905 V.getOperand(0).getValueType().isScalableVector()) {
13907 dbgs() <<
"Reshuffle failed: "
13908 "a shuffle can only come from building a vector from "
13909 "various elements of other fixed-width vectors, provided "
13910 "their indices are constant\n");
13916 auto Source =
find(Sources, SourceVec);
13917 if (Source == Sources.
end())
13918 Source = Sources.
insert(Sources.
end(), ShuffleSourceInfo(SourceVec));
13921 unsigned EltNo = V.getConstantOperandVal(1);
13922 Source->MinElt = std::min(Source->MinElt, EltNo);
13923 Source->MaxElt = std::max(Source->MaxElt, EltNo);
13928 if ((Sources.
size() == 3 || Sources.
size() == 4) && NumElts > 4) {
13933 for (
unsigned I = 0;
I < NumElts; ++
I) {
13936 for (
unsigned OF = 0; OF < OutputFactor; OF++)
13937 Mask.push_back(-1);
13943 unsigned Lane = V.getConstantOperandVal(1);
13944 for (
unsigned S = 0; S < Sources.
size(); S++) {
13945 if (V.getOperand(0) == Sources[S].Vec) {
13946 unsigned InputSize = Sources[S].Vec.getScalarValueSizeInBits();
13947 unsigned InputBase = 16 * S + Lane * InputSize / 8;
13948 for (
unsigned OF = 0; OF < OutputFactor; OF++)
13949 Mask.push_back(InputBase + OF);
13959 ? Intrinsic::aarch64_neon_tbl3
13960 : Intrinsic::aarch64_neon_tbl4,
13962 for (
unsigned i = 0; i < Sources.
size(); i++) {
13963 SDValue Src = Sources[i].Vec;
13964 EVT SrcVT = Src.getValueType();
13967 "Expected a legally typed vector");
13975 for (
unsigned i = 0; i < Mask.size(); i++)
13977 assert((Mask.size() == 8 || Mask.size() == 16) &&
13978 "Expected a v8i8 or v16i8 Mask");
13980 Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8,
DL, TBLMask));
13984 Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, TBLOperands);
13988 if (Sources.
size() > 2) {
13989 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: currently only do something "
13990 <<
"sensible when at most two source vectors are "
13998 for (
auto &Source : Sources) {
13999 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
14000 if (SrcEltTy.
bitsLT(SmallestEltTy)) {
14001 SmallestEltTy = SrcEltTy;
14004 unsigned ResMultiplier =
14013 for (
auto &Src : Sources) {
14014 EVT SrcVT = Src.ShuffleVec.getValueType();
14027 assert(2 * SrcVTSize == VTSize);
14032 DAG.
getUNDEF(Src.ShuffleVec.getValueType()));
14038 dbgs() <<
"Reshuffle failed: result vector too small to extract\n");
14042 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
14044 dbgs() <<
"Reshuffle failed: span too large for a VEXT to cope\n");
14048 if (Src.MinElt >= NumSrcElts) {
14053 Src.WindowBase = -NumSrcElts;
14054 }
else if (Src.MaxElt < NumSrcElts) {
14071 dbgs() <<
"Reshuffle failed: don't know how to lower AArch64ISD::EXT "
14072 "for SVE vectors.");
14077 DAG.
getNode(AArch64ISD::EXT,
DL, DestVT, VEXTSrc1, VEXTSrc2,
14079 Src.WindowBase = -Src.MinElt;
14086 for (
auto &Src : Sources) {
14088 if (SrcEltTy == SmallestEltTy)
14093 DAG.
getNode(AArch64ISD::NVCAST,
DL, ShuffleVT, Src.ShuffleVec);
14099 Src.WindowBase *= Src.WindowScale;
14104 for (
auto Src : Sources)
14105 assert(Src.ShuffleVec.getValueType() == ShuffleVT);
14113 if (Entry.isUndef())
14116 auto Src =
find(Sources, Entry.getOperand(0));
14125 int LanesDefined = BitsDefined / BitsPerShuffleLane;
14129 int *LaneMask = &Mask[i * ResMultiplier];
14131 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
14132 ExtractBase += NumElts * (Src - Sources.
begin());
14133 for (
int j = 0; j < LanesDefined; ++j)
14134 LaneMask[j] = ExtractBase + j;
14139 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: illegal shuffle mask\n");
14144 for (
unsigned i = 0; i < Sources.
size(); ++i)
14151 V = DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Shuffle);
14157 dbgs() <<
"Reshuffle, creating node: "; V.dump(););
14176 unsigned ExpectedElt = Imm;
14177 for (
unsigned i = 1; i < NumElts; ++i) {
14181 if (ExpectedElt == NumElts)
14186 if (ExpectedElt !=
static_cast<unsigned>(M[i]))
14197 if (V.getValueType() != MVT::v16i8)
14199 assert(V.getNumOperands() == 16 &&
"Expected 16 operands on the BUILDVECTOR");
14201 for (
unsigned X = 0;
X < 4;
X++) {
14213 for (
unsigned Y = 1;
Y < 4;
Y++) {
14229 V.getOperand(0).getOperand(0), V.getOperand(4).getOperand(0),
14230 V.getOperand(8).getOperand(0), V.getOperand(12).getOperand(0)};
14232 if (V.getValueType() == MVT::v4i32)
14248 unsigned &DupLaneOp) {
14250 "Only possible block sizes for wide DUP are: 16, 32, 64");
14269 for (
size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
14270 for (
size_t I = 0;
I < NumEltsPerBlock;
I++) {
14271 int Elt = M[BlockIndex * NumEltsPerBlock +
I];
14275 if ((
unsigned)Elt >= SingleVecNumElements)
14277 if (BlockElts[
I] < 0)
14278 BlockElts[
I] = Elt;
14279 else if (BlockElts[
I] != Elt)
14288 auto FirstRealEltIter =
find_if(BlockElts, [](
int Elt) {
return Elt >= 0; });
14289 assert(FirstRealEltIter != BlockElts.
end() &&
14290 "Shuffle with all-undefs must have been caught by previous cases, "
14292 if (FirstRealEltIter == BlockElts.
end()) {
14298 size_t FirstRealIndex = FirstRealEltIter - BlockElts.
begin();
14300 if ((
unsigned)*FirstRealEltIter < FirstRealIndex)
14303 size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
14306 if (Elt0 % NumEltsPerBlock != 0)
14310 for (
size_t I = 0;
I < NumEltsPerBlock;
I++)
14311 if (BlockElts[
I] >= 0 && (
unsigned)BlockElts[
I] != Elt0 +
I)
14314 DupLaneOp = Elt0 / NumEltsPerBlock;
14323 const int *FirstRealElt =
find_if(M, [](
int Elt) {
return Elt >= 0; });
14328 APInt ExpectedElt =
APInt(MaskBits, *FirstRealElt + 1,
false,
14332 bool FoundWrongElt = std::any_of(FirstRealElt + 1, M.end(), [&](
int Elt) {
14333 return Elt != ExpectedElt++ && Elt >= 0;
14365 if (NumElts % 2 != 0)
14367 WhichResult = (M[0] == 0 ? 0 : 1);
14368 unsigned Idx = WhichResult * NumElts / 2;
14369 for (
unsigned i = 0; i != NumElts; i += 2) {
14370 if ((M[i] >= 0 && (
unsigned)M[i] != Idx) ||
14371 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != Idx))
14384 WhichResult = (M[0] == 0 ? 0 : 1);
14385 for (
unsigned j = 0; j != 2; ++j) {
14386 unsigned Idx = WhichResult;
14387 for (
unsigned i = 0; i != Half; ++i) {
14388 int MIdx = M[i + j * Half];
14389 if (MIdx >= 0 && (
unsigned)MIdx != Idx)
14403 if (NumElts % 2 != 0)
14405 WhichResult = (M[0] == 0 ? 0 : 1);
14406 for (
unsigned i = 0; i < NumElts; i += 2) {
14407 if ((M[i] >= 0 && (
unsigned)M[i] != i + WhichResult) ||
14408 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != i + WhichResult))
14415 bool &DstIsLeft,
int &Anomaly) {
14416 if (M.size() !=
static_cast<size_t>(NumInputElements))
14419 int NumLHSMatch = 0, NumRHSMatch = 0;
14420 int LastLHSMismatch = -1, LastRHSMismatch = -1;
14422 for (
int i = 0; i < NumInputElements; ++i) {
14432 LastLHSMismatch = i;
14434 if (M[i] == i + NumInputElements)
14437 LastRHSMismatch = i;
14440 if (NumLHSMatch == NumInputElements - 1) {
14442 Anomaly = LastLHSMismatch;
14444 }
else if (NumRHSMatch == NumInputElements - 1) {
14446 Anomaly = LastRHSMismatch;
14459 for (
int I = 0,
E = NumElts / 2;
I !=
E;
I++) {
14464 int Offset = NumElts / 2;
14465 for (
int I = NumElts / 2,
E = NumElts;
I !=
E;
I++) {
14466 if (Mask[
I] !=
I + SplitLHS *
Offset)
14475 EVT VT =
Op.getValueType();
14510 unsigned OpNum = (PFEntry >> 26) & 0x0F;
14511 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
14512 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
14534 if (LHSID == (1 * 9 + 2) * 9 + 3)
14536 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 &&
"Illegal OP_COPY!");
14540 if (OpNum == OP_MOVLANE) {
14542 auto getPFIDLane = [](
unsigned ID,
int Elt) ->
int {
14543 assert(Elt < 4 &&
"Expected Perfect Lanes to be less than 4");
14549 return (
ID % 9 == 8) ? -1 :
ID % 9;
14558 assert(RHSID < 8 &&
"Expected a lane index for RHSID!");
14559 unsigned ExtLane = 0;
14565 int MaskElt = getPFIDLane(
ID, (RHSID & 0x01) << 1) >> 1;
14567 MaskElt = (getPFIDLane(
ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
14568 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
14569 ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
14570 Input = MaskElt < 2 ? V1 : V2;
14576 "Expected 16 or 32 bit shuffle elements");
14581 int MaskElt = getPFIDLane(
ID, RHSID);
14582 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
14583 ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
14584 Input = MaskElt < 4 ? V1 : V2;
14586 if (VT == MVT::v4i16) {
14592 Input.getValueType().getVectorElementType(),
14614 return DAG.
getNode(AArch64ISD::REV64,
DL, VT, OpLHS);
14619 return DAG.
getNode(AArch64ISD::REV32,
DL, VT, OpLHS);
14622 return DAG.
getNode(AArch64ISD::REV16,
DL, VT, OpLHS);
14629 if (EltTy == MVT::i8)
14630 Opcode = AArch64ISD::DUPLANE8;
14631 else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
14632 Opcode = AArch64ISD::DUPLANE16;
14633 else if (EltTy == MVT::i32 || EltTy == MVT::f32)
14634 Opcode = AArch64ISD::DUPLANE32;
14635 else if (EltTy == MVT::i64 || EltTy == MVT::f64)
14636 Opcode = AArch64ISD::DUPLANE64;
14643 return DAG.
getNode(Opcode,
DL, VT, OpLHS, Lane);
14649 return DAG.
getNode(AArch64ISD::EXT,
DL, VT, OpLHS, OpRHS,
14653 return DAG.
getNode(AArch64ISD::UZP1,
DL, VT, OpLHS, OpRHS);
14655 return DAG.
getNode(AArch64ISD::UZP2,
DL, VT, OpLHS, OpRHS);
14657 return DAG.
getNode(AArch64ISD::ZIP1,
DL, VT, OpLHS, OpRHS);
14659 return DAG.
getNode(AArch64ISD::ZIP2,
DL, VT, OpLHS, OpRHS);
14661 return DAG.
getNode(AArch64ISD::TRN1,
DL, VT, OpLHS, OpRHS);
14663 return DAG.
getNode(AArch64ISD::TRN2,
DL, VT, OpLHS, OpRHS);
14674 EVT EltVT =
Op.getValueType().getVectorElementType();
14687 MVT IndexVT = MVT::v8i8;
14688 unsigned IndexLen = 8;
14689 if (
Op.getValueSizeInBits() == 128) {
14690 IndexVT = MVT::v16i8;
14695 for (
int Val : ShuffleMask) {
14696 for (
unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
14697 unsigned Offset = Byte + Val * BytesPerElt;
14700 if (IsUndefOrZero &&
Offset >= IndexLen)
14710 if (IsUndefOrZero) {
14719 if (IndexLen == 8) {
14744 if (EltType == MVT::i8)
14745 return AArch64ISD::DUPLANE8;
14746 if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
14747 return AArch64ISD::DUPLANE16;
14748 if (EltType == MVT::i32 || EltType == MVT::f32)
14749 return AArch64ISD::DUPLANE32;
14750 if (EltType == MVT::i64 || EltType == MVT::f64)
14751 return AArch64ISD::DUPLANE64;
14759 auto getScaledOffsetDup = [](
SDValue BitCast,
int &LaneC,
MVT &CastVT) {
14770 unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
14772 if (ExtIdxInBits % CastedEltBitWidth != 0)
14780 LaneC += ExtIdxInBits / CastedEltBitWidth;
14787 unsigned SrcVecNumElts =
14794 if (getScaledOffsetDup(V, Lane, CastVT)) {
14795 V = DAG.
getBitcast(CastVT, V.getOperand(0).getOperand(0));
14797 V.getOperand(0).getValueType().is128BitVector()) {
14800 Lane += V.getConstantOperandVal(1);
14801 V = V.getOperand(0);
14827 EVT VT =
Op.getValueType();
14837 if (ElementSize > 32 || ElementSize == 1)
14867 EVT VT =
Op.getValueType();
14884 for (
unsigned I = 0;
I < 16;
I++) {
14885 if (ShuffleMask[
I] < 16)
14891 TBLMaskParts[
I] = DAG.
getConstant(
C->getSExtValue() + 32,
DL, MVT::i32);
14905AArch64TargetLowering::LowerEXTEND_VECTOR_INREG(
SDValue Op,
14908 EVT VT =
Op.getValueType();
14912 unsigned UnpackOpcode =
Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
14920 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv8i16, Val);
14921 if (VT == MVT::nxv8i16)
14925 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv4i32, Val);
14926 if (VT == MVT::nxv4i32)
14930 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv2i64, Val);
14931 assert(VT == MVT::nxv2i64 &&
"Unexpected result type!");
14942AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(
SDValue Op,
14945 EVT VT =
Op.getValueType();
14948 return LowerEXTEND_VECTOR_INREG(
Op, DAG);
14953 "Unexpected extension factor.");
14960 DAG.
getNode(AArch64ISD::ZIP1,
DL, SrcVT, SrcOp, Zeros));
14966 EVT VT =
Op.getValueType();
14971 return LowerFixedLengthVECTOR_SHUFFLEToSVE(
Op, DAG);
14977 ArrayRef<int> ShuffleMask = SVN->
getMask();
14984 "Unexpected VECTOR_SHUFFLE mask size!");
15010 for (
unsigned LaneSize : {64U, 32U, 16U}) {
15013 unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
15014 : LaneSize == 32 ? AArch64ISD::DUPLANE32
15015 : AArch64ISD::DUPLANE16;
15030 if (
isREVMask(ShuffleMask, EltSize, NumElts, 64))
15032 if (
isREVMask(ShuffleMask, EltSize, NumElts, 32))
15034 if (
isREVMask(ShuffleMask, EltSize, NumElts, 16))
15037 if (((NumElts == 8 && EltSize == 16) || (NumElts == 16 && EltSize == 8)) &&
15040 return DAG.
getNode(AArch64ISD::EXT,
DL, VT, Rev, Rev,
15044 bool ReverseEXT =
false;
15046 if (
isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
15058 unsigned WhichResult;
15059 unsigned OperandOrder;
15060 if (
isZIPMask(ShuffleMask, NumElts, WhichResult, OperandOrder)) {
15061 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
15063 OperandOrder == 0 ? V2 : V1);
15065 if (
isUZPMask(ShuffleMask, NumElts, WhichResult)) {
15066 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
15069 if (
isTRNMask(ShuffleMask, NumElts, WhichResult, OperandOrder)) {
15070 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
15072 OperandOrder == 0 ? V2 : V1);
15076 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
15080 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
15084 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
15094 if (
isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
15095 SDValue DstVec = DstIsLeft ? V1 : V2;
15099 int SrcLane = ShuffleMask[Anomaly];
15100 if (SrcLane >= NumInputElements) {
15102 SrcLane -= NumElts;
15109 ScalarVT = MVT::i32;
15122 if (NumElts == 4) {
15123 unsigned PFIndexes[4];
15124 for (
unsigned i = 0; i != 4; ++i) {
15125 if (ShuffleMask[i] < 0)
15128 PFIndexes[i] = ShuffleMask[i];
15132 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
15133 PFIndexes[2] * 9 + PFIndexes[3];
15143 "Expected larger vector element sizes to be handled already");
15145 for (
int M : ShuffleMask)
15147 M >=
static_cast<int>(NumElts) ? 0 : 0xffffffff,
DL, MVT::i32));
15161 EVT VT =
Op.getValueType();
15164 return LowerToScalableOp(
Op, DAG);
15167 "Unexpected vector type!");
15182 if (VT == MVT::nxv1i1)
15194 EVT VT =
Op.getValueType();
15207 if (CIdx && (CIdx->getZExtValue() <= 3)) {
15209 return DAG.
getNode(AArch64ISD::DUPLANE128,
DL, VT,
Op.getOperand(1), CI);
15231 SDValue TBL = DAG.
getNode(AArch64ISD::TBL,
DL, MVT::nxv2i64, V, ShuffleMask);
15237 APInt &UndefBits) {
15239 APInt SplatBits, SplatUndef;
15240 unsigned SplatBitSize;
15242 if (BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
15245 for (
unsigned i = 0; i < NumSplats; ++i) {
15246 CnstBits <<= SplatBitSize;
15247 UndefBits <<= SplatBitSize;
15249 UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.
getSizeInBits());
15260 const APInt &Bits) {
15261 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15263 EVT VT =
Op.getValueType();
15272 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15283 EVT VT =
Op.getValueType();
15288 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15291 bool isAdvSIMDModImm =
false;
15311 if (isAdvSIMDModImm) {
15325 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15336 EVT VT =
Op.getValueType();
15341 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15344 bool isAdvSIMDModImm =
false;
15356 if (isAdvSIMDModImm) {
15370 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15380 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15382 EVT VT =
Op.getValueType();
15384 bool isAdvSIMDModImm =
false;
15396 if (isAdvSIMDModImm) {
15401 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15410 const APInt &Bits) {
15411 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15413 EVT VT =
Op.getValueType();
15422 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15431 const APInt &Bits) {
15432 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15434 EVT VT =
Op.getValueType();
15437 bool isAdvSIMDModImm =
false;
15441 MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
15446 MovTy = MVT::v2f64;
15449 if (isAdvSIMDModImm) {
15453 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15473 for (
unsigned i = 1; i < NumElts; ++i)
15482 while (
N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
15483 N =
N.getOperand(0);
15489 unsigned NumElts =
N.getValueType().getVectorMinNumElements();
15492 while (
N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
15493 N =
N.getOperand(0);
15496 if (
N.getValueType().getVectorMinNumElements() < NumElts)
15506 if (
N.getOpcode() == AArch64ISD::PTRUE &&
15507 N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
15508 return N.getValueType().getVectorMinNumElements() >= NumElts;
15520 EVT VT =
N->getValueType(0);
15530 SDValue FirstOp =
N->getOperand(0);
15531 unsigned FirstOpc = FirstOp.
getOpcode();
15532 SDValue SecondOp =
N->getOperand(1);
15533 unsigned SecondOpc = SecondOp.
getOpcode();
15540 if ((FirstOpc ==
ISD::AND || FirstOpc == AArch64ISD::BICi) &&
15541 (SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR ||
15542 SecondOpc == AArch64ISD::SHL_PRED ||
15543 SecondOpc == AArch64ISD::SRL_PRED)) {
15547 }
else if ((SecondOpc ==
ISD::AND || SecondOpc == AArch64ISD::BICi) &&
15548 (FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR ||
15549 FirstOpc == AArch64ISD::SHL_PRED ||
15550 FirstOpc == AArch64ISD::SRL_PRED)) {
15557 bool IsShiftRight = Shift.
getOpcode() == AArch64ISD::VLSHR ||
15558 Shift.
getOpcode() == AArch64ISD::SRL_PRED;
15559 bool ShiftHasPredOp = Shift.
getOpcode() == AArch64ISD::SHL_PRED ||
15560 Shift.
getOpcode() == AArch64ISD::SRL_PRED;
15564 if (ShiftHasPredOp) {
15570 C2 =
C.getZExtValue();
15573 C2 = C2node->getZExtValue();
15587 assert(C1nodeImm && C1nodeShift);
15589 C1AsAPInt = C1AsAPInt.
zextOrTrunc(ElemSizeInBits);
15595 if (C2 > ElemSizeInBits)
15600 if (C1AsAPInt != RequiredC1)
15608 unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
15613 EVT VT =
N->getValueType(0);
15614 assert(VT.
isVector() &&
"Expected vector type in tryLowerToBSL\n");
15632 for (
int i = 1; i >= 0; --i) {
15633 for (
int j = 1; j >= 0; --j) {
15659 if (
Sub.getOperand(1) !=
Add.getOperand(0))
15662 return DAG.
getNode(AArch64ISD::BSP,
DL, VT,
Sub, SubSibling, AddSibling);
15670 for (
int i = 1; i >= 0; --i)
15671 for (
int j = 1; j >= 0; --j) {
15682 if (!BVN0 || !BVN1)
15685 bool FoundMatch =
true;
15689 if (!CN0 || !CN1 ||
15692 FoundMatch =
false;
15707 !Subtarget->isNeonAvailable()))
15708 return LowerToScalableOp(
Op, DAG);
15717 EVT VT =
Op.getValueType();
15722 BuildVectorSDNode *BVN =
15726 LHS =
Op.getOperand(1);
15744 UndefBits, &
LHS)) ||
15760 EVT VT =
Op.getValueType();
15774 CstLane->getAPIntValue().trunc(EltTy.
getSizeInBits()).getZExtValue(),
15776 }
else if (Lane.getNode()->isUndef()) {
15779 assert(Lane.getValueType() == MVT::i32 &&
15780 "Unexpected BUILD_VECTOR operand type");
15782 Ops.push_back(Lane);
15789 EVT VT =
Op.getValueType();
15797 int32_t ImmVal, ShiftVal;
15807 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Res);
15812 EVT VT =
Op.getValueType();
15814 "Expected a legal NEON vector");
15820 auto TryMOVIWithBits = [&](
APInt DefBits) {
15834 APInt NotDefBits = ~DefBits;
15844 if (
SDValue R = TryMOVIWithBits(DefBits))
15846 if (
SDValue R = TryMOVIWithBits(UndefBits))
15854 auto TryWithFNeg = [&](
APInt DefBits,
MVT FVT) {
15860 unsigned NumElts = VT.
getSizeInBits() / FVT.getScalarSizeInBits();
15861 for (
unsigned i = 0; i < NumElts; i++)
15862 NegBits |= Neg << (FVT.getScalarSizeInBits() * i);
15863 NegBits = DefBits ^ NegBits;
15867 if (
SDValue NewOp = TryMOVIWithBits(NegBits)) {
15871 AArch64ISD::NVCAST,
DL, VT,
15873 DAG.
getNode(AArch64ISD::NVCAST,
DL, VFVT, NewOp)));
15878 if ((R = TryWithFNeg(DefBits, MVT::f32)) ||
15879 (R = TryWithFNeg(DefBits, MVT::f64)) ||
15880 (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
15887SDValue AArch64TargetLowering::LowerFixedLengthBuildVectorToSVE(
15889 EVT VT =
Op.getValueType();
15913 NumElems -
count_if(
Op->op_values(), IsExtractElt) > 4)
15920 return Op.isUndef() ? Undef
15921 : DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
15922 ContainerVT, Undef, Op, ZeroI64);
15926 while (Intermediates.
size() > 1) {
15929 for (
unsigned I = 0;
I < Intermediates.
size();
I += 2) {
15932 Intermediates[
I / 2] =
15934 : DAG.
getNode(AArch64ISD::ZIP1,
DL, ZipVT, Op0, Op1);
15937 Intermediates.
resize(Intermediates.
size() / 2);
15948 EVT VT =
Op.getValueType();
15950 bool OverrideNEON = !Subtarget->isNeonAvailable() ||
15953 return LowerFixedLengthBuildVectorToSVE(
Op, DAG);
15971 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
15972 if (Val.isZero() || (VT.
isInteger() && Val.isAllOnes()))
15976 if (
Const->isZero() && !
Const->isNegative())
15997 bool isOnlyLowElement =
true;
15998 bool usesOnlyOneValue =
true;
15999 bool usesOnlyOneConstantValue =
true;
16001 bool AllLanesExtractElt =
true;
16002 unsigned NumConstantLanes = 0;
16003 unsigned NumDifferentLanes = 0;
16004 unsigned NumUndefLanes = 0;
16007 SmallMapVector<SDValue, unsigned, 16> DifferentValueMap;
16008 unsigned ConsecutiveValCount = 0;
16010 for (
unsigned i = 0; i < NumElts; ++i) {
16013 AllLanesExtractElt =
false;
16019 isOnlyLowElement =
false;
16024 ++NumConstantLanes;
16025 if (!ConstantValue.
getNode())
16027 else if (ConstantValue != V)
16028 usesOnlyOneConstantValue =
false;
16031 if (!
Value.getNode())
16033 else if (V !=
Value) {
16034 usesOnlyOneValue =
false;
16035 ++NumDifferentLanes;
16038 if (PrevVal != V) {
16039 ConsecutiveValCount = 0;
16054 DifferentValueMap[
V] = ++ConsecutiveValCount;
16057 if (!
Value.getNode()) {
16059 dbgs() <<
"LowerBUILD_VECTOR: value undefined, creating undef node\n");
16067 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: only low element used, creating 1 "
16068 "SCALAR_TO_VECTOR node\n");
16072 if (AllLanesExtractElt) {
16073 SDNode *
Vector =
nullptr;
16078 for (
unsigned i = 0; i < NumElts; ++i) {
16080 const SDNode *
N =
V.getNode();
16105 if (Val == 2 * i) {
16109 if (Val - 1 == 2 * i) {
16136 if (usesOnlyOneValue) {
16139 Value.getValueType() != VT) {
16141 dbgs() <<
"LowerBUILD_VECTOR: use DUP for non-constant splats\n");
16149 if (
Value.getValueSizeInBits() == 64) {
16151 dbgs() <<
"LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
16163 assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
16164 EltTy == MVT::f64) &&
"Unsupported floating-point vector type");
16166 dbgs() <<
"LowerBUILD_VECTOR: float constant splats, creating int "
16167 "BITCASTS, and try again\n");
16169 for (
unsigned i = 0; i < NumElts; ++i)
16173 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: trying to lower new vector: ";
16175 Val = LowerBUILD_VECTOR(Val, DAG);
16185 bool PreferDUPAndInsert =
16187 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
16188 NumDifferentLanes >= NumConstantLanes;
16194 if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
16198 APInt ConstantValueAPInt(1, 0);
16200 ConstantValueAPInt =
C->getAPIntValue().zextOrTrunc(BitSize);
16202 !ConstantValueAPInt.isAllOnes()) {
16206 Val = DAG.
getNode(AArch64ISD::DUP,
DL, VT, ConstantValue);
16210 for (
unsigned i = 0; i < NumElts; ++i) {
16224 dbgs() <<
"LowerBUILD_VECTOR: all elements are constant, use default "
16236 if (NumElts >= 4) {
16244 if (PreferDUPAndInsert) {
16249 for (
unsigned I = 0;
I < NumElts; ++
I)
16260 if (DifferentValueMap.
size() == 2 && NumUndefLanes == 0) {
16272 bool canUseVECTOR_CONCAT =
true;
16273 for (
auto Pair : DifferentValueMap) {
16275 if (Pair.second != NumElts / 2)
16276 canUseVECTOR_CONCAT =
false;
16289 if (canUseVECTOR_CONCAT) {
16312 if (NumElts >= 8) {
16313 SmallVector<int, 16> MaskVec;
16315 SDValue FirstLaneVal =
Op.getOperand(0);
16316 for (
unsigned i = 0; i < NumElts; ++i) {
16318 if (FirstLaneVal == Val)
16342 dbgs() <<
"LowerBUILD_VECTOR: alternatives failed, creating sequence "
16343 "of INSERT_VECTOR_ELT\n");
16360 LLVM_DEBUG(
dbgs() <<
"Creating node for op0, it is not undefined:\n");
16366 dbgs() <<
"Creating nodes for the other vector elements:\n";
16368 for (; i < NumElts; ++i) {
16379 dbgs() <<
"LowerBUILD_VECTOR: use default expansion, failed to find "
16380 "better alternative\n");
16387 !Subtarget->isNeonAvailable()))
16388 return LowerFixedLengthConcatVectorsToSVE(
Op, DAG);
16390 assert(
Op.getValueType().isScalableVector() &&
16392 "Expected legal scalable vector type!");
16397 "Unexpected number of operands in CONCAT_VECTORS");
16399 if (NumOperands == 2)
16404 while (ConcatOps.size() > 1) {
16405 for (
unsigned I = 0,
E = ConcatOps.size();
I !=
E;
I += 2) {
16413 ConcatOps.resize(ConcatOps.size() / 2);
16415 return ConcatOps[0];
16426 !Subtarget->isNeonAvailable()))
16427 return LowerFixedLengthInsertVectorElt(
Op, DAG);
16429 EVT VT =
Op.getOperand(0).getValueType();
16443 ExtendedValue,
Op.getOperand(2));
16456AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(
SDValue Op,
16459 EVT VT =
Op.getOperand(0).getValueType();
16465 if (VT == MVT::nxv1i1) {
16469 WidenedPred,
Op.getOperand(1));
16476 MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
16478 Extend,
Op.getOperand(1));
16483 return LowerFixedLengthExtractVectorElt(
Op, DAG);
16491 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16492 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
16493 VT == MVT::v8f16 || VT == MVT::v8bf16)
16496 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
16497 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
16508 if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
16518 EVT VT =
Op.getValueType();
16520 "Only cases that extract a fixed length vector are supported!");
16521 EVT InVT =
Op.getOperand(0).getValueType();
16529 unsigned Idx =
Op.getConstantOperandVal(1);
16548 if (PackedVT != InVT) {
16572 assert(
Op.getValueType().isScalableVector() &&
16573 "Only expect to lower inserts into scalable vectors!");
16575 EVT InVT =
Op.getOperand(1).getValueType();
16576 unsigned Idx =
Op.getConstantOperandVal(2);
16581 EVT VT =
Op.getValueType();
16597 if (Idx < (NumElts / 2))
16623 Vec0 = getSVESafeBitCast(NarrowVT, Vec0, DAG);
16624 Vec1 = getSVESafeBitCast(NarrowVT, Vec1, DAG);
16628 Vec1 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, Vec1);
16637 HiVec0 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, HiVec0);
16638 Narrow = DAG.
getNode(AArch64ISD::UZP1,
DL, NarrowVT, Vec1, HiVec0);
16641 "Invalid subvector index!");
16643 LoVec0 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, LoVec0);
16644 Narrow = DAG.
getNode(AArch64ISD::UZP1,
DL, NarrowVT, LoVec0, Vec1);
16647 return getSVESafeBitCast(VT, Narrow, DAG);
16655 std::optional<unsigned> PredPattern =
16667 if (
Op.getOpcode() != AArch64ISD::DUP &&
16680 SplatVal =
Op->getConstantOperandVal(0);
16681 if (
Op.getValueType().getVectorElementType() != MVT::i64)
16682 SplatVal = (int32_t)SplatVal;
16690 SplatVal = -SplatVal;
16698 EVT VT =
Op.getValueType();
16702 return LowerFixedLengthVectorIntDivideToSVE(
Op, DAG);
16707 unsigned PredOpcode =
Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
16716 DAG.
getNode(AArch64ISD::ASRD_MERGE_OP1,
DL, VT, Pg,
Op->getOperand(0),
16724 if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
16725 return LowerToPredicatedOp(
Op, DAG, PredOpcode);
16730 if (VT == MVT::nxv16i8)
16731 WidenedVT = MVT::nxv8i16;
16732 else if (VT == MVT::nxv8i16)
16733 WidenedVT = MVT::nxv4i32;
16737 unsigned UnpkLo =
Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
16738 unsigned UnpkHi =
Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
16747 return DAG.
getNode(AArch64ISD::UZP1,
DL, VT, ResultLoCast, ResultHiCast);
16750bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(
16751 EVT VT,
unsigned DefinedValues)
const {
16752 if (!Subtarget->isNeonAvailable())
16771 unsigned DummyUnsigned;
16779 isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
16781 isTRNMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
16782 isUZPMask(M, NumElts, DummyUnsigned) ||
16783 isZIPMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
16787 isINSMask(M, NumElts, DummyBool, DummyInt) ||
16803 Op =
Op.getOperand(0);
16805 APInt SplatBits, SplatUndef;
16806 unsigned SplatBitSize;
16808 if (!BVN || !BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
16809 HasAnyUndefs, ElementBits) ||
16810 SplatBitSize > ElementBits)
16821 assert(VT.
isVector() &&
"vector shift count is not a vector type");
16825 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
16832 assert(VT.
isVector() &&
"vector shift count is not a vector type");
16836 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
16841 EVT VT =
Op.getValueType();
16846 EVT OpVT =
Op.getOperand(0).getValueType();
16857 !Subtarget->isNeonAvailable()))
16858 return LowerFixedLengthVectorTruncateToSVE(
Op, DAG);
16868 unsigned &ShiftValue,
16881 ShiftValue = ShiftOp1->getZExtValue();
16890 "ResVT must be truncated or same type as the shift.");
16893 if (ShiftValue > ExtraBits && !
Add->getFlags().hasNoUnsignedWrap())
16900 uint64_t AddValue = AddOp1->getZExtValue();
16901 if (AddValue != 1ULL << (ShiftValue - 1))
16904 RShOperand =
Add->getOperand(0);
16910 EVT VT =
Op.getValueType();
16914 if (!
Op.getOperand(1).getValueType().isVector())
16918 switch (
Op.getOpcode()) {
16922 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SHL_PRED);
16924 if (
isVShiftLImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize)
16925 return DAG.
getNode(AArch64ISD::VSHL,
DL, VT,
Op.getOperand(0),
16930 Op.getOperand(0),
Op.getOperand(1));
16934 (Subtarget->hasSVE2() ||
16935 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
16937 unsigned ShiftValue;
16939 return DAG.
getNode(AArch64ISD::URSHR_I_PRED,
DL, VT,
16946 unsigned Opc =
Op.getOpcode() ==
ISD::SRA ? AArch64ISD::SRA_PRED
16947 : AArch64ISD::SRL_PRED;
16948 return LowerToPredicatedOp(
Op, DAG,
Opc);
16952 if (
isVShiftRImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize) {
16954 (
Op.getOpcode() ==
ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
16963 unsigned Opc = (
Op.getOpcode() ==
ISD::SRA) ? Intrinsic::aarch64_neon_sshl
16964 : Intrinsic::aarch64_neon_ushl;
16972 return NegShiftLeft;
16980 if (
Op.getValueType().isScalableVector())
16981 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
16984 !Subtarget->isNeonAvailable()))
16985 return LowerFixedLengthVectorSetccToSVE(
Op, DAG);
16990 EVT CmpVT =
LHS.getValueType().changeVectorElementTypeToInteger();
16993 if (
LHS.getValueType().getVectorElementType().isInteger())
16996 assert(((!Subtarget->hasFullFP16() &&
16997 LHS.getValueType().getVectorElementType() != MVT::f16) ||
16998 LHS.getValueType().getVectorElementType() != MVT::bf16 ||
16999 LHS.getValueType().getVectorElementType() != MVT::f128) &&
17000 "Unexpected type!");
17005 bool OneNaN =
false;
17029 if (!
Cmp.getNode())
17058 unsigned ScalarOpcode;
17076 "Expected power-of-2 length vector");
17084 if (ElemVT == MVT::i1) {
17086 if (NumElems > 16) {
17089 EVT HalfVT =
Lo.getValueType();
17100 unsigned ExtendedWidth = 64;
17103 ExtendedWidth = 128;
17108 unsigned ExtendOp =
17117 NumElems == 2 && ExtendedWidth == 128) {
17118 Extended = DAG.
getBitcast(MVT::v4i32, Extended);
17119 ExtendedVT = MVT::i32;
17121 switch (ScalarOpcode) {
17142 VecVT =
Lo.getValueType();
17158 for (
unsigned Shift = NumElems / 2; Shift > 0; Shift /= 2) {
17163 Scalar = DAG.
getNode(ScalarOpcode,
DL, ScalarVT, Scalar, Shifted);
17175 EVT SrcVT = Src.getValueType();
17180 SrcVT == MVT::v2f16) {
17188 bool OverrideNEON = !Subtarget->isNeonAvailable() ||
17197 SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {
17200 return LowerPredReductionToSVE(
Op, DAG);
17202 switch (
Op.getOpcode()) {
17204 return LowerReductionToSVE(AArch64ISD::UADDV_PRED,
Op, DAG);
17206 return LowerReductionToSVE(AArch64ISD::ANDV_PRED,
Op, DAG);
17208 return LowerReductionToSVE(AArch64ISD::ORV_PRED,
Op, DAG);
17210 return LowerReductionToSVE(AArch64ISD::SMAXV_PRED,
Op, DAG);
17212 return LowerReductionToSVE(AArch64ISD::SMINV_PRED,
Op, DAG);
17214 return LowerReductionToSVE(AArch64ISD::UMAXV_PRED,
Op, DAG);
17216 return LowerReductionToSVE(AArch64ISD::UMINV_PRED,
Op, DAG);
17218 return LowerReductionToSVE(AArch64ISD::EORV_PRED,
Op, DAG);
17220 return LowerReductionToSVE(AArch64ISD::FADDV_PRED,
Op, DAG);
17222 return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED,
Op, DAG);
17224 return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED,
Op, DAG);
17226 return LowerReductionToSVE(AArch64ISD::FMAXV_PRED,
Op, DAG);
17228 return LowerReductionToSVE(AArch64ISD::FMINV_PRED,
Op, DAG);
17236 switch (
Op.getOpcode()) {
17241 Op.getValueType(),
DL, DAG);
17261 EVT SrcVT = Src.getValueType();
17264 SDVTList SrcVTs = DAG.
getVTList(SrcVT, SrcVT);
17276 for (
unsigned I = 0;
I < Stages; ++
I) {
17278 Src = DAG.
getNode(BaseOpc,
DL, SrcVT, Src.getValue(0), Src.getValue(1));
17286 auto &Subtarget = DAG.
getSubtarget<AArch64Subtarget>();
17288 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
17293 MVT VT =
Op.getSimpleValueType();
17294 assert(VT != MVT::i128 &&
"Handled elsewhere, code replicated.");
17299 Op.getOperand(0),
Op.getOperand(1),
RHS,
17304AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
SDValue Op,
17309 SDNode *
Node =
Op.getNode();
17314 EVT VT =
Node->getValueType(0);
17317 "no-stack-arg-probe")) {
17319 Chain =
SP.getValue(1);
17329 RTLIB::LibcallImpl ChkStkImpl =
getLibcallImpl(RTLIB::STACK_PROBE);
17330 if (ChkStkImpl == RTLIB::Unsupported)
17339 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
17340 const uint32_t *
Mask =
TRI->getWindowsStackProbePreservedMask();
17341 if (Subtarget->hasCustomCallingConv())
17349 Chain, Callee, DAG.
getRegister(AArch64::X15, MVT::i64),
17360 Chain =
SP.getValue(1);
17374AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(
SDValue Op,
17377 SDNode *
Node =
Op.getNode();
17384 EVT VT =
Node->getValueType(0);
17388 Chain =
SP.getValue(1);
17395 Chain = DAG.
getNode(AArch64ISD::PROBED_ALLOCA,
DL, MVT::Other, Chain, SP);
17401AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(
SDValue Op,
17405 if (Subtarget->isTargetWindows())
17406 return LowerWindowsDYNAMIC_STACKALLOC(
Op, DAG);
17408 return LowerInlineDYNAMIC_STACKALLOC(
Op, DAG);
17414 unsigned NewOp)
const {
17415 if (Subtarget->hasSVE2())
17416 return LowerToPredicatedOp(
Op, DAG, NewOp);
17424 EVT VT =
Op.getValueType();
17425 assert(VT != MVT::i64 &&
"Expected illegal VSCALE node");
17428 APInt MulImm =
Op.getConstantOperandAPInt(0);
17434template <
unsigned NumVecs>
17444 for (
unsigned I = 0;
I < NumVecs; ++
I)
17453 Info.align.reset();
17465 auto &
DL =
I.getDataLayout();
17467 case Intrinsic::aarch64_sve_st2:
17469 case Intrinsic::aarch64_sve_st3:
17471 case Intrinsic::aarch64_sve_st4:
17473 case Intrinsic::aarch64_neon_ld2:
17474 case Intrinsic::aarch64_neon_ld3:
17475 case Intrinsic::aarch64_neon_ld4:
17476 case Intrinsic::aarch64_neon_ld1x2:
17477 case Intrinsic::aarch64_neon_ld1x3:
17478 case Intrinsic::aarch64_neon_ld1x4: {
17480 uint64_t NumElts =
DL.getTypeSizeInBits(
I.getType()) / 64;
17482 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17484 Info.align.reset();
17489 case Intrinsic::aarch64_neon_ld2lane:
17490 case Intrinsic::aarch64_neon_ld3lane:
17491 case Intrinsic::aarch64_neon_ld4lane:
17492 case Intrinsic::aarch64_neon_ld2r:
17493 case Intrinsic::aarch64_neon_ld3r:
17494 case Intrinsic::aarch64_neon_ld4r: {
17497 Type *RetTy =
I.getType();
17499 unsigned NumElts = StructTy->getNumElements();
17500 Type *VecTy = StructTy->getElementType(0);
17503 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17505 Info.align.reset();
17510 case Intrinsic::aarch64_neon_st2:
17511 case Intrinsic::aarch64_neon_st3:
17512 case Intrinsic::aarch64_neon_st4:
17513 case Intrinsic::aarch64_neon_st1x2:
17514 case Intrinsic::aarch64_neon_st1x3:
17515 case Intrinsic::aarch64_neon_st1x4: {
17517 unsigned NumElts = 0;
17518 for (
const Value *Arg :
I.args()) {
17519 Type *ArgTy = Arg->getType();
17522 NumElts +=
DL.getTypeSizeInBits(ArgTy) / 64;
17525 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17527 Info.align.reset();
17532 case Intrinsic::aarch64_neon_st2lane:
17533 case Intrinsic::aarch64_neon_st3lane:
17534 case Intrinsic::aarch64_neon_st4lane: {
17536 unsigned NumElts = 0;
17538 Type *VecTy =
I.getArgOperand(0)->getType();
17541 for (
const Value *Arg :
I.args()) {
17542 Type *ArgTy = Arg->getType();
17549 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17551 Info.align.reset();
17556 case Intrinsic::aarch64_ldaxr:
17557 case Intrinsic::aarch64_ldxr: {
17558 Type *ValTy =
I.getParamElementType(0);
17561 Info.ptrVal =
I.getArgOperand(0);
17563 Info.align =
DL.getABITypeAlign(ValTy);
17567 case Intrinsic::aarch64_stlxr:
17568 case Intrinsic::aarch64_stxr: {
17569 Type *ValTy =
I.getParamElementType(1);
17572 Info.ptrVal =
I.getArgOperand(1);
17574 Info.align =
DL.getABITypeAlign(ValTy);
17578 case Intrinsic::aarch64_ldaxp:
17579 case Intrinsic::aarch64_ldxp:
17581 Info.memVT = MVT::i128;
17582 Info.ptrVal =
I.getArgOperand(0);
17584 Info.align =
Align(16);
17587 case Intrinsic::aarch64_stlxp:
17588 case Intrinsic::aarch64_stxp:
17590 Info.memVT = MVT::i128;
17591 Info.ptrVal =
I.getArgOperand(2);
17593 Info.align =
Align(16);
17596 case Intrinsic::aarch64_sve_ldnt1: {
17600 Info.ptrVal =
I.getArgOperand(1);
17602 Info.align =
DL.getABITypeAlign(ElTy);
17606 case Intrinsic::aarch64_sve_stnt1: {
17610 Info.memVT =
MVT::getVT(
I.getOperand(0)->getType());
17611 Info.ptrVal =
I.getArgOperand(2);
17613 Info.align =
DL.getABITypeAlign(ElTy);
17617 case Intrinsic::aarch64_mops_memset_tag: {
17618 Value *Dst =
I.getArgOperand(0);
17619 Value *Val =
I.getArgOperand(1);
17624 Info.align =
I.getParamAlign(0).valueOrOne();
17639 std::optional<unsigned> ByteOffset)
const {
17656 Base.getOperand(1).hasOneUse() &&
17663 uint64_t ShiftAmount =
Base.getOperand(1).getConstantOperandVal(1);
17665 if (ShiftAmount ==
Log2_32(LoadBytes))
17675 if ((VT == MVT::i64 || VT == MVT::i32) && Extend->
use_size()) {
17694 return NumBits1 > NumBits2;
17701 return NumBits1 > NumBits2;
17708 if (
I->getOpcode() != Instruction::FMul)
17711 if (!
I->hasOneUse())
17716 if (!(
User->getOpcode() == Instruction::FSub ||
17717 User->getOpcode() == Instruction::FAdd))
17728 I->getFastMathFlags().allowContract()));
17738 return NumBits1 == 32 && NumBits2 == 64;
17745 return NumBits1 == 32 && NumBits2 == 64;
17763bool AArch64TargetLowering::isExtFreeImpl(
const Instruction *Ext)
const {
17771 for (
const Use &U : Ext->
uses()) {
17779 switch (Instr->getOpcode()) {
17780 case Instruction::Shl:
17784 case Instruction::GetElementPtr: {
17787 std::advance(GTI, U.getOperandNo()-1);
17800 if (ShiftAmt == 0 || ShiftAmt > 4)
17804 case Instruction::Trunc:
17821 unsigned NumElts,
bool IsLittleEndian,
17823 if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth > 64)
17826 assert(DstWidth % SrcWidth == 0 &&
17827 "TBL lowering is not supported for a conversion instruction with this "
17828 "source and destination element type.");
17830 unsigned Factor = DstWidth / SrcWidth;
17831 unsigned MaskLen = NumElts * Factor;
17834 Mask.resize(MaskLen, NumElts);
17836 unsigned SrcIndex = 0;
17837 for (
unsigned I = IsLittleEndian ? 0 : Factor - 1;
I < MaskLen;
I += Factor)
17838 Mask[
I] = SrcIndex++;
17846 bool IsLittleEndian) {
17848 unsigned NumElts = SrcTy->getNumElements();
17856 auto *FirstEltZero = Builder.CreateInsertElement(
17858 Value *Result = Builder.CreateShuffleVector(
Op, FirstEltZero, Mask);
17859 Result = Builder.CreateBitCast(Result, DstTy);
17860 if (DstTy != ZExtTy)
17861 Result = Builder.CreateZExt(Result, ZExtTy);
17867 bool IsLittleEndian) {
17874 !IsLittleEndian, Mask))
17877 auto *FirstEltZero = Builder.CreateInsertElement(
17880 return Builder.CreateShuffleVector(
Op, FirstEltZero, Mask);
17889 assert(SrcTy->getElementType()->isIntegerTy() &&
17890 "Non-integer type source vector element is not supported");
17891 assert(DstTy->getElementType()->isIntegerTy(8) &&
17892 "Unsupported destination vector element type");
17893 unsigned SrcElemTySz =
17895 unsigned DstElemTySz =
17897 assert((SrcElemTySz % DstElemTySz == 0) &&
17898 "Cannot lower truncate to tbl instructions for a source element size "
17899 "that is not divisible by the destination element size");
17900 unsigned TruncFactor = SrcElemTySz / DstElemTySz;
17901 assert((SrcElemTySz == 16 || SrcElemTySz == 32 || SrcElemTySz == 64) &&
17902 "Unsupported source vector element type size");
17910 for (
int Itr = 0; Itr < 16; Itr++) {
17911 if (Itr < NumElements)
17913 IsLittleEndian ? Itr * TruncFactor
17914 : Itr * TruncFactor + (TruncFactor - 1)));
17916 MaskConst.
push_back(Builder.getInt8(255));
17919 int MaxTblSz = 128 * 4;
17920 int MaxSrcSz = SrcElemTySz * NumElements;
17922 (MaxTblSz > MaxSrcSz) ? NumElements : (MaxTblSz / SrcElemTySz);
17923 assert(ElemsPerTbl <= 16 &&
17924 "Maximum elements selected using TBL instruction cannot exceed 16!");
17926 int ShuffleCount = 128 / SrcElemTySz;
17928 for (
int i = 0; i < ShuffleCount; ++i)
17935 while (ShuffleLanes.
back() < NumElements) {
17937 Builder.CreateShuffleVector(TI->
getOperand(0), ShuffleLanes), VecTy));
17939 if (Parts.
size() == 4) {
17942 Builder.CreateIntrinsic(Intrinsic::aarch64_neon_tbl4, VecTy, Parts));
17946 for (
int i = 0; i < ShuffleCount; ++i)
17947 ShuffleLanes[i] += ShuffleCount;
17951 "Lowering trunc for vectors requiring different TBL instructions is "
17955 if (!Parts.
empty()) {
17957 switch (Parts.
size()) {
17959 TblID = Intrinsic::aarch64_neon_tbl1;
17962 TblID = Intrinsic::aarch64_neon_tbl2;
17965 TblID = Intrinsic::aarch64_neon_tbl3;
17970 Results.push_back(Builder.CreateIntrinsic(TblID, VecTy, Parts));
17975 assert(
Results.size() <= 2 &&
"Trunc lowering does not support generation of "
17976 "more than 2 tbl instructions!");
17979 if (ElemsPerTbl < 16) {
17981 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
17982 FinalResult = Builder.CreateShuffleVector(
Results[0], FinalMask);
17986 if (ElemsPerTbl < 16) {
17987 std::iota(FinalMask.
begin(), FinalMask.
begin() + ElemsPerTbl, 0);
17988 std::iota(FinalMask.
begin() + ElemsPerTbl, FinalMask.
end(), 16);
17990 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
18004 if (!
EnableExtToTBL || Subtarget->useSVEForFixedLengthVectors())
18012 if (!L || L->getHeader() !=
I->getParent() ||
F->hasOptSize())
18017 if (!SrcTy || !DstTy)
18024 if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
18025 auto DstWidth = DstTy->getElementType()->getScalarSizeInBits();
18026 if (DstWidth % 8 != 0)
18029 auto *TruncDstType =
18033 auto SrcWidth = SrcTy->getElementType()->getScalarSizeInBits();
18034 if (
TTI.getCastInstrCost(
I->getOpcode(), DstTy, TruncDstType,
18037 if (SrcWidth * 2 >= TruncDstType->getElementType()->getScalarSizeInBits())
18040 DstTy = TruncDstType;
18048 if (SrcWidth * 4 <= DstWidth) {
18049 if (
all_of(
I->users(), [&](
auto *U) {
18050 using namespace llvm::PatternMatch;
18051 auto *SingleUser = cast<Instruction>(&*U);
18052 if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value()))))
18054 if (match(SingleUser,
18055 m_Intrinsic<Intrinsic::vector_partial_reduce_add>(
18056 m_Value(), m_Specific(I))))
18063 if (DstTy->getScalarSizeInBits() >= 64)
18069 DstTy, Subtarget->isLittleEndian());
18072 ZExt->replaceAllUsesWith(Result);
18073 ZExt->eraseFromParent();
18078 if (UIToFP && ((SrcTy->getElementType()->isIntegerTy(8) &&
18079 DstTy->getElementType()->isFloatTy()) ||
18080 (SrcTy->getElementType()->isIntegerTy(16) &&
18081 DstTy->getElementType()->isDoubleTy()))) {
18086 assert(ZExt &&
"Cannot fail for the i8 to float conversion");
18087 auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
18088 I->replaceAllUsesWith(UI);
18089 I->eraseFromParent();
18094 if (SIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
18095 DstTy->getElementType()->isFloatTy()) {
18099 Subtarget->isLittleEndian());
18100 assert(Shuffle &&
"Cannot fail for the i8 to float conversion");
18102 auto *AShr = Builder.CreateAShr(Cast, 24,
"",
true);
18103 auto *
SI = Builder.CreateSIToFP(AShr, DstTy);
18104 I->replaceAllUsesWith(
SI);
18105 I->eraseFromParent();
18113 (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
18114 SrcTy->getElementType()->isFloatTy() &&
18115 DstTy->getElementType()->isIntegerTy(8)) {
18117 auto *WideConv = Builder.CreateFPToUI(FPToUI->getOperand(0),
18119 auto *TruncI = Builder.CreateTrunc(WideConv, DstTy);
18120 I->replaceAllUsesWith(TruncI);
18121 I->eraseFromParent();
18131 if (TI && DstTy->getElementType()->isIntegerTy(8) &&
18132 ((SrcTy->getElementType()->isIntegerTy(32) ||
18133 SrcTy->getElementType()->isIntegerTy(64)) &&
18134 (SrcTy->getNumElements() == 16 || SrcTy->getNumElements() == 8))) {
18143 Align &RequiredAlignment)
const {
18148 RequiredAlignment =
Align(1);
18150 return NumBits == 32 || NumBits == 64;
18157 unsigned VecSize = 128;
18161 VecSize = std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
18162 return std::max<unsigned>(1, (MinElts * ElSize + 127) / VecSize);
18167 if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
18177 unsigned MinElts = EC.getKnownMinValue();
18179 UseScalable =
false;
18182 (!Subtarget->useSVEForFixedLengthVectors() ||
18187 !Subtarget->isSVEorStreamingSVEAvailable())
18195 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
18198 if (EC.isScalable()) {
18199 UseScalable =
true;
18200 return isPowerOf2_32(MinElts) && (MinElts * ElSize) % 128 == 0;
18203 unsigned VecSize =
DL.getTypeSizeInBits(VecTy);
18204 if (Subtarget->useSVEForFixedLengthVectors()) {
18205 unsigned MinSVEVectorSize =
18206 std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
18207 if (VecSize % MinSVEVectorSize == 0 ||
18209 (!Subtarget->isNeonAvailable() || VecSize > 128))) {
18210 UseScalable =
true;
18217 return Subtarget->isNeonAvailable() && (VecSize == 64 || VecSize % 128 == 0);
18249 bool Scalable,
Type *LDVTy,
18251 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
18252 static const Intrinsic::ID SVELoads[3] = {Intrinsic::aarch64_sve_ld2_sret,
18253 Intrinsic::aarch64_sve_ld3_sret,
18254 Intrinsic::aarch64_sve_ld4_sret};
18255 static const Intrinsic::ID NEONLoads[3] = {Intrinsic::aarch64_neon_ld2,
18256 Intrinsic::aarch64_neon_ld3,
18257 Intrinsic::aarch64_neon_ld4};
18266 bool Scalable,
Type *STVTy,
18268 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
18269 static const Intrinsic::ID SVEStores[3] = {Intrinsic::aarch64_sve_st2,
18270 Intrinsic::aarch64_sve_st3,
18271 Intrinsic::aarch64_sve_st4};
18272 static const Intrinsic::ID NEONStores[3] = {Intrinsic::aarch64_neon_st2,
18273 Intrinsic::aarch64_neon_st3,
18274 Intrinsic::aarch64_neon_st4};
18297 "Invalid interleave factor");
18298 assert(!Shuffles.
empty() &&
"Empty shufflevector input");
18300 "Unmatched number of shufflevectors and indices");
18305 assert(!Mask && GapMask.
popcount() == Factor &&
"Unexpected mask on a load");
18324 SI->getType()->getScalarSizeInBits() * 4 ==
18325 SI->user_back()->getType()->getScalarSizeInBits();
18335 Type *EltTy = FVTy->getElementType();
18343 FVTy->getNumElements() / NumLoads);
18351 Value *BaseAddr = LI->getPointerOperand();
18353 Type *PtrTy = LI->getPointerOperandType();
18355 LDVTy->getElementCount());
18358 UseScalable, LDVTy, PtrTy);
18365 Value *PTrue =
nullptr;
18367 std::optional<unsigned> PgPattern =
18369 if (Subtarget->getMinSVEVectorSizeInBits() ==
18370 Subtarget->getMaxSVEVectorSizeInBits() &&
18371 Subtarget->getMinSVEVectorSizeInBits() ==
DL.getTypeSizeInBits(FVTy))
18372 PgPattern = AArch64SVEPredPattern::all;
18376 PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
18380 for (
unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
18385 BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
18386 FVTy->getNumElements() * Factor);
18390 LdN = Builder.CreateCall(LdNFunc, {PTrue, BaseAddr},
"ldN");
18392 LdN = Builder.CreateCall(LdNFunc, BaseAddr,
"ldN");
18395 for (
unsigned i = 0; i < Shuffles.
size(); i++) {
18397 unsigned Index = Indices[i];
18399 Value *SubVec = Builder.CreateExtractValue(LdN, Index);
18402 SubVec = Builder.CreateExtractVector(FVTy, SubVec,
uint64_t(0));
18406 SubVec = Builder.CreateIntToPtr(
18408 FVTy->getNumElements()));
18410 SubVecs[SVI].push_back(SubVec);
18419 auto &SubVec = SubVecs[SVI];
18422 SVI->replaceAllUsesWith(WideVec);
18428template <
typename Iter>
18430 int MaxLookupDist = 20;
18431 unsigned IdxWidth =
DL.getIndexSizeInBits(0);
18432 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
18433 const Value *PtrA1 =
18436 while (++It != End) {
18437 if (It->isDebugOrPseudoInst())
18439 if (MaxLookupDist-- == 0)
18442 const Value *PtrB1 =
18443 SI->getPointerOperand()->stripAndAccumulateInBoundsConstantOffsets(
18445 if (PtrA1 == PtrB1 &&
18446 (OffsetA.sextOrTrunc(IdxWidth) - OffsetB.
sextOrTrunc(IdxWidth))
18485 const APInt &GapMask)
const {
18488 "Invalid interleave factor");
18493 "Unexpected mask on store");
18496 assert(VecTy->getNumElements() % Factor == 0 &&
"Invalid interleaved store");
18498 unsigned LaneLen = VecTy->getNumElements() / Factor;
18499 Type *EltTy = VecTy->getElementType();
18520 Type *IntTy =
DL.getIntPtrType(EltTy);
18521 unsigned NumOpElts =
18526 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
18527 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
18534 LaneLen /= NumStores;
18541 Value *BaseAddr =
SI->getPointerOperand();
18555 if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 &&
18563 Type *PtrTy =
SI->getPointerOperandType();
18565 STVTy->getElementCount());
18568 UseScalable, STVTy, PtrTy);
18570 Value *PTrue =
nullptr;
18572 std::optional<unsigned> PgPattern =
18574 if (Subtarget->getMinSVEVectorSizeInBits() ==
18575 Subtarget->getMaxSVEVectorSizeInBits() &&
18576 Subtarget->getMinSVEVectorSizeInBits() ==
18577 DL.getTypeSizeInBits(SubVecTy))
18578 PgPattern = AArch64SVEPredPattern::all;
18582 PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
18586 for (
unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
18591 for (
unsigned i = 0; i < Factor; i++) {
18593 unsigned IdxI = StoreCount * LaneLen * Factor + i;
18594 if (Mask[IdxI] >= 0) {
18595 Shuffle = Builder.CreateShuffleVector(
18598 unsigned StartMask = 0;
18599 for (
unsigned j = 1; j < LaneLen; j++) {
18600 unsigned IdxJ = StoreCount * LaneLen * Factor + j * Factor + i;
18601 if (Mask[IdxJ] >= 0) {
18602 StartMask = Mask[IdxJ] - j;
18611 Shuffle = Builder.CreateShuffleVector(
18619 Ops.push_back(Shuffle);
18623 Ops.push_back(PTrue);
18627 if (StoreCount > 0)
18628 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
18629 BaseAddr, LaneLen * Factor);
18631 Ops.push_back(BaseAddr);
18632 Builder.CreateCall(StNFunc,
Ops);
18640 if (Factor != 2 && Factor != 3 && Factor != 4) {
18641 LLVM_DEBUG(
dbgs() <<
"Matching ld2, ld3 and ld4 patterns failed\n");
18647 assert(!Mask &&
"Unexpected mask on a load\n");
18651 const DataLayout &
DL = LI->getModule()->getDataLayout();
18666 Type *PtrTy = LI->getPointerOperandType();
18668 UseScalable, LdTy, PtrTy);
18671 Value *Pred =
nullptr;
18674 Builder.CreateVectorSplat(LdTy->
getElementCount(), Builder.getTrue());
18676 Value *BaseAddr = LI->getPointerOperand();
18677 Value *Result =
nullptr;
18678 if (NumLoads > 1) {
18681 for (
unsigned I = 0;
I < NumLoads; ++
I) {
18685 Value *LdN =
nullptr;
18687 LdN = Builder.CreateCall(LdNFunc, {Pred,
Address},
"ldN");
18689 LdN = Builder.CreateCall(LdNFunc,
Address,
"ldN");
18692 for (
unsigned J = 0; J < Factor; ++J) {
18693 ExtractedLdValues[J] = Builder.CreateInsertVector(
18694 VTy, ExtractedLdValues[J], Builder.CreateExtractValue(LdN, J), Idx);
18701 for (
unsigned J = 0; J < Factor; ++J)
18702 Result = Builder.CreateInsertValue(Result, ExtractedLdValues[J], J);
18705 Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr},
"ldN");
18707 Result = Builder.CreateCall(LdNFunc, BaseAddr,
"ldN");
18718 unsigned Factor = InterleavedValues.
size();
18719 if (Factor != 2 && Factor != 3 && Factor != 4) {
18720 LLVM_DEBUG(
dbgs() <<
"Matching st2, st3 and st4 patterns failed\n");
18726 assert(!Mask &&
"Unexpected mask on plain store");
18746 Type *PtrTy =
SI->getPointerOperandType();
18748 UseScalable, StTy, PtrTy);
18752 Value *BaseAddr =
SI->getPointerOperand();
18753 Value *Pred =
nullptr;
18757 Builder.CreateVectorSplat(StTy->
getElementCount(), Builder.getTrue());
18759 auto ExtractedValues = InterleavedValues;
18764 for (
unsigned I = 0;
I < NumStores; ++
I) {
18766 if (NumStores > 1) {
18771 for (
unsigned J = 0; J < Factor; J++) {
18773 Builder.CreateExtractVector(StTy, ExtractedValues[J], Idx);
18776 StoreOperands[StoreOperands.
size() - 1] =
Address;
18778 Builder.CreateCall(StNFunc, StoreOperands);
18785 const AttributeList &FuncAttributes)
const {
18786 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
18787 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
18788 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
18792 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
18793 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
18794 if (
Op.isAligned(AlignCheck))
18802 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
18803 AlignmentIsAcceptable(MVT::v16i8,
Align(16)))
18805 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128,
Align(16)))
18807 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
18809 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
18815 const MemOp &
Op,
const AttributeList &FuncAttributes)
const {
18816 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
18817 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
18818 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
18822 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
18823 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
18824 if (
Op.isAligned(AlignCheck))
18832 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
18833 AlignmentIsAcceptable(MVT::v2i64,
Align(16)))
18835 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128,
Align(16)))
18837 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
18839 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
18846 if (Immed == std::numeric_limits<int64_t>::min()) {
18855 if (!Subtarget->hasSVE2())
18874 return std::abs(Imm / 8) <= 16;
18877 return std::abs(Imm / 4) <= 16;
18880 return std::abs(Imm / 2) <= 16;
18907 if (Insn.
size() > 1)
18944 if (AM.
Scale == 1) {
18947 }
else if (AM.
Scale == 2) {
18959 if (Ty->isScalableTy()) {
18965 uint64_t VecNumBytes =
DL.getTypeSizeInBits(Ty).getKnownMinValue() / 8;
18987 if (Ty->isSized()) {
18988 uint64_t NumBits =
DL.getTypeSizeInBits(Ty);
18989 NumBytes = NumBits / 8;
18994 return Subtarget->getInstrInfo()->isLegalAddressingMode(NumBytes, AM.
BaseOffs,
19002 int64_t MaxOffset)
const {
19003 int64_t HighPart = MinOffset & ~0xfffULL;
19026 return Subtarget->hasFullFP16();
19032 Subtarget->isNonStreamingSVEorSME2Available();
19042 switch (Ty->getScalarType()->getTypeID()) {
19062 static const MCPhysReg ScratchRegs[] = {
19063 AArch64::X16, AArch64::X17, AArch64::LR, 0
19065 return ScratchRegs;
19069 static const MCPhysReg RCRegs[] = {AArch64::FPCR};
19078 "Expected shift op");
19080 SDValue ShiftLHS =
N->getOperand(0);
19081 EVT VT =
N->getValueType(0);
19102 return SRLC->getZExtValue() == SHLC->getZExtValue();
19114 (
N->getOperand(0).getOpcode() ==
ISD::SHL ||
19115 N->getOperand(0).getOpcode() ==
ISD::SRL) &&
19116 "Expected XOR(SHIFT) pattern");
19121 if (XorC && ShiftC) {
19122 unsigned MaskIdx, MaskLen;
19123 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
19124 unsigned ShiftAmt = ShiftC->getZExtValue();
19125 unsigned BitWidth =
N->getValueType(0).getScalarSizeInBits();
19126 if (
N->getOperand(0).getOpcode() ==
ISD::SHL)
19127 return MaskIdx == ShiftAmt && MaskLen == (
BitWidth - ShiftAmt);
19128 return MaskIdx == 0 && MaskLen == (
BitWidth - ShiftAmt);
19138 N->getOperand(0).getOpcode() ==
ISD::SRL) ||
19140 N->getOperand(0).getOpcode() ==
ISD::SHL)) &&
19141 "Expected shift-shift mask");
19143 if (!
N->getOperand(0)->hasOneUse())
19147 EVT VT =
N->getValueType(0);
19148 if (
N->getOpcode() ==
ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
19151 return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
19156 if (
N->getOpcode() ==
ISD::SHL &&
N->hasOneUse()) {
19158 unsigned ShlAmt = C2->getZExtValue();
19159 if (
auto ShouldADD = *
N->user_begin();
19160 ShouldADD->getOpcode() ==
ISD::ADD && ShouldADD->hasOneUse()) {
19162 EVT MemVT = Load->getMemoryVT();
19164 if (Load->getValueType(0).isScalableVector())
19178 unsigned BinOpcode,
EVT VT,
unsigned SelectOpcode,
SDValue X,
19186 assert(Ty->isIntegerTy());
19188 unsigned BitSize = Ty->getPrimitiveSizeInBits();
19192 int64_t Val = Imm.getSExtValue();
19199 Val &= (1LL << 32) - 1;
19207 unsigned Index)
const {
19229 EVT VT =
N->getValueType(0);
19230 if (!Subtarget->hasNEON() || !VT.
isVector())
19244 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.
getSizeInBits() - 1)
19279 if (
N->getValueType(0) != MVT::i32)
19282 SDValue VecReduceOp0 =
N->getOperand(0);
19283 bool SawTrailingZext =
false;
19289 SawTrailingZext =
true;
19294 MVT AbsInputVT = SawTrailingZext ? MVT::v16i16 : MVT::v16i32;
19296 unsigned Opcode = VecReduceOp0.
getOpcode();
19302 if (ABS->getOperand(0)->getOpcode() !=
ISD::SUB ||
19303 ABS->getOperand(0)->getValueType(0) != AbsInputVT)
19306 SDValue SUB = ABS->getOperand(0);
19307 unsigned Opcode0 = SUB->getOperand(0).getOpcode();
19308 unsigned Opcode1 = SUB->getOperand(1).getOpcode();
19310 if (SUB->getOperand(0)->getValueType(0) != AbsInputVT ||
19311 SUB->getOperand(1)->getValueType(0) != AbsInputVT)
19315 bool IsZExt =
false;
19323 SDValue EXT0 = SUB->getOperand(0);
19324 SDValue EXT1 = SUB->getOperand(1);
19341 UABDHigh8Op0, UABDHigh8Op1);
19352 UABDLo8Op0, UABDLo8Op1);
19373 if (!
N->getValueType(0).isScalableVector() ||
19374 (!ST->hasSVE2p1() && !(ST->hasSME2() && ST->isStreaming())))
19379 return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR;
19382 auto MaskEC =
N->getValueType(0).getVectorElementCount();
19383 if (!MaskEC.isKnownMultipleOf(NumExts))
19397 if (
Use->getValueType(0).getVectorElementCount() != ExtMinEC)
19401 unsigned Offset =
Use->getConstantOperandVal(1);
19403 if (Extracts[Part] !=
nullptr)
19406 Extracts[Part] =
Use;
19422 EVT ExtVT = Extracts[0]->getValueType(0);
19426 DCI.
CombineTo(Extracts[0], R.getValue(0));
19427 DCI.
CombineTo(Extracts[1], R.getValue(1));
19431 if (NumExts == 2) {
19432 assert(
N->getValueType(0) == DoubleExtVT);
19438 for (
unsigned I = 2;
I < NumExts;
I += 2) {
19443 DCI.
CombineTo(Extracts[
I + 1], R.getValue(1));
19445 R.getValue(0), R.getValue(1)));
19459 if (!ST->isNeonAvailable())
19462 if (!ST->hasDotProd())
19473 unsigned DotOpcode;
19477 if (
A.getOperand(0).getValueType() !=
B.getOperand(0).getValueType())
19479 auto OpCodeA =
A.getOpcode();
19483 auto OpCodeB =
B.getOpcode();
19487 if (OpCodeA == OpCodeB) {
19492 if (!ST->hasMatMulInt8())
19494 DotOpcode = AArch64ISD::USDOT;
19499 DotOpcode = AArch64ISD::UDOT;
19501 DotOpcode = AArch64ISD::SDOT;
19506 EVT Op0VT =
A.getOperand(0).getValueType();
19509 if (!IsValidElementCount || !IsValidSize)
19518 B =
B.getOperand(0);
19521 unsigned NumOfVecReduce;
19523 if (IsMultipleOf16) {
19525 TargetType = MVT::v4i32;
19528 TargetType = MVT::v2i32;
19531 if (NumOfVecReduce == 1) {
19534 A.getOperand(0),
B);
19541 for (;
I < VecReduce16Num;
I += 1) {
19560 if (VecReduce8Num == 0)
19561 return VecReduceAdd16;
19583 auto DetectAddExtract = [&](
SDValue A) {
19587 EVT VT =
A.getValueType();
19612 : AArch64ISD::SADDLP;
19616 if (
SDValue R = DetectAddExtract(
A))
19619 if (
A.getOperand(0).getOpcode() ==
ISD::ADD &&
A.getOperand(0).hasOneUse())
19623 if (
A.getOperand(1).getOpcode() ==
ISD::ADD &&
A.getOperand(1).hasOneUse())
19636 EVT VT =
A.getValueType();
19637 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
19648 if (ExtVT0 != ExtVT1 ||
19663 return DAG.
getNode(AArch64ISD::NVCAST,
SDLoc(
A), MVT::v8i16, Uaddlv);
19680 MVT OpVT =
A.getSimpleValueType();
19681 assert(
N->getSimpleValueType(0) == OpVT &&
19682 "The operand type should be consistent with the result type of UADDV");
19686 if (KnownLeadingLanes.
isZero())
19696 APInt DemandedElts =
19715AArch64TargetLowering::BuildSDIVPow2(
SDNode *
N,
const APInt &Divisor,
19722 EVT VT =
N->getValueType(0);
19727 if (VT.
isVector() && Subtarget->isSVEorStreamingSVEAvailable())
19731 if ((VT != MVT::i32 && VT != MVT::i64) ||
19737 if (Divisor == 2 ||
19738 Divisor == APInt(Divisor.
getBitWidth(), -2,
true))
19745AArch64TargetLowering::BuildSREMPow2(
SDNode *
N,
const APInt &Divisor,
19752 EVT VT =
N->getValueType(0);
19760 if ((VT != MVT::i32 && VT != MVT::i64) ||
19776 CSNeg = DAG.
getNode(AArch64ISD::CSNEG,
DL, VT,
And,
And, CCVal, Cmp);
19787 CSNeg = DAG.
getNode(AArch64ISD::CSNEG,
DL, VT, AndPos, AndNeg, CCVal,
19802 case Intrinsic::aarch64_sve_cntb:
19803 case Intrinsic::aarch64_sve_cnth:
19804 case Intrinsic::aarch64_sve_cntw:
19805 case Intrinsic::aarch64_sve_cntd:
19815 if (IID == Intrinsic::aarch64_sve_cntp)
19816 return Op.getOperand(1).getValueType().getVectorElementCount();
19818 case Intrinsic::aarch64_sve_cntd:
19820 case Intrinsic::aarch64_sve_cntw:
19822 case Intrinsic::aarch64_sve_cnth:
19824 case Intrinsic::aarch64_sve_cntb:
19827 return std::nullopt;
19854 return TypeNode->
getVT();
19864 if (Mask == UCHAR_MAX)
19866 else if (Mask == USHRT_MAX)
19868 else if (Mask == UINT_MAX)
19890 unsigned ExtendOpcode = Extend.
getOpcode();
19906 if (PreExtendType == MVT::Other ||
19911 bool SeenZExtOrSExt = !IsAnyExt;
19919 unsigned Opc =
Op.getOpcode();
19930 if (SeenZExtOrSExt && OpcIsSExt != IsSExt)
19933 IsSExt = OpcIsSExt;
19934 SeenZExtOrSExt =
true;
19942 EVT PreExtendLegalType =
19948 PreExtendLegalType));
19959 unsigned ExtOpc = !SeenZExtOrSExt
19962 return DAG.
getNode(ExtOpc,
DL, VT, NBV);
19969 EVT VT =
Mul->getValueType(0);
19970 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
19981 return DAG.
getNode(
Mul->getOpcode(),
DL, VT, Op0 ? Op0 :
Mul->getOperand(0),
19982 Op1 ? Op1 :
Mul->getOperand(1));
19997 EVT VT =
Mul->getValueType(0);
19999 int ConstMultiplier =
20005 unsigned AbsConstValue =
abs(ConstMultiplier);
20006 unsigned OperandShift =
20015 unsigned B = ConstMultiplier < 0 ? 32 : 31;
20016 unsigned CeilAxOverB = (AbsConstValue + (
B - 1)) /
B;
20020 if (LowerBound > UpperBound)
20025 int Shift = std::min(std::max( 0, LowerBound), UpperBound);
20028 int32_t RdsvlMul = (AbsConstValue >> (OperandShift + Shift)) *
20029 (ConstMultiplier < 0 ? -1 : 1);
20030 auto Rdsvl = DAG.
getNode(AArch64ISD::RDSVL,
DL, MVT::i64,
20043 EVT VT =
N->getValueType(0);
20044 if (VT != MVT::v2i64 && VT != MVT::v1i64 && VT != MVT::v2i32 &&
20045 VT != MVT::v4i32 && VT != MVT::v4i16 && VT != MVT::v8i16)
20047 if (
N->getOperand(0).getOpcode() !=
ISD::AND ||
20048 N->getOperand(0).getOperand(0).getOpcode() !=
ISD::SRL)
20061 if (!V1.
isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
20062 V3 != (HalfSize - 1))
20073 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, CM);
20081 EVT VT =
N->getValueType(0);
20087 N->getOperand(0).getOperand(0).getValueType() !=
20088 N->getOperand(1).getOperand(0).getValueType())
20092 N->getOperand(0).getOpcode() !=
N->getOperand(1).getOpcode())
20095 SDValue N0 =
N->getOperand(0).getOperand(0);
20096 SDValue N1 =
N->getOperand(1).getOperand(0);
20101 if ((S2 == MVT::i32 &&
S1 == MVT::i8) ||
20102 (S2 == MVT::i64 && (
S1 == MVT::i8 ||
S1 == MVT::i16))) {
20135 EVT VT =
N->getValueType(0);
20139 unsigned AddSubOpc;
20141 auto IsAddSubWith1 = [&](
SDValue V) ->
bool {
20142 AddSubOpc = V->getOpcode();
20154 if (IsAddSubWith1(N0)) {
20156 return DAG.
getNode(AddSubOpc,
DL, VT, N1, MulVal);
20159 if (IsAddSubWith1(N1)) {
20161 return DAG.
getNode(AddSubOpc,
DL, VT, N0, MulVal);
20172 const APInt &ConstValue =
C->getAPIntValue();
20179 if (ConstValue.
sge(1) && ConstValue.
sle(16))
20194 unsigned TrailingZeroes = ConstValue.
countr_zero();
20195 if (TrailingZeroes) {
20203 if (
N->hasOneUse() && (
N->user_begin()->getOpcode() ==
ISD::ADD ||
20204 N->user_begin()->getOpcode() ==
ISD::SUB))
20209 APInt ShiftedConstValue = ConstValue.
ashr(TrailingZeroes);
20212 auto Shl = [&](
SDValue N0,
unsigned N1) {
20243 for (
unsigned i = 1; i <
BitWidth / 2; i++) {
20263 unsigned TrailingZeroes = CVMinus1.
countr_zero();
20264 APInt SCVMinus1 = CVMinus1.
ashr(TrailingZeroes) - 1;
20280 unsigned TrailingZeroes = CVMinus1.
countr_zero();
20281 APInt CVPlus1 = CVMinus1.
ashr(TrailingZeroes) + 1;
20301 APInt SCVMinus1 = ShiftedConstValue - 1;
20302 APInt SCVPlus1 = ShiftedConstValue + 1;
20303 APInt CVPlus1 = ConstValue + 1;
20307 return Shl(
Add(Shl(N0, ShiftAmt), N0), TrailingZeroes);
20310 return Sub(Shl(N0, ShiftAmt), N0);
20312 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
20313 return Sub(Shl(N0, ShiftAmt), Shl(N0, TrailingZeroes));
20315 if (Subtarget->hasALULSLFast() &&
20316 isPowPlusPlusConst(ConstValue, CVM, CVN)) {
20317 APInt CVMMinus1 = CVM - 1;
20318 APInt CVNMinus1 = CVN - 1;
20319 unsigned ShiftM1 = CVMMinus1.
logBase2();
20320 unsigned ShiftN1 = CVNMinus1.
logBase2();
20322 if (ShiftM1 <= 4 && ShiftN1 <= 4) {
20324 return Add(Shl(MVal, ShiftN1), MVal);
20327 if (Subtarget->hasALULSLFast() &&
20328 isPowPlusPlusOneConst(ConstValue, CVM, CVN)) {
20332 if (ShiftM <= 4 && ShiftN <= 4) {
20338 if (Subtarget->hasALULSLFast() &&
20339 isPowMinusMinusOneConst(ConstValue, CVM, CVN)) {
20343 if (ShiftM <= 4 && ShiftN <= 4) {
20352 APInt SCVPlus1 = -ShiftedConstValue + 1;
20353 APInt CVNegPlus1 = -ConstValue + 1;
20354 APInt CVNegMinus1 = -ConstValue - 1;
20357 return Sub(N0, Shl(N0, ShiftAmt));
20359 ShiftAmt = CVNegMinus1.
logBase2();
20360 return Negate(
Add(Shl(N0, ShiftAmt), N0));
20362 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
20363 return Sub(Shl(N0, TrailingZeroes), Shl(N0, ShiftAmt));
20383 EVT VT =
N->getValueType(0);
20385 N->getOperand(0)->getOperand(0)->getOpcode() !=
ISD::SETCC ||
20386 VT.
getSizeInBits() !=
N->getOperand(0)->getValueType(0).getSizeInBits())
20396 if (!BV->isConstant())
20401 EVT IntVT = BV->getValueType(0);
20408 N->getOperand(0)->getOperand(0), MaskConst);
20422 if (
N->isStrictFPOpcode())
20433 return !VT.
isVector() && VT != MVT::bf16 && VT != MVT::f128;
20436 SDValue SrcVal =
N->getOperand(0);
20438 EVT DestTy =
N->getValueType(0);
20445 if (DestTy.
bitsGT(SrcTy)) {
20454 if (SrcVecTy == MVT::nxv2i32 || DestVecTy == MVT::nxv2i32)
20460 DAG.
getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
20477 EVT VT =
N->getValueType(0);
20478 if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64)
20480 if (VT == MVT::f16 && !Subtarget->hasFullFP16())
20484 if (VT.
getSizeInBits() !=
N->getOperand(0).getValueSizeInBits())
20505 (
N->getOpcode() ==
ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
20524 if (!
N->getValueType(0).isSimple())
20528 if (!
Op.getValueType().isSimple() ||
Op.getOpcode() !=
ISD::FMUL)
20531 if (!
Op.getValueType().is64BitVector() && !
Op.getValueType().is128BitVector())
20538 MVT FloatTy =
Op.getSimpleValueType().getVectorElementType();
20540 if (FloatBits != 32 && FloatBits != 64 &&
20541 (FloatBits != 16 || !Subtarget->hasFullFP16()))
20544 MVT IntTy =
N->getSimpleValueType(0).getVectorElementType();
20545 uint32_t IntBits = IntTy.getSizeInBits();
20546 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
20550 if (IntBits > FloatBits)
20555 int32_t Bits = IntBits == 64 ? 64 : 32;
20557 if (
C == -1 ||
C == 0 ||
C > Bits)
20560 EVT ResTy =
Op.getValueType().changeVectorElementTypeToInteger();
20574 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
20575 : Intrinsic::aarch64_neon_vcvtfp2fxu;
20581 if (IntBits < FloatBits)
20598 EVT VT =
N->getValueType(0);
20602 if (CSel0.
getOpcode() != AArch64ISD::CSEL ||