74#include "llvm/IR/IntrinsicsAArch64.h"
109#define DEBUG_TYPE "aarch64-lower"
112STATISTIC(NumOptimizedImms,
"Number of times immediates were optimized");
119 cl::desc(
"Allow AArch64 Local Dynamic TLS code generation"),
124 cl::desc(
"Enable AArch64 logical imm instruction "
134 cl::desc(
"Combine extends of AArch64 masked "
135 "gather intrinsics"),
139 cl::desc(
"Combine ext and trunc to TBL"),
154 cl::desc(
"Enable / disable SVE scalable vectors in Global ISel"),
161 cl::desc(
"Generate ISD::PTRADD nodes for pointer arithmetic in "
162 "SelectionDAG for FEAT_CPA"),
172 AArch64::X3, AArch64::X4, AArch64::X5,
173 AArch64::X6, AArch64::X7};
175 AArch64::Q3, AArch64::Q4, AArch64::Q5,
176 AArch64::Q6, AArch64::Q7};
201 return MVT::nxv8bf16;
208 switch (EC.getKnownMinValue()) {
224 "Expected scalable predicate vector type!");
246 "Expected legal vector type!");
253 "Expected legal type!");
254 return VT == MVT::nxv16i1;
267 "Unexpected fixed-size unpacked type.");
277 case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
278 case AArch64ISD::BSWAP_MERGE_PASSTHRU:
279 case AArch64ISD::REVH_MERGE_PASSTHRU:
280 case AArch64ISD::REVW_MERGE_PASSTHRU:
281 case AArch64ISD::REVD_MERGE_PASSTHRU:
282 case AArch64ISD::CTLZ_MERGE_PASSTHRU:
283 case AArch64ISD::CTPOP_MERGE_PASSTHRU:
284 case AArch64ISD::DUP_MERGE_PASSTHRU:
285 case AArch64ISD::ABS_MERGE_PASSTHRU:
286 case AArch64ISD::NEG_MERGE_PASSTHRU:
287 case AArch64ISD::FNEG_MERGE_PASSTHRU:
288 case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
289 case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
290 case AArch64ISD::FCEIL_MERGE_PASSTHRU:
291 case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
292 case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
293 case AArch64ISD::FRINT_MERGE_PASSTHRU:
294 case AArch64ISD::FRINT32_MERGE_PASSTHRU:
295 case AArch64ISD::FRINT64_MERGE_PASSTHRU:
296 case AArch64ISD::FROUND_MERGE_PASSTHRU:
297 case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
298 case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
299 case AArch64ISD::FTRUNC32_MERGE_PASSTHRU:
300 case AArch64ISD::FTRUNC64_MERGE_PASSTHRU:
301 case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
302 case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
303 case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
304 case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
305 case AArch64ISD::FCVTX_MERGE_PASSTHRU:
306 case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
307 case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
308 case AArch64ISD::FSQRT_MERGE_PASSTHRU:
309 case AArch64ISD::FRECPX_MERGE_PASSTHRU:
310 case AArch64ISD::FABS_MERGE_PASSTHRU:
317 switch (
Op.getOpcode()) {
323 case AArch64ISD::PTRUE:
324 case AArch64ISD::SETCC_MERGE_ZERO:
327 switch (
Op.getConstantOperandVal(0)) {
330 case Intrinsic::aarch64_sve_ptrue:
331 case Intrinsic::aarch64_sve_pnext:
332 case Intrinsic::aarch64_sve_cmpeq:
333 case Intrinsic::aarch64_sve_cmpne:
334 case Intrinsic::aarch64_sve_cmpge:
335 case Intrinsic::aarch64_sve_cmpgt:
336 case Intrinsic::aarch64_sve_cmphs:
337 case Intrinsic::aarch64_sve_cmphi:
338 case Intrinsic::aarch64_sve_cmpeq_wide:
339 case Intrinsic::aarch64_sve_cmpne_wide:
340 case Intrinsic::aarch64_sve_cmpge_wide:
341 case Intrinsic::aarch64_sve_cmpgt_wide:
342 case Intrinsic::aarch64_sve_cmplt_wide:
343 case Intrinsic::aarch64_sve_cmple_wide:
344 case Intrinsic::aarch64_sve_cmphs_wide:
345 case Intrinsic::aarch64_sve_cmphi_wide:
346 case Intrinsic::aarch64_sve_cmplo_wide:
347 case Intrinsic::aarch64_sve_cmpls_wide:
348 case Intrinsic::aarch64_sve_fcmpeq:
349 case Intrinsic::aarch64_sve_fcmpne:
350 case Intrinsic::aarch64_sve_fcmpge:
351 case Intrinsic::aarch64_sve_fcmpgt:
352 case Intrinsic::aarch64_sve_fcmpuo:
353 case Intrinsic::aarch64_sve_facgt:
354 case Intrinsic::aarch64_sve_facge:
355 case Intrinsic::aarch64_sve_whilege:
356 case Intrinsic::aarch64_sve_whilegt:
357 case Intrinsic::aarch64_sve_whilehi:
358 case Intrinsic::aarch64_sve_whilehs:
359 case Intrinsic::aarch64_sve_whilele:
360 case Intrinsic::aarch64_sve_whilelo:
361 case Intrinsic::aarch64_sve_whilels:
362 case Intrinsic::aarch64_sve_whilelt:
363 case Intrinsic::aarch64_sve_match:
364 case Intrinsic::aarch64_sve_nmatch:
365 case Intrinsic::aarch64_sve_whilege_x2:
366 case Intrinsic::aarch64_sve_whilegt_x2:
367 case Intrinsic::aarch64_sve_whilehi_x2:
368 case Intrinsic::aarch64_sve_whilehs_x2:
369 case Intrinsic::aarch64_sve_whilele_x2:
370 case Intrinsic::aarch64_sve_whilelo_x2:
371 case Intrinsic::aarch64_sve_whilels_x2:
372 case Intrinsic::aarch64_sve_whilelt_x2:
378static std::tuple<SDValue, SDValue>
399 if (!ConstDiscN || !
isUInt<16>(ConstDiscN->getZExtValue()))
405 AddrDisc = DAG->
getRegister(AArch64::NoRegister, MVT::i64);
407 return std::make_tuple(
426 if (Subtarget->hasLS64()) {
432 if (Subtarget->hasFPARMv8()) {
441 if (Subtarget->hasNEON()) {
445 addDRType(MVT::v2f32);
446 addDRType(MVT::v8i8);
447 addDRType(MVT::v4i16);
448 addDRType(MVT::v2i32);
449 addDRType(MVT::v1i64);
450 addDRType(MVT::v1f64);
451 addDRType(MVT::v4f16);
452 addDRType(MVT::v4bf16);
454 addQRType(MVT::v4f32);
455 addQRType(MVT::v2f64);
456 addQRType(MVT::v16i8);
457 addQRType(MVT::v8i16);
458 addQRType(MVT::v4i32);
459 addQRType(MVT::v2i64);
460 addQRType(MVT::v8f16);
461 addQRType(MVT::v8bf16);
464 if (Subtarget->isSVEorStreamingSVEAvailable()) {
492 if (Subtarget->useSVEForFixedLengthVectors()) {
534 if (Subtarget->hasFPARMv8()) {
625 if (Subtarget->hasFPARMv8()) {
631 if (Subtarget->hasFPARMv8()) {
685 if (Subtarget->hasCSSC()) {
764 if (Subtarget->hasFullFP16()) {
796 if (Subtarget->hasFullFP16()) {
809 auto LegalizeNarrowFP = [
this](
MVT ScalarVT) {
915 if (!Subtarget->hasFullFP16()) {
916 LegalizeNarrowFP(MVT::f16);
918 LegalizeNarrowFP(MVT::bf16);
936 for (
MVT Ty : {MVT::f32, MVT::f64})
938 if (Subtarget->hasFullFP16())
946 for (
MVT Ty : {MVT::f32, MVT::f64})
948 if (Subtarget->hasFullFP16())
961 if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
973 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
1001 if (Subtarget->outlineAtomics() && !Subtarget->hasLSFE()) {
1028 if (Subtarget->hasLSE128()) {
1042 if (Subtarget->hasLSE2()) {
1099 if (WideVT.getScalarSizeInBits() > NarrowVT.getScalarSizeInBits()) {
1105 if (Subtarget->hasFPARMv8()) {
1229 if (!Subtarget->isTargetWindows())
1245 if (Subtarget->hasSME())
1248 if (Subtarget->isNeonAvailable()) {
1293 for (
auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
1296 if (Subtarget->hasFullFP16()) {
1325 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1334 for (
auto VT : {MVT::v1i64, MVT::v2i64}) {
1350 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64,
1351 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1358 for (
MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1369 for (
MVT VT : { MVT::v4f16, MVT::v2f32,
1370 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1371 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1380 if (Subtarget->hasFullFP16())
1383 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1384 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1406 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1433 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1435 if (Subtarget->hasFullFP16())
1436 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1442 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1444 if (Subtarget->hasFullFP16())
1445 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1480 for (
MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1483 for (
MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1486 if (Subtarget->hasDotProd()) {
1495 if (Subtarget->hasMatMulInt8()) {
1514 if (VT.is128BitVector() || VT.is64BitVector()) {
1529 for (
MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1535 if (Subtarget->hasSME()) {
1541 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1543 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1549 for (
auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
1554 if (Subtarget->isSVEorStreamingSVEAvailable() &&
1555 (Subtarget->hasSVE2p1() || Subtarget->hasSME2()))
1558 for (
auto VT : {MVT::v16i8, MVT::v8i8, MVT::v4i16, MVT::v2i32})
1561 for (
auto VT : {MVT::v8f16, MVT::v4f32, MVT::v2f64})
1565 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1566 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1635 if (!Subtarget->isLittleEndian())
1638 if (Subtarget->hasSVE2() ||
1639 (Subtarget->hasSME() && Subtarget->isStreaming()))
1645 for (
auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1651 for (
auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32})
1655 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
1656 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
1668 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1682 if (VT != MVT::nxv16i1) {
1692 {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1693 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1694 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1733 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1734 MVT::nxv4f32, MVT::nxv2f64}) {
1812 for (
auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1831 if (Subtarget->hasSVEB16B16() &&
1832 Subtarget->isNonStreamingSVEorSME2Available()) {
1834 for (
auto VT : {MVT::v4bf16, MVT::v8bf16, MVT::nxv2bf16, MVT::nxv4bf16,
1857 if (!Subtarget->hasSVEB16B16() ||
1858 !Subtarget->isNonStreamingSVEorSME2Available()) {
1859 for (
MVT VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1869 if (VT != MVT::nxv2bf16 && Subtarget->hasBF16())
1875 if (Subtarget->hasBF16() && Subtarget->isNeonAvailable())
1884 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1885 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1898 if (Subtarget->useSVEForFixedLengthVectors()) {
1901 VT, !Subtarget->isNeonAvailable()))
1902 addTypeForFixedLengthSVE(VT);
1906 VT, !Subtarget->isNeonAvailable()))
1907 addTypeForFixedLengthSVE(VT);
1911 for (
auto VT : {MVT::v8i8, MVT::v4i16})
1916 for (
auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1918 for (
auto VT : {MVT::v8f16, MVT::v4f32, MVT::v8bf16})
1940 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1941 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1950 for (
auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1973 for (
auto VT : {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1})
1978 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1988 if (Subtarget->hasMatMulInt8()) {
1990 MVT::nxv16i8,
Legal);
1995 if (Subtarget->hasSVE2() || Subtarget->hasSME()) {
2005 if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
2007 MVT::nxv8f16,
Legal);
2015 if (Subtarget->hasSVE2() ||
2016 (Subtarget->hasSME() && Subtarget->isStreaming())) {
2018 for (
auto VT : {MVT::v2i32, MVT::v4i16, MVT::v8i8, MVT::v16i8}) {
2022 for (
auto VT : {MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, MVT::nxv16i1}) {
2029 if (Subtarget->isSVEAvailable()) {
2030 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64,
2031 MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
2032 MVT::nxv4f32, MVT::nxv2f64, MVT::nxv2bf16, MVT::nxv4bf16,
2033 MVT::nxv8bf16, MVT::v4f16, MVT::v8f16, MVT::v2f32,
2034 MVT::v4f32, MVT::v1f64, MVT::v2f64, MVT::v8i8,
2035 MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
2036 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
2041 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
2042 MVT::nxv4f32, MVT::nxv2f64, MVT::v4f16, MVT::v8f16,
2043 MVT::v2f32, MVT::v4f32, MVT::v2f64})
2048 {MVT::nxv4i32, MVT::nxv2i64, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv2f64})
2053 for (
auto VT : {MVT::v2i32, MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32,
2063 for (
auto VT : {MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64,
2064 MVT::nxv2f32, MVT::nxv2f64, MVT::nxv4i8, MVT::nxv4i16,
2065 MVT::nxv4i32, MVT::nxv4f32}) {
2073 if (Subtarget->hasSVE2()) {
2091 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
2098 if (Subtarget->hasSVE()) {
2112 if (Subtarget->isTargetWindows()) {
2129void AArch64TargetLowering::addTypeForNEON(
MVT VT) {
2139 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
2160 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
2161 ((VT == MVT::v4bf16 || VT == MVT::v8bf16 || VT == MVT::v4f16 ||
2162 VT == MVT::v8f16) &&
2163 Subtarget->hasFullFP16()))
2188 if (VT != MVT::v8i8 && VT != MVT::v16i8)
2197 for (
unsigned Opcode :
2215 for (
unsigned Opcode :
2246 if (Subtarget->isLittleEndian()) {
2257 if (Subtarget->hasD128()) {
2275 if (!Subtarget->isSVEorStreamingSVEAvailable() ||
2282 (OpVT != MVT::i32 && OpVT != MVT::i64))))
2294 if (!Subtarget->isSVEorStreamingSVEAvailable())
2299 return VT != MVT::nxv16i1 && VT != MVT::nxv8i1 && VT != MVT::nxv4i1 &&
2300 VT != MVT::nxv2i1 && VT != MVT::v16i1 && VT != MVT::v8i1 &&
2301 VT != MVT::v4i1 && VT != MVT::v2i1;
2305 unsigned SearchSize)
const {
2307 if (!Subtarget->hasSVE2() || !Subtarget->isSVEAvailable())
2310 if (VT == MVT::nxv8i16 || VT == MVT::v8i16)
2311 return SearchSize != 8;
2312 if (VT == MVT::nxv16i8 || VT == MVT::v16i8 || VT == MVT::v8i8)
2313 return SearchSize != 8 && SearchSize != 16;
2317void AArch64TargetLowering::addTypeForFixedLengthSVE(
MVT VT) {
2342 while (InnerVT != VT) {
2356 while (InnerVT != VT) {
2365 bool PreferSVE = !PreferNEON && Subtarget->isSVEAvailable();
2386 if (Subtarget->hasMatMulInt8()) {
2496void AArch64TargetLowering::addDRType(
MVT VT) {
2498 if (Subtarget->isNeonAvailable())
2502void AArch64TargetLowering::addQRType(
MVT VT) {
2504 if (Subtarget->isNeonAvailable())
2521 Imm =
C->getZExtValue();
2529 case AArch64ISD::SQDMULH:
2541 return N->getOpcode() ==
Opc &&
2546 const APInt &Demanded,
2549 uint64_t OldImm = Imm, NewImm, Enc;
2554 if (Imm == 0 || Imm == Mask ||
2558 unsigned EltSize =
Size;
2575 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2577 uint64_t Sum = RotatedImm + NonDemandedBits;
2578 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2579 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
2580 NewImm = (Imm | Ones) & Mask;
2608 while (EltSize <
Size) {
2609 NewImm |= NewImm << EltSize;
2615 "demanded bits should never be altered");
2616 assert(OldImm != NewImm &&
"the new imm shouldn't be equal to the old imm");
2619 EVT VT =
Op.getValueType();
2625 if (NewImm == 0 || NewImm == OrigMask) {
2650 EVT VT =
Op.getValueType();
2664 switch (
Op.getOpcode()) {
2668 NewOpc =
Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
2671 NewOpc =
Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
2674 NewOpc =
Size == 32 ? AArch64::EORWri : AArch64::EORXri;
2689 switch (
Op.getOpcode()) {
2692 case AArch64ISD::DUP: {
2695 if (
SrcOp.getValueSizeInBits() !=
Op.getScalarValueSizeInBits()) {
2696 assert(
SrcOp.getValueSizeInBits() >
Op.getScalarValueSizeInBits() &&
2697 "Expected DUP implicit truncation");
2698 Known = Known.
trunc(
Op.getScalarValueSizeInBits());
2702 case AArch64ISD::CSEL: {
2709 case AArch64ISD::CSNEG:
2710 case AArch64ISD::CSINC:
2711 case AArch64ISD::CSINV: {
2719 if (
Op.getOpcode() == AArch64ISD::CSINC)
2723 else if (
Op.getOpcode() == AArch64ISD::CSINV)
2725 else if (
Op.getOpcode() == AArch64ISD::CSNEG)
2728 Op.getScalarValueSizeInBits())));
2733 case AArch64ISD::BICi: {
2736 ~(
Op->getConstantOperandAPInt(1) <<
Op->getConstantOperandAPInt(2))
2742 case AArch64ISD::VLSHR: {
2749 case AArch64ISD::VASHR: {
2756 case AArch64ISD::VSHL: {
2763 case AArch64ISD::MOVI: {
2768 case AArch64ISD::MOVIshift: {
2771 <<
Op->getConstantOperandVal(1)));
2774 case AArch64ISD::MOVImsl: {
2777 Known.
getBitWidth(), ~(~
Op->getConstantOperandVal(0) << ShiftAmt)));
2780 case AArch64ISD::MOVIedit: {
2786 case AArch64ISD::MVNIshift: {
2789 ~(
Op->getConstantOperandVal(0) <<
Op->getConstantOperandVal(1)),
2793 case AArch64ISD::MVNImsl: {
2800 case AArch64ISD::LOADgot:
2801 case AArch64ISD::ADDlow: {
2802 if (!Subtarget->isTargetILP32())
2808 case AArch64ISD::ASSERT_ZEXT_BOOL: {
2818 case Intrinsic::aarch64_ldaxr:
2819 case Intrinsic::aarch64_ldxr: {
2831 unsigned IntNo =
Op.getConstantOperandVal(0);
2835 case Intrinsic::aarch64_neon_uaddlv: {
2836 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2838 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2839 unsigned Bound = (VT == MVT::v8i8) ? 11 : 12;
2846 case Intrinsic::aarch64_neon_umaxv:
2847 case Intrinsic::aarch64_neon_uminv: {
2852 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2854 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2858 }
else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
2872 unsigned Depth)
const {
2873 EVT VT =
Op.getValueType();
2875 unsigned Opcode =
Op.getOpcode();
2877 case AArch64ISD::FCMEQ:
2878 case AArch64ISD::FCMGE:
2879 case AArch64ISD::FCMGT:
2882 case AArch64ISD::VASHR: {
2885 return std::min<uint64_t>(Tmp +
Op.getConstantOperandVal(1), VTBits);
2899 unsigned *
Fast)
const {
2909 if (ElementSizeBits % 8 == 0 && Alignment >=
Align(ElementSizeBits / 8))
2913 if (Subtarget->requiresStrictAlign())
2918 *
Fast = !Subtarget->isMisaligned128StoreSlow() || VT.
getStoreSize() != 16 ||
2937 unsigned *
Fast)
const {
2938 if (Subtarget->requiresStrictAlign())
2943 *
Fast = !Subtarget->isMisaligned128StoreSlow() ||
2944 Ty.getSizeInBytes() != 16 ||
2987 Register DestReg =
MI.getOperand(0).getReg();
2988 Register IfTrueReg =
MI.getOperand(1).getReg();
2989 Register IfFalseReg =
MI.getOperand(2).getReg();
2990 unsigned CondCode =
MI.getOperand(3).getImm();
2991 bool NZCVKilled =
MI.getOperand(4).isKill();
3005 MBB->addSuccessor(TrueBB);
3006 MBB->addSuccessor(EndBB);
3022 MI.eraseFromParent();
3030 "SEH does not use catchret!");
3041 Register TargetReg =
MI.getOperand(0).getReg();
3043 TII.probedStackAlloc(
MBBI, TargetReg,
false);
3045 MI.eraseFromParent();
3046 return NextInst->getParent();
3058 Register RegVL_GPR =
MRI.createVirtualRegister(RC_GPR);
3059 Register RegVL_GPRsp =
MRI.createVirtualRegister(RC_GPRsp);
3060 Register RegSVL_GPR =
MRI.createVirtualRegister(RC_GPR);
3061 Register RegSVL_GPRsp =
MRI.createVirtualRegister(RC_GPRsp);
3099 MBB->addSuccessor(TrapBB);
3100 MBB->addSuccessor(PassBB);
3102 MI.eraseFromParent();
3114 MIB.
add(
MI.getOperand(1));
3115 MIB.
add(
MI.getOperand(2));
3116 MIB.
add(
MI.getOperand(3));
3117 MIB.
add(
MI.getOperand(4));
3118 MIB.
add(
MI.getOperand(5));
3120 MI.eraseFromParent();
3131 MIB.
add(
MI.getOperand(0));
3132 MIB.
add(
MI.getOperand(1));
3133 MIB.
add(
MI.getOperand(2));
3134 MIB.
add(
MI.getOperand(1));
3136 MI.eraseFromParent();
3143 bool Op0IsDef)
const {
3149 for (
unsigned I = 1;
I <
MI.getNumOperands(); ++
I)
3150 MIB.
add(
MI.getOperand(
I));
3152 MI.eraseFromParent();
3162 unsigned StartIdx = 0;
3164 bool HasTile = BaseReg != AArch64::ZA;
3165 bool HasZPROut = HasTile &&
MI.getOperand(0).isReg();
3167 MIB.
add(
MI.getOperand(StartIdx));
3171 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm(),
3173 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm());
3177 if (
MI.getOperand(0).isReg() && !
MI.getOperand(1).isImm()) {
3178 MIB.
add(
MI.getOperand(StartIdx));
3183 for (
unsigned I = StartIdx;
I <
MI.getNumOperands(); ++
I)
3184 MIB.
add(
MI.getOperand(
I));
3186 MI.eraseFromParent();
3195 MIB.
add(
MI.getOperand(0));
3197 unsigned Mask =
MI.getOperand(0).getImm();
3198 for (
unsigned I = 0;
I < 8;
I++) {
3199 if (Mask & (1 <<
I))
3203 MI.eraseFromParent();
3214 if (TPIDR2.Uses > 0) {
3217 if (!Subtarget->isLittleEndian())
3219 "TPIDR2 block initialization is not supported on big-endian targets");
3247 "Lazy ZA save is not yet supported on Windows");
3251 if (TPIDR2.
Uses > 0) {
3257 Register SP =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3258 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::COPY), SP)
3262 auto Size =
MI.getOperand(1).getReg();
3263 auto Dest =
MI.getOperand(0).getReg();
3264 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(AArch64::MSUBXrrr), Dest)
3288 "Lazy ZA save is not yet supported on Windows");
3293 auto Size =
MI.getOperand(1).getReg();
3294 auto Dest =
MI.getOperand(0).getReg();
3295 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(AArch64::SUBXrx64), AArch64::SP)
3299 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::COPY), Dest)
3305 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF),
3306 MI.getOperand(0).getReg());
3320 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
3327 MI.getOperand(0).getReg())
3331 MI.getOperand(0).getReg())
3343 Register ResultReg =
MI.getOperand(0).getReg();
3346 }
else if (Subtarget->hasSME()) {
3348 .
addImm(AArch64SysReg::SVCR)
3351 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
3360 MI.eraseFromParent();
3368 while (
Reg.isVirtual()) {
3370 assert(
DefMI &&
"Virtual register definition not found");
3371 unsigned Opcode =
DefMI->getOpcode();
3373 if (Opcode == AArch64::COPY) {
3374 Reg =
DefMI->getOperand(1).getReg();
3376 if (
Reg.isPhysical())
3380 if (Opcode == AArch64::SUBREG_TO_REG) {
3381 Reg =
DefMI->getOperand(2).getReg();
3398 int64_t IntDisc = IntDiscOp.
getImm();
3399 assert(IntDisc == 0 &&
"Blend components are already expanded");
3404 case AArch64::MOVKXi:
3413 case AArch64::MOVi32imm:
3414 case AArch64::MOVi64imm:
3418 AddrDisc = AArch64::NoRegister;
3427 if (AddrDisc == AArch64::XZR)
3428 AddrDisc = AArch64::NoRegister;
3431 if (AddrDisc &&
MRI.getRegClass(AddrDisc) != AddrDiscRC) {
3432 Register TmpReg =
MRI.createVirtualRegister(AddrDiscRC);
3437 AddrDiscOp.
setReg(AddrDisc);
3438 IntDiscOp.
setImm(IntDisc);
3445 if (SMEOrigInstr != -1) {
3449 switch (SMEMatrixType) {
3465 switch (
MI.getOpcode()) {
3471 case AArch64::InitTPIDR2Obj:
3473 case AArch64::AllocateZABuffer:
3475 case AArch64::AllocateSMESaveBuffer:
3477 case AArch64::GetSMESaveSize:
3479 case AArch64::EntryPStateSM:
3481 case AArch64::F128CSEL:
3483 case TargetOpcode::STATEPOINT:
3489 MI.addOperand(*
MI.getMF(),
3495 case TargetOpcode::STACKMAP:
3496 case TargetOpcode::PATCHPOINT:
3499 case TargetOpcode::PATCHABLE_EVENT_CALL:
3500 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
3503 case AArch64::CATCHRET:
3506 case AArch64::PROBED_STACKALLOC_DYN:
3509 case AArch64::CHECK_MATCHING_VL_PSEUDO:
3512 case AArch64::LD1_MXIPXX_H_PSEUDO_B:
3513 return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0,
MI, BB);
3514 case AArch64::LD1_MXIPXX_H_PSEUDO_H:
3515 return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0,
MI, BB);
3516 case AArch64::LD1_MXIPXX_H_PSEUDO_S:
3517 return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0,
MI, BB);
3518 case AArch64::LD1_MXIPXX_H_PSEUDO_D:
3519 return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0,
MI, BB);
3520 case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
3521 return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0,
MI, BB);
3522 case AArch64::LD1_MXIPXX_V_PSEUDO_B:
3523 return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0,
MI, BB);
3524 case AArch64::LD1_MXIPXX_V_PSEUDO_H:
3525 return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0,
MI, BB);
3526 case AArch64::LD1_MXIPXX_V_PSEUDO_S:
3527 return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0,
MI, BB);
3528 case AArch64::LD1_MXIPXX_V_PSEUDO_D:
3529 return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0,
MI, BB);
3530 case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
3531 return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0,
MI, BB);
3532 case AArch64::LDR_ZA_PSEUDO:
3534 case AArch64::LDR_TX_PSEUDO:
3536 case AArch64::STR_TX_PSEUDO:
3538 case AArch64::ZERO_M_PSEUDO:
3540 case AArch64::ZERO_T_PSEUDO:
3542 case AArch64::MOVT_TIZ_PSEUDO:
3547 &AArch64::GPR64noipRegClass);
3574 N =
N->getOperand(0).getNode();
3579 if (
N->getOpcode() != AArch64ISD::DUP)
3582 auto Opnd0 =
N->getOperand(0);
3736 CondCode, CondCode2);
3749 bool IsLegal = (
C >> 12 == 0) || ((
C & 0xFFFULL) == 0 &&
C >> 24 == 0);
3751 <<
" legal: " << (IsLegal ?
"yes\n" :
"no\n"));
3770 if (
Op->getFlags().hasNoSignedWrap())
3796 (isIntEqualitySetCC(CC) ||
3804 EVT VT =
LHS.getValueType();
3809 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3814 Chain =
RHS.getValue(1);
3817 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
3823 EVT VT =
LHS.getValueType();
3828 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3839 unsigned Opcode = AArch64ISD::SUBS;
3843 Opcode = AArch64ISD::ADDS;
3846 isIntEqualitySetCC(CC)) {
3849 Opcode = AArch64ISD::ADDS;
3858 LHS.getOperand(0),
LHS.getOperand(1));
3862 }
else if (
LHS.getOpcode() == AArch64ISD::ANDS) {
3864 return LHS.getValue(1);
3930 unsigned Opcode = 0;
3933 if (
LHS.getValueType().isFloatingPoint()) {
3934 assert(
LHS.getValueType() != MVT::f128);
3935 if ((
LHS.getValueType() == MVT::f16 && !FullFP16) ||
3936 LHS.getValueType() == MVT::bf16) {
3940 Opcode = AArch64ISD::FCCMP;
3942 APInt Imm = Const->getAPIntValue();
3943 if (Imm.isNegative() && Imm.sgt(-32)) {
3944 Opcode = AArch64ISD::CCMN;
3948 Opcode = AArch64ISD::CCMN;
3951 isIntEqualitySetCC(CC)) {
3954 Opcode = AArch64ISD::CCMN;
3958 Opcode = AArch64ISD::CCMP;
3984 bool &CanNegate,
bool &MustBeFirst,
3985 bool &PreferFirst,
bool WillNegate,
3986 unsigned Depth = 0) {
3992 if (VT == MVT::f128)
3995 MustBeFirst =
false;
3999 {Val->getOperand(0), Val->getOperand(1)});
4006 bool IsOR = Opcode ==
ISD::OR;
4022 if (MustBeFirstL && MustBeFirstR)
4028 if (!CanNegateL && !CanNegateR)
4032 CanNegate = WillNegate && CanNegateL && CanNegateR;
4035 MustBeFirst = !CanNegate;
4040 MustBeFirst = MustBeFirstL || MustBeFirstR;
4042 PreferFirst = PreferFirstL || PreferFirstR;
4065 bool isInteger =
LHS.getValueType().isInteger();
4067 CC = getSetCCInverse(CC,
LHS.getValueType());
4073 assert(
LHS.getValueType().isFloatingPoint());
4099 bool IsOR = Opcode ==
ISD::OR;
4106 PreferFirstL, IsOR);
4107 assert(ValidL &&
"Valid conjunction/disjunction tree");
4115 PreferFirstR, IsOR);
4116 assert(ValidR &&
"Valid conjunction/disjunction tree");
4119 bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR;
4122 if (MustBeFirstL || ShouldFirstL) {
4123 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
4132 bool NegateAfterAll;
4136 assert(CanNegateR &&
"at least one side must be negatable");
4137 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
4141 NegateAfterR =
true;
4144 NegateR = CanNegateR;
4145 NegateAfterR = !CanNegateR;
4148 NegateAfterAll = !Negate;
4150 assert(Opcode ==
ISD::AND &&
"Valid conjunction/disjunction tree");
4151 assert(!Negate &&
"Valid conjunction/disjunction tree");
4155 NegateAfterR =
false;
4156 NegateAfterAll =
false;
4176 bool DummyCanNegate;
4177 bool DummyMustBeFirst;
4178 bool DummyPreferFirst;
4180 DummyPreferFirst,
false))
4191 auto isSupportedExtend = [&](
SDValue V) {
4197 uint64_t Mask = MaskCst->getZExtValue();
4198 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
4204 if (!
Op.hasOneUse())
4207 if (isSupportedExtend(
Op))
4210 unsigned Opc =
Op.getOpcode();
4213 uint64_t Shift = ShiftCst->getZExtValue();
4214 if (isSupportedExtend(
Op.getOperand(0)))
4215 return (Shift <= 4) ? 2 : 1;
4216 EVT VT =
Op.getValueType();
4217 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
4229 if (
LHS.getOpcode() !=
ISD::AND &&
LHS.getOpcode() != AArch64ISD::ANDS)
4249 EVT VT =
RHS.getValueType();
4250 APInt C = RHSC->getAPIntValue();
4265 if (!
C.isMinSignedValue()) {
4277 assert(!
C.isZero() &&
"C should not be zero here");
4288 if (!
C.isMaxSignedValue()) {
4299 if (!
C.isAllOnes()) {
4324 bool LHSIsCMN =
isCMN(
LHS, CC, DAG);
4325 bool RHSIsCMN =
isCMN(
RHS, CC, DAG);
4360 LHS.getNode()->hasNUsesOfValue(1, 0)) {
4361 int16_t ValueofRHS =
RHS->getAsZExtVal();
4389static std::pair<SDValue, SDValue>
4391 assert((
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::i64) &&
4392 "Unsupported value type");
4398 switch (
Op.getOpcode()) {
4402 Opc = AArch64ISD::ADDS;
4406 Opc = AArch64ISD::ADDS;
4410 Opc = AArch64ISD::SUBS;
4414 Opc = AArch64ISD::SUBS;
4422 if (
Op.getValueType() == MVT::i32) {
4445 assert(
Op.getValueType() == MVT::i64 &&
"Expected an i64 value type");
4455 Overflow = DAG.
getNode(AArch64ISD::SUBS,
DL, VTs, UpperBits, LowerBits)
4474 Overflow =
Value.getValue(1);
4476 return std::make_pair(
Value, Overflow);
4481 !Subtarget->isNeonAvailable()))
4482 return LowerToScalableOp(
Op, DAG);
4506 return DAG.
getNode(AArch64ISD::CSEL,
DL,
Op.getValueType(), TVal, FVal,
4529 if (
LHS.getValueType() != MVT::i32 &&
LHS.getValueType() != MVT::i64)
4536 if (!CFVal || !CTVal)
4573 return Cmp.getValue(1);
4586 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, One, Zero, CC, Glue);
4596 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, One, Zero, CC, Glue);
4602 unsigned Opcode,
bool IsSigned) {
4603 EVT VT0 =
Op.getValue(0).getValueType();
4604 EVT VT1 =
Op.getValue(1).getValueType();
4606 if (VT0 != MVT::i32 && VT0 != MVT::i64)
4609 bool InvertCarry = Opcode == AArch64ISD::SBCS;
4628 bool LastOperandIsImm =
false) {
4629 if (
Op.getValueType().isVector())
4634 const unsigned NumOperands =
Op.getNumOperands();
4635 auto getFloatVT = [](
EVT VT) {
4636 assert((VT == MVT::i32 || VT == MVT::i64) &&
"Unexpected VT");
4637 return VT == MVT::i32 ? MVT::f32 : MVT::f64;
4639 auto bitcastToFloat = [&](
SDValue Val) {
4640 return DAG.
getBitcast(getFloatVT(Val.getValueType()), Val);
4644 for (
unsigned I = 1;
I < NumOperands; ++
I) {
4646 const bool KeepInt = LastOperandIsImm && (
I == NumOperands - 1);
4647 NewOps.
push_back(KeepInt ? Val : bitcastToFloat(Val));
4649 EVT OrigVT =
Op.getValueType();
4674 DAG.
getNode(AArch64ISD::CSEL,
DL, MVT::i32, FVal, TVal, CCVal, Overflow);
4686 unsigned IsWrite =
Op.getConstantOperandVal(2);
4687 unsigned Locality =
Op.getConstantOperandVal(3);
4688 unsigned IsData =
Op.getConstantOperandVal(4);
4690 bool IsStream = !Locality;
4694 assert(Locality <= 3 &&
"Prefetch locality out-of-range");
4698 Locality = 3 - Locality;
4702 unsigned PrfOp = (IsWrite << 4) |
4706 return DAG.
getNode(AArch64ISD::PREFETCH,
DL, MVT::Other,
Op.getOperand(0),
4719 if (LHSConstOp && RHSConst) {
4723 uint64_t NewMaskValue = LHSConstValue & ~(RHSConstant - 1);
4736 EVT VT =
Op.getValueType();
4740 if (VT == MVT::nxv2f64 && SrcVal.
getValueType() == MVT::nxv2bf16) {
4748 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
4752 return LowerFixedLengthFPExtendToSVE(
Op, DAG);
4754 bool IsStrict =
Op->isStrictFPOpcode();
4755 SDValue Op0 =
Op.getOperand(IsStrict ? 1 : 0);
4757 if (VT == MVT::f64) {
4759 if (Op0VT == MVT::f32 || Op0VT == MVT::f16)
4762 if (Op0VT == MVT::bf16 && IsStrict) {
4765 {Op0,
Op.getOperand(0)});
4769 if (Op0VT == MVT::bf16)
4775 assert(
Op.getValueType() == MVT::f128 &&
"Unexpected lowering");
4781 EVT VT =
Op.getValueType();
4782 bool IsStrict =
Op->isStrictFPOpcode();
4783 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4785 bool Trunc =
Op.getConstantOperandVal(IsStrict ? 2 : 1) == 1;
4789 if (SrcVT == MVT::nxv8f32)
4793 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
4796 constexpr EVT
I32 = MVT::nxv4i32;
4802 if (SrcVT == MVT::nxv2f32 || SrcVT == MVT::nxv4f32) {
4803 if (Subtarget->hasBF16())
4804 return LowerToPredicatedOp(
Op, DAG,
4805 AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
4807 Narrow = getSVESafeBitCast(I32, SrcVal, DAG);
4812 }
else if (SrcVT == MVT::nxv2f64 &&
4813 (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable())) {
4816 Narrow = DAG.
getNode(AArch64ISD::FCVTX_MERGE_PASSTHRU,
DL, MVT::nxv2f32,
4817 Pg, SrcVal, DAG.
getPOISON(MVT::nxv2f32));
4823 NewOps.
push_back(
Op.getOperand(IsStrict ? 2 : 1));
4824 return DAG.
getNode(
Op.getOpcode(),
DL, VT, NewOps,
Op->getFlags());
4841 IsNaN = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, I1, IsNaN);
4842 Narrow = DAG.
getSelect(
DL, I32, IsNaN, NaN, Narrow);
4847 return getSVESafeBitCast(VT, Narrow, DAG);
4851 return LowerFixedLengthFPRoundToSVE(
Op, DAG);
4856 !((Subtarget->hasNEON() || Subtarget->hasSME()) &&
4857 Subtarget->hasBF16())) {
4872 Narrow = DAG.
getNode(AArch64ISD::FCVTXN,
DL,
F32, Narrow);
4893 Narrow = DAG.
getSelect(
DL, I32, IsNaN, NaN, Narrow);
4910 if (SrcVT != MVT::f128) {
4927 bool IsStrict =
Op->isStrictFPOpcode();
4928 EVT InVT =
Op.getOperand(IsStrict ? 1 : 0).getValueType();
4929 EVT VT =
Op.getValueType();
4932 "Unimplemented SVE support for STRICT_FP_to_INT!");
4941 {
Op.getOperand(0),
Op.getOperand(1)});
4942 return DAG.
getNode(
Op.getOpcode(),
DL, {VT, MVT::Other},
4943 {Ext.getValue(1), Ext.getValue(0)});
4946 Op.getOpcode(),
DL,
Op.getValueType(),
4960 if (InVT == MVT::nxv8f32)
4964 ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
4965 : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
4966 return LowerToPredicatedOp(
Op, DAG, Opcode);
4971 return LowerFixedLengthFPToIntToSVE(
Op, DAG);
4975 if (VTSize < InVTSize) {
4980 {Op.getOperand(0), Op.getOperand(1)});
4990 if (VTSize > InVTSize) {
4997 {
Op.getOperand(0),
Op.getOperand(1)});
4998 return DAG.
getNode(
Op.getOpcode(),
DL, {VT, MVT::Other},
4999 {Ext.getValue(1), Ext.getValue(0)});
5014 return DAG.
getNode(
Op.getOpcode(),
DL, {ScalarVT, MVT::Other},
5015 {Op.getOperand(0), Extract});
5016 return DAG.
getNode(
Op.getOpcode(),
DL, ScalarVT, Extract);
5025 bool IsStrict =
Op->isStrictFPOpcode();
5026 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
5029 return LowerVectorFP_TO_INT(
Op, DAG);
5032 if ((SrcVal.
getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
5038 {
Op.getOperand(0), SrcVal});
5039 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
5040 {Ext.getValue(1), Ext.getValue(0)});
5055AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(
SDValue Op,
5061 EVT DstVT =
Op.getValueType();
5067 assert(SatWidth <= DstElementWidth &&
5068 "Saturation width cannot exceed result width");
5081 if ((SrcElementVT == MVT::f16 &&
5082 (!Subtarget->hasFullFP16() || DstElementWidth > 16)) ||
5083 SrcElementVT == MVT::bf16) {
5093 SrcElementVT = MVT::f32;
5094 SrcElementWidth = 32;
5095 }
else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
5096 SrcElementVT != MVT::f16 && SrcElementVT != MVT::bf16)
5101 if (SatWidth == 64 && SrcElementWidth < 64) {
5105 SrcElementVT = MVT::f64;
5106 SrcElementWidth = 64;
5109 if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth) {
5124 if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
5131 SrcVal2 ? DAG.
getNode(
Op.getOpcode(),
DL, IntVT, SrcVal2,
5167 return LowerVectorFP_TO_INT_SAT(
Op, DAG);
5169 EVT DstVT =
Op.getValueType();
5173 assert(SatWidth <= DstWidth &&
"Saturation width cannot exceed result width");
5176 if ((SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) || SrcVT == MVT::bf16) {
5179 }
else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16 &&
5185 if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
5186 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
5187 DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32))
5188 return DAG.
getNode(
Op.getOpcode(),
DL, DstVT, SrcVal,
5194 if (DstWidth < SatWidth)
5197 if (SrcVT == MVT::f16 && SatVT == MVT::i16 && DstVT == MVT::i32) {
5200 DAG.
getNode(AArch64ISD::FCVTZS_HALF,
DL, MVT::f32, SrcVal);
5205 SDValue CVTf32 = DAG.
getNode(AArch64ISD::FCVTZU_HALF,
DL, MVT::f32, SrcVal);
5230 EVT VT =
Op.getValueType();
5237 *DAG.
getContext(), Src.getValueType().getVectorElementType());
5253 bool IsStrict =
Op->isStrictFPOpcode();
5254 EVT VT =
Op.getValueType();
5257 EVT InVT =
In.getValueType();
5258 unsigned Opc =
Op.getOpcode();
5262 "Unimplemented SVE support for ISD:::STRICT_INT_TO_FP!");
5277 {Op.getOperand(0), In});
5279 {
Op.getValueType(), MVT::Other},
5290 if (VT == MVT::nxv8f32)
5293 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
5294 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
5295 return LowerToPredicatedOp(
Op, DAG, Opcode);
5300 return LowerFixedLengthIntToFPToSVE(
Op, DAG);
5304 if (VTSize < InVTSize) {
5310 bool IsTargetf16 =
false;
5311 if (
Op.hasOneUse() &&
5316 SDNode *
U = *
Op->user_begin();
5317 if (
U->hasOneUse() &&
U->user_begin()->getOpcode() ==
ISD::FP_ROUND) {
5318 EVT TmpVT =
U->user_begin()->getValueType(0);
5324 if (IsTargetf32 && !IsTargetf16) {
5334 {
In.getValue(1),
In.getValue(0),
5342 if (VTSize > InVTSize) {
5359 return DAG.
getNode(
Op.getOpcode(),
DL, {ScalarVT, MVT::Other},
5360 {Op.getOperand(0), Extract});
5361 return DAG.
getNode(
Op.getOpcode(),
DL, ScalarVT, Extract);
5369 if (
Op.getValueType().isVector())
5370 return LowerVectorINT_TO_FP(
Op, DAG);
5372 bool IsStrict =
Op->isStrictFPOpcode();
5373 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
5378 auto IntToFpViaPromotion = [&](EVT PromoteVT) {
5382 {Op.getOperand(0), SrcVal});
5384 {
Op.getValueType(), MVT::Other},
5389 DAG.
getNode(
Op.getOpcode(),
DL, PromoteVT, SrcVal),
5393 if (
Op.getValueType() == MVT::bf16) {
5394 unsigned MaxWidth = IsSigned
5398 if (MaxWidth <= 24) {
5399 return IntToFpViaPromotion(MVT::f32);
5403 if (MaxWidth <= 53) {
5404 return IntToFpViaPromotion(MVT::f64);
5455 IsStrict ? DAG.
getNode(
Op.getOpcode(),
DL, {MVT::f64, MVT::Other},
5456 {Op.getOperand(0), ToRound})
5457 : DAG.
getNode(
Op.getOpcode(),
DL, MVT::f64, ToRound);
5484 {
Op.getValueType(), MVT::Other},
5488 DAG.getIntPtrConstant(0,
DL,
true));
5493 if (
Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
5494 return IntToFpViaPromotion(MVT::f32);
5503 if (
Op.getValueType() != MVT::f128)
5511AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(
SDValue Op,
5513 assert((Subtarget->hasSVE2() ||
5514 (Subtarget->hasSME() && Subtarget->isStreaming())) &&
5515 "Lowering loop_dependence_raw_mask or loop_dependence_war_mask "
5516 "requires SVE or SME");
5519 EVT VT =
Op.getValueType();
5520 unsigned LaneOffset =
Op.getConstantOperandVal(3);
5522 uint64_t EltSizeInBytes =
Op.getConstantOperandVal(2);
5525 if (LaneOffset != 0 || !
is_contained({1u, 2u, 4u, 8u}, EltSizeInBytes))
5556 EVT OpVT =
Op.getValueType();
5557 EVT ArgVT =
Op.getOperand(0).getValueType();
5560 return LowerFixedLengthBitcastToSVE(
Op, DAG);
5568 "Expected int->fp bitcast!");
5581 return getSVESafeBitCast(OpVT, ExtResult, DAG);
5592 return getSVESafeBitCast(OpVT,
Op.getOperand(0), DAG);
5595 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
5599 if (ArgVT == MVT::f16 || ArgVT == MVT::bf16)
5602 assert(ArgVT == MVT::i16);
5612static std::optional<uint64_t>
5616 return std::nullopt;
5621 return std::nullopt;
5623 return C->getZExtValue();
5628 EVT VT =
N.getValueType();
5633 for (
const SDValue &Elt :
N->op_values()) {
5636 unsigned HalfSize = EltSize / 2;
5638 if (!
isIntN(HalfSize,
C->getSExtValue()))
5641 if (!
isUIntN(HalfSize,
C->getZExtValue()))
5653 EVT VT =
N.getValueType();
5675 unsigned Opcode =
N.getOpcode();
5686 unsigned Opcode =
N.getOpcode();
5828 if (IsN0SExt && IsN1SExt)
5829 return AArch64ISD::SMULL;
5834 if (IsN0ZExt && IsN1ZExt)
5835 return AArch64ISD::UMULL;
5841 if (IsN0ZExt || IsN1ZExt) {
5843 return AArch64ISD::UMULL;
5848 return AArch64ISD::UMULL;
5851 if (IsN0SExt || IsN1SExt) {
5853 return AArch64ISD::SMULL;
5856 return AArch64ISD::SMULL;
5859 if (!IsN1SExt && !IsN1ZExt)
5866 return AArch64ISD::SMULL;
5870 return AArch64ISD::UMULL;
5875 return AArch64ISD::UMULL;
5881 EVT VT =
Op.getValueType();
5883 bool OverrideNEON = !Subtarget->isNeonAvailable();
5885 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5890 "unexpected type for custom-lowering ISD::MUL");
5906 if (VT == MVT::v1i64) {
5907 if (Subtarget->hasSVE())
5908 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5924 if (Subtarget->hasSVE())
5925 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5940 "unexpected types for extended operands to VMULL");
5963 if (
Pattern == AArch64SVEPredPattern::all)
5972 if (PatNumElts == (NumElts * VScale))
5976 return DAG.
getNode(AArch64ISD::PTRUE,
DL, VT,
5981 bool IsSigned,
bool IsEqual) {
5985 if (!
N->getValueType(0).isScalableVector() ||
5990 APInt Y =
N->getConstantOperandAPInt(Op1);
5995 if (IsSigned ?
Y.isMaxSignedValue() :
Y.isMaxValue())
6001 APInt X =
N->getConstantOperandAPInt(Op0);
6004 APInt NumActiveElems =
6005 IsSigned ?
Y.ssub_ov(
X, Overflow) :
Y.usub_ov(
X, Overflow);
6012 NumActiveElems = IsSigned ? NumActiveElems.
sadd_ov(One, Overflow)
6013 : NumActiveElems.
uadd_ov(One, Overflow);
6018 std::optional<unsigned> PredPattern =
6020 unsigned MinSVEVectorSize = std::max(
6022 unsigned ElementSize = 128 /
N->getValueType(0).getVectorMinNumElements();
6023 if (PredPattern != std::nullopt &&
6024 NumActiveElems.
getZExtValue() <= (MinSVEVectorSize / ElementSize))
6025 return getPTrue(DAG,
DL,
N->getValueType(0), *PredPattern);
6034 EVT InVT =
Op.getValueType();
6038 "Expected a predicate-to-predicate bitcast");
6042 "Only expect to cast between legal scalable predicate types!");
6052 Op.getConstantOperandVal(0) == Intrinsic::aarch64_sve_convert_to_svbool &&
6053 Op.getOperand(1).getValueType().bitsGT(VT))
6054 Op =
Op.getOperand(1);
6072 Mask = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, VT, Mask);
6079 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
6085 TargetLowering::CallLoweringInfo CLI(DAG);
6087 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
6090 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
6136 SDValue TileSlice =
N->getOperand(2);
6139 int32_t ConstAddend = 0;
6148 ConstAddend = ImmNode->getSExtValue();
6152 int32_t ImmAddend = ConstAddend % 16;
6153 if (int32_t
C = (ConstAddend - ImmAddend)) {
6155 VarAddend = VarAddend
6162 auto SVL = DAG.
getNode(AArch64ISD::RDSVL,
DL, MVT::i64,
6174 return DAG.
getNode(IsLoad ? AArch64ISD::SME_ZA_LDR : AArch64ISD::SME_ZA_STR,
6176 {
N.getOperand(0), TileSlice,
Base,
6185 auto Op1 =
Op.getOperand(1);
6186 auto Op2 =
Op.getOperand(2);
6187 auto Mask =
Op.getOperand(3);
6190 EVT Op2VT = Op2.getValueType();
6191 EVT ResVT =
Op.getValueType();
6195 "Expected 8-bit or 16-bit characters.");
6209 Op2 = DAG.
getNode(AArch64ISD::DUPLANE128,
DL, OpContainerVT, Op2,
6237 ID, Mask, Op1, Op2);
6248 unsigned IntNo =
Op.getConstantOperandVal(1);
6253 case Intrinsic::aarch64_prefetch: {
6257 unsigned IsWrite =
Op.getConstantOperandVal(3);
6258 unsigned Locality =
Op.getConstantOperandVal(4);
6259 unsigned IsStream =
Op.getConstantOperandVal(5);
6260 unsigned IsData =
Op.getConstantOperandVal(6);
6261 unsigned PrfOp = (IsWrite << 4) |
6266 return DAG.
getNode(AArch64ISD::PREFETCH,
DL, MVT::Other, Chain,
6269 case Intrinsic::aarch64_range_prefetch: {
6273 unsigned IsWrite =
Op.getConstantOperandVal(3);
6274 unsigned IsStream =
Op.getConstantOperandVal(4);
6275 unsigned PrfOp = (IsStream << 2) | IsWrite;
6278 return DAG.
getNode(AArch64ISD::RANGE_PREFETCH,
DL, MVT::Other, Chain,
6282 case Intrinsic::aarch64_sme_str:
6283 case Intrinsic::aarch64_sme_ldr: {
6286 case Intrinsic::aarch64_sme_za_enable:
6288 AArch64ISD::SMSTART,
DL, DAG.
getVTList(MVT::Other, MVT::Glue),
6291 case Intrinsic::aarch64_sme_za_disable:
6293 AArch64ISD::SMSTOP,
DL, DAG.
getVTList(MVT::Other, MVT::Glue),
6301 unsigned IntNo =
Op.getConstantOperandVal(1);
6306 case Intrinsic::aarch64_mops_memset_tag: {
6313 auto Alignment =
Node->getMemOperand()->getAlign();
6314 bool IsVol =
Node->isVolatile();
6315 auto DstPtrInfo =
Node->getPointerInfo();
6319 SDValue MS = SDI.EmitMOPS(AArch64::MOPSMemorySetTaggingPseudo, DAG,
DL,
6320 Chain, Dst, Val,
Size, Alignment, IsVol,
6321 DstPtrInfo, MachinePointerInfo{});
6334 unsigned IntNo =
Op.getConstantOperandVal(0);
6338 case Intrinsic::thread_pointer: {
6340 return DAG.
getNode(AArch64ISD::THREAD_POINTER,
DL, PtrVT);
6342 case Intrinsic::aarch64_sve_whilewr_b:
6344 Op.getOperand(1),
Op.getOperand(2),
6347 case Intrinsic::aarch64_sve_whilewr_h:
6349 Op.getOperand(1),
Op.getOperand(2),
6352 case Intrinsic::aarch64_sve_whilewr_s:
6354 Op.getOperand(1),
Op.getOperand(2),
6357 case Intrinsic::aarch64_sve_whilewr_d:
6359 Op.getOperand(1),
Op.getOperand(2),
6362 case Intrinsic::aarch64_sve_whilerw_b:
6364 Op.getOperand(1),
Op.getOperand(2),
6367 case Intrinsic::aarch64_sve_whilerw_h:
6369 Op.getOperand(1),
Op.getOperand(2),
6372 case Intrinsic::aarch64_sve_whilerw_s:
6374 Op.getOperand(1),
Op.getOperand(2),
6377 case Intrinsic::aarch64_sve_whilerw_d:
6379 Op.getOperand(1),
Op.getOperand(2),
6382 case Intrinsic::aarch64_neon_abs: {
6383 EVT Ty =
Op.getValueType();
6384 if (Ty == MVT::i64) {
6395 case Intrinsic::aarch64_neon_pmull64: {
6399 std::optional<uint64_t> LHSLane =
6401 std::optional<uint64_t> RHSLane =
6404 assert((!LHSLane || *LHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6405 assert((!RHSLane || *RHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6411 auto TryVectorizeOperand = [](
SDValue N, std::optional<uint64_t> NLane,
6412 std::optional<uint64_t> OtherLane,
6414 SelectionDAG &DAG) ->
SDValue {
6423 if (OtherLane == 1) {
6432 DAG.
getNode(AArch64ISD::DUPLANE64,
DL, MVT::v2i64,
6438 return DAG.
getNode(AArch64ISD::DUP,
DL, MVT::v1i64,
N);
6443 assert(
N.getValueType() == MVT::i64 &&
6444 "Intrinsic aarch64_neon_pmull64 requires i64 parameters");
6448 LHS = TryVectorizeOperand(
LHS, LHSLane, RHSLane,
DL, DAG);
6449 RHS = TryVectorizeOperand(
RHS, RHSLane, LHSLane,
DL, DAG);
6453 case Intrinsic::aarch64_neon_smax:
6456 case Intrinsic::aarch64_neon_umax:
6459 case Intrinsic::aarch64_neon_smin:
6462 case Intrinsic::aarch64_neon_umin:
6465 case Intrinsic::aarch64_neon_scalar_sqxtn:
6466 case Intrinsic::aarch64_neon_scalar_sqxtun:
6467 case Intrinsic::aarch64_neon_scalar_uqxtn: {
6468 assert(
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::f32);
6469 if (
Op.getValueType() == MVT::i32)
6474 Op.getOperand(1))));
6477 case Intrinsic::aarch64_neon_sqxtn:
6480 case Intrinsic::aarch64_neon_sqxtun:
6483 case Intrinsic::aarch64_neon_uqxtn:
6486 case Intrinsic::aarch64_neon_sqshrn:
6487 if (
Op.getValueType().isVector())
6490 Op.getOperand(1).getValueType(),
6491 Op.getOperand(1),
Op.getOperand(2)));
6494 case Intrinsic::aarch64_neon_sqshrun:
6495 if (
Op.getValueType().isVector())
6498 Op.getOperand(1).getValueType(),
6499 Op.getOperand(1),
Op.getOperand(2)));
6502 case Intrinsic::aarch64_neon_uqshrn:
6503 if (
Op.getValueType().isVector())
6506 Op.getOperand(1).getValueType(),
6507 Op.getOperand(1),
Op.getOperand(2)));
6510 case Intrinsic::aarch64_neon_sqrshrn:
6511 if (
Op.getValueType().isVector())
6514 Op.getOperand(1).getValueType(),
6515 Op.getOperand(1),
Op.getOperand(2)));
6518 case Intrinsic::aarch64_neon_sqrshrun:
6519 if (
Op.getValueType().isVector())
6522 Op.getOperand(1).getValueType(),
6523 Op.getOperand(1),
Op.getOperand(2)));
6526 case Intrinsic::aarch64_neon_uqrshrn:
6527 if (
Op.getValueType().isVector())
6530 Op.getOperand(1).getValueType(),
6531 Op.getOperand(1),
Op.getOperand(2)));
6534 case Intrinsic::aarch64_neon_sqdmulh:
6536 case Intrinsic::aarch64_neon_sqrdmulh:
6538 case Intrinsic::aarch64_neon_sqrdmlah:
6540 case Intrinsic::aarch64_neon_sqrdmlsh:
6542 case Intrinsic::aarch64_neon_sqrshl:
6544 case Intrinsic::aarch64_neon_sqshl:
6546 case Intrinsic::aarch64_neon_uqrshl:
6548 case Intrinsic::aarch64_neon_uqshl:
6550 case Intrinsic::aarch64_neon_sqadd:
6551 if (
Op.getValueType().isVector())
6556 case Intrinsic::aarch64_neon_sqsub:
6557 if (
Op.getValueType().isVector())
6562 case Intrinsic::aarch64_neon_uqadd:
6563 if (
Op.getValueType().isVector())
6567 case Intrinsic::aarch64_neon_uqsub:
6568 if (
Op.getValueType().isVector())
6572 case Intrinsic::aarch64_neon_sqdmulls_scalar:
6574 case Intrinsic::aarch64_sve_whilelt:
6577 case Intrinsic::aarch64_sve_whilels:
6580 case Intrinsic::aarch64_sve_whilele:
6583 case Intrinsic::aarch64_sve_sunpkhi:
6584 return DAG.
getNode(AArch64ISD::SUNPKHI,
DL,
Op.getValueType(),
6586 case Intrinsic::aarch64_sve_sunpklo:
6587 return DAG.
getNode(AArch64ISD::SUNPKLO,
DL,
Op.getValueType(),
6589 case Intrinsic::aarch64_sve_uunpkhi:
6590 return DAG.
getNode(AArch64ISD::UUNPKHI,
DL,
Op.getValueType(),
6592 case Intrinsic::aarch64_sve_uunpklo:
6593 return DAG.
getNode(AArch64ISD::UUNPKLO,
DL,
Op.getValueType(),
6595 case Intrinsic::aarch64_sve_clasta_n:
6596 return DAG.
getNode(AArch64ISD::CLASTA_N,
DL,
Op.getValueType(),
6597 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6598 case Intrinsic::aarch64_sve_clastb_n:
6599 return DAG.
getNode(AArch64ISD::CLASTB_N,
DL,
Op.getValueType(),
6600 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6601 case Intrinsic::aarch64_sve_lasta:
6602 return DAG.
getNode(AArch64ISD::LASTA,
DL,
Op.getValueType(),
6603 Op.getOperand(1),
Op.getOperand(2));
6604 case Intrinsic::aarch64_sve_lastb:
6605 return DAG.
getNode(AArch64ISD::LASTB,
DL,
Op.getValueType(),
6606 Op.getOperand(1),
Op.getOperand(2));
6607 case Intrinsic::aarch64_sve_tbl:
6608 return DAG.
getNode(AArch64ISD::TBL,
DL,
Op.getValueType(),
Op.getOperand(1),
6610 case Intrinsic::aarch64_sve_trn1:
6611 return DAG.
getNode(AArch64ISD::TRN1,
DL,
Op.getValueType(),
6612 Op.getOperand(1),
Op.getOperand(2));
6613 case Intrinsic::aarch64_sve_trn2:
6614 return DAG.
getNode(AArch64ISD::TRN2,
DL,
Op.getValueType(),
6615 Op.getOperand(1),
Op.getOperand(2));
6616 case Intrinsic::aarch64_sve_uzp1:
6617 return DAG.
getNode(AArch64ISD::UZP1,
DL,
Op.getValueType(),
6618 Op.getOperand(1),
Op.getOperand(2));
6619 case Intrinsic::aarch64_sve_uzp2:
6620 return DAG.
getNode(AArch64ISD::UZP2,
DL,
Op.getValueType(),
6621 Op.getOperand(1),
Op.getOperand(2));
6622 case Intrinsic::aarch64_sve_zip1:
6623 return DAG.
getNode(AArch64ISD::ZIP1,
DL,
Op.getValueType(),
6624 Op.getOperand(1),
Op.getOperand(2));
6625 case Intrinsic::aarch64_sve_zip2:
6626 return DAG.
getNode(AArch64ISD::ZIP2,
DL,
Op.getValueType(),
6627 Op.getOperand(1),
Op.getOperand(2));
6628 case Intrinsic::aarch64_sve_splice:
6629 return DAG.
getNode(AArch64ISD::SPLICE,
DL,
Op.getValueType(),
6630 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6631 case Intrinsic::aarch64_sve_ptrue:
6632 return getPTrue(DAG,
DL,
Op.getValueType(),
Op.getConstantOperandVal(1));
6633 case Intrinsic::aarch64_sve_clz:
6634 return DAG.
getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6635 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6636 case Intrinsic::aarch64_sme_cntsd: {
6642 case Intrinsic::aarch64_sve_cnt: {
6645 if (
Data.getValueType().isFloatingPoint())
6647 return DAG.
getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6648 Op.getOperand(2),
Data,
Op.getOperand(1));
6650 case Intrinsic::aarch64_sve_dupq_lane:
6651 return LowerDUPQLane(
Op, DAG);
6652 case Intrinsic::aarch64_sve_convert_from_svbool:
6653 if (
Op.getValueType() == MVT::aarch64svcount)
6656 case Intrinsic::aarch64_sve_convert_to_svbool:
6657 if (
Op.getOperand(1).getValueType() == MVT::aarch64svcount)
6660 case Intrinsic::aarch64_sve_fneg:
6661 return DAG.
getNode(AArch64ISD::FNEG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6662 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6663 case Intrinsic::aarch64_sve_frintp:
6664 return DAG.
getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6665 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6666 case Intrinsic::aarch64_sve_frintm:
6667 return DAG.
getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6668 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6669 case Intrinsic::aarch64_sve_frinti:
6670 return DAG.
getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU,
DL,
6671 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6673 case Intrinsic::aarch64_sve_frintx:
6674 return DAG.
getNode(AArch64ISD::FRINT_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6675 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6676 case Intrinsic::aarch64_sve_frint32x:
6677 return DAG.
getNode(AArch64ISD::FRINT32_MERGE_PASSTHRU,
DL,
6678 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6680 case Intrinsic::aarch64_sve_frint64x:
6681 return DAG.
getNode(AArch64ISD::FRINT64_MERGE_PASSTHRU,
DL,
6682 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6684 case Intrinsic::aarch64_sve_frinta:
6685 return DAG.
getNode(AArch64ISD::FROUND_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6686 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6687 case Intrinsic::aarch64_sve_frintn:
6688 return DAG.
getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU,
DL,
6689 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6691 case Intrinsic::aarch64_sve_frintz:
6692 return DAG.
getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6693 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6694 case Intrinsic::aarch64_sve_frint32z:
6695 return DAG.
getNode(AArch64ISD::FTRUNC32_MERGE_PASSTHRU,
DL,
6696 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6698 case Intrinsic::aarch64_sve_frint64z:
6699 return DAG.
getNode(AArch64ISD::FTRUNC64_MERGE_PASSTHRU,
DL,
6700 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6702 case Intrinsic::aarch64_sve_ucvtf:
6703 return DAG.
getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU,
DL,
6704 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6706 case Intrinsic::aarch64_sve_scvtf:
6707 return DAG.
getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU,
DL,
6708 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6710 case Intrinsic::aarch64_sve_fcvtzu:
6711 return DAG.
getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6712 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6713 case Intrinsic::aarch64_sve_fcvtzs:
6714 return DAG.
getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6715 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6716 case Intrinsic::aarch64_sve_fsqrt:
6717 return DAG.
getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6718 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6719 case Intrinsic::aarch64_sve_frecpx:
6720 return DAG.
getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6721 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6722 case Intrinsic::aarch64_sve_frecpe_x:
6723 return DAG.
getNode(AArch64ISD::FRECPE,
DL,
Op.getValueType(),
6725 case Intrinsic::aarch64_sve_frecps_x:
6726 return DAG.
getNode(AArch64ISD::FRECPS,
DL,
Op.getValueType(),
6727 Op.getOperand(1),
Op.getOperand(2));
6728 case Intrinsic::aarch64_sve_frsqrte_x:
6729 return DAG.
getNode(AArch64ISD::FRSQRTE,
DL,
Op.getValueType(),
6731 case Intrinsic::aarch64_sve_frsqrts_x:
6732 return DAG.
getNode(AArch64ISD::FRSQRTS,
DL,
Op.getValueType(),
6733 Op.getOperand(1),
Op.getOperand(2));
6734 case Intrinsic::aarch64_sve_fabs:
6735 return DAG.
getNode(AArch64ISD::FABS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6736 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6737 case Intrinsic::aarch64_sve_abs:
6738 return DAG.
getNode(AArch64ISD::ABS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6739 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6740 case Intrinsic::aarch64_sve_neg:
6741 return DAG.
getNode(AArch64ISD::NEG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6742 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6743 case Intrinsic::aarch64_sve_insr: {
6745 EVT ScalarTy =
Scalar.getValueType();
6746 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
6749 return DAG.
getNode(AArch64ISD::INSR,
DL,
Op.getValueType(),
6750 Op.getOperand(1), Scalar);
6752 case Intrinsic::aarch64_sve_rbit:
6753 return DAG.
getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
DL,
6754 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6756 case Intrinsic::aarch64_sve_revb:
6757 return DAG.
getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6758 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6759 case Intrinsic::aarch64_sve_revh:
6760 return DAG.
getNode(AArch64ISD::REVH_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6761 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6762 case Intrinsic::aarch64_sve_revw:
6763 return DAG.
getNode(AArch64ISD::REVW_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6764 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6765 case Intrinsic::aarch64_sve_revd:
6766 return DAG.
getNode(AArch64ISD::REVD_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6767 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6768 case Intrinsic::aarch64_sve_sxtb:
6770 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6771 Op.getOperand(2),
Op.getOperand(3),
6775 case Intrinsic::aarch64_sve_sxth:
6777 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6778 Op.getOperand(2),
Op.getOperand(3),
6782 case Intrinsic::aarch64_sve_sxtw:
6784 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6785 Op.getOperand(2),
Op.getOperand(3),
6789 case Intrinsic::aarch64_sve_uxtb:
6791 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6792 Op.getOperand(2),
Op.getOperand(3),
6796 case Intrinsic::aarch64_sve_uxth:
6798 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6799 Op.getOperand(2),
Op.getOperand(3),
6803 case Intrinsic::aarch64_sve_uxtw:
6805 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6806 Op.getOperand(2),
Op.getOperand(3),
6810 case Intrinsic::localaddress: {
6812 const auto *RegInfo = Subtarget->getRegisterInfo();
6813 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
6815 Op.getSimpleValueType());
6818 case Intrinsic::eh_recoverfp: {
6823 SDValue IncomingFPOp =
Op.getOperand(2);
6828 "llvm.eh.recoverfp must take a function as the first argument");
6829 return IncomingFPOp;
6831 case Intrinsic::aarch64_neon_vsri:
6832 case Intrinsic::aarch64_neon_vsli:
6833 case Intrinsic::aarch64_sve_sri:
6834 case Intrinsic::aarch64_sve_sli: {
6835 EVT Ty =
Op.getValueType();
6842 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri ||
6843 IntNo == Intrinsic::aarch64_sve_sri;
6844 unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
6845 return DAG.
getNode(Opcode,
DL, Ty,
Op.getOperand(1),
Op.getOperand(2),
6849 case Intrinsic::aarch64_neon_srhadd:
6850 case Intrinsic::aarch64_neon_urhadd:
6851 case Intrinsic::aarch64_neon_shadd:
6852 case Intrinsic::aarch64_neon_uhadd: {
6853 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6854 IntNo == Intrinsic::aarch64_neon_shadd);
6855 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6856 IntNo == Intrinsic::aarch64_neon_urhadd);
6857 unsigned Opcode = IsSignedAdd
6860 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1),
6863 case Intrinsic::aarch64_neon_saddlp:
6864 case Intrinsic::aarch64_neon_uaddlp: {
6865 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
6866 ? AArch64ISD::UADDLP
6867 : AArch64ISD::SADDLP;
6868 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1));
6870 case Intrinsic::aarch64_neon_sdot:
6871 case Intrinsic::aarch64_neon_udot:
6872 case Intrinsic::aarch64_sve_sdot:
6873 case Intrinsic::aarch64_sve_udot: {
6874 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
6875 IntNo == Intrinsic::aarch64_sve_udot)
6878 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1),
6879 Op.getOperand(2),
Op.getOperand(3));
6881 case Intrinsic::aarch64_neon_usdot:
6882 case Intrinsic::aarch64_sve_usdot: {
6883 return DAG.
getNode(AArch64ISD::USDOT,
DL,
Op.getValueType(),
6884 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6886 case Intrinsic::aarch64_neon_saddlv:
6887 case Intrinsic::aarch64_neon_uaddlv: {
6888 EVT OpVT =
Op.getOperand(1).getValueType();
6889 EVT ResVT =
Op.getValueType();
6891 ((ResVT == MVT::i32 && (OpVT == MVT::v8i8 || OpVT == MVT::v16i8 ||
6892 OpVT == MVT::v8i16 || OpVT == MVT::v4i16)) ||
6893 (ResVT == MVT::i64 && (OpVT == MVT::v4i32 || OpVT == MVT::v2i32))) &&
6894 "Unexpected aarch64_neon_u/saddlv type");
6898 IntNo == Intrinsic::aarch64_neon_uaddlv ? AArch64ISD::UADDLV
6899 : AArch64ISD::SADDLV,
6900 DL, ResVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64,
Op.getOperand(1));
6904 return EXTRACT_VEC_ELT;
6906 case Intrinsic::experimental_cttz_elts: {
6920 DAG.
getNode(AArch64ISD::CTTZ_ELTS,
DL, MVT::i64, CttzOp);
6923 case Intrinsic::experimental_vector_match: {
6926 case Intrinsic::aarch64_cls:
6927 case Intrinsic::aarch64_cls64: {
6932 case Intrinsic::aarch64_neon_cls: {
6936 case Intrinsic::aarch64_sve_pmul:
6937 case Intrinsic::aarch64_neon_pmul:
6943bool AArch64TargetLowering::shouldExtendGSIndex(
EVT VT,
EVT &EltTy)
const {
6952bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(
SDValue Extend,
6973 if (LD->isVolatile())
6976 EVT MemVT = LD->getMemoryVT();
6977 if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8 && MemVT != MVT::v2i16)
6980 Align Alignment = LD->getAlign();
6982 if (Subtarget.requiresStrictAlign() && Alignment < RequiredAlignment)
6988bool AArch64TargetLowering::isVectorLoadExtDesirable(
SDValue ExtVal)
const {
6996 if (!ExtVT.
isScalableVector() && !Subtarget->useSVEForFixedLengthVectors())
7011 unsigned NumExtMaskedLoads = 0;
7012 for (
auto *U : Ld->getMask()->users())
7014 NumExtMaskedLoads++;
7016 if (NumExtMaskedLoads <= 1)
7022 return PreExtScalarVT == MVT::i8 || PreExtScalarVT == MVT::i16 ||
7023 PreExtScalarVT == MVT::i32 || PreExtScalarVT == MVT::i64;
7027 std::map<std::tuple<bool, bool, bool>,
unsigned> AddrModes = {
7028 {std::make_tuple(
false,
false,
false),
7029 AArch64ISD::GLD1_MERGE_ZERO},
7030 {std::make_tuple(
false,
false,
true),
7031 AArch64ISD::GLD1_UXTW_MERGE_ZERO},
7032 {std::make_tuple(
false,
true,
false),
7033 AArch64ISD::GLD1_MERGE_ZERO},
7034 {std::make_tuple(
false,
true,
true),
7035 AArch64ISD::GLD1_SXTW_MERGE_ZERO},
7036 {std::make_tuple(
true,
false,
false),
7037 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
7038 {std::make_tuple(
true,
false,
true),
7039 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
7040 {std::make_tuple(
true,
true,
false),
7041 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
7042 {std::make_tuple(
true,
true,
true),
7043 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
7045 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
7046 return AddrModes.find(
Key)->second;
7054 case AArch64ISD::GLD1_MERGE_ZERO:
7055 return AArch64ISD::GLD1S_MERGE_ZERO;
7056 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
7057 return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
7058 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
7059 return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
7060 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
7061 return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
7062 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
7063 return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
7064 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
7065 return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
7066 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
7067 return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
7082 EVT VT =
Op.getValueType();
7106 EVT IndexVT =
Index.getValueType();
7118 assert(Subtarget->useSVEForFixedLengthVectors() &&
7119 "Cannot lower when not using SVE for fixed vectors!");
7128 Index.getValueType().getVectorElementType() == MVT::i64 ||
7129 Mask.getValueType().getVectorElementType() == MVT::i64)
7195 EVT IndexVT =
Index.getValueType();
7207 assert(Subtarget->useSVEForFixedLengthVectors() &&
7208 "Cannot lower when not using SVE for fixed vectors!");
7220 Index.getValueType().getVectorElementType() == MVT::i64 ||
7221 Mask.getValueType().getVectorElementType() == MVT::i64)
7231 if (PromotedVT != VT)
7256 assert(LoadNode &&
"Expected custom lowering of a masked load node");
7257 EVT VT =
Op->getValueType(0);
7260 return LowerFixedLengthVectorMLoadToSVE(
Op, DAG);
7284 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
7307 return DAG.
getStore(ST->getChain(),
DL, ExtractTrunc,
7308 ST->getBasePtr(), ST->getMemOperand());
7314 MVT DestVT =
Op.getSimpleValueType();
7318 unsigned SrcAS =
N->getSrcAddressSpace();
7319 unsigned DestAS =
N->getDestAddressSpace();
7320 assert(SrcAS != DestAS &&
7321 "addrspacecast must be between different address spaces");
7324 "addrspacecast must be between different ptr sizes");
7350 assert(StoreNode &&
"Expected a store operation");
7383 {StoreNode->getChain(), DAG.getBitcast(MVT::v2i64, Lo),
7384 DAG.getBitcast(MVT::v2i64, Hi), StoreNode->getBasePtr()},
7399 assert (StoreNode &&
"Can only custom lower store nodes");
7403 EVT VT =
Value.getValueType();
7407 if (
auto MaybeSTNP =
LowerNTStore(StoreNode, VT, MemVT, Dl, DAG))
7414 Subtarget->useSVEForFixedLengthVectors()))
7415 return LowerFixedLengthVectorStoreToSVE(
Op, DAG);
7427 MemVT == MVT::v4i8) {
7430 }
else if (MemVT == MVT::i128 && StoreNode->
isVolatile()) {
7431 return LowerStore128(
Op, DAG);
7432 }
else if (MemVT == MVT::i64x8) {
7437 EVT PtrVT =
Base.getValueType();
7438 for (
unsigned i = 0; i < 8; i++) {
7459 bool IsStoreRelease =
7462 assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
7463 Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
7473 unsigned Opcode = IsStoreRelease ? AArch64ISD::STILP : AArch64ISD::STP;
7475 std::swap(StoreValue.first, StoreValue.second);
7478 {StoreNode->getChain(), StoreValue.first, StoreValue.second,
7479 StoreNode->getBasePtr()},
7491 EVT MemVT = Load->getMemoryVT();
7492 EVT ResVT = Load->getValueType(0);
7498 switch (Load->getExtensionType()) {
7511 SDValue Chain = Load->getChain();
7512 SDValue BasePtr = Load->getBasePtr();
7514 Align Alignment = Load->getAlign();
7520 DAG.
getLoad(ScalarLoadType,
DL, Chain, BasePtr, PtrInfo, Alignment);
7532 while (CurrentEltBits < DstEltBits) {
7534 CurrentNumElts = CurrentNumElts / 2;
7540 CurrentEltBits = CurrentEltBits * 2;
7543 Res = DAG.
getNode(ExtOpcode,
DL, ExtVT, Res);
7546 if (CurrentNumElts != NumElts) {
7559 assert(LoadNode &&
"Expected custom lowering of a load node");
7568 EVT PtrVT =
Base.getValueType();
7569 for (
unsigned i = 0; i < 8; i++) {
7575 Ops.push_back(Part);
7585SDValue AArch64TargetLowering::LowerFixedLengthVectorCompressToSVE(
7588 EVT VT =
Op.getValueType();
7603 EVT VT =
Op.getValueType();
7604 if (!Subtarget->isSVEAvailable())
7608 return LowerFixedLengthVectorCompressToSVE(
Op, DAG);
7614 EVT MaskVT =
Mask.getValueType();
7641 MVT VT =
Op.getSimpleValueType();
7644 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
7652 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT,
Op.getOperand(0), Neg,
7665 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
7680 MVT VT =
Op.getSimpleValueType();
7684 if (NewShiftNo == 0)
7685 return Op.getOperand(0);
7694 if (NewShiftNo == 0)
7695 return Op.getOperand(1);
7697 if (ShiftNo->getZExtValue() == NewShiftNo)
7712 EVT XScalarTy =
X.getValueType();
7717 switch (
Op.getSimpleValueType().SimpleTy) {
7726 ExpVT = MVT::nxv4i32;
7730 ExpVT = MVT::nxv2i64;
7748 if (
X.getValueType() != XScalarTy)
7756 return Op.getOperand(0);
7791 const char FptrReg = 0x11;
7797 Chain,
DL, DAG.
getConstant(0x58000080u | NestReg,
DL, MVT::i32), Addr,
7798 MachinePointerInfo(TrmpAddr));
7803 Chain,
DL, DAG.
getConstant(0x580000b0u | FptrReg,
DL, MVT::i32), Addr,
7804 MachinePointerInfo(TrmpAddr, 4));
7810 MachinePointerInfo(TrmpAddr, 8));
7815 DAG.
getStore(Chain,
DL, Nest, Addr, MachinePointerInfo(TrmpAddr, 16));
7820 DAG.
getStore(Chain,
DL, FPtr, Addr, MachinePointerInfo(TrmpAddr, 24));
7834 EVT VT =
Op.getValueType();
7836 (Subtarget->hasSVEB16B16() &&
7837 Subtarget->isNonStreamingSVEorSME2Available()))
7838 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMUL_PRED);
7840 assert(Subtarget->hasBF16() &&
"Expected +bf16 for custom FMUL lowering");
7841 assert((VT == MVT::nxv4bf16 || VT == MVT::nxv8bf16 || VT == MVT::v8bf16) &&
7842 "Unexpected FMUL VT");
7845 return [&, IID](EVT VT,
auto...
Ops) {
7852 EVT SrcVT =
Value.getValueType();
7863 auto FCVT = MakeGetIntrinsic(Intrinsic::aarch64_sve_fcvt_bf16f32_v2);
7864 auto FCVTNT = MakeGetIntrinsic(Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2);
7869 MakeGetIntrinsic(UseSVEBFMLAL ? Intrinsic::aarch64_sve_bfmlalb
7870 : Intrinsic::aarch64_neon_bfmlalb);
7872 MakeGetIntrinsic(UseSVEBFMLAL ? Intrinsic::aarch64_sve_bfmlalt
7873 : Intrinsic::aarch64_neon_bfmlalt);
7875 EVT AccVT = UseSVEBFMLAL ? MVT::nxv4f32 : MVT::v4f32;
7888 LHS = Reinterpret(
LHS, MVT::nxv8bf16);
7889 RHS = Reinterpret(
RHS, MVT::nxv8bf16);
7892 SDValue BottomF32 = Reinterpret(BFMLALB(AccVT, Zero,
LHS,
RHS), MVT::nxv4f32);
7894 FCVT(MVT::nxv8bf16, DAG.
getPOISON(MVT::nxv8bf16), Pg, BottomF32);
7896 if (VT == MVT::nxv4bf16)
7897 return Reinterpret(BottomBF16, VT);
7899 SDValue TopF32 = Reinterpret(BFMLALT(AccVT, Zero,
LHS,
RHS), MVT::nxv4f32);
7900 SDValue TopBF16 = FCVTNT(MVT::nxv8bf16, BottomBF16, Pg, TopF32);
7901 return Reinterpret(TopBF16, VT);
7908 EVT VT =
Op.getValueType();
7911 assert(VT.
isVector() &&
"Scalar fma lowering should be handled by patterns");
7914 if (VT != MVT::v8f16 && VT != MVT::v4f32 && VT != MVT::v2f64)
7915 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMA_PRED);
7919 ? LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMA_PRED)
7929 auto ConvertToScalableFnegMt = [&](
SDValue Op) {
7931 Op = LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
7935 OpA = ConvertToScalableFnegMt(OpA);
7936 OpB = ConvertToScalableFnegMt(OpB);
7937 OpC = ConvertToScalableFnegMt(OpC);
7940 DAG.
getNode(AArch64ISD::FMA_PRED,
DL, ContainerVT, Pg, OpA, OpB, OpC);
7949 switch (
Op.getOpcode()) {
7955 return LowerLOOP_DEPENDENCE_MASK(
Op, DAG);
7957 return LowerBITCAST(
Op, DAG);
7959 return LowerGlobalAddress(
Op, DAG);
7961 return LowerGlobalTLSAddress(
Op, DAG);
7963 return LowerPtrAuthGlobalAddress(
Op, DAG);
7965 return LowerADJUST_TRAMPOLINE(
Op, DAG);
7967 return LowerINIT_TRAMPOLINE(
Op, DAG);
7971 return LowerSETCC(
Op, DAG);
7973 return LowerSETCCCARRY(
Op, DAG);
7977 return LowerBR_CC(
Op, DAG);
7979 return LowerSELECT(
Op, DAG);
7981 return LowerSELECT_CC(
Op, DAG);
7983 return LowerJumpTable(
Op, DAG);
7985 return LowerBR_JT(
Op, DAG);
7987 return LowerBRIND(
Op, DAG);
7989 return LowerConstantPool(
Op, DAG);
7991 return LowerBlockAddress(
Op, DAG);
7993 return LowerVASTART(
Op, DAG);
7995 return LowerVACOPY(
Op, DAG);
7997 return LowerVAARG(
Op, DAG);
8014 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FADD_PRED);
8016 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FSUB_PRED);
8018 return LowerFMUL(
Op, DAG);
8020 return LowerFMA(
Op, DAG);
8022 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FDIV_PRED);
8024 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
8026 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
8028 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
8030 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
8032 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
8034 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
8036 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
8038 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
8040 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
8042 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
8045 return LowerFP_ROUND(
Op, DAG);
8048 return LowerFP_EXTEND(
Op, DAG);
8050 return LowerFRAMEADDR(
Op, DAG);
8052 return LowerSPONENTRY(
Op, DAG);
8054 return LowerRETURNADDR(
Op, DAG);
8056 return LowerADDROFRETURNADDR(
Op, DAG);
8058 return LowerCONCAT_VECTORS(
Op, DAG);
8060 return LowerINSERT_VECTOR_ELT(
Op, DAG);
8062 return LowerEXTRACT_VECTOR_ELT(
Op, DAG);
8064 return LowerBUILD_VECTOR(
Op, DAG);
8067 return LowerEXTEND_VECTOR_INREG(
Op, DAG);
8069 return LowerZERO_EXTEND_VECTOR_INREG(
Op, DAG);
8071 return LowerVECTOR_SHUFFLE(
Op, DAG);
8073 return LowerSPLAT_VECTOR(
Op, DAG);
8075 return LowerEXTRACT_SUBVECTOR(
Op, DAG);
8077 return LowerINSERT_SUBVECTOR(
Op, DAG);
8080 return LowerDIV(
Op, DAG);
8085 return LowerMinMax(
Op, DAG);
8089 return LowerVectorSRA_SRL_SHL(
Op, DAG);
8093 return LowerShiftParts(
Op, DAG);
8096 return LowerCTPOP_PARITY(
Op, DAG);
8098 return LowerFCOPYSIGN(
Op, DAG);
8100 return LowerVectorOR(
Op, DAG);
8102 return LowerXOR(
Op, DAG);
8109 return LowerINT_TO_FP(
Op, DAG);
8114 return LowerFP_TO_INT(
Op, DAG);
8117 return LowerFP_TO_INT_SAT(
Op, DAG);
8119 return LowerGET_ROUNDING(
Op, DAG);
8121 return LowerSET_ROUNDING(
Op, DAG);
8123 return LowerGET_FPMODE(
Op, DAG);
8125 return LowerSET_FPMODE(
Op, DAG);
8127 return LowerRESET_FPMODE(
Op, DAG);
8129 return LowerMUL(
Op, DAG);
8131 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MULHS_PRED);
8133 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MULHU_PRED);
8135 return LowerINTRINSIC_W_CHAIN(
Op, DAG);
8137 return LowerINTRINSIC_WO_CHAIN(
Op, DAG);
8139 return LowerINTRINSIC_VOID(
Op, DAG);
8142 assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
8143 return LowerStore128(
Op, DAG);
8147 return LowerSTORE(
Op, DAG);
8149 return LowerMSTORE(
Op, DAG);
8151 return LowerMGATHER(
Op, DAG);
8153 return LowerMSCATTER(
Op, DAG);
8155 return LowerVECREDUCE_SEQ_FADD(
Op, DAG);
8169 return LowerVECREDUCE(
Op, DAG);
8172 return LowerVECREDUCE_MUL(
Op, DAG);
8174 return LowerATOMIC_LOAD_AND(
Op, DAG);
8176 return LowerDYNAMIC_STACKALLOC(
Op, DAG);
8178 return LowerVSCALE(
Op, DAG);
8180 return LowerVECTOR_COMPRESS(
Op, DAG);
8184 return LowerFixedLengthVectorIntExtendToSVE(
Op, DAG);
8191 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
8192 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
8195 return LowerToPredicatedOp(
Op, DAG,
8196 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
8199 return LowerTRUNCATE(
Op, DAG);
8201 return LowerMLOAD(
Op, DAG);
8204 !Subtarget->isNeonAvailable()))
8205 return LowerFixedLengthVectorLoadToSVE(
Op, DAG);
8206 return LowerLOAD(
Op, DAG);
8210 return LowerToScalableOp(
Op, DAG);
8212 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMAX_PRED);
8214 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMAXNM_PRED);
8216 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMIN_PRED);
8218 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMINNM_PRED);
8220 return LowerFixedLengthVectorSelectToSVE(
Op, DAG);
8222 return LowerABS(
Op, DAG);
8224 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABDS_PRED);
8226 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABDU_PRED);
8228 return LowerAVG(
Op, DAG, AArch64ISD::HADDS_PRED);
8230 return LowerAVG(
Op, DAG, AArch64ISD::HADDU_PRED);
8232 return LowerAVG(
Op, DAG, AArch64ISD::RHADDS_PRED);
8234 return LowerAVG(
Op, DAG, AArch64ISD::RHADDU_PRED);
8236 return LowerBitreverse(
Op, DAG);
8238 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
8240 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU);
8242 return LowerCTTZ(
Op, DAG);
8245 return LowerVECTOR_SPLICE(
Op, DAG);
8247 return LowerVECTOR_DEINTERLEAVE(
Op, DAG);
8249 return LowerVECTOR_INTERLEAVE(
Op, DAG);
8251 return LowerGET_ACTIVE_LANE_MASK(
Op, DAG);
8254 if (
Op.getValueType().isVector())
8255 return LowerVectorXRINT(
Op, DAG);
8259 assert((
Op.getOperand(0).getValueType() == MVT::f16 ||
8260 Op.getOperand(0).getValueType() == MVT::bf16) &&
8261 "Expected custom lowering of rounding operations only for f16");
8264 return DAG.
getNode(
Op.getOpcode(),
DL,
Op.getValueType(), Ext);
8270 assert((
Op.getOperand(1).getValueType() == MVT::f16 ||
8271 Op.getOperand(1).getValueType() == MVT::bf16) &&
8272 "Expected custom lowering of rounding operations only for f16");
8275 {
Op.getOperand(0),
Op.getOperand(1)});
8276 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
8277 {Ext.getValue(1), Ext.getValue(0)});
8280 assert(
Op.getOperand(2).getValueType() == MVT::i128 &&
8281 "WRITE_REGISTER custom lowering is only for 128-bit sysregs");
8286 std::pair<SDValue, SDValue> Pair =
8291 SysRegName, Pair.first, Pair.second);
8301 return LowerVECTOR_HISTOGRAM(
Op, DAG);
8306 return LowerPARTIAL_REDUCE_MLA(
Op, DAG);
8311 return !Subtarget->useSVEForFixedLengthVectors();
8315 EVT VT,
bool OverrideNEON)
const {
8338 return Subtarget->isSVEorStreamingSVEAvailable();
8345 if (!Subtarget->useSVEForFixedLengthVectors())
8365 unsigned Opcode =
N->getOpcode();
8370 unsigned IID =
N->getConstantOperandVal(0);
8371 if (IID < Intrinsic::num_intrinsics)
8385 if (IID == Intrinsic::aarch64_neon_umull ||
8387 IID == Intrinsic::aarch64_neon_smull ||
8396 bool IsVarArg)
const {
8419 if (Subtarget->isTargetWindows()) {
8421 if (Subtarget->isWindowsArm64EC())
8427 if (!Subtarget->isTargetDarwin())
8435 if (Subtarget->isWindowsArm64EC())
8441 if (Subtarget->isWindowsArm64EC())
8465 if (Subtarget->isWindowsArm64EC())
8501 IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE;
8519 RTLIB::Libcall LC = RTLIB::SMEABI_TPIDR2_RESTORE;
8537 Chain = DAG.
getCopyToReg(Chain,
DL, AArch64::X0, TPIDR2Block, Glue);
8539 DAG.
getNode(AArch64ISD::RESTORE_ZA,
DL, MVT::Other,
8540 {Chain, TPIDR2_EL0, DAG.
getRegister(AArch64::X0, MVT::i64),
8541 RestoreRoutine, RegMask, Chain.
getValue(1)});
8557 auto &FuncInfo = *MF.
getInfo<AArch64FunctionInfo>();
8558 auto &Subtarget = DAG.
getSubtarget<AArch64Subtarget>();
8559 const AArch64RegisterInfo &
TRI = *Subtarget.getRegisterInfo();
8561 SMEAttrs SMEFnAttrs = FuncInfo.getSMEFnAttrs();
8587 if (
getTM().useNewSMEABILowering())
8597 AArch64ISD::SMSTART,
DL, DAG.
getVTList(MVT::Other, MVT::Glue), Chain,
8606 {Chain, DAG.getConstant(0, DL, MVT::i32), ZT0FrameIndex});
8617SDValue AArch64TargetLowering::LowerFormalArguments(
8625 Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
8627 (isVarArg && Subtarget->isWindowsArm64EC());
8628 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
8638 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.
getContext());
8646 unsigned NumArgs = Ins.
size();
8648 unsigned CurArgIdx = 0;
8649 bool UseVarArgCC =
false;
8651 UseVarArgCC = isVarArg;
8655 for (
unsigned i = 0; i != NumArgs; ++i) {
8656 MVT ValVT = Ins[i].VT;
8657 if (Ins[i].isOrigArg()) {
8658 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
8659 CurArgIdx = Ins[i].getOrigArgIndex();
8666 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
8668 else if (ActualMVT == MVT::i16)
8672 Ins[i].OrigTy, CCInfo);
8673 assert(!Res &&
"Call operand has unhandled type");
8678 bool IsLocallyStreaming =
8679 !
Attrs.hasStreamingInterface() &&
Attrs.hasStreamingBody();
8683 unsigned ExtraArgLocs = 0;
8684 for (
unsigned i = 0, e = Ins.
size(); i != e; ++i) {
8685 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
8687 if (Ins[i].
Flags.isByVal()) {
8691 int Size = Ins[i].Flags.getByValSize();
8692 unsigned NumRegs = (
Size + 7) / 8;
8704 if (Ins[i].
Flags.isSwiftAsync())
8705 MF.
getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(
true);
8711 const TargetRegisterClass *RC;
8713 if (RegVT == MVT::i32)
8714 RC = &AArch64::GPR32RegClass;
8715 else if (RegVT == MVT::i64)
8716 RC = &AArch64::GPR64RegClass;
8717 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
8718 RC = &AArch64::FPR16RegClass;
8719 else if (RegVT == MVT::f32)
8720 RC = &AArch64::FPR32RegClass;
8722 RC = &AArch64::FPR64RegClass;
8724 RC = &AArch64::FPR128RegClass;
8728 RC = &AArch64::PPRRegClass;
8729 }
else if (RegVT == MVT::aarch64svcount) {
8731 RC = &AArch64::PPRRegClass;
8734 RC = &AArch64::ZPRRegClass;
8741 if (IsLocallyStreaming) {
8756 DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
8775 "Indirect arguments should be scalable on most subtargets");
8797 uint32_t BEAlign = 0;
8798 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
8799 !Ins[i].Flags.isInConsecutiveRegs())
8800 BEAlign = 8 - ArgSize;
8803 MachinePointerInfo PtrInfo;
8809 unsigned ObjOffset = ArgOffset + BEAlign;
8839 "Indirect arguments should be scalable on most subtargets");
8859 Subtarget->isWindowsArm64EC()) &&
8860 "Indirect arguments should be scalable on most subtargets");
8863 unsigned NumParts = 1;
8864 if (Ins[i].
Flags.isInConsecutiveRegs()) {
8865 while (!Ins[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
8874 while (NumParts > 0) {
8875 ArgValue = DAG.
getLoad(PartLoad,
DL, Chain, Ptr, MachinePointerInfo());
8888 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
8894 if (Ins[i].isOrigArg()) {
8895 Argument *OrigArg =
F.getArg(Ins[i].getOrigArgIndex());
8897 if (!Ins[i].
Flags.isZExt()) {
8898 ArgValue = DAG.
getNode(AArch64ISD::ASSERT_ZEXT_BOOL,
DL,
8909 if (
Attrs.hasStreamingCompatibleInterface()) {
8911 DAG.
getNode(AArch64ISD::ENTRY_PSTATE_SM,
DL,
8912 DAG.
getVTList(MVT::i64, MVT::Other), {Chain});
8924 if (IsLocallyStreaming) {
8925 if (
Attrs.hasStreamingCompatibleInterface())
8934 for (
unsigned I=0;
I<InVals.
size(); ++
I) {
8946 if (!Subtarget->isTargetDarwin() || IsWin64) {
8952 saveVarArgRegisters(CCInfo, DAG,
DL, Chain);
8956 unsigned VarArgsOffset = CCInfo.getStackSize();
8959 alignTo(VarArgsOffset, Subtarget->isTargetILP32() ? 4 : 8);
8970 SmallVectorImpl<ForwardedRegister> &Forwards =
8972 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
8976 if (!CCInfo.isAllocated(AArch64::X8)) {
8978 Forwards.
push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
8987 for (
unsigned I = 0,
E = Ins.
size();
I !=
E; ++
I) {
8989 Ins[
I].Flags.isInReg()) &&
8990 Ins[
I].Flags.isSRet()) {
9005 unsigned StackArgSize = CCInfo.getStackSize();
9007 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
9011 StackArgSize =
alignTo(StackArgSize, 16);
9025 if (Subtarget->hasCustomCallingConv())
9026 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
9028 if (
getTM().useNewSMEABILowering()) {
9031 if (
Attrs.hasZAState()) {
9035 }
else if (
Attrs.hasAgnosticZAInterface()) {
9036 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
9041 auto *RetTy = EVT(MVT::i64).getTypeForEVT(*DAG.
getContext());
9042 TargetLowering::CallLoweringInfo CLI(DAG);
9043 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
9051 {Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
9057 Chain = DAG.
getNode(AArch64ISD::SME_STATE_ALLOC,
DL,
9068 if (
Attrs.hasZAState()) {
9075 Buffer = DAG.
getNode(AArch64ISD::ALLOCATE_ZA_BUFFER,
DL,
9076 DAG.
getVTList(MVT::i64, MVT::Other), {Chain, SVL});
9081 {Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
9087 AArch64ISD::INIT_TPIDR2OBJ,
DL, DAG.
getVTList(MVT::Other),
9088 { Buffer.getValue(1), Buffer.getValue(0),
9090 }
else if (
Attrs.hasAgnosticZAInterface()) {
9093 DAG.
getNode(AArch64ISD::GET_SME_SAVE_SIZE,
DL,
9094 DAG.
getVTList(MVT::i64, MVT::Other), Chain);
9098 Buffer = DAG.
getNode(AArch64ISD::ALLOC_SME_SAVE_BUFFER,
DL,
9100 {Chain, BufferSize});
9105 {Chain, BufferSize, DAG.getConstant(1, DL, MVT::i64)});
9117 for (
const ISD::InputArg &
I : Ins) {
9118 if (
I.Flags.isSwiftSelf() ||
I.Flags.isSwiftError() ||
9119 I.Flags.isSwiftAsync()) {
9123 "Swift attributes can't be used with preserve_none",
9133void AArch64TargetLowering::saveVarArgRegisters(
CCState &CCInfo,
9139 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9143 Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
9149 if (Subtarget->isWindowsArm64EC()) {
9156 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
9158 if (GPRSaveSize != 0) {
9161 if (GPRSaveSize & 15)
9168 if (Subtarget->isWindowsArm64EC()) {
9181 for (
unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
9187 MF, GPRIdx, (i - FirstVariadicGPR) * 8)
9197 if (Subtarget->hasFPARMv8() && !IsWin64) {
9199 const unsigned NumFPRArgRegs =
FPRArgRegs.size();
9202 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
9204 if (FPRSaveSize != 0) {
9209 for (
unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
9224 if (!MemOps.
empty()) {
9231SDValue AArch64TargetLowering::LowerCallResult(
9235 SDValue ThisVal,
bool RequiresSMChange)
const {
9236 DenseMap<unsigned, SDValue> CopiedRegs;
9238 for (
unsigned i = 0; i != RVLocs.
size(); ++i) {
9239 CCValAssign VA = RVLocs[i];
9243 if (i == 0 && isThisReturn) {
9245 "unexpected return calling convention register assignment");
9281 Val = DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
9348 unsigned NumArgs = Outs.
size();
9349 for (
unsigned i = 0; i != NumArgs; ++i) {
9350 MVT ArgVT = Outs[i].VT;
9353 bool UseVarArgCC =
false;
9357 if (IsCalleeWin64) {
9371 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
9373 else if (ActualMVT == MVT::i16)
9381 Outs[i].OrigTy, CCInfo);
9382 assert(!Res &&
"Call operand has unhandled type");
9397bool AArch64TargetLowering::isEligibleForTailCallOptimization(
9398 const CallLoweringInfo &CLI)
const {
9404 bool IsVarArg = CLI.IsVarArg;
9408 const SelectionDAG &DAG = CLI.DAG;
9415 SMECallAttrs CallAttrs =
9429 MF.
getInfo<AArch64FunctionInfo>()->isSVECC())
9432 bool CCMatch = CallerCC == CalleeCC;
9447 if (i->hasByValAttr())
9456 if (i->hasInRegAttr()) {
9457 unsigned ArgIdx = i - CallerF.
arg_begin();
9458 if (!CLI.CB || CLI.CB->arg_size() <= ArgIdx)
9460 AttributeSet
Attrs = CLI.CB->getParamAttributes(ArgIdx);
9461 if (!
Attrs.hasAttribute(Attribute::InReg) ||
9462 !
Attrs.hasAttribute(Attribute::StructRet) || !i->hasStructRetAttr() ||
9463 CLI.CB->getArgOperand(ArgIdx) != i) {
9480 const GlobalValue *GV =
G->getGlobal();
9483 (!
TT.isOSWindows() ||
TT.isOSBinFormatELF() ||
TT.isOSBinFormatMachO()))
9503 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
9504 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
9506 const uint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
9507 if (Subtarget->hasCustomCallingConv()) {
9508 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
9509 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
9511 if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
9520 CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs,
C);
9524 if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
9532 for (
const CCValAssign &ArgLoc : ArgLocs)
9533 if (!ArgLoc.isRegLoc())
9537 const AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9545 A.getValVT().isScalableVector() ||
9546 Subtarget->isWindowsArm64EC()) &&
9547 "Expected value to be scalable");
9567 int ClobberedFI)
const {
9570 int64_t LastByte = FirstByte + MFI.
getObjectSize(ClobberedFI) - 1;
9581 if (FI->getIndex() < 0) {
9583 int64_t InLastByte = InFirstByte;
9586 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
9587 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
9595bool AArch64TargetLowering::DoesCalleeRestoreStack(
CallingConv::ID CallCC,
9596 bool TailCallOpt)
const {
9607 APInt RequiredZero(SizeInBits, 0xFE);
9609 bool ZExtBool = (Bits.Zero & RequiredZero) == RequiredZero;
9613void AArch64TargetLowering::AdjustInstrPostInstrSelection(
MachineInstr &
MI,
9619 if (
MI.getOpcode() == AArch64::MSRpstatesvcrImm1 ||
9620 MI.getOpcode() == AArch64::MSRpstatePseudo) {
9621 for (
unsigned I =
MI.getNumOperands() - 1;
I > 0; --
I)
9622 if (MachineOperand &MO =
MI.getOperand(
I);
9623 MO.isReg() && MO.isImplicit() && MO.isDef() &&
9624 (AArch64::GPR32RegClass.contains(MO.getReg()) ||
9625 AArch64::GPR64RegClass.contains(MO.getReg())))
9626 MI.removeOperand(
I);
9630 if (
MI.getOperand(0).getImm() == AArch64SVCR::SVCRSM ||
9631 MI.getOperand(0).getImm() == AArch64SVCR::SVCRSMZA) {
9646 const MachineFunction &MF = *
MI.getMF();
9647 if (MF.
getInfo<AArch64FunctionInfo>()->hasStreamingModeChanges() &&
9648 (
MI.getOpcode() == AArch64::ADDXri ||
9649 MI.getOpcode() == AArch64::SUBXri)) {
9650 const MachineOperand &MO =
MI.getOperand(1);
9659 unsigned Condition,
bool InsertVectorLengthCheck)
const {
9667 Ops.push_back(InGlue);
9668 return DAG.
getNode(AArch64ISD::CHECK_MATCHING_VL,
DL,
9672 if (InsertVectorLengthCheck &&
Enable) {
9675 SDValue CheckVL = GetCheckVL(Chain, InGlue);
9688 assert(PStateReg.
isValid() &&
"PStateSM Register is invalid");
9695 Opcode =
Enable ? AArch64ISD::COND_SMSTART : AArch64ISD::COND_SMSTOP;
9696 Ops.push_back(ConditionOp);
9697 Ops.push_back(PStateSM);
9699 Opcode =
Enable ? AArch64ISD::SMSTART : AArch64ISD::SMSTOP;
9701 Ops.push_back(RegMask);
9704 Ops.push_back(InGlue);
9709 if (!InsertVectorLengthCheck ||
Enable)
9736 if (Flags.isZExt() || Flags.isSExt())
9743 Arg->
isAssert() ||
Op == AArch64ISD::ASSERT_ZEXT_BOOL) {
9755 int FI = FINode->getIndex();
9773AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
9775 SelectionDAG &DAG = CLI.DAG;
9782 bool &IsTailCall = CLI.IsTailCall;
9784 bool IsVarArg = CLI.IsVarArg;
9785 const CallBase *CB = CLI.CB;
9788 MachineFunction::CallSiteInfo CSInfo;
9789 bool IsThisReturn =
false;
9791 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9793 bool IsCFICall = CLI.CB && CLI.CB->isIndirectCall() && CLI.CFIType;
9794 bool IsSibCall =
false;
9795 bool GuardWithBTI =
false;
9797 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
9798 !Subtarget->noBTIAtReturnTwice()) {
9804 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.
getContext());
9807 unsigned NumArgs = Outs.
size();
9809 for (
unsigned i = 0; i != NumArgs; ++i) {
9810 if (Outs[i].
Flags.isVarArg() && Outs[i].VT.isScalableVector())
9812 "currently not supported");
9823 RetCCInfo.AnalyzeCallResult(Ins, RetCC);
9827 CSInfo = MachineFunction::CallSiteInfo(*CB);
9832 auto HasSVERegLoc = [](CCValAssign &Loc) {
9833 if (!Loc.isRegLoc())
9835 return AArch64::ZPRRegClass.contains(Loc.getLocReg()) ||
9836 AArch64::PPRRegClass.contains(Loc.getLocReg());
9838 if (
any_of(RVLocs, HasSVERegLoc) ||
any_of(ArgLocs, HasSVERegLoc))
9843 SMECallAttrs CallAttrs =
9846 std::optional<unsigned> ZAMarkerNode;
9849 if (UseNewSMEABILowering) {
9852 ZAMarkerNode = AArch64ISD::REQUIRES_ZA_SAVE;
9854 ZAMarkerNode = AArch64ISD::REQUIRES_ZT0_SAVE;
9857 ZAMarkerNode = AArch64ISD::INOUT_ZA_USE;
9862 IsTailCall = isEligibleForTailCallOptimization(CLI);
9866 if (!ZAMarkerNode && !TailCallOpt && IsTailCall &&
9874 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
9876 "site marked musttail");
9894 if (IsTailCall && !IsSibCall) {
9899 NumBytes =
alignTo(NumBytes, 16);
9904 FPDiff = NumReusableBytes - NumBytes;
9908 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (
unsigned)-FPDiff)
9916 assert(FPDiff % 16 == 0 &&
"unaligned stack on tail call");
9919 auto DescribeCallsite =
9920 [&](OptimizationRemarkAnalysis &
R) -> OptimizationRemarkAnalysis & {
9923 R <<
ore::NV(
"Callee", ES->getSymbol());
9924 else if (CLI.CB && CLI.CB->getCalledFunction())
9925 R <<
ore::NV(
"Callee", CLI.CB->getCalledFunction()->getName());
9927 R <<
"unknown callee";
9932 bool RequiresLazySave = !UseNewSMEABILowering && CallAttrs.
requiresLazySave();
9933 bool RequiresSaveAllZA =
9935 if (RequiresLazySave) {
9946 auto R = CLI.CB ? OptimizationRemarkAnalysis(
"sme",
"SMELazySaveZA",
9948 : OptimizationRemarkAnalysis(
"sme",
"SMELazySaveZA",
9950 return DescribeCallsite(R) <<
" sets up a lazy save for ZA";
9952 }
else if (RequiresSaveAllZA) {
9954 "Cannot share state that may not exist");
9960 if (RequiresSMChange) {
9963 auto R = CLI.CB ? OptimizationRemarkAnalysis(
"sme",
"SMETransition",
9965 : OptimizationRemarkAnalysis(
"sme",
"SMETransition",
9967 DescribeCallsite(R) <<
" requires a streaming mode transition";
9974 bool ShouldPreserveZT0 =
9979 if (ShouldPreserveZT0) {
9983 {Chain, DAG.getConstant(0, DL, MVT::i32), ZTFrameIdx});
9990 assert((!DisableZA || !RequiresLazySave) &&
9991 "Lazy-save should have PSTATE.SM=1 on entry to the function");
9995 AArch64ISD::SMSTOP,
DL, DAG.
getVTList(MVT::Other, MVT::Glue), Chain,
10000 assert((!IsSibCall || !ZAMarkerNode) &&
"ZA markers require CALLSEQ_START");
10003 if (ZAMarkerNode) {
10010 {Chain, Chain.getValue(1)});
10018 SmallSet<unsigned, 8> RegsUsed;
10022 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
10024 for (
const auto &
F : Forwards) {
10031 unsigned ExtraArgLocs = 0;
10032 for (
unsigned i = 0, e = Outs.
size(); i != e; ++i) {
10033 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
10035 ISD::ArgFlagsTy
Flags = Outs[i].Flags;
10050 if (Outs[i].ArgVT == MVT::i1) {
10072 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
10088 assert((isScalable || Subtarget->isWindowsArm64EC()) &&
10089 "Indirect arguments should be scalable on most subtargets");
10092 TypeSize PartSize = StoreSize;
10093 unsigned NumParts = 1;
10094 if (Outs[i].
Flags.isInConsecutiveRegs()) {
10095 while (!Outs[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
10097 StoreSize *= NumParts;
10106 bool IsPred = VA.
getValVT() == MVT::aarch64svcount ||
10124 if (NumParts > 0) {
10140 if (i == 0 &&
Flags.isReturned() && !
Flags.isSwiftSelf() &&
10141 Outs[0].VT == MVT::i64) {
10143 "unexpected calling convention register assignment");
10145 "unexpected use of 'returned'");
10146 IsThisReturn =
true;
10155 [=](
const std::pair<unsigned, SDValue> &Elt) {
10164 [&VA](MachineFunction::ArgRegPair ArgReg) {
10165 return ArgReg.Reg == VA.getLocReg();
10172 Arg = DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
10177 if (
Options.EmitCallSiteInfo)
10184 MachinePointerInfo DstInfo;
10188 uint32_t BEAlign = 0;
10194 OpSize =
Flags.isByVal() ?
Flags.getByValSize() * 8
10196 OpSize = (OpSize + 7) / 8;
10197 if (!Subtarget->isLittleEndian() && !
Flags.isByVal() &&
10198 !
Flags.isInConsecutiveRegs()) {
10200 BEAlign = 8 - OpSize;
10203 int32_t
Offset = LocMemOffset + BEAlign;
10220 Chain = addTokenForArgument(Chain, DAG, MF.
getFrameInfo(), FI);
10228 if (Outs[i].
Flags.isByVal()) {
10232 Chain,
DL, DstAddr, Arg, SizeNode,
10233 Outs[i].
Flags.getNonZeroByValAlign(),
10235 nullptr, std::nullopt, DstInfo, MachinePointerInfo());
10252 if (IsVarArg && Subtarget->isWindowsArm64EC() &&
10253 !(CLI.CB && CLI.CB->isMustTailCall())) {
10271 if (!MemOpChains.
empty())
10275 if (RequiresSMChange) {
10276 bool InsertVectorLengthCheck =
10286 for (
auto &RegToPass : RegsToPass) {
10288 RegToPass.second, InGlue);
10295 const GlobalValue *CalledGlobal =
nullptr;
10296 unsigned OpFlags = 0;
10298 CalledGlobal =
G->getGlobal();
10299 OpFlags = Subtarget->classifyGlobalFunctionReference(CalledGlobal,
10305 const GlobalValue *GV =
G->getGlobal();
10310 Subtarget->isTargetMachO()) ||
10312 const char *Sym = S->getSymbol();
10325 if (IsTailCall && !IsSibCall) {
10330 unsigned Opc = IsTailCall ? AArch64ISD::TC_RETURN : AArch64ISD::CALL;
10332 std::vector<SDValue>
Ops;
10333 Ops.push_back(Chain);
10334 Ops.push_back(Callee);
10341 "tail calls cannot be marked with clang.arc.attachedcall");
10342 Opc = AArch64ISD::CALL_RVMARKER;
10348 Ops.insert(
Ops.begin() + 1, GA);
10355 Ops.insert(
Ops.begin() + 2, DoEmitMarker);
10357 Opc = AArch64ISD::CALL_ARM64EC_TO_X64;
10358 }
else if (GuardWithBTI) {
10359 Opc = AArch64ISD::CALL_BTI;
10370 const uint64_t
Key = CLI.PAI->Key;
10372 "Invalid auth call key");
10376 std::tie(IntDisc, AddrDisc) =
10379 if (
Opc == AArch64ISD::CALL_RVMARKER)
10380 Opc = AArch64ISD::AUTH_CALL_RVMARKER;
10382 Opc = IsTailCall ? AArch64ISD::AUTH_TC_RETURN : AArch64ISD::AUTH_CALL;
10384 Ops.push_back(IntDisc);
10385 Ops.push_back(AddrDisc);
10390 for (
auto &RegToPass : RegsToPass)
10392 RegToPass.second.getValueType()));
10395 const uint32_t *
Mask;
10396 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10397 if (IsThisReturn) {
10399 Mask =
TRI->getThisReturnPreservedMask(MF, CallConv);
10401 IsThisReturn =
false;
10402 Mask =
TRI->getCallPreservedMask(MF, CallConv);
10405 Mask =
TRI->getCallPreservedMask(MF, CallConv);
10407 if (Subtarget->hasCustomCallingConv())
10408 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
10410 if (
TRI->isAnyArgRegReserved(MF))
10411 TRI->emitReservedArgRegCallError(MF);
10413 assert(Mask &&
"Missing call preserved mask for calling convention");
10417 Ops.push_back(InGlue);
10419 if (CLI.DeactivationSymbol)
10432 if (CalledGlobal &&
10446 if (CalledGlobal &&
10450 uint64_t CalleePopBytes =
10451 DoesCalleeRestoreStack(CallConv, TailCallOpt) ?
alignTo(NumBytes, 16) : 0;
10459 Chain, InGlue, CallConv, IsVarArg, RVLocs,
DL, DAG, InVals, IsThisReturn,
10460 IsThisReturn ? OutVals[0] :
SDValue(), RequiresSMChange);
10465 if (RequiresSMChange) {
10471 if (!UseNewSMEABILowering &&
10475 AArch64ISD::SMSTART,
DL, DAG.
getVTList(MVT::Other, MVT::Glue), Result,
10478 if (ShouldPreserveZT0)
10481 {Result, DAG.getConstant(0, DL, MVT::i32), ZTFrameIdx});
10483 if (RequiresLazySave) {
10485 }
else if (RequiresSaveAllZA) {
10490 if (RequiresSMChange || RequiresLazySave || ShouldPreserveZT0 ||
10491 RequiresSaveAllZA) {
10492 for (
unsigned I = 0;
I < InVals.
size(); ++
I) {
10507 for (
const ISD::OutputArg &O : Outs) {
10508 if (
O.Flags.isSwiftSelf() ||
O.Flags.isSwiftError() ||
10509 O.Flags.isSwiftAsync()) {
10513 "Swift attributes can't be used with preserve_none",
10514 DL.getDebugLoc()));
10523bool AArch64TargetLowering::CanLowerReturn(
10526 const Type *RetTy)
const {
10529 CCState CCInfo(CallConv, isVarArg, MF, RVLocs,
Context);
10540 auto *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
10544 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.
getContext());
10550 SmallSet<unsigned, 4> RegsUsed;
10551 for (
unsigned i = 0, realRVLocIdx = 0; i != RVLocs.
size();
10552 ++i, ++realRVLocIdx) {
10553 CCValAssign &VA = RVLocs[i];
10555 SDValue Arg = OutVals[realRVLocIdx];
10561 if (Outs[i].ArgVT == MVT::i1) {
10577 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
10586 llvm::find_if(RetVals, [=](
const std::pair<unsigned, SDValue> &Elt) {
10596 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10612 for (
auto &RetVal : RetVals) {
10616 DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
10617 DAG.
getVTList(RetVal.second.getValueType(), MVT::Glue),
10619 Chain = DAG.
getCopyToReg(Chain,
DL, RetVal.first, RetVal.second, Glue);
10622 DAG.
getRegister(RetVal.first, RetVal.second.getValueType()));
10633 unsigned RetValReg = AArch64::X0;
10635 RetValReg = AArch64::X8;
10646 if (AArch64::GPR64RegClass.
contains(*
I))
10648 else if (AArch64::FPR64RegClass.
contains(*
I))
10659 RetOps.push_back(Glue);
10670 MachinePointerInfo());
10671 RetOps.insert(RetOps.begin() + 1, Arm64ECRetDest);
10673 return DAG.
getNode(AArch64ISD::TC_RETURN,
DL, MVT::Other, RetOps);
10676 return DAG.
getNode(AArch64ISD::RET_GLUE,
DL, MVT::Other, RetOps);
10685 unsigned Flag)
const {
10687 N->getOffset(), Flag);
10692 unsigned Flag)
const {
10698 unsigned Flag)
const {
10700 N->getOffset(), Flag);
10705 unsigned Flag)
const {
10711 unsigned Flag)
const {
10716template <
class NodeTy>
10718 unsigned Flags)
const {
10726 .
getInfo<AArch64FunctionInfo>()
10727 ->hasELFSignedGOT())
10730 return DAG.
getNode(AArch64ISD::LOADgot,
DL, Ty, GotAddr);
10734template <
class NodeTy>
10736 unsigned Flags)
const {
10742 AArch64ISD::WrapperLarge,
DL, Ty,
10750template <
class NodeTy>
10752 unsigned Flags)
const {
10760 return DAG.
getNode(AArch64ISD::ADDlow,
DL, Ty, ADRP,
Lo);
10764template <
class NodeTy>
10766 unsigned Flags)
const {
10770 SDValue Sym = getTargetNode(
N, Ty, DAG, Flags);
10771 return DAG.
getNode(AArch64ISD::ADR,
DL, Ty, Sym);
10777 const GlobalValue *GV = GN->
getGlobal();
10778 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV,
getTargetMachine());
10782 "unexpected offset in global node");
10787 return getGOT(GN, DAG, OpFlags);
10793 Result = getAddrLarge(GN, DAG, OpFlags);
10795 Result = getAddrTiny(GN, DAG, OpFlags);
10797 Result = getAddr(GN, DAG, OpFlags);
10836AArch64TargetLowering::LowerDarwinGlobalTLSAddress(
SDValue Op,
10838 assert(Subtarget->isTargetDarwin() &&
10839 "This function expects a Darwin target");
10854 PtrMemVT,
DL, Chain, DescAddr,
10869 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10870 const uint32_t *
Mask =
TRI->getTLSCallPreservedMask();
10871 if (Subtarget->hasCustomCallingConv())
10879 unsigned Opcode = AArch64ISD::CALL;
10881 Ops.push_back(Chain);
10882 Ops.push_back(FuncTLVGet);
10886 Opcode = AArch64ISD::AUTH_CALL;
11008SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(
SDValue SymAddr,
11013 auto *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
11017 SDVTList NodeTys = DAG.
getVTList(MVT::Other, MVT::Glue);
11020 bool RequiresSMChange = TLSCallAttrs.requiresSMChange();
11022 auto ChainAndGlue = [](
SDValue Chain) -> std::pair<SDValue, SDValue> {
11023 return {Chain, Chain.
getValue(1)};
11026 if (RequiresSMChange)
11027 std::tie(Chain, Glue) =
11033 ? AArch64ISD::TLSDESC_AUTH_CALLSEQ
11034 : AArch64ISD::TLSDESC_CALLSEQ;
11036 std::tie(Chain, Glue) = ChainAndGlue(DAG.
getNode(
11039 if (TLSCallAttrs.requiresLazySave())
11040 std::tie(Chain, Glue) = ChainAndGlue(DAG.
getNode(
11041 AArch64ISD::REQUIRES_ZA_SAVE,
DL, NodeTys, {Chain, Chain.getValue(1)}));
11043 if (RequiresSMChange)
11044 std::tie(Chain, Glue) =
11052AArch64TargetLowering::LowerELFGlobalTLSAddress(
SDValue Op,
11054 assert(Subtarget->isTargetELF() &&
"This function expects an ELF target");
11057 AArch64FunctionInfo *MFI =
11072 "in local exec TLS model");
11083 const GlobalValue *GV = GA->
getGlobal();
11088 return LowerELFTLSLocalExec(GV, ThreadBase,
DL, DAG);
11091 TPOff = DAG.
getNode(AArch64ISD::LOADgot,
DL, PtrVT, TPOff);
11109 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
11116 GV,
DL, MVT::i64, 0,
11133 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
11141AArch64TargetLowering::LowerWindowsGlobalTLSAddress(
SDValue Op,
11143 assert(Subtarget->isTargetWindows() &&
"Windows specific TLS lowering");
11155 TLSArray = DAG.
getLoad(PtrVT,
DL, Chain, TLSArray, MachinePointerInfo());
11168 DAG.
getNode(AArch64ISD::ADDlow,
DL, PtrVT, ADRP, TLSIndexLo);
11169 TLSIndex = DAG.
getLoad(MVT::i32,
DL, Chain, TLSIndex, MachinePointerInfo());
11179 MachinePointerInfo());
11180 Chain =
TLS.getValue(1);
11183 const GlobalValue *GV = GA->
getGlobal();
11195 Addr = DAG.
getNode(AArch64ISD::ADDlow,
DL, PtrVT, Addr, TGALo);
11205 if (Subtarget->isTargetDarwin())
11206 return LowerDarwinGlobalTLSAddress(
Op, DAG);
11207 if (Subtarget->isTargetELF())
11208 return LowerELFGlobalTLSAddress(
Op, DAG);
11209 if (Subtarget->isTargetWindows())
11210 return LowerWindowsGlobalTLSAddress(
Op, DAG);
11248 assert(TGN->getGlobal()->hasExternalWeakLinkage());
11254 if (TGN->getOffset() != 0)
11256 "unsupported non-zero offset in weak ptrauth global reference");
11263 {TGA, Key, Discriminator}),
11268AArch64TargetLowering::LowerPtrAuthGlobalAddress(
SDValue Op,
11271 uint64_t KeyC =
Op.getConstantOperandVal(1);
11272 SDValue AddrDiscriminator =
Op.getOperand(2);
11273 uint64_t DiscriminatorC =
Op.getConstantOperandVal(3);
11274 EVT VT =
Op.getValueType();
11284 "constant discriminator in ptrauth global out of range [0, 0xffff]");
11287 if (!Subtarget->isTargetELF() && !Subtarget->isTargetMachO())
11290 int64_t PtrOffsetC = 0;
11296 const GlobalValue *PtrGV = PtrN->getGlobal();
11299 const unsigned OpFlags =
11303 "unsupported non-GOT op flags on ptrauth global reference");
11306 PtrOffsetC += PtrN->getOffset();
11309 assert(PtrN->getTargetFlags() == 0 &&
11310 "unsupported target flags on ptrauth global");
11315 ? AddrDiscriminator
11319 if (!NeedsGOTLoad) {
11323 {TPtr, Key, TAddrDiscriminator, Discriminator}),
11332 {TPtr, Key, TAddrDiscriminator, Discriminator}),
11360 SDValue Dest,
unsigned Opcode,
11372 uint64_t Mask =
Op.getConstantOperandVal(1);
11377 if (
Op.getOperand(0).getOpcode() ==
ISD::SHL) {
11378 auto Op00 =
Op.getOperand(0).getOperand(0);
11381 Op.getOperand(1),
Op.getOperand(0).getOperand(1));
11382 return DAG.
getNode(Opcode,
DL, MVT::Other, Chain, Shr,
11402 bool ProduceNonFlagSettingCondBr =
11408 if (
LHS.getValueType() == MVT::f128) {
11413 if (!
RHS.getNode()) {
11433 OFCC = getInvertedCondCode(OFCC);
11436 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
11440 if (
LHS.getValueType().isInteger()) {
11442 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
11447 if (RHSC && RHSC->
getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
11453 return DAG.
getNode(AArch64ISD::CBZ,
DL, MVT::Other, Chain,
LHS, Dest);
11459 return DAG.
getNode(AArch64ISD::CBNZ,
DL, MVT::Other, Chain,
LHS, Dest);
11464 uint64_t SignBitPos;
11466 return DAG.
getNode(AArch64ISD::TBNZ,
DL, MVT::Other, Chain,
LHS,
11471 LHS.getOpcode() !=
ISD::AND && ProduceNonFlagSettingCondBr) {
11475 uint64_t SignBitPos;
11477 return DAG.
getNode(AArch64ISD::TBZ,
DL, MVT::Other, Chain,
LHS,
11483 if (Subtarget->hasCMPBR() &&
11485 ProduceNonFlagSettingCondBr) {
11494 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
11498 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::bf16 ||
11499 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
11508 DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CC1Val, Cmp);
11511 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, BR1, Dest, CC2Val,
11520 if (!Subtarget->isNeonAvailable() &&
11521 !Subtarget->useSVEForFixedLengthVectors())
11524 EVT VT =
Op.getValueType();
11552 if (!VT.
isVector() && !Subtarget->isNeonAvailable() &&
11553 Subtarget->isSVEorStreamingSVEAvailable()) {
11554 if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64 && VT != MVT::bf16)
11568 auto BitCast = [
this](EVT VT,
SDValue Op, SelectionDAG &DAG) {
11570 return getSVESafeBitCast(VT,
Op, DAG);
11577 auto SetVecVal = [&](
int Idx = -1) {
11583 VecVal1 = BitCast(VecVT, In1, DAG);
11584 VecVal2 = BitCast(VecVT, In2, DAG);
11590 }
else if (VT == MVT::f64) {
11591 VecVT = MVT::v2i64;
11592 SetVecVal(AArch64::dsub);
11593 }
else if (VT == MVT::f32) {
11594 VecVT = MVT::v4i32;
11595 SetVecVal(AArch64::ssub);
11596 }
else if (VT == MVT::f16 || VT == MVT::bf16) {
11597 VecVT = MVT::v8i16;
11598 SetVecVal(AArch64::hsub);
11609 if (VT == MVT::f64 || VT == MVT::v2f64) {
11617 DAG.
getNode(AArch64ISD::BSP,
DL, VecVT, SignMaskV, VecVal1, VecVal2);
11618 if (VT == MVT::f16 || VT == MVT::bf16)
11620 if (VT == MVT::f32)
11622 if (VT == MVT::f64)
11625 return BitCast(VT, BSP, DAG);
11631 Attribute::NoImplicitFloat))
11634 EVT VT =
Op.getValueType();
11637 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
11645 if (VT == MVT::i32 && IsParity)
11648 if (Subtarget->isSVEorStreamingSVEAvailable()) {
11649 if (VT == MVT::i32 || VT == MVT::i64) {
11650 EVT ContainerVT = VT == MVT::i32 ? MVT::nxv4i32 : MVT::nxv2i64;
11662 if (VT == MVT::i128) {
11675 if (!Subtarget->isNeonAvailable())
11686 if (VT == MVT::i32 || VT == MVT::i64) {
11687 if (VT == MVT::i32)
11693 AddV = DAG.
getNode(AArch64ISD::NVCAST,
DL,
11694 VT == MVT::i32 ? MVT::v2i32 : MVT::v1i64, AddV);
11700 }
else if (VT == MVT::i128) {
11706 DAG.
getNode(AArch64ISD::NVCAST,
DL, MVT::v2i64, AddV),
11714 assert(!IsParity &&
"ISD::PARITY of vector types not supported");
11716 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
11717 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
11718 "Unexpected type for custom ctpop lowering");
11726 EVT DT = VT == MVT::v2i64 ? MVT::v4i32 : VT;
11730 if (VT == MVT::v2i64) {
11731 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11732 Val = DAG.
getNode(AArch64ISD::UADDLP,
DL, VT, Val);
11733 }
else if (VT == MVT::v2i32) {
11734 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11735 }
else if (VT == MVT::v4i32) {
11736 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11745 unsigned EltSize = 8;
11751 Val = DAG.
getNode(AArch64ISD::UADDLP,
DL, WidenVT, Val);
11758 EVT VT =
Op.getValueType();
11761 VT, Subtarget->useSVEForFixedLengthVectors()));
11771 EVT VT =
Op.getValueType();
11773 unsigned Opcode =
Op.getOpcode();
11800 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SMAX_PRED);
11802 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SMIN_PRED);
11804 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::UMAX_PRED);
11806 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::UMIN_PRED);
11818 EVT VT =
Op.getValueType();
11822 VT, Subtarget->useSVEForFixedLengthVectors()))
11823 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU);
11835 REVB = DAG.
getNode(AArch64ISD::REV32,
DL, VST,
Op.getOperand(0));
11842 REVB = DAG.
getNode(AArch64ISD::REV32,
DL, VST,
Op.getOperand(0));
11849 REVB = DAG.
getNode(AArch64ISD::REV64,
DL, VST,
Op.getOperand(0));
11856 REVB = DAG.
getNode(AArch64ISD::REV64,
DL, VST,
Op.getOperand(0));
11862 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT,
11869 SmallVector<std::pair<SDValue, SDValue>, 16> &WorkList) {
11875 N =
N->getOperand(0);
11879 WorkList.push_back(std::make_pair(
N->getOperand(0),
N->getOperand(1)));
11885 if (
N->getOpcode() !=
ISD::OR || !
N->hasOneUse())
11899 EVT VT =
N->getValueType(0);
11909 unsigned NumXors = 0;
11914 std::tie(XOR0, XOR1) = WorkList[0];
11917 for (
unsigned I = 1;
I < WorkList.
size();
I++) {
11918 std::tie(XOR0, XOR1) = WorkList[
I];
11920 Cmp = DAG.
getNode(LogicOp,
DL, VT, Cmp, CmpChain);
11932 if (
Op.getValueType().isVector())
11933 return LowerVSETCC(
Op, DAG);
11935 bool IsStrict =
Op->isStrictFPOpcode();
11937 unsigned OpNo = IsStrict ? 1 : 0;
11940 Chain =
Op.getOperand(0);
11947 EVT VT =
Op.getValueType();
11953 if (
LHS.getValueType() == MVT::f128) {
11958 if (!
RHS.getNode()) {
11959 assert(
LHS.getValueType() ==
Op.getValueType() &&
11960 "Unexpected setcc expansion!");
11965 if (
LHS.getValueType().isInteger()) {
11981 SDValue Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, FVal, TVal, CCVal, Cmp);
11986 assert(
LHS.getValueType() == MVT::bf16 ||
LHS.getValueType() == MVT::f16 ||
11987 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
12008 Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, FVal, TVal, CC1Val, Cmp);
12018 DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, FVal, CC1Val, Cmp);
12021 Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, CS1, CC2Val, Cmp);
12031 EVT VT =
LHS.getValueType();
12032 if (VT != MVT::i32 && VT != MVT::i64)
12042 EVT OpVT =
Op.getValueType();
12051 return DAG.
getNode(AArch64ISD::CSEL,
DL, OpVT, FVal, TVal, CCVal,
12060 "function only supposed to emit natural comparisons");
12069 if (!
LHS.getValueType().isVector()) {
12108 assert(!
LHS.getValueType().isVector());
12109 assert(!
RHS.getValueType().isVector());
12113 if (!CTVal || !CFVal)
12127 bool OneNaN =
false;
12143 bool ShouldInvert =
false;
12152 if (!Cmp2 && !ShouldInvert)
12169SDValue AArch64TargetLowering::LowerSELECT_CC(
12175 if (
LHS.getValueType() == MVT::f128) {
12180 if (!
RHS.getNode()) {
12187 if ((
LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
12188 LHS.getValueType() == MVT::bf16) {
12194 if (
LHS.getValueType().isInteger()) {
12196 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
12208 LHS.getValueType() ==
RHS.getValueType()) {
12209 EVT VT =
LHS.getValueType();
12215 Shift = DAG.
getNOT(
DL, Shift, VT);
12229 uint64_t SignBitPos;
12231 EVT TestVT =
LHS.getValueType();
12235 LHS, SignBitConst);
12263 unsigned Opcode = AArch64ISD::CSEL;
12271 }
else if (CTVal && CFVal && CTVal->
isOne() && CFVal->
isZero()) {
12291 }
else if (CTVal && CFVal) {
12299 if (TrueVal == ~FalseVal) {
12300 Opcode = AArch64ISD::CSINV;
12301 }
else if (FalseVal > std::numeric_limits<int64_t>::min() &&
12302 TrueVal == -FalseVal) {
12303 Opcode = AArch64ISD::CSNEG;
12313 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
12314 Opcode = AArch64ISD::CSINC;
12316 if (TrueVal32 > FalseVal32) {
12322 const uint64_t TrueVal64 =
TrueVal;
12323 const uint64_t FalseVal64 =
FalseVal;
12325 if ((TrueVal64 == FalseVal64 + 1) || (TrueVal64 + 1 == FalseVal64)) {
12326 Opcode = AArch64ISD::CSINC;
12328 if (TrueVal > FalseVal) {
12341 if (Opcode != AArch64ISD::CSEL) {
12354 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->
isOne() &&
12359 if (CTVal && CTVal == RHSVal && AArch64CC ==
AArch64CC::EQ)
12361 else if (CFVal && CFVal == RHSVal && AArch64CC ==
AArch64CC::NE)
12363 }
else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->
isOne()) {
12364 assert (CTVal && CFVal &&
"Expected constant operands for CSNEG.");
12369 Opcode = AArch64ISD::CSINV;
12378 return DAG.
getNode(Opcode,
DL, VT, TVal, FVal, CCVal, Cmp);
12382 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::f32 ||
12383 LHS.getValueType() == MVT::f64);
12390 if (Subtarget->isNeonAvailable() &&
all_of(
Users, [](
const SDNode *U) {
12391 switch (
U->getOpcode()) {
12396 case AArch64ISD::DUP:
12414 if (
Flags.hasNoSignedZeros()) {
12418 if (RHSVal && RHSVal->
isZero()) {
12426 CFVal && CFVal->
isZero() &&
12434 SDValue CS1 = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, FVal, CC1Val, Cmp);
12440 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, CS1, CC2Val, Cmp);
12449 EVT Ty =
Op.getValueType();
12452 auto Idx =
Op.getConstantOperandAPInt(2);
12453 int64_t IdxVal = Idx.getSExtValue();
12455 "Only expect scalable vectors for custom lowering of VECTOR_SPLICE");
12464 std::optional<unsigned> PredPattern;
12476 return DAG.
getNode(AArch64ISD::SPLICE,
DL, Ty, Pred,
Op.getOperand(0),
12496 SDNodeFlags
Flags =
Op->getFlags();
12498 return LowerSELECT_CC(CC,
LHS,
RHS, TVal, FVal,
Op->users(), Flags,
DL, DAG);
12508 EVT Ty =
Op.getValueType();
12509 if (Ty == MVT::aarch64svcount) {
12546 return DAG.
getNode(AArch64ISD::CSEL,
DL,
Op.getValueType(), TVal, FVal,
12565 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
12572 Op->getFlags(),
DL, DAG);
12574 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
12589 !Subtarget->isTargetMachO())
12590 return getAddrLarge(JT, DAG);
12592 return getAddrTiny(JT, DAG);
12593 return getAddr(JT, DAG);
12606 AFI->setJumpTableEntryInfo(JTI, 4,
nullptr);
12611 "aarch64-jump-table-hardening")) {
12613 if (Subtarget->isTargetMachO()) {
12618 assert(Subtarget->isTargetELF() &&
12619 "jump table hardening only supported on MachO/ELF");
12650 std::optional<uint16_t> BADisc =
12651 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(MF.
getFunction());
12662 {Dest,
Key, Disc, AddrDisc, Chain});
12672 if (Subtarget->isTargetMachO()) {
12673 return getGOT(CP, DAG);
12676 return getAddrLarge(CP, DAG);
12678 return getAddrTiny(CP, DAG);
12680 return getAddr(CP, DAG);
12688 if (std::optional<uint16_t> BADisc =
12689 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(
12703 {TargetBA,
Key, AddrDisc, Disc});
12711 return getAddrLarge(BAN, DAG);
12713 return getAddrTiny(BAN, DAG);
12715 return getAddr(BAN, DAG);
12720 AArch64FunctionInfo *FuncInfo =
12729 MachinePointerInfo(SV));
12735 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
12739 if (Subtarget->isWindowsArm64EC()) {
12745 uint64_t StackOffset;
12760 MachinePointerInfo(SV));
12768 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
12769 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
12784 MachinePointerInfo(SV),
Align(PtrSize)));
12801 MachinePointerInfo(SV,
Offset),
12819 MachinePointerInfo(SV,
Offset),
12829 GROffsAddr, MachinePointerInfo(SV,
Offset),
Align(4)));
12837 VROffsAddr, MachinePointerInfo(SV,
Offset),
Align(4)));
12847 if (Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg()))
12848 return LowerWin64_VASTART(
Op, DAG);
12849 else if (Subtarget->isTargetDarwin())
12850 return LowerDarwin_VASTART(
Op, DAG);
12852 return LowerAAPCS_VASTART(
Op, DAG);
12860 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
12861 unsigned VaListSize =
12862 (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
12864 : Subtarget->isTargetILP32() ? 20 : 32;
12870 Align(PtrSize),
false,
false,
nullptr,
12871 std::nullopt, MachinePointerInfo(DestSV),
12872 MachinePointerInfo(SrcSV));
12876 assert(Subtarget->isTargetDarwin() &&
12877 "automatic va_arg instruction only works on Darwin");
12880 EVT VT =
Op.getValueType();
12884 MaybeAlign
Align(
Op.getConstantOperandVal(3));
12885 unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
12889 DAG.
getLoad(PtrMemVT,
DL, Chain, Addr, MachinePointerInfo(V));
12895 "currently not supported");
12897 if (Align && *Align > MinSlotSize) {
12913 ArgSize = std::max(ArgSize, MinSlotSize);
12914 bool NeedFPTrunc =
false;
12917 NeedFPTrunc =
true;
12927 DAG.
getStore(Chain,
DL, VANext, Addr, MachinePointerInfo(V));
12933 DAG.
getLoad(MVT::f64,
DL, APStore, VAList, MachinePointerInfo());
12943 return DAG.
getLoad(VT,
DL, APStore, VAList, MachinePointerInfo());
12951 EVT VT =
Op.getValueType();
12953 unsigned Depth =
Op.getConstantOperandVal(0);
12958 MachinePointerInfo());
12960 if (Subtarget->isTargetILP32())
12976#define GET_REGISTER_MATCHER
12977#include "AArch64GenAsmMatcher.inc"
12984 if (AArch64::X1 <=
Reg &&
Reg <= AArch64::X28) {
12986 unsigned DwarfRegNum =
MRI->getDwarfRegNum(
Reg,
false);
12987 if (!Subtarget->isXRegisterReserved(DwarfRegNum) &&
12988 !
MRI->isReservedReg(MF,
Reg))
12998 EVT VT =
Op.getValueType();
13014 EVT VT =
Op.getValueType();
13016 unsigned Depth =
Op.getConstantOperandVal(0);
13019 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
13036 if (Subtarget->hasPAuth()) {
13064 bool OptForSize)
const {
13065 bool IsLegal =
false;
13074 const APInt ImmInt = Imm.bitcastToAPInt();
13075 if (VT == MVT::f64)
13077 else if (VT == MVT::f32)
13079 else if (VT == MVT::f16 || VT == MVT::bf16)
13089 if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
13098 "Should be able to build any value with at most 4 moves");
13099 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 4 : 2));
13100 IsLegal = Insn.
size() <= Limit;
13104 <<
" imm value: "; Imm.dump(););
13116 if ((ST->hasNEON() &&
13117 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
13118 VT == MVT::f32 || VT == MVT::v1f32 || VT == MVT::v2f32 ||
13119 VT == MVT::v4f32)) ||
13121 (VT == MVT::nxv8f16 || VT == MVT::nxv4f32 || VT == MVT::nxv2f64))) {
13128 constexpr unsigned AccurateBits = 8;
13130 ExtraSteps = DesiredBits <= AccurateBits
13135 return DAG.
getNode(Opcode,
SDLoc(Operand), VT, Operand);
13145 EVT VT =
Op.getValueType();
13152AArch64TargetLowering::getSqrtResultForDenormInput(
SDValue Op,
13161 bool Reciprocal)
const {
13165 DAG, ExtraSteps)) {
13170 SDNodeFlags
Flags =
13175 for (
int i = ExtraSteps; i > 0; --i) {
13178 Step = DAG.
getNode(AArch64ISD::FRSQRTS,
DL, VT, Operand, Step, Flags);
13193 int &ExtraSteps)
const {
13196 DAG, ExtraSteps)) {
13204 for (
int i = ExtraSteps; i > 0; --i) {
13244const char *AArch64TargetLowering::LowerXConstraint(
EVT ConstraintVT)
const {
13252 if (!Subtarget->hasFPARMv8())
13277static std::optional<std::pair<unsigned, const TargetRegisterClass *>>
13280 (Constraint[1] !=
'p' && Constraint[1] !=
'z'))
13281 return std::nullopt;
13283 bool IsPredicate = Constraint[1] ==
'p';
13284 Constraint = Constraint.
substr(2, Constraint.
size() - 3);
13285 bool IsPredicateAsCount = IsPredicate && Constraint.
starts_with(
"n");
13286 if (IsPredicateAsCount)
13291 return std::nullopt;
13293 if (IsPredicateAsCount)
13294 return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
13296 return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
13297 return std::make_pair(AArch64::Z0 + V, &AArch64::ZPRRegClass);
13300static std::optional<PredicateConstraint>
13311 if (VT != MVT::aarch64svcount &&
13315 switch (Constraint) {
13317 return VT == MVT::aarch64svcount ? &AArch64::PNR_p8to15RegClass
13318 : &AArch64::PPR_p8to15RegClass;
13320 return VT == MVT::aarch64svcount ? &AArch64::PNR_3bRegClass
13321 : &AArch64::PPR_3bRegClass;
13323 return VT == MVT::aarch64svcount ? &AArch64::PNRRegClass
13324 : &AArch64::PPRRegClass;
13332static std::optional<ReducedGprConstraint>
13345 switch (Constraint) {
13347 return &AArch64::MatrixIndexGPR32_8_11RegClass;
13349 return &AArch64::MatrixIndexGPR32_12_15RegClass;
13383 return DAG.
getNode(AArch64ISD::CSINC,
DL, MVT::i32,
13386 getCondCode(DAG, getInvertedCondCode(CC)), NZCV);
13390SDValue AArch64TargetLowering::LowerAsmOutputForConstraint(
13392 const AsmOperandInfo &OpInfo,
SelectionDAG &DAG)
const {
13397 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
13398 OpInfo.ConstraintVT.getSizeInBits() < 8)
13413 if (OpInfo.ConstraintVT.getSizeInBits() <= 32)
13424AArch64TargetLowering::getConstraintType(
StringRef Constraint)
const {
13425 if (Constraint.
size() == 1) {
13426 switch (Constraint[0]) {
13463AArch64TargetLowering::getSingleConstraintMatchWeight(
13464 AsmOperandInfo &
info,
const char *constraint)
const {
13466 Value *CallOperandVal =
info.CallOperandVal;
13469 if (!CallOperandVal)
13473 switch (*constraint) {
13495std::pair<unsigned, const TargetRegisterClass *>
13496AArch64TargetLowering::getRegForInlineAsmConstraint(
13498 if (Constraint.
size() == 1) {
13499 switch (Constraint[0]) {
13502 return std::make_pair(0U,
nullptr);
13504 return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
13506 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
13507 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
13509 if (!Subtarget->hasFPARMv8())
13513 return std::make_pair(0U, &AArch64::ZPRRegClass);
13514 return std::make_pair(0U,
nullptr);
13516 if (VT == MVT::Other)
13520 return std::make_pair(0U, &AArch64::FPR16RegClass);
13522 return std::make_pair(0U, &AArch64::FPR32RegClass);
13524 return std::make_pair(0U, &AArch64::FPR64RegClass);
13526 return std::make_pair(0U, &AArch64::FPR128RegClass);
13532 if (!Subtarget->hasFPARMv8())
13535 return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
13537 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
13540 if (!Subtarget->hasFPARMv8())
13543 return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
13551 if (AArch64::ZPRRegClass.hasSubClassEq(
P->second) &&
13552 !Subtarget->isSVEorStreamingSVEAvailable())
13553 return std::make_pair(
TRI->getSubReg(
P->first, AArch64::zsub),
13554 &AArch64::FPR128RegClass);
13559 return std::make_pair(0U, RegClass);
13563 return std::make_pair(0U, RegClass);
13565 if (StringRef(
"{cc}").equals_insensitive(Constraint) ||
13567 return std::make_pair(
unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
13569 if (Constraint ==
"{za}") {
13570 return std::make_pair(
unsigned(AArch64::ZA), &AArch64::MPRRegClass);
13573 if (Constraint ==
"{zt0}") {
13574 return std::make_pair(
unsigned(AArch64::ZT0), &AArch64::ZTRRegClass);
13579 std::pair<unsigned, const TargetRegisterClass *> Res;
13584 unsigned Size = Constraint.
size();
13585 if ((
Size == 4 ||
Size == 5) && Constraint[0] ==
'{' &&
13586 tolower(Constraint[1]) ==
'v' && Constraint[
Size - 1] ==
'}') {
13589 if (!
Failed && RegNo >= 0 && RegNo <= 31) {
13594 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
13595 Res.second = &AArch64::FPR64RegClass;
13597 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
13598 Res.second = &AArch64::FPR128RegClass;
13604 if (Res.second && !Subtarget->hasFPARMv8() &&
13605 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
13606 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
13607 return std::make_pair(0U,
nullptr);
13614 bool AllowUnknown)
const {
13615 if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
13616 return EVT(MVT::i64x8);
13623void AArch64TargetLowering::LowerAsmOperandForConstraint(
13629 if (Constraint.
size() != 1)
13632 char ConstraintLetter = Constraint[0];
13633 switch (ConstraintLetter) {
13644 if (
Op.getValueType() == MVT::i64)
13645 Result = DAG.
getRegister(AArch64::XZR, MVT::i64);
13647 Result = DAG.
getRegister(AArch64::WZR, MVT::i32);
13669 switch (ConstraintLetter) {
13683 CVal =
C->getSExtValue();
13714 if ((CVal & 0xFFFF) == CVal)
13716 if ((CVal & 0xFFFF0000ULL) == CVal)
13718 uint64_t NCVal = ~(uint32_t)CVal;
13719 if ((NCVal & 0xFFFFULL) == NCVal)
13721 if ((NCVal & 0xFFFF0000ULL) == NCVal)
13728 if ((CVal & 0xFFFFULL) == CVal)
13730 if ((CVal & 0xFFFF0000ULL) == CVal)
13732 if ((CVal & 0xFFFF00000000ULL) == CVal)
13734 if ((CVal & 0xFFFF000000000000ULL) == CVal)
13736 uint64_t NCVal = ~CVal;
13737 if ((NCVal & 0xFFFFULL) == NCVal)
13739 if ((NCVal & 0xFFFF0000ULL) == NCVal)
13741 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
13743 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
13757 Ops.push_back(Result);
13794 EVT VT =
Op.getValueType();
13796 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
13800 if (VT != MVT::v16i8 && VT != MVT::v8i8)
13804 assert((NumElts == 8 || NumElts == 16) &&
13805 "Need to have exactly 8 or 16 elements in vector.");
13811 for (
unsigned i = 0; i < NumElts; ++i) {
13818 SourceVec = OperandSourceVec;
13819 else if (SourceVec != OperandSourceVec)
13832 }
else if (!AndMaskConstants.
empty()) {
13852 if (!MaskSourceVec) {
13856 }
else if (MaskSourceVec != MaskSource->
getOperand(0)) {
13870 if (!AndMaskConstants.
empty())
13877 SourceVec, MaskSourceVec);
13885 LLVM_DEBUG(
dbgs() <<
"AArch64TargetLowering::ReconstructShuffle\n");
13887 EVT VT =
Op.getValueType();
13889 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
13892 struct ShuffleSourceInfo {
13907 ShuffleSourceInfo(
SDValue Vec)
13908 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
13909 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
13917 for (
unsigned i = 0; i < NumElts; ++i) {
13923 V.getOperand(0).getValueType().isScalableVector()) {
13925 dbgs() <<
"Reshuffle failed: "
13926 "a shuffle can only come from building a vector from "
13927 "various elements of other fixed-width vectors, provided "
13928 "their indices are constant\n");
13934 auto Source =
find(Sources, SourceVec);
13935 if (Source == Sources.
end())
13936 Source = Sources.
insert(Sources.
end(), ShuffleSourceInfo(SourceVec));
13939 unsigned EltNo = V.getConstantOperandVal(1);
13940 Source->MinElt = std::min(Source->MinElt, EltNo);
13941 Source->MaxElt = std::max(Source->MaxElt, EltNo);
13946 if ((Sources.
size() == 3 || Sources.
size() == 4) && NumElts > 4) {
13951 for (
unsigned I = 0;
I < NumElts; ++
I) {
13954 for (
unsigned OF = 0; OF < OutputFactor; OF++)
13955 Mask.push_back(-1);
13961 unsigned Lane = V.getConstantOperandVal(1);
13962 for (
unsigned S = 0; S < Sources.
size(); S++) {
13963 if (V.getOperand(0) == Sources[S].Vec) {
13964 unsigned InputSize = Sources[S].Vec.getScalarValueSizeInBits();
13965 unsigned InputBase = 16 * S + Lane * InputSize / 8;
13966 for (
unsigned OF = 0; OF < OutputFactor; OF++)
13967 Mask.push_back(InputBase + OF);
13977 ? Intrinsic::aarch64_neon_tbl3
13978 : Intrinsic::aarch64_neon_tbl4,
13980 for (
unsigned i = 0; i < Sources.
size(); i++) {
13981 SDValue Src = Sources[i].Vec;
13982 EVT SrcVT = Src.getValueType();
13985 "Expected a legally typed vector");
13993 for (
unsigned i = 0; i < Mask.size(); i++)
13995 assert((Mask.size() == 8 || Mask.size() == 16) &&
13996 "Expected a v8i8 or v16i8 Mask");
13998 Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8,
DL, TBLMask));
14002 Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, TBLOperands);
14006 if (Sources.
size() > 2) {
14007 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: currently only do something "
14008 <<
"sensible when at most two source vectors are "
14016 for (
auto &Source : Sources) {
14017 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
14018 if (SrcEltTy.
bitsLT(SmallestEltTy)) {
14019 SmallestEltTy = SrcEltTy;
14022 unsigned ResMultiplier =
14031 for (
auto &Src : Sources) {
14032 EVT SrcVT = Src.ShuffleVec.getValueType();
14045 assert(2 * SrcVTSize == VTSize);
14050 DAG.
getPOISON(Src.ShuffleVec.getValueType()));
14056 dbgs() <<
"Reshuffle failed: result vector too small to extract\n");
14060 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
14062 dbgs() <<
"Reshuffle failed: span too large for a VEXT to cope\n");
14066 if (Src.MinElt >= NumSrcElts) {
14071 Src.WindowBase = -NumSrcElts;
14072 }
else if (Src.MaxElt < NumSrcElts) {
14089 dbgs() <<
"Reshuffle failed: don't know how to lower AArch64ISD::EXT "
14090 "for SVE vectors.");
14095 DAG.
getNode(AArch64ISD::EXT,
DL, DestVT, VEXTSrc1, VEXTSrc2,
14097 Src.WindowBase = -Src.MinElt;
14104 for (
auto &Src : Sources) {
14106 if (SrcEltTy == SmallestEltTy)
14111 DAG.
getNode(AArch64ISD::NVCAST,
DL, ShuffleVT, Src.ShuffleVec);
14117 Src.WindowBase *= Src.WindowScale;
14122 for (
auto Src : Sources)
14123 assert(Src.ShuffleVec.getValueType() == ShuffleVT);
14131 if (Entry.isUndef())
14134 auto Src =
find(Sources, Entry.getOperand(0));
14143 int LanesDefined = BitsDefined / BitsPerShuffleLane;
14147 int *LaneMask = &Mask[i * ResMultiplier];
14149 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
14150 ExtractBase += NumElts * (Src - Sources.
begin());
14151 for (
int j = 0; j < LanesDefined; ++j)
14152 LaneMask[j] = ExtractBase + j;
14157 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: illegal shuffle mask\n");
14163 for (
unsigned i = 0; i < Sources.
size(); ++i)
14170 V = DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Shuffle);
14176 dbgs() <<
"Reshuffle, creating node: "; V.dump(););
14195 unsigned ExpectedElt = Imm;
14196 for (
unsigned i = 1; i < NumElts; ++i) {
14200 if (ExpectedElt == NumElts)
14205 if (ExpectedElt !=
static_cast<unsigned>(M[i]))
14216 if (V.getValueType() != MVT::v16i8)
14218 assert(V.getNumOperands() == 16 &&
"Expected 16 operands on the BUILDVECTOR");
14220 for (
unsigned X = 0;
X < 4;
X++) {
14232 for (
unsigned Y = 1;
Y < 4;
Y++) {
14248 V.getOperand(0).getOperand(0), V.getOperand(4).getOperand(0),
14249 V.getOperand(8).getOperand(0), V.getOperand(12).getOperand(0)};
14251 if (V.getValueType() == MVT::v4i32)
14267 unsigned &DupLaneOp) {
14269 "Only possible block sizes for wide DUP are: 16, 32, 64");
14288 for (
size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
14289 for (
size_t I = 0;
I < NumEltsPerBlock;
I++) {
14290 int Elt = M[BlockIndex * NumEltsPerBlock +
I];
14294 if ((
unsigned)Elt >= SingleVecNumElements)
14296 if (BlockElts[
I] < 0)
14297 BlockElts[
I] = Elt;
14298 else if (BlockElts[
I] != Elt)
14307 auto FirstRealEltIter =
find_if(BlockElts, [](
int Elt) {
return Elt >= 0; });
14308 assert(FirstRealEltIter != BlockElts.
end() &&
14309 "Shuffle with all-undefs must have been caught by previous cases, "
14311 if (FirstRealEltIter == BlockElts.
end()) {
14317 size_t FirstRealIndex = FirstRealEltIter - BlockElts.
begin();
14319 if ((
unsigned)*FirstRealEltIter < FirstRealIndex)
14322 size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
14325 if (Elt0 % NumEltsPerBlock != 0)
14329 for (
size_t I = 0;
I < NumEltsPerBlock;
I++)
14330 if (BlockElts[
I] >= 0 && (
unsigned)BlockElts[
I] != Elt0 +
I)
14333 DupLaneOp = Elt0 / NumEltsPerBlock;
14342 const int *FirstRealElt =
find_if(M, [](
int Elt) {
return Elt >= 0; });
14347 APInt ExpectedElt =
APInt(MaskBits, *FirstRealElt + 1,
false,
14351 bool FoundWrongElt = std::any_of(FirstRealElt + 1, M.end(), [&](
int Elt) {
14352 return Elt != ExpectedElt++ && Elt >= 0;
14384 if (NumElts % 2 != 0)
14386 WhichResult = (M[0] == 0 ? 0 : 1);
14387 unsigned Idx = WhichResult * NumElts / 2;
14388 for (
unsigned i = 0; i != NumElts; i += 2) {
14389 if ((M[i] >= 0 && (
unsigned)M[i] != Idx) ||
14390 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != Idx))
14403 WhichResult = (M[0] == 0 ? 0 : 1);
14404 for (
unsigned j = 0; j != 2; ++j) {
14405 unsigned Idx = WhichResult;
14406 for (
unsigned i = 0; i != Half; ++i) {
14407 int MIdx = M[i + j * Half];
14408 if (MIdx >= 0 && (
unsigned)MIdx != Idx)
14422 if (NumElts % 2 != 0)
14424 WhichResult = (M[0] == 0 ? 0 : 1);
14425 for (
unsigned i = 0; i < NumElts; i += 2) {
14426 if ((M[i] >= 0 && (
unsigned)M[i] != i + WhichResult) ||
14427 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != i + WhichResult))
14434 bool &DstIsLeft,
int &Anomaly) {
14435 if (M.size() !=
static_cast<size_t>(NumInputElements))
14438 int NumLHSMatch = 0, NumRHSMatch = 0;
14439 int LastLHSMismatch = -1, LastRHSMismatch = -1;
14441 for (
int i = 0; i < NumInputElements; ++i) {
14451 LastLHSMismatch = i;
14453 if (M[i] == i + NumInputElements)
14456 LastRHSMismatch = i;
14459 if (NumLHSMatch == NumInputElements - 1) {
14461 Anomaly = LastLHSMismatch;
14463 }
else if (NumRHSMatch == NumInputElements - 1) {
14465 Anomaly = LastRHSMismatch;
14478 for (
int I = 0,
E = NumElts / 2;
I !=
E;
I++) {
14483 int Offset = NumElts / 2;
14484 for (
int I = NumElts / 2,
E = NumElts;
I !=
E;
I++) {
14485 if (Mask[
I] !=
I + SplitLHS *
Offset)
14494 EVT VT =
Op.getValueType();
14529 unsigned OpNum = (PFEntry >> 26) & 0x0F;
14530 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
14531 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
14553 if (LHSID == (1 * 9 + 2) * 9 + 3)
14555 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 &&
"Illegal OP_COPY!");
14559 if (OpNum == OP_MOVLANE) {
14561 auto getPFIDLane = [](
unsigned ID,
int Elt) ->
int {
14562 assert(Elt < 4 &&
"Expected Perfect Lanes to be less than 4");
14568 return (
ID % 9 == 8) ? -1 :
ID % 9;
14577 assert(RHSID < 8 &&
"Expected a lane index for RHSID!");
14578 unsigned ExtLane = 0;
14584 int MaskElt = getPFIDLane(
ID, (RHSID & 0x01) << 1) >> 1;
14586 MaskElt = (getPFIDLane(
ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
14587 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
14588 ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
14589 Input = MaskElt < 2 ? V1 : V2;
14595 "Expected 16 or 32 bit shuffle elements");
14600 int MaskElt = getPFIDLane(
ID, RHSID);
14601 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
14602 ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
14603 Input = MaskElt < 4 ? V1 : V2;
14605 if (VT == MVT::v4i16) {
14611 Input.getValueType().getVectorElementType(),
14633 return DAG.
getNode(AArch64ISD::REV64,
DL, VT, OpLHS);
14638 return DAG.
getNode(AArch64ISD::REV32,
DL, VT, OpLHS);
14641 return DAG.
getNode(AArch64ISD::REV16,
DL, VT, OpLHS);
14648 if (EltTy == MVT::i8)
14649 Opcode = AArch64ISD::DUPLANE8;
14650 else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
14651 Opcode = AArch64ISD::DUPLANE16;
14652 else if (EltTy == MVT::i32 || EltTy == MVT::f32)
14653 Opcode = AArch64ISD::DUPLANE32;
14654 else if (EltTy == MVT::i64 || EltTy == MVT::f64)
14655 Opcode = AArch64ISD::DUPLANE64;
14662 return DAG.
getNode(Opcode,
DL, VT, OpLHS, Lane);
14668 return DAG.
getNode(AArch64ISD::EXT,
DL, VT, OpLHS, OpRHS,
14672 return DAG.
getNode(AArch64ISD::UZP1,
DL, VT, OpLHS, OpRHS);
14674 return DAG.
getNode(AArch64ISD::UZP2,
DL, VT, OpLHS, OpRHS);
14676 return DAG.
getNode(AArch64ISD::ZIP1,
DL, VT, OpLHS, OpRHS);
14678 return DAG.
getNode(AArch64ISD::ZIP2,
DL, VT, OpLHS, OpRHS);
14680 return DAG.
getNode(AArch64ISD::TRN1,
DL, VT, OpLHS, OpRHS);
14682 return DAG.
getNode(AArch64ISD::TRN2,
DL, VT, OpLHS, OpRHS);
14693 EVT EltVT =
Op.getValueType().getVectorElementType();
14706 MVT IndexVT = MVT::v8i8;
14707 unsigned IndexLen = 8;
14708 if (
Op.getValueSizeInBits() == 128) {
14709 IndexVT = MVT::v16i8;
14714 for (
int Val : ShuffleMask) {
14715 for (
unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
14716 unsigned Offset = Byte + Val * BytesPerElt;
14719 if (IsUndefOrZero &&
Offset >= IndexLen)
14729 if (IsUndefOrZero) {
14738 if (IndexLen == 8) {
14763 if (EltType == MVT::i8)
14764 return AArch64ISD::DUPLANE8;
14765 if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
14766 return AArch64ISD::DUPLANE16;
14767 if (EltType == MVT::i32 || EltType == MVT::f32)
14768 return AArch64ISD::DUPLANE32;
14769 if (EltType == MVT::i64 || EltType == MVT::f64)
14770 return AArch64ISD::DUPLANE64;
14778 auto getScaledOffsetDup = [](
SDValue BitCast,
int &LaneC,
MVT &CastVT) {
14789 unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
14791 if (ExtIdxInBits % CastedEltBitWidth != 0)
14799 LaneC += ExtIdxInBits / CastedEltBitWidth;
14806 unsigned SrcVecNumElts =
14813 if (getScaledOffsetDup(V, Lane, CastVT)) {
14814 V = DAG.
getBitcast(CastVT, V.getOperand(0).getOperand(0));
14816 V.getOperand(0).getValueType().is128BitVector()) {
14819 Lane += V.getConstantOperandVal(1);
14820 V = V.getOperand(0);
14846 EVT VT =
Op.getValueType();
14856 if (ElementSize > 32 || ElementSize == 1)
14886 EVT VT =
Op.getValueType();
14903 for (
unsigned I = 0;
I < 16;
I++) {
14904 if (ShuffleMask[
I] < 16)
14910 TBLMaskParts[
I] = DAG.
getConstant(
C->getSExtValue() + 32,
DL, MVT::i32);
14924AArch64TargetLowering::LowerEXTEND_VECTOR_INREG(
SDValue Op,
14927 EVT VT =
Op.getValueType();
14931 unsigned UnpackOpcode =
Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
14939 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv8i16, Val);
14940 if (VT == MVT::nxv8i16)
14944 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv4i32, Val);
14945 if (VT == MVT::nxv4i32)
14949 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv2i64, Val);
14950 assert(VT == MVT::nxv2i64 &&
"Unexpected result type!");
14961AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(
SDValue Op,
14964 EVT VT =
Op.getValueType();
14967 return LowerEXTEND_VECTOR_INREG(
Op, DAG);
14972 "Unexpected extension factor.");
14979 DAG.
getNode(AArch64ISD::ZIP1,
DL, SrcVT, SrcOp, Zeros));
14985 EVT VT =
Op.getValueType();
14990 return LowerFixedLengthVECTOR_SHUFFLEToSVE(
Op, DAG);
14996 ArrayRef<int> ShuffleMask = SVN->
getMask();
15003 "Unexpected VECTOR_SHUFFLE mask size!");
15029 for (
unsigned LaneSize : {64U, 32U, 16U}) {
15032 unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
15033 : LaneSize == 32 ? AArch64ISD::DUPLANE32
15034 : AArch64ISD::DUPLANE16;
15049 if (
isREVMask(ShuffleMask, EltSize, NumElts, 64))
15051 if (
isREVMask(ShuffleMask, EltSize, NumElts, 32))
15053 if (
isREVMask(ShuffleMask, EltSize, NumElts, 16))
15056 if (((NumElts == 8 && EltSize == 16) || (NumElts == 16 && EltSize == 8)) &&
15059 return DAG.
getNode(AArch64ISD::EXT,
DL, VT, Rev, Rev,
15063 bool ReverseEXT =
false;
15065 if (
isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
15077 unsigned WhichResult;
15078 unsigned OperandOrder;
15079 if (
isZIPMask(ShuffleMask, NumElts, WhichResult, OperandOrder)) {
15080 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
15082 OperandOrder == 0 ? V2 : V1);
15084 if (
isUZPMask(ShuffleMask, NumElts, WhichResult)) {
15085 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
15088 if (
isTRNMask(ShuffleMask, NumElts, WhichResult, OperandOrder)) {
15089 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
15091 OperandOrder == 0 ? V2 : V1);
15095 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
15099 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
15103 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
15113 if (
isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
15114 SDValue DstVec = DstIsLeft ? V1 : V2;
15118 int SrcLane = ShuffleMask[Anomaly];
15119 if (SrcLane >= NumInputElements) {
15121 SrcLane -= NumElts;
15128 ScalarVT = MVT::i32;
15141 if (NumElts == 4) {
15142 unsigned PFIndexes[4];
15143 for (
unsigned i = 0; i != 4; ++i) {
15144 if (ShuffleMask[i] < 0)
15147 PFIndexes[i] = ShuffleMask[i];
15151 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
15152 PFIndexes[2] * 9 + PFIndexes[3];
15162 "Expected larger vector element sizes to be handled already");
15164 for (
int M : ShuffleMask)
15166 M >=
static_cast<int>(NumElts) ? 0 : 0xffffffff,
DL, MVT::i32));
15180 EVT VT =
Op.getValueType();
15183 return LowerToScalableOp(
Op, DAG);
15186 "Unexpected vector type!");
15201 if (VT == MVT::nxv1i1)
15213 EVT VT =
Op.getValueType();
15226 if (CIdx && (CIdx->getZExtValue() <= 3)) {
15228 return DAG.
getNode(AArch64ISD::DUPLANE128,
DL, VT,
Op.getOperand(1), CI);
15250 SDValue TBL = DAG.
getNode(AArch64ISD::TBL,
DL, MVT::nxv2i64, V, ShuffleMask);
15256 APInt &UndefBits) {
15258 APInt SplatBits, SplatUndef;
15259 unsigned SplatBitSize;
15261 if (BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
15264 for (
unsigned i = 0; i < NumSplats; ++i) {
15265 CnstBits <<= SplatBitSize;
15266 UndefBits <<= SplatBitSize;
15268 UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.
getSizeInBits());
15279 const APInt &Bits) {
15280 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15282 EVT VT =
Op.getValueType();
15291 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15302 EVT VT =
Op.getValueType();
15307 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15310 bool isAdvSIMDModImm =
false;
15330 if (isAdvSIMDModImm) {
15344 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15355 EVT VT =
Op.getValueType();
15360 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15363 bool isAdvSIMDModImm =
false;
15375 if (isAdvSIMDModImm) {
15389 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15399 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15401 EVT VT =
Op.getValueType();
15403 bool isAdvSIMDModImm =
false;
15415 if (isAdvSIMDModImm) {
15420 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15429 const APInt &Bits) {
15430 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15432 EVT VT =
Op.getValueType();
15441 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15450 const APInt &Bits) {
15451 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15453 EVT VT =
Op.getValueType();
15456 bool isAdvSIMDModImm =
false;
15460 MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
15465 MovTy = MVT::v2f64;
15468 if (isAdvSIMDModImm) {
15472 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15492 for (
unsigned i = 1; i < NumElts; ++i)
15501 while (
N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
15502 N =
N.getOperand(0);
15508 unsigned NumElts =
N.getValueType().getVectorMinNumElements();
15511 while (
N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
15512 N =
N.getOperand(0);
15515 if (
N.getValueType().getVectorMinNumElements() < NumElts)
15525 if (
N.getOpcode() == AArch64ISD::PTRUE &&
15526 N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
15527 return N.getValueType().getVectorMinNumElements() >= NumElts;
15539 EVT VT =
N->getValueType(0);
15549 SDValue FirstOp =
N->getOperand(0);
15550 unsigned FirstOpc = FirstOp.
getOpcode();
15551 SDValue SecondOp =
N->getOperand(1);
15552 unsigned SecondOpc = SecondOp.
getOpcode();
15559 if ((FirstOpc ==
ISD::AND || FirstOpc == AArch64ISD::BICi) &&
15560 (SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR ||
15561 SecondOpc == AArch64ISD::SHL_PRED ||
15562 SecondOpc == AArch64ISD::SRL_PRED)) {
15566 }
else if ((SecondOpc ==
ISD::AND || SecondOpc == AArch64ISD::BICi) &&
15567 (FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR ||
15568 FirstOpc == AArch64ISD::SHL_PRED ||
15569 FirstOpc == AArch64ISD::SRL_PRED)) {
15576 bool IsShiftRight = Shift.
getOpcode() == AArch64ISD::VLSHR ||
15577 Shift.
getOpcode() == AArch64ISD::SRL_PRED;
15578 bool ShiftHasPredOp = Shift.
getOpcode() == AArch64ISD::SHL_PRED ||
15579 Shift.
getOpcode() == AArch64ISD::SRL_PRED;
15583 if (ShiftHasPredOp) {
15589 C2 =
C.getZExtValue();
15592 C2 = C2node->getZExtValue();
15606 assert(C1nodeImm && C1nodeShift);
15608 C1AsAPInt = C1AsAPInt.
zextOrTrunc(ElemSizeInBits);
15614 if (C2 > ElemSizeInBits)
15619 if (C1AsAPInt != RequiredC1)
15627 unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
15632 EVT VT =
N->getValueType(0);
15633 assert(VT.
isVector() &&
"Expected vector type in tryLowerToBSL\n");
15651 for (
int i = 1; i >= 0; --i) {
15652 for (
int j = 1; j >= 0; --j) {
15678 if (
Sub.getOperand(1) !=
Add.getOperand(0))
15681 return DAG.
getNode(AArch64ISD::BSP,
DL, VT,
Sub, SubSibling, AddSibling);
15689 for (
int i = 1; i >= 0; --i)
15690 for (
int j = 1; j >= 0; --j) {
15701 if (!BVN0 || !BVN1)
15704 bool FoundMatch =
true;
15708 if (!CN0 || !CN1 ||
15711 FoundMatch =
false;
15726 !Subtarget->isNeonAvailable()))
15727 return LowerToScalableOp(
Op, DAG);
15736 EVT VT =
Op.getValueType();
15741 BuildVectorSDNode *BVN =
15745 LHS =
Op.getOperand(1);
15763 UndefBits, &
LHS)) ||
15779 EVT VT =
Op.getValueType();
15793 CstLane->getAPIntValue().trunc(EltTy.
getSizeInBits()).getZExtValue(),
15797 }
else if (Lane.getOpcode() ==
ISD::UNDEF) {
15800 assert(Lane.getValueType() == MVT::i32 &&
15801 "Unexpected BUILD_VECTOR operand type");
15803 Ops.push_back(Lane);
15810 EVT VT =
Op.getValueType();
15818 int32_t ImmVal, ShiftVal;
15828 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Res);
15833 EVT VT =
Op.getValueType();
15835 "Expected a legal NEON vector");
15841 auto TryMOVIWithBits = [&](
APInt DefBits) {
15855 APInt NotDefBits = ~DefBits;
15865 if (
SDValue R = TryMOVIWithBits(DefBits))
15867 if (
SDValue R = TryMOVIWithBits(UndefBits))
15875 auto TryWithFNeg = [&](
APInt DefBits,
MVT FVT) {
15881 unsigned NumElts = VT.
getSizeInBits() / FVT.getScalarSizeInBits();
15882 for (
unsigned i = 0; i < NumElts; i++)
15883 NegBits |= Neg << (FVT.getScalarSizeInBits() * i);
15884 NegBits = DefBits ^ NegBits;
15888 if (
SDValue NewOp = TryMOVIWithBits(NegBits)) {
15892 AArch64ISD::NVCAST,
DL, VT,
15894 DAG.
getNode(AArch64ISD::NVCAST,
DL, VFVT, NewOp)));
15899 if ((R = TryWithFNeg(DefBits, MVT::f32)) ||
15900 (R = TryWithFNeg(DefBits, MVT::f64)) ||
15901 (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
15908SDValue AArch64TargetLowering::LowerFixedLengthBuildVectorToSVE(
15910 EVT VT =
Op.getValueType();
15934 NumElems -
count_if(
Op->op_values(), IsExtractElt) > 4)
15941 return Op.isUndef() ? Poison
15942 : DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
15943 ContainerVT, Poison, Op, ZeroI64);
15947 while (Intermediates.
size() > 1) {
15950 for (
unsigned I = 0;
I < Intermediates.
size();
I += 2) {
15953 Intermediates[
I / 2] =
15955 : DAG.
getNode(AArch64ISD::ZIP1,
DL, ZipVT, Op0, Op1);
15958 Intermediates.
resize(Intermediates.
size() / 2);
15969 EVT VT =
Op.getValueType();
15971 bool OverrideNEON = !Subtarget->isNeonAvailable() ||
15974 return LowerFixedLengthBuildVectorToSVE(
Op, DAG);
15992 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
15993 if (Val.isZero() || (VT.
isInteger() && Val.isAllOnes()))
15997 if (
Const->isZero() && !
Const->isNegative())
16018 bool isOnlyLowElement =
true;
16019 bool usesOnlyOneValue =
true;
16020 bool usesOnlyOneConstantValue =
true;
16022 bool AllLanesExtractElt =
true;
16023 unsigned NumConstantLanes = 0;
16024 unsigned NumDifferentLanes = 0;
16025 unsigned NumUndefLanes = 0;
16028 SmallMapVector<SDValue, unsigned, 16> DifferentValueMap;
16029 unsigned ConsecutiveValCount = 0;
16031 for (
unsigned i = 0; i < NumElts; ++i) {
16034 AllLanesExtractElt =
false;
16040 isOnlyLowElement =
false;
16045 ++NumConstantLanes;
16046 if (!ConstantValue.
getNode())
16048 else if (ConstantValue != V)
16049 usesOnlyOneConstantValue =
false;
16052 if (!
Value.getNode())
16054 else if (V !=
Value) {
16055 usesOnlyOneValue =
false;
16056 ++NumDifferentLanes;
16059 if (PrevVal != V) {
16060 ConsecutiveValCount = 0;
16075 DifferentValueMap[
V] = ++ConsecutiveValCount;
16078 if (!
Value.getNode()) {
16080 dbgs() <<
"LowerBUILD_VECTOR: value undefined, creating undef node\n");
16088 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: only low element used, creating 1 "
16089 "SCALAR_TO_VECTOR node\n");
16093 if (AllLanesExtractElt) {
16094 SDNode *
Vector =
nullptr;
16099 for (
unsigned i = 0; i < NumElts; ++i) {
16101 const SDNode *
N =
V.getNode();
16126 if (Val == 2 * i) {
16130 if (Val - 1 == 2 * i) {
16157 if (usesOnlyOneValue) {
16160 Value.getValueType() != VT) {
16162 dbgs() <<
"LowerBUILD_VECTOR: use DUP for non-constant splats\n");
16170 if (
Value.getValueSizeInBits() == 64) {
16172 dbgs() <<
"LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
16184 assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
16185 EltTy == MVT::f64) &&
"Unsupported floating-point vector type");
16187 dbgs() <<
"LowerBUILD_VECTOR: float constant splats, creating int "
16188 "BITCASTS, and try again\n");
16190 for (
unsigned i = 0; i < NumElts; ++i)
16194 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: trying to lower new vector: ";
16196 Val = LowerBUILD_VECTOR(Val, DAG);
16206 bool PreferDUPAndInsert =
16208 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
16209 NumDifferentLanes >= NumConstantLanes;
16215 if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
16219 APInt ConstantValueAPInt(1, 0);
16221 ConstantValueAPInt =
C->getAPIntValue().zextOrTrunc(BitSize);
16223 !ConstantValueAPInt.isAllOnes()) {
16227 Val = DAG.
getNode(AArch64ISD::DUP,
DL, VT, ConstantValue);
16231 for (
unsigned i = 0; i < NumElts; ++i) {
16245 dbgs() <<
"LowerBUILD_VECTOR: all elements are constant, use default "
16257 if (NumElts >= 4) {
16265 if (PreferDUPAndInsert) {
16270 for (
unsigned I = 0;
I < NumElts; ++
I)
16281 if (DifferentValueMap.
size() == 2 && NumUndefLanes == 0) {
16293 bool canUseVECTOR_CONCAT =
true;
16294 for (
auto Pair : DifferentValueMap) {
16296 if (Pair.second != NumElts / 2)
16297 canUseVECTOR_CONCAT =
false;
16310 if (canUseVECTOR_CONCAT) {
16333 if (NumElts >= 8) {
16334 SmallVector<int, 16> MaskVec;
16336 SDValue FirstLaneVal =
Op.getOperand(0);
16337 for (
unsigned i = 0; i < NumElts; ++i) {
16339 if (FirstLaneVal == Val)
16363 dbgs() <<
"LowerBUILD_VECTOR: alternatives failed, creating sequence "
16364 "of INSERT_VECTOR_ELT\n");
16381 LLVM_DEBUG(
dbgs() <<
"Creating node for op0, it is not undefined:\n");
16387 dbgs() <<
"Creating nodes for the other vector elements:\n";
16389 for (; i < NumElts; ++i) {
16400 dbgs() <<
"LowerBUILD_VECTOR: use default expansion, failed to find "
16401 "better alternative\n");
16408 !Subtarget->isNeonAvailable()))
16409 return LowerFixedLengthConcatVectorsToSVE(
Op, DAG);
16411 assert(
Op.getValueType().isScalableVector() &&
16413 "Expected legal scalable vector type!");
16418 "Unexpected number of operands in CONCAT_VECTORS");
16420 if (NumOperands == 2)
16425 while (ConcatOps.size() > 1) {
16426 for (
unsigned I = 0,
E = ConcatOps.size();
I !=
E;
I += 2) {
16434 ConcatOps.resize(ConcatOps.size() / 2);
16436 return ConcatOps[0];
16447 !Subtarget->isNeonAvailable()))
16448 return LowerFixedLengthInsertVectorElt(
Op, DAG);
16450 EVT VT =
Op.getOperand(0).getValueType();
16464 ExtendedValue,
Op.getOperand(2));
16477AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(
SDValue Op,
16480 EVT VT =
Op.getOperand(0).getValueType();
16486 if (VT == MVT::nxv1i1) {
16490 WidenedPred,
Op.getOperand(1));
16497 MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
16499 Extend,
Op.getOperand(1));
16504 return LowerFixedLengthExtractVectorElt(
Op, DAG);
16512 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16513 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
16514 VT == MVT::v8f16 || VT == MVT::v8bf16)
16517 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
16518 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
16529 if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
16539 EVT VT =
Op.getValueType();
16541 "Only cases that extract a fixed length vector are supported!");
16542 EVT InVT =
Op.getOperand(0).getValueType();
16550 unsigned Idx =
Op.getConstantOperandVal(1);
16569 if (PackedVT != InVT) {
16593 assert(
Op.getValueType().isScalableVector() &&
16594 "Only expect to lower inserts into scalable vectors!");
16596 EVT InVT =
Op.getOperand(1).getValueType();
16597 unsigned Idx =
Op.getConstantOperandVal(2);
16602 EVT VT =
Op.getValueType();
16618 if (Idx < (NumElts / 2))
16644 Vec0 = getSVESafeBitCast(NarrowVT, Vec0, DAG);
16645 Vec1 = getSVESafeBitCast(NarrowVT, Vec1, DAG);
16649 Vec1 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, Vec1);
16658 HiVec0 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, HiVec0);
16659 Narrow = DAG.
getNode(AArch64ISD::UZP1,
DL, NarrowVT, Vec1, HiVec0);
16662 "Invalid subvector index!");
16664 LoVec0 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, LoVec0);
16665 Narrow = DAG.
getNode(AArch64ISD::UZP1,
DL, NarrowVT, LoVec0, Vec1);
16668 return getSVESafeBitCast(VT, Narrow, DAG);
16676 std::optional<unsigned> PredPattern =
16688 if (
Op.getOpcode() != AArch64ISD::DUP &&
16701 SplatVal =
Op->getConstantOperandVal(0);
16702 if (
Op.getValueType().getVectorElementType() != MVT::i64)
16703 SplatVal = (int32_t)SplatVal;
16711 SplatVal = -SplatVal;
16719 EVT VT =
Op.getValueType();
16723 return LowerFixedLengthVectorIntDivideToSVE(
Op, DAG);
16728 unsigned PredOpcode =
Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
16737 DAG.
getNode(AArch64ISD::ASRD_MERGE_OP1,
DL, VT, Pg,
Op->getOperand(0),
16745 if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
16746 return LowerToPredicatedOp(
Op, DAG, PredOpcode);
16751 if (VT == MVT::nxv16i8)
16752 WidenedVT = MVT::nxv8i16;
16753 else if (VT == MVT::nxv8i16)
16754 WidenedVT = MVT::nxv4i32;
16758 unsigned UnpkLo =
Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
16759 unsigned UnpkHi =
Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
16768 return DAG.
getNode(AArch64ISD::UZP1,
DL, VT, ResultLoCast, ResultHiCast);
16771bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(
16772 EVT VT,
unsigned DefinedValues)
const {
16773 if (!Subtarget->isNeonAvailable())
16792 unsigned DummyUnsigned;
16800 isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
16802 isTRNMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
16803 isUZPMask(M, NumElts, DummyUnsigned) ||
16804 isZIPMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
16808 isINSMask(M, NumElts, DummyBool, DummyInt) ||
16824 Op =
Op.getOperand(0);
16826 APInt SplatBits, SplatUndef;
16827 unsigned SplatBitSize;
16829 if (!BVN || !BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
16830 HasAnyUndefs, ElementBits) ||
16831 SplatBitSize > ElementBits)
16842 assert(VT.
isVector() &&
"vector shift count is not a vector type");
16846 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
16853 assert(VT.
isVector() &&
"vector shift count is not a vector type");
16857 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
16862 EVT VT =
Op.getValueType();
16867 EVT OpVT =
Op.getOperand(0).getValueType();
16878 !Subtarget->isNeonAvailable()))
16879 return LowerFixedLengthVectorTruncateToSVE(
Op, DAG);
16889 unsigned &ShiftValue,
16902 ShiftValue = ShiftOp1->getZExtValue();
16911 "ResVT must be truncated or same type as the shift.");
16914 if (ShiftValue > ExtraBits && !
Add->getFlags().hasNoUnsignedWrap())
16921 uint64_t AddValue = AddOp1->getZExtValue();
16922 if (AddValue != 1ULL << (ShiftValue - 1))
16925 RShOperand =
Add->getOperand(0);
16931 EVT VT =
Op.getValueType();
16935 if (!
Op.getOperand(1).getValueType().isVector())
16939 switch (
Op.getOpcode()) {
16943 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SHL_PRED);
16945 if (
isVShiftLImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize)
16946 return DAG.
getNode(AArch64ISD::VSHL,
DL, VT,
Op.getOperand(0),
16951 Op.getOperand(0),
Op.getOperand(1));
16955 (Subtarget->hasSVE2() ||
16956 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
16958 unsigned ShiftValue;
16960 return DAG.
getNode(AArch64ISD::URSHR_I_PRED,
DL, VT,
16967 unsigned Opc =
Op.getOpcode() ==
ISD::SRA ? AArch64ISD::SRA_PRED
16968 : AArch64ISD::SRL_PRED;
16969 return LowerToPredicatedOp(
Op, DAG,
Opc);
16973 if (
isVShiftRImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize) {
16975 (
Op.getOpcode() ==
ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
16984 unsigned Opc = (
Op.getOpcode() ==
ISD::SRA) ? Intrinsic::aarch64_neon_sshl
16985 : Intrinsic::aarch64_neon_ushl;
16993 return NegShiftLeft;
17001 if (
Op.getValueType().isScalableVector())
17002 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
17005 !Subtarget->isNeonAvailable()))
17006 return LowerFixedLengthVectorSetccToSVE(
Op, DAG);
17011 EVT CmpVT =
LHS.getValueType().changeVectorElementTypeToInteger();
17014 if (
LHS.getValueType().getVectorElementType().isInteger())
17017 assert(((!Subtarget->hasFullFP16() &&
17018 LHS.getValueType().getVectorElementType() != MVT::f16) ||
17019 LHS.getValueType().getVectorElementType() != MVT::bf16 ||
17020 LHS.getValueType().getVectorElementType() != MVT::f128) &&
17021 "Unexpected type!");
17026 bool OneNaN =
false;
17050 if (!
Cmp.getNode())
17079 unsigned ScalarOpcode;
17097 "Expected power-of-2 length vector");
17105 if (ElemVT == MVT::i1) {
17107 if (NumElems > 16) {
17110 EVT HalfVT =
Lo.getValueType();
17121 unsigned ExtendedWidth = 64;
17124 ExtendedWidth = 128;
17129 unsigned ExtendOp =
17138 NumElems == 2 && ExtendedWidth == 128) {
17139 Extended = DAG.
getBitcast(MVT::v4i32, Extended);
17140 ExtendedVT = MVT::i32;
17142 switch (ScalarOpcode) {
17163 VecVT =
Lo.getValueType();
17179 for (
unsigned Shift = NumElems / 2; Shift > 0; Shift /= 2) {
17184 Scalar = DAG.
getNode(ScalarOpcode,
DL, ScalarVT, Scalar, Shifted);
17196 EVT SrcVT = Src.getValueType();
17201 SrcVT == MVT::v2f16) {
17209 bool OverrideNEON = !Subtarget->isNeonAvailable() ||
17218 SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {
17221 return LowerPredReductionToSVE(
Op, DAG);
17223 switch (
Op.getOpcode()) {
17225 return LowerReductionToSVE(AArch64ISD::UADDV_PRED,
Op, DAG);
17227 return LowerReductionToSVE(AArch64ISD::ANDV_PRED,
Op, DAG);
17229 return LowerReductionToSVE(AArch64ISD::ORV_PRED,
Op, DAG);
17231 return LowerReductionToSVE(AArch64ISD::SMAXV_PRED,
Op, DAG);
17233 return LowerReductionToSVE(AArch64ISD::SMINV_PRED,
Op, DAG);
17235 return LowerReductionToSVE(AArch64ISD::UMAXV_PRED,
Op, DAG);
17237 return LowerReductionToSVE(AArch64ISD::UMINV_PRED,
Op, DAG);
17239 return LowerReductionToSVE(AArch64ISD::EORV_PRED,
Op, DAG);
17241 return LowerReductionToSVE(AArch64ISD::FADDV_PRED,
Op, DAG);
17243 return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED,
Op, DAG);
17245 return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED,
Op, DAG);
17247 return LowerReductionToSVE(AArch64ISD::FMAXV_PRED,
Op, DAG);
17249 return LowerReductionToSVE(AArch64ISD::FMINV_PRED,
Op, DAG);
17257 switch (
Op.getOpcode()) {
17262 Op.getValueType(),
DL, DAG);
17282 EVT SrcVT = Src.getValueType();
17285 SDVTList SrcVTs = DAG.
getVTList(SrcVT, SrcVT);
17297 for (
unsigned I = 0;
I < Stages; ++
I) {
17299 Src = DAG.
getNode(BaseOpc,
DL, SrcVT, Src.getValue(0), Src.getValue(1));
17307 auto &Subtarget = DAG.
getSubtarget<AArch64Subtarget>();
17309 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
17314 MVT VT =
Op.getSimpleValueType();
17315 assert(VT != MVT::i128 &&
"Handled elsewhere, code replicated.");
17320 Op.getOperand(0),
Op.getOperand(1),
RHS,
17325AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
SDValue Op,
17330 SDNode *
Node =
Op.getNode();
17335 EVT VT =
Node->getValueType(0);
17338 "no-stack-arg-probe")) {
17340 Chain =
SP.getValue(1);
17350 RTLIB::LibcallImpl ChkStkImpl =
getLibcallImpl(RTLIB::STACK_PROBE);
17351 if (ChkStkImpl == RTLIB::Unsupported)
17360 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
17361 const uint32_t *
Mask =
TRI->getWindowsStackProbePreservedMask();
17362 if (Subtarget->hasCustomCallingConv())
17370 Chain, Callee, DAG.
getRegister(AArch64::X15, MVT::i64),
17381 Chain =
SP.getValue(1);
17395AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(
SDValue Op,
17398 SDNode *
Node =
Op.getNode();
17405 EVT VT =
Node->getValueType(0);
17409 Chain =
SP.getValue(1);
17416 Chain = DAG.
getNode(AArch64ISD::PROBED_ALLOCA,
DL, MVT::Other, Chain, SP);
17422AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(
SDValue Op,
17426 if (Subtarget->isTargetWindows())
17427 return LowerWindowsDYNAMIC_STACKALLOC(
Op, DAG);
17429 return LowerInlineDYNAMIC_STACKALLOC(
Op, DAG);
17435 unsigned NewOp)
const {
17436 if (Subtarget->hasSVE2())
17437 return LowerToPredicatedOp(
Op, DAG, NewOp);
17445 EVT VT =
Op.getValueType();
17446 assert(VT != MVT::i64 &&
"Expected illegal VSCALE node");
17449 APInt MulImm =
Op.getConstantOperandAPInt(0);
17455template <
unsigned NumVecs>
17465 for (
unsigned I = 0;
I < NumVecs; ++
I)
17474 Info.align.reset();
17486 auto &
DL =
I.getDataLayout();
17488 case Intrinsic::aarch64_sve_st2:
17490 case Intrinsic::aarch64_sve_st3:
17492 case Intrinsic::aarch64_sve_st4:
17494 case Intrinsic::aarch64_neon_ld2:
17495 case Intrinsic::aarch64_neon_ld3:
17496 case Intrinsic::aarch64_neon_ld4:
17497 case Intrinsic::aarch64_neon_ld1x2:
17498 case Intrinsic::aarch64_neon_ld1x3:
17499 case Intrinsic::aarch64_neon_ld1x4: {
17501 uint64_t NumElts =
DL.getTypeSizeInBits(
I.getType()) / 64;
17503 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17505 Info.align.reset();
17510 case Intrinsic::aarch64_neon_ld2lane:
17511 case Intrinsic::aarch64_neon_ld3lane:
17512 case Intrinsic::aarch64_neon_ld4lane:
17513 case Intrinsic::aarch64_neon_ld2r:
17514 case Intrinsic::aarch64_neon_ld3r:
17515 case Intrinsic::aarch64_neon_ld4r: {
17518 Type *RetTy =
I.getType();
17520 unsigned NumElts = StructTy->getNumElements();
17521 Type *VecTy = StructTy->getElementType(0);
17524 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17526 Info.align.reset();
17531 case Intrinsic::aarch64_neon_st2:
17532 case Intrinsic::aarch64_neon_st3:
17533 case Intrinsic::aarch64_neon_st4:
17534 case Intrinsic::aarch64_neon_st1x2:
17535 case Intrinsic::aarch64_neon_st1x3:
17536 case Intrinsic::aarch64_neon_st1x4: {
17538 unsigned NumElts = 0;
17539 for (
const Value *Arg :
I.args()) {
17540 Type *ArgTy = Arg->getType();
17543 NumElts +=
DL.getTypeSizeInBits(ArgTy) / 64;
17546 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17548 Info.align.reset();
17553 case Intrinsic::aarch64_neon_st2lane:
17554 case Intrinsic::aarch64_neon_st3lane:
17555 case Intrinsic::aarch64_neon_st4lane: {
17557 unsigned NumElts = 0;
17559 Type *VecTy =
I.getArgOperand(0)->getType();
17562 for (
const Value *Arg :
I.args()) {
17563 Type *ArgTy = Arg->getType();
17570 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17572 Info.align.reset();
17577 case Intrinsic::aarch64_ldaxr:
17578 case Intrinsic::aarch64_ldxr: {
17579 Type *ValTy =
I.getParamElementType(0);
17582 Info.ptrVal =
I.getArgOperand(0);
17584 Info.align =
DL.getABITypeAlign(ValTy);
17588 case Intrinsic::aarch64_stlxr:
17589 case Intrinsic::aarch64_stxr: {
17590 Type *ValTy =
I.getParamElementType(1);
17593 Info.ptrVal =
I.getArgOperand(1);
17595 Info.align =
DL.getABITypeAlign(ValTy);
17599 case Intrinsic::aarch64_ldaxp:
17600 case Intrinsic::aarch64_ldxp:
17602 Info.memVT = MVT::i128;
17603 Info.ptrVal =
I.getArgOperand(0);
17605 Info.align =
Align(16);
17608 case Intrinsic::aarch64_stlxp:
17609 case Intrinsic::aarch64_stxp:
17611 Info.memVT = MVT::i128;
17612 Info.ptrVal =
I.getArgOperand(2);
17614 Info.align =
Align(16);
17617 case Intrinsic::aarch64_sve_ldnt1: {
17621 Info.ptrVal =
I.getArgOperand(1);
17623 Info.align =
DL.getABITypeAlign(ElTy);
17627 case Intrinsic::aarch64_sve_stnt1: {
17631 Info.memVT =
MVT::getVT(
I.getOperand(0)->getType());
17632 Info.ptrVal =
I.getArgOperand(2);
17634 Info.align =
DL.getABITypeAlign(ElTy);
17638 case Intrinsic::aarch64_mops_memset_tag: {
17639 Value *Dst =
I.getArgOperand(0);
17640 Value *Val =
I.getArgOperand(1);
17645 Info.align =
I.getParamAlign(0).valueOrOne();
17660 std::optional<unsigned> ByteOffset)
const {
17677 Base.getOperand(1).hasOneUse() &&
17684 uint64_t ShiftAmount =
Base.getOperand(1).getConstantOperandVal(1);
17686 if (ShiftAmount ==
Log2_32(LoadBytes))
17696 if ((VT == MVT::i64 || VT == MVT::i32) && Extend->
use_size()) {
17715 return NumBits1 > NumBits2;
17722 return NumBits1 > NumBits2;
17729 if (
I->getOpcode() != Instruction::FMul)
17732 if (!
I->hasOneUse())
17737 if (!(
User->getOpcode() == Instruction::FSub ||
17738 User->getOpcode() == Instruction::FAdd))
17749 I->getFastMathFlags().allowContract()));
17759 return NumBits1 == 32 && NumBits2 == 64;
17766 return NumBits1 == 32 && NumBits2 == 64;
17784bool AArch64TargetLowering::isExtFreeImpl(
const Instruction *Ext)
const {
17792 for (
const Use &U : Ext->
uses()) {
17800 switch (Instr->getOpcode()) {
17801 case Instruction::Shl:
17805 case Instruction::GetElementPtr: {
17808 std::advance(GTI, U.getOperandNo()-1);
17821 if (ShiftAmt == 0 || ShiftAmt > 4)
17825 case Instruction::Trunc:
17842 unsigned NumElts,
bool IsLittleEndian,
17844 if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth > 64)
17847 assert(DstWidth % SrcWidth == 0 &&
17848 "TBL lowering is not supported for a conversion instruction with this "
17849 "source and destination element type.");
17851 unsigned Factor = DstWidth / SrcWidth;
17852 unsigned MaskLen = NumElts * Factor;
17855 Mask.resize(MaskLen, NumElts);
17857 unsigned SrcIndex = 0;
17858 for (
unsigned I = IsLittleEndian ? 0 : Factor - 1;
I < MaskLen;
I += Factor)
17859 Mask[
I] = SrcIndex++;
17867 bool IsLittleEndian) {
17869 unsigned NumElts = SrcTy->getNumElements();
17877 auto *FirstEltZero = Builder.CreateInsertElement(
17879 Value *Result = Builder.CreateShuffleVector(
Op, FirstEltZero, Mask);
17880 Result = Builder.CreateBitCast(Result, DstTy);
17881 if (DstTy != ZExtTy)
17882 Result = Builder.CreateZExt(Result, ZExtTy);
17888 bool IsLittleEndian) {
17895 !IsLittleEndian, Mask))
17898 auto *FirstEltZero = Builder.CreateInsertElement(
17901 return Builder.CreateShuffleVector(
Op, FirstEltZero, Mask);
17910 assert(SrcTy->getElementType()->isIntegerTy() &&
17911 "Non-integer type source vector element is not supported");
17912 assert(DstTy->getElementType()->isIntegerTy(8) &&
17913 "Unsupported destination vector element type");
17914 unsigned SrcElemTySz =
17916 unsigned DstElemTySz =
17918 assert((SrcElemTySz % DstElemTySz == 0) &&
17919 "Cannot lower truncate to tbl instructions for a source element size "
17920 "that is not divisible by the destination element size");
17921 unsigned TruncFactor = SrcElemTySz / DstElemTySz;
17922 assert((SrcElemTySz == 16 || SrcElemTySz == 32 || SrcElemTySz == 64) &&
17923 "Unsupported source vector element type size");
17931 for (
int Itr = 0; Itr < 16; Itr++) {
17932 if (Itr < NumElements)
17934 IsLittleEndian ? Itr * TruncFactor
17935 : Itr * TruncFactor + (TruncFactor - 1)));
17937 MaskConst.
push_back(Builder.getInt8(255));
17940 int MaxTblSz = 128 * 4;
17941 int MaxSrcSz = SrcElemTySz * NumElements;
17943 (MaxTblSz > MaxSrcSz) ? NumElements : (MaxTblSz / SrcElemTySz);
17944 assert(ElemsPerTbl <= 16 &&
17945 "Maximum elements selected using TBL instruction cannot exceed 16!");
17947 int ShuffleCount = 128 / SrcElemTySz;
17949 for (
int i = 0; i < ShuffleCount; ++i)
17956 while (ShuffleLanes.
back() < NumElements) {
17958 Builder.CreateShuffleVector(TI->
getOperand(0), ShuffleLanes), VecTy));
17960 if (Parts.
size() == 4) {
17963 Builder.CreateIntrinsic(Intrinsic::aarch64_neon_tbl4, VecTy, Parts));
17967 for (
int i = 0; i < ShuffleCount; ++i)
17968 ShuffleLanes[i] += ShuffleCount;
17972 "Lowering trunc for vectors requiring different TBL instructions is "
17976 if (!Parts.
empty()) {
17978 switch (Parts.
size()) {
17980 TblID = Intrinsic::aarch64_neon_tbl1;
17983 TblID = Intrinsic::aarch64_neon_tbl2;
17986 TblID = Intrinsic::aarch64_neon_tbl3;
17991 Results.push_back(Builder.CreateIntrinsic(TblID, VecTy, Parts));
17996 assert(
Results.size() <= 2 &&
"Trunc lowering does not support generation of "
17997 "more than 2 tbl instructions!");
18000 if (ElemsPerTbl < 16) {
18002 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
18003 FinalResult = Builder.CreateShuffleVector(
Results[0], FinalMask);
18007 if (ElemsPerTbl < 16) {
18008 std::iota(FinalMask.
begin(), FinalMask.
begin() + ElemsPerTbl, 0);
18009 std::iota(FinalMask.
begin() + ElemsPerTbl, FinalMask.
end(), 16);
18011 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
18025 if (!
EnableExtToTBL || Subtarget->useSVEForFixedLengthVectors())
18033 if (!L || L->getHeader() !=
I->getParent() ||
F->hasOptSize())
18038 if (!SrcTy || !DstTy)
18045 if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
18046 auto DstWidth = DstTy->getElementType()->getScalarSizeInBits();
18047 if (DstWidth % 8 != 0)
18050 auto *TruncDstType =
18054 auto SrcWidth = SrcTy->getElementType()->getScalarSizeInBits();
18055 if (
TTI.getCastInstrCost(
I->getOpcode(), DstTy, TruncDstType,
18058 if (SrcWidth * 2 >= TruncDstType->getElementType()->getScalarSizeInBits())
18061 DstTy = TruncDstType;
18069 if (SrcWidth * 4 <= DstWidth) {
18070 if (
all_of(
I->users(), [&](
auto *U) {
18071 using namespace llvm::PatternMatch;
18072 auto *SingleUser = cast<Instruction>(&*U);
18073 if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value()))))
18075 if (match(SingleUser,
18076 m_Intrinsic<Intrinsic::vector_partial_reduce_add>(
18077 m_Value(), m_Specific(I))))
18084 if (DstTy->getScalarSizeInBits() >= 64)
18090 DstTy, Subtarget->isLittleEndian());
18093 ZExt->replaceAllUsesWith(Result);
18094 ZExt->eraseFromParent();
18099 if (UIToFP && ((SrcTy->getElementType()->isIntegerTy(8) &&
18100 DstTy->getElementType()->isFloatTy()) ||
18101 (SrcTy->getElementType()->isIntegerTy(16) &&
18102 DstTy->getElementType()->isDoubleTy()))) {
18107 assert(ZExt &&
"Cannot fail for the i8 to float conversion");
18108 auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
18109 I->replaceAllUsesWith(UI);
18110 I->eraseFromParent();
18115 if (SIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
18116 DstTy->getElementType()->isFloatTy()) {
18120 Subtarget->isLittleEndian());
18121 assert(Shuffle &&
"Cannot fail for the i8 to float conversion");
18123 auto *AShr = Builder.CreateAShr(Cast, 24,
"",
true);
18124 auto *
SI = Builder.CreateSIToFP(AShr, DstTy);
18125 I->replaceAllUsesWith(
SI);
18126 I->eraseFromParent();
18134 (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
18135 SrcTy->getElementType()->isFloatTy() &&
18136 DstTy->getElementType()->isIntegerTy(8)) {
18138 auto *WideConv = Builder.CreateFPToUI(FPToUI->getOperand(0),
18140 auto *TruncI = Builder.CreateTrunc(WideConv, DstTy);
18141 I->replaceAllUsesWith(TruncI);
18142 I->eraseFromParent();
18152 if (TI && DstTy->getElementType()->isIntegerTy(8) &&
18153 ((SrcTy->getElementType()->isIntegerTy(32) ||
18154 SrcTy->getElementType()->isIntegerTy(64)) &&
18155 (SrcTy->getNumElements() == 16 || SrcTy->getNumElements() == 8))) {
18164 Align &RequiredAlignment)
const {
18169 RequiredAlignment =
Align(1);
18171 return NumBits == 32 || NumBits == 64;
18178 unsigned VecSize = 128;
18182 VecSize = std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
18183 return std::max<unsigned>(1, (MinElts * ElSize + 127) / VecSize);
18188 if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
18198 unsigned MinElts = EC.getKnownMinValue();
18200 UseScalable =
false;
18203 (!Subtarget->useSVEForFixedLengthVectors() ||
18208 !Subtarget->isSVEorStreamingSVEAvailable())
18216 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
18219 if (EC.isScalable()) {
18220 UseScalable =
true;
18221 return isPowerOf2_32(MinElts) && (MinElts * ElSize) % 128 == 0;
18224 unsigned VecSize =
DL.getTypeSizeInBits(VecTy);
18225 if (Subtarget->useSVEForFixedLengthVectors()) {
18226 unsigned MinSVEVectorSize =
18227 std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
18228 if (VecSize % MinSVEVectorSize == 0 ||
18230 (!Subtarget->isNeonAvailable() || VecSize > 128))) {
18231 UseScalable =
true;
18238 return Subtarget->isNeonAvailable() && (VecSize == 64 || VecSize % 128 == 0);
18270 bool Scalable,
Type *LDVTy,
18272 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
18273 static const Intrinsic::ID SVELoads[3] = {Intrinsic::aarch64_sve_ld2_sret,
18274 Intrinsic::aarch64_sve_ld3_sret,
18275 Intrinsic::aarch64_sve_ld4_sret};
18276 static const Intrinsic::ID NEONLoads[3] = {Intrinsic::aarch64_neon_ld2,
18277 Intrinsic::aarch64_neon_ld3,
18278 Intrinsic::aarch64_neon_ld4};
18287 bool Scalable,
Type *STVTy,
18289 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
18290 static const Intrinsic::ID SVEStores[3] = {Intrinsic::aarch64_sve_st2,
18291 Intrinsic::aarch64_sve_st3,
18292 Intrinsic::aarch64_sve_st4};
18293 static const Intrinsic::ID NEONStores[3] = {Intrinsic::aarch64_neon_st2,
18294 Intrinsic::aarch64_neon_st3,
18295 Intrinsic::aarch64_neon_st4};
18318 "Invalid interleave factor");
18319 assert(!Shuffles.
empty() &&
"Empty shufflevector input");
18321 "Unmatched number of shufflevectors and indices");
18326 assert(!Mask && GapMask.
popcount() == Factor &&
"Unexpected mask on a load");
18345 SI->getType()->getScalarSizeInBits() * 4 ==
18346 SI->user_back()->getType()->getScalarSizeInBits();
18356 Type *EltTy = FVTy->getElementType();
18364 FVTy->getNumElements() / NumLoads);
18372 Value *BaseAddr = LI->getPointerOperand();
18374 Type *PtrTy = LI->getPointerOperandType();
18376 LDVTy->getElementCount());
18379 UseScalable, LDVTy, PtrTy);
18386 Value *PTrue =
nullptr;
18388 std::optional<unsigned> PgPattern =
18390 if (Subtarget->getMinSVEVectorSizeInBits() ==
18391 Subtarget->getMaxSVEVectorSizeInBits() &&
18392 Subtarget->getMinSVEVectorSizeInBits() ==
DL.getTypeSizeInBits(FVTy))
18393 PgPattern = AArch64SVEPredPattern::all;
18397 PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
18401 for (
unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
18406 BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
18407 FVTy->getNumElements() * Factor);
18411 LdN = Builder.CreateCall(LdNFunc, {PTrue, BaseAddr},
"ldN");
18413 LdN = Builder.CreateCall(LdNFunc, BaseAddr,
"ldN");
18416 for (
unsigned i = 0; i < Shuffles.
size(); i++) {
18418 unsigned Index = Indices[i];
18420 Value *SubVec = Builder.CreateExtractValue(LdN, Index);
18423 SubVec = Builder.CreateExtractVector(FVTy, SubVec,
uint64_t(0));
18427 SubVec = Builder.CreateIntToPtr(
18429 FVTy->getNumElements()));
18431 SubVecs[SVI].push_back(SubVec);
18440 auto &SubVec = SubVecs[SVI];
18443 SVI->replaceAllUsesWith(WideVec);
18449template <
typename Iter>
18451 int MaxLookupDist = 20;
18452 unsigned IdxWidth =
DL.getIndexSizeInBits(0);
18453 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
18454 const Value *PtrA1 =
18457 while (++It != End) {
18458 if (It->isDebugOrPseudoInst())
18460 if (MaxLookupDist-- == 0)
18463 const Value *PtrB1 =
18464 SI->getPointerOperand()->stripAndAccumulateInBoundsConstantOffsets(
18466 if (PtrA1 == PtrB1 &&
18467 (OffsetA.sextOrTrunc(IdxWidth) - OffsetB.
sextOrTrunc(IdxWidth))
18506 const APInt &GapMask)
const {
18509 "Invalid interleave factor");
18514 "Unexpected mask on store");
18517 assert(VecTy->getNumElements() % Factor == 0 &&
"Invalid interleaved store");
18519 unsigned LaneLen = VecTy->getNumElements() / Factor;
18520 Type *EltTy = VecTy->getElementType();
18541 Type *IntTy =
DL.getIntPtrType(EltTy);
18542 unsigned NumOpElts =
18547 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
18548 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
18555 LaneLen /= NumStores;
18562 Value *BaseAddr =
SI->getPointerOperand();
18576 if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 &&
18584 Type *PtrTy =
SI->getPointerOperandType();
18586 STVTy->getElementCount());
18589 UseScalable, STVTy, PtrTy);
18591 Value *PTrue =
nullptr;
18593 std::optional<unsigned> PgPattern =
18595 if (Subtarget->getMinSVEVectorSizeInBits() ==
18596 Subtarget->getMaxSVEVectorSizeInBits() &&
18597 Subtarget->getMinSVEVectorSizeInBits() ==
18598 DL.getTypeSizeInBits(SubVecTy))
18599 PgPattern = AArch64SVEPredPattern::all;
18603 PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
18607 for (
unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
18612 for (
unsigned i = 0; i < Factor; i++) {
18614 unsigned IdxI = StoreCount * LaneLen * Factor + i;
18615 if (Mask[IdxI] >= 0) {
18616 Shuffle = Builder.CreateShuffleVector(
18619 unsigned StartMask = 0;
18620 for (
unsigned j = 1; j < LaneLen; j++) {
18621 unsigned IdxJ = StoreCount * LaneLen * Factor + j * Factor + i;
18622 if (Mask[IdxJ] >= 0) {
18623 StartMask = Mask[IdxJ] - j;
18632 Shuffle = Builder.CreateShuffleVector(
18640 Ops.push_back(Shuffle);
18644 Ops.push_back(PTrue);
18648 if (StoreCount > 0)
18649 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
18650 BaseAddr, LaneLen * Factor);
18652 Ops.push_back(BaseAddr);
18653 Builder.CreateCall(StNFunc,
Ops);
18661 if (Factor != 2 && Factor != 3 && Factor != 4) {
18662 LLVM_DEBUG(
dbgs() <<
"Matching ld2, ld3 and ld4 patterns failed\n");
18668 assert(!Mask &&
"Unexpected mask on a load\n");
18672 const DataLayout &
DL = LI->getModule()->getDataLayout();
18687 Type *PtrTy = LI->getPointerOperandType();
18689 UseScalable, LdTy, PtrTy);
18692 Value *Pred =
nullptr;
18695 Builder.CreateVectorSplat(LdTy->
getElementCount(), Builder.getTrue());
18697 Value *BaseAddr = LI->getPointerOperand();
18698 Value *Result =
nullptr;
18699 if (NumLoads > 1) {
18702 for (
unsigned I = 0;
I < NumLoads; ++
I) {
18706 Value *LdN =
nullptr;
18708 LdN = Builder.CreateCall(LdNFunc, {Pred,
Address},
"ldN");
18710 LdN = Builder.CreateCall(LdNFunc,
Address,
"ldN");
18713 for (
unsigned J = 0; J < Factor; ++J) {
18714 ExtractedLdValues[J] = Builder.CreateInsertVector(
18715 VTy, ExtractedLdValues[J], Builder.CreateExtractValue(LdN, J), Idx);
18722 for (
unsigned J = 0; J < Factor; ++J)
18723 Result = Builder.CreateInsertValue(Result, ExtractedLdValues[J], J);
18726 Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr},
"ldN");
18728 Result = Builder.CreateCall(LdNFunc, BaseAddr,
"ldN");
18739 unsigned Factor = InterleavedValues.
size();
18740 if (Factor != 2 && Factor != 3 && Factor != 4) {
18741 LLVM_DEBUG(
dbgs() <<
"Matching st2, st3 and st4 patterns failed\n");
18747 assert(!Mask &&
"Unexpected mask on plain store");
18767 Type *PtrTy =
SI->getPointerOperandType();
18769 UseScalable, StTy, PtrTy);
18773 Value *BaseAddr =
SI->getPointerOperand();
18774 Value *Pred =
nullptr;
18778 Builder.CreateVectorSplat(StTy->
getElementCount(), Builder.getTrue());
18780 auto ExtractedValues = InterleavedValues;
18785 for (
unsigned I = 0;
I < NumStores; ++
I) {
18787 if (NumStores > 1) {
18792 for (
unsigned J = 0; J < Factor; J++) {
18794 Builder.CreateExtractVector(StTy, ExtractedValues[J], Idx);
18797 StoreOperands[StoreOperands.
size() - 1] =
Address;
18799 Builder.CreateCall(StNFunc, StoreOperands);
18806 const AttributeList &FuncAttributes)
const {
18807 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
18808 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
18809 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
18814 bool IsSmallZeroMemset =
Op.isMemset() &&
Op.size() < 32 &&
Op.isZeroMemset();
18815 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
18816 if (
Op.isAligned(AlignCheck))
18826 if (CanUseNEON &&
Op.isMemset() && !IsSmallZeroMemset &&
18827 AlignmentIsAcceptable(MVT::v16i8,
Align(1)))
18829 if (CanUseFP && !IsSmallZeroMemset &&
18830 AlignmentIsAcceptable(MVT::f128,
Align(16)))
18832 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
18834 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
18840 LLVMContext &Context, std::vector<EVT> &MemOps,
unsigned Limit,
18841 const MemOp &
Op,
unsigned DstAS,
unsigned SrcAS,
18842 const AttributeList &FuncAttributes,
EVT *LargestVT)
const {
18846 if (VT == MVT::v16i8 &&
Op.isMemset() && !
Op.isZeroMemset() &&
18848 unsigned Size =
Op.size();
18849 unsigned RemainingSize =
Size;
18855 while (RemainingSize > 0) {
18859 if (RemainingSize >= 8) {
18860 TargetVT = MVT::i64;
18861 RemainingSize -= 8;
18862 }
else if (RemainingSize >= 4) {
18863 TargetVT = MVT::i32;
18864 RemainingSize -= 4;
18865 }
else if (RemainingSize >= 2) {
18866 TargetVT = MVT::i16;
18867 RemainingSize -= 2;
18868 }
else if (RemainingSize >= 1) {
18869 TargetVT = MVT::i8;
18870 RemainingSize -= 1;
18876 MemOps.push_back(TargetVT);
18882 if (RemainingSize == 0 && !MemOps.empty()) {
18893 Context, MemOps, Limit,
Op, DstAS, SrcAS, FuncAttributes, LargestVT);
18897 const MemOp &
Op,
const AttributeList &FuncAttributes)
const {
18898 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
18899 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
18900 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
18905 bool IsSmallZeroMemset =
Op.isMemset() &&
Op.size() < 32 &&
Op.isZeroMemset();
18906 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
18907 if (
Op.isAligned(AlignCheck))
18917 if (CanUseNEON &&
Op.isMemset() && !IsSmallZeroMemset &&
18918 AlignmentIsAcceptable(MVT::v16i8,
Align(1)))
18920 if (CanUseFP && !IsSmallZeroMemset &&
18921 AlignmentIsAcceptable(MVT::f128,
Align(16)))
18923 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
18925 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
18932 if (Immed == std::numeric_limits<int64_t>::min()) {
18941 if (!Subtarget->hasSVE2())
18960 return std::abs(Imm / 8) <= 16;
18963 return std::abs(Imm / 4) <= 16;
18966 return std::abs(Imm / 2) <= 16;
18993 if (Insn.
size() > 1)
19030 if (AM.
Scale == 1) {
19033 }
else if (AM.
Scale == 2) {
19045 if (Ty->isScalableTy()) {
19051 uint64_t VecNumBytes =
DL.getTypeSizeInBits(Ty).getKnownMinValue() / 8;
19073 if (Ty->isSized()) {
19074 uint64_t NumBits =
DL.getTypeSizeInBits(Ty);
19075 NumBytes = NumBits / 8;
19080 return Subtarget->getInstrInfo()->isLegalAddressingMode(NumBytes, AM.
BaseOffs,
19088 int64_t MaxOffset)
const {
19089 int64_t HighPart = MinOffset & ~0xfffULL;
19112 return Subtarget->hasFullFP16();
19118 Subtarget->isNonStreamingSVEorSME2Available();
19128 switch (Ty->getScalarType()->getTypeID()) {
19148 static const MCPhysReg ScratchRegs[] = {
19149 AArch64::X16, AArch64::X17, AArch64::LR, 0
19151 return ScratchRegs;
19155 static const MCPhysReg RCRegs[] = {AArch64::FPCR};
19164 "Expected shift op");
19166 SDValue ShiftLHS =
N->getOperand(0);
19167 EVT VT =
N->getValueType(0);
19188 return SRLC->getZExtValue() == SHLC->getZExtValue();
19200 (
N->getOperand(0).getOpcode() ==
ISD::SHL ||
19201 N->getOperand(0).getOpcode() ==
ISD::SRL) &&
19202 "Expected XOR(SHIFT) pattern");
19207 if (XorC && ShiftC) {
19208 unsigned MaskIdx, MaskLen;
19209 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
19210 unsigned ShiftAmt = ShiftC->getZExtValue();
19211 unsigned BitWidth =
N->getValueType(0).getScalarSizeInBits();
19212 if (
N->getOperand(0).getOpcode() ==
ISD::SHL)
19213 return MaskIdx == ShiftAmt && MaskLen == (
BitWidth - ShiftAmt);
19214 return MaskIdx == 0 && MaskLen == (
BitWidth - ShiftAmt);
19224 N->getOperand(0).getOpcode() ==
ISD::SRL) ||
19226 N->getOperand(0).getOpcode() ==
ISD::SHL)) &&
19227 "Expected shift-shift mask");
19229 if (!
N->getOperand(0)->hasOneUse())
19233 EVT VT =
N->getValueType(0);
19234 if (
N->getOpcode() ==
ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
19237 return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
19242 if (
N->getOpcode() ==
ISD::SHL &&
N->hasOneUse()) {
19244 unsigned ShlAmt = C2->getZExtValue();
19245 if (
auto ShouldADD = *
N->user_begin();
19246 ShouldADD->getOpcode() ==
ISD::ADD && ShouldADD->hasOneUse()) {
19248 EVT MemVT = Load->getMemoryVT();
19250 if (Load->getValueType(0).isScalableVector())
19264 unsigned BinOpcode,
EVT VT,
unsigned SelectOpcode,
SDValue X,
19272 assert(Ty->isIntegerTy());
19274 unsigned BitSize = Ty->getPrimitiveSizeInBits();
19278 int64_t Val = Imm.getSExtValue();
19285 Val &= (1LL << 32) - 1;
19293 unsigned Index)
const {
19315 EVT VT =
N->getValueType(0);
19316 if (!Subtarget->hasNEON() || !VT.
isVector())
19330 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.
getSizeInBits() - 1)
19365 if (
N->getValueType(0) != MVT::i32)
19368 SDValue VecReduceOp0 =
N->getOperand(0);
19369 bool SawTrailingZext =
false;
19375 SawTrailingZext =
true;
19380 MVT AbsInputVT = SawTrailingZext ? MVT::v16i16 : MVT::v16i32;
19382 unsigned Opcode = VecReduceOp0.
getOpcode();
19388 if (ABS->getOperand(0)->getOpcode() !=
ISD::SUB ||
19389 ABS->getOperand(0)->getValueType(0) != AbsInputVT)
19392 SDValue SUB = ABS->getOperand(0);
19393 unsigned Opcode0 = SUB->getOperand(0).getOpcode();
19394 unsigned Opcode1 = SUB->getOperand(1).getOpcode();
19396 if (SUB->getOperand(0)->getValueType(0) != AbsInputVT ||
19397 SUB->getOperand(1)->getValueType(0) != AbsInputVT)
19401 bool IsZExt =
false;
19409 SDValue EXT0 = SUB->getOperand(0);
19410 SDValue EXT1 = SUB->getOperand(1);
19427 UABDHigh8Op0, UABDHigh8Op1);
19438 UABDLo8Op0, UABDLo8Op1);
19459 if (!
N->getValueType(0).isScalableVector() ||
19460 !ST->isSVEorStreamingSVEAvailable() ||
19461 !(ST->hasSVE2p1() || ST->hasSME2()))
19466 return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR;
19469 auto MaskEC =
N->getValueType(0).getVectorElementCount();
19470 if (!MaskEC.isKnownMultipleOf(NumExts))
19484 if (
Use->getValueType(0).getVectorElementCount() != ExtMinEC)
19488 unsigned Offset =
Use->getConstantOperandVal(1);
19490 if (Extracts[Part] !=
nullptr)
19493 Extracts[Part] =
Use;
19509 EVT ExtVT = Extracts[0]->getValueType(0);
19513 DCI.
CombineTo(Extracts[0], R.getValue(0));
19514 DCI.
CombineTo(Extracts[1], R.getValue(1));
19518 if (NumExts == 2) {
19519 assert(
N->getValueType(0) == DoubleExtVT);
19525 for (
unsigned I = 2;
I < NumExts;
I += 2) {
19530 DCI.
CombineTo(Extracts[
I + 1], R.getValue(1));
19532 R.getValue(0), R.getValue(1)));
19564 if (!ST->isNeonAvailable())
19567 if (!ST->hasDotProd())
19585 unsigned DotOpcode;
19589 if (
A.getOperand(0).getValueType() !=
B.getOperand(0).getValueType())
19591 auto OpCodeA =
A.getOpcode();
19595 auto OpCodeB =
B.getOpcode();
19599 if (OpCodeA == OpCodeB) {
19604 if (!ST->hasMatMulInt8())
19606 DotOpcode = AArch64ISD::USDOT;
19611 DotOpcode = AArch64ISD::UDOT;
19613 DotOpcode = AArch64ISD::SDOT;
19618 EVT Op0VT =
A.getOperand(0).getValueType();
19621 if (!IsValidElementCount || !IsValidSize)
19630 B =
B.getOperand(0);
19633 unsigned NumOfVecReduce;
19635 if (IsMultipleOf16) {
19637 TargetType = MVT::v4i32;
19640 TargetType = MVT::v2i32;
19643 if (NumOfVecReduce == 1) {
19646 A.getOperand(0),
B);
19653 for (;
I < VecReduce16Num;
I += 1) {
19672 if (VecReduce8Num == 0)
19673 return VecReduceAdd16;
19695 auto DetectAddExtract = [&](
SDValue A) {
19699 EVT VT =
A.getValueType();
19724 : AArch64ISD::SADDLP;
19728 if (
SDValue R = DetectAddExtract(
A))
19731 if (
A.getOperand(0).getOpcode() ==
ISD::ADD &&
A.getOperand(0).hasOneUse())
19735 if (
A.getOperand(1).getOpcode() ==
ISD::ADD &&
A.getOperand(1).hasOneUse())
19748 EVT VT =
A.getValueType();
19749 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
19760 if (ExtVT0 != ExtVT1 ||
19775 return DAG.
getNode(AArch64ISD::NVCAST,
SDLoc(
A), MVT::v8i16, Uaddlv);
19792 MVT OpVT =
A.getSimpleValueType();
19793 assert(
N->getSimpleValueType(0) == OpVT &&
19794 "The operand type should be consistent with the result type of UADDV");
19798 if (KnownLeadingLanes.
isZero())
19808 APInt DemandedElts =
19827AArch64TargetLowering::BuildSDIVPow2(
SDNode *
N,
const APInt &Divisor,
19834 EVT VT =
N->getValueType(0);
19839 if (VT.
isVector() && Subtarget->isSVEorStreamingSVEAvailable())
19843 if ((VT != MVT::i32 && VT != MVT::i64) ||
19849 if (Divisor == 2 ||
19850 Divisor == APInt(Divisor.
getBitWidth(), -2,
true))
19857AArch64TargetLowering::BuildSREMPow2(
SDNode *
N,
const APInt &Divisor,
19864 EVT VT =
N->getValueType(0);
19872 if ((VT != MVT::i32 && VT != MVT::i64) ||
19888 CSNeg = DAG.
getNode(AArch64ISD::CSNEG,
DL, VT,
And,
And, CCVal, Cmp);
19899 CSNeg = DAG.
getNode(AArch64ISD::CSNEG,
DL, VT, AndPos, AndNeg, CCVal,
19914 case Intrinsic::aarch64_sve_cntb:
19915 case Intrinsic::aarch64_sve_cnth:
19916 case Intrinsic::aarch64_sve_cntw:
19917 case Intrinsic::aarch64_sve_cntd:
19927 if (IID == Intrinsic::aarch64_sve_cntp)
19928 return Op.getOperand(1).getValueType().getVectorElementCount();
19930 case Intrinsic::aarch64_sve_cntd:
19932 case Intrinsic::aarch64_sve_cntw:
19934 case Intrinsic::aarch64_sve_cnth:
19936 case Intrinsic::aarch64_sve_cntb:
19939 return std::nullopt;
19966 return TypeNode->
getVT();
19976 if (Mask == UCHAR_MAX)
19978 else if (Mask == USHRT_MAX)
19980 else if (Mask == UINT_MAX)
20002 unsigned ExtendOpcode = Extend.
getOpcode();
20018 if (PreExtendType == MVT::Other ||
20023 bool SeenZExtOrSExt = !IsAnyExt;
20031 unsigned Opc =
Op.getOpcode();
20042 if (SeenZExtOrSExt && OpcIsSExt != IsSExt)
20045 IsSExt = OpcIsSExt;
20046 SeenZExtOrSExt =
true;
20054 EVT PreExtendLegalType =
20060 PreExtendLegalType));
20071 unsigned ExtOpc = !SeenZExtOrSExt
20074 return DAG.
getNode(ExtOpc,
DL, VT, NBV);
20081 EVT VT =
Mul->getValueType(0);
20082 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
20093 return DAG.
getNode(
Mul->getOpcode(),
DL, VT, Op0 ? Op0 :
Mul->getOperand(0),
20094 Op1 ? Op1 :
Mul->getOperand(1));
20109 EVT VT =
Mul->getValueType(0);
20111 int ConstMultiplier =
20117 unsigned AbsConstValue =
abs(ConstMultiplier);
20118 unsigned OperandShift =
20127 unsigned B = ConstMultiplier < 0 ? 32 : 31;
20128 unsigned CeilAxOverB = (AbsConstValue + (
B - 1)) /
B;
20132 if (LowerBound > UpperBound)
20137 int Shift = std::min(std::max( 0, LowerBound), UpperBound);
20140 int32_t RdsvlMul = (AbsConstValue >> (OperandShift + Shift)) *
20141 (ConstMultiplier < 0 ? -1 : 1);
20142 auto Rdsvl = DAG.
getNode(AArch64ISD::RDSVL,
DL, MVT::i64,
20155 EVT VT =
N->getValueType(0);
20156 if (VT != MVT::v2i64 && VT != MVT::v1i64 && VT != MVT::v2i32 &&
20157 VT != MVT::v4i32 && VT != MVT::v4i16 && VT != MVT::v8i16)
20159 if (
N->getOperand(0).getOpcode() !=
ISD::AND ||
20160 N->getOperand(0).getOperand(0).getOpcode() !=
ISD::SRL)
20173 if (!V1.
isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
20174 V3 != (HalfSize - 1))
20185 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, CM);
20193 EVT VT =
N->getValueType(0);
20199 N->getOperand(0).getOperand(0).getValueType() !=
20200 N->getOperand(1).getOperand(0).getValueType())
20204 N->getOperand(0).getOpcode() !=
N->getOperand(1).getOpcode())
20207 SDValue N0 =
N->getOperand(0).getOperand(0);
20208 SDValue N1 =
N->getOperand(1).getOperand(0);
20213 if ((S2 == MVT::i32 &&
S1 == MVT::i8) ||
20214 (S2 == MVT::i64 && (
S1 == MVT::i8 ||
S1 == MVT::i16))) {
20247 EVT VT =
N->getValueType(0);
20251 unsigned AddSubOpc;
20253 auto IsAddSubWith1 = [&](
SDValue V) ->
bool {
20254 AddSubOpc = V->getOpcode();
20266 if (IsAddSubWith1(N0)) {
20268 return DAG.
getNode(AddSubOpc,
DL, VT, N1, MulVal);
20271 if (IsAddSubWith1(N1)) {
20273 return DAG.
getNode(AddSubOpc,
DL, VT, N0, MulVal);
20284 const APInt &ConstValue =
C->getAPIntValue();
20291 if (ConstValue.
sge(1) && ConstValue.
sle(16))
20306 unsigned TrailingZeroes = ConstValue.
countr_zero();
20307 if (TrailingZeroes) {
20315 if (
N->hasOneUse() && (
N->user_begin()->getOpcode() ==
ISD::ADD ||
20316 N->user_begin()->getOpcode() ==
ISD::SUB))
20321 APInt ShiftedConstValue = ConstValue.
ashr(TrailingZeroes);
20324 auto Shl = [&](
SDValue N0,
unsigned N1) {
20355 for (
unsigned i = 1; i <
BitWidth / 2; i++) {
20375 unsigned TrailingZeroes = CVMinus1.
countr_zero();
20376 APInt SCVMinus1 = CVMinus1.
ashr(TrailingZeroes) - 1;
20392 unsigned TrailingZeroes = CVMinus1.
countr_zero();
20393 APInt CVPlus1 = CVMinus1.
ashr(TrailingZeroes) + 1;
20413 APInt SCVMinus1 = ShiftedConstValue - 1;
20414 APInt SCVPlus1 = ShiftedConstValue + 1;
20415 APInt CVPlus1 = ConstValue + 1;
20419 return Shl(
Add(Shl(N0, ShiftAmt), N0), TrailingZeroes);
20422 return Sub(Shl(N0, ShiftAmt), N0);
20424 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
20425 return Sub(Shl(N0, ShiftAmt), Shl(N0, TrailingZeroes));
20427 if (Subtarget->hasALULSLFast() &&
20428 isPowPlusPlusConst(ConstValue, CVM, CVN)) {
20429 APInt CVMMinus1 = CVM - 1;
20430 APInt CVNMinus1 = CVN - 1;
20431 unsigned ShiftM1 = CVMMinus1.
logBase2();
20432 unsigned ShiftN1 = CVNMinus1.
logBase2();
20434 if (ShiftM1 <= 4 && ShiftN1 <= 4) {
20436 return Add(Shl(MVal, ShiftN1), MVal);
20439 if (Subtarget->hasALULSLFast() &&
20440 isPowPlusPlusOneConst(ConstValue, CVM, CVN)) {
20444 if (ShiftM <= 4 && ShiftN <= 4) {
20450 if (Subtarget->hasALULSLFast() &&
20451 isPowMinusMinusOneConst(ConstValue, CVM, CVN)) {
20455 if (ShiftM <= 4 && ShiftN <= 4) {
20464 APInt SCVPlus1 = -ShiftedConstValue + 1;
20465 APInt CVNegPlus1 = -ConstValue + 1;
20466 APInt CVNegMinus1 = -ConstValue - 1;
20469 return Sub(N0, Shl(N0, ShiftAmt));
20471 ShiftAmt = CVNegMinus1.
logBase2();
20472 return Negate(
Add(Shl(N0, ShiftAmt), N0));
20474 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
20475 return Sub(Shl(N0, TrailingZeroes), Shl(N0, ShiftAmt));
20495 EVT VT =
N->getValueType(0);
20497 N->getOperand(0)->getOperand(0)->getOpcode() !=
ISD::SETCC ||
20498 VT.
getSizeInBits() !=
N->getOperand(0)->getValueType(0).getSizeInBits())
20508 if (!BV->isConstant())
20513 EVT IntVT = BV->getValueType(0);
20520 N->getOperand(0)->getOperand(0), MaskConst);
20534 if (
N->isStrictFPOpcode())
20545 return !VT.
isVector() && VT != MVT::bf16 && VT != MVT::f128;
20548 SDValue SrcVal =
N->getOperand(0);
20550 EVT DestTy =
N->getValueType(0);
20557 if (DestTy.
bitsGT(SrcTy)) {
20566 if (SrcVecTy == MVT::nxv2i32 || DestVecTy == MVT::nxv2i32)
20572 DAG.
getPOISON(SrcVecTy), SrcVal, ZeroIdx);
20589 EVT VT =
N->getValueType(0);
20590 if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64)
20592 if (VT == MVT::f16 && !Subtarget->hasFullFP16())
20596 if (VT.
getSizeInBits() !=
N->getOperand(0).getValueSizeInBits())