74#include "llvm/IR/IntrinsicsAArch64.h"
109#define DEBUG_TYPE "aarch64-lower"
112STATISTIC(NumOptimizedImms,
"Number of times immediates were optimized");
119 cl::desc(
"Allow AArch64 Local Dynamic TLS code generation"),
124 cl::desc(
"Enable AArch64 logical imm instruction "
134 cl::desc(
"Combine extends of AArch64 masked "
135 "gather intrinsics"),
139 cl::desc(
"Combine ext and trunc to TBL"),
154 cl::desc(
"Enable / disable SVE scalable vectors in Global ISel"),
161 cl::desc(
"Generate ISD::PTRADD nodes for pointer arithmetic in "
162 "SelectionDAG for FEAT_CPA"),
172 AArch64::X3, AArch64::X4, AArch64::X5,
173 AArch64::X6, AArch64::X7};
175 AArch64::Q3, AArch64::Q4, AArch64::Q5,
176 AArch64::Q6, AArch64::Q7};
201 return MVT::nxv8bf16;
208 switch (EC.getKnownMinValue()) {
224 "Expected scalable predicate vector type!");
246 "Expected legal vector type!");
253 "Expected legal type!");
254 return VT == MVT::nxv16i1;
267 "Unexpected fixed-size unpacked type.");
277 case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
278 case AArch64ISD::BSWAP_MERGE_PASSTHRU:
279 case AArch64ISD::REVH_MERGE_PASSTHRU:
280 case AArch64ISD::REVW_MERGE_PASSTHRU:
281 case AArch64ISD::REVD_MERGE_PASSTHRU:
282 case AArch64ISD::CTLZ_MERGE_PASSTHRU:
283 case AArch64ISD::CTPOP_MERGE_PASSTHRU:
284 case AArch64ISD::DUP_MERGE_PASSTHRU:
285 case AArch64ISD::ABS_MERGE_PASSTHRU:
286 case AArch64ISD::NEG_MERGE_PASSTHRU:
287 case AArch64ISD::FNEG_MERGE_PASSTHRU:
288 case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
289 case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
290 case AArch64ISD::FCEIL_MERGE_PASSTHRU:
291 case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
292 case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
293 case AArch64ISD::FRINT_MERGE_PASSTHRU:
294 case AArch64ISD::FRINT32_MERGE_PASSTHRU:
295 case AArch64ISD::FRINT64_MERGE_PASSTHRU:
296 case AArch64ISD::FROUND_MERGE_PASSTHRU:
297 case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
298 case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
299 case AArch64ISD::FTRUNC32_MERGE_PASSTHRU:
300 case AArch64ISD::FTRUNC64_MERGE_PASSTHRU:
301 case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
302 case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
303 case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
304 case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
305 case AArch64ISD::FCVTX_MERGE_PASSTHRU:
306 case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
307 case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
308 case AArch64ISD::FSQRT_MERGE_PASSTHRU:
309 case AArch64ISD::FRECPX_MERGE_PASSTHRU:
310 case AArch64ISD::FABS_MERGE_PASSTHRU:
317 switch (
Op.getOpcode()) {
323 case AArch64ISD::PTRUE:
324 case AArch64ISD::SETCC_MERGE_ZERO:
327 switch (
Op.getConstantOperandVal(0)) {
330 case Intrinsic::aarch64_sve_ptrue:
331 case Intrinsic::aarch64_sve_pnext:
332 case Intrinsic::aarch64_sve_cmpeq:
333 case Intrinsic::aarch64_sve_cmpne:
334 case Intrinsic::aarch64_sve_cmpge:
335 case Intrinsic::aarch64_sve_cmpgt:
336 case Intrinsic::aarch64_sve_cmphs:
337 case Intrinsic::aarch64_sve_cmphi:
338 case Intrinsic::aarch64_sve_cmpeq_wide:
339 case Intrinsic::aarch64_sve_cmpne_wide:
340 case Intrinsic::aarch64_sve_cmpge_wide:
341 case Intrinsic::aarch64_sve_cmpgt_wide:
342 case Intrinsic::aarch64_sve_cmplt_wide:
343 case Intrinsic::aarch64_sve_cmple_wide:
344 case Intrinsic::aarch64_sve_cmphs_wide:
345 case Intrinsic::aarch64_sve_cmphi_wide:
346 case Intrinsic::aarch64_sve_cmplo_wide:
347 case Intrinsic::aarch64_sve_cmpls_wide:
348 case Intrinsic::aarch64_sve_fcmpeq:
349 case Intrinsic::aarch64_sve_fcmpne:
350 case Intrinsic::aarch64_sve_fcmpge:
351 case Intrinsic::aarch64_sve_fcmpgt:
352 case Intrinsic::aarch64_sve_fcmpuo:
353 case Intrinsic::aarch64_sve_facgt:
354 case Intrinsic::aarch64_sve_facge:
355 case Intrinsic::aarch64_sve_whilege:
356 case Intrinsic::aarch64_sve_whilegt:
357 case Intrinsic::aarch64_sve_whilehi:
358 case Intrinsic::aarch64_sve_whilehs:
359 case Intrinsic::aarch64_sve_whilele:
360 case Intrinsic::aarch64_sve_whilelo:
361 case Intrinsic::aarch64_sve_whilels:
362 case Intrinsic::aarch64_sve_whilelt:
363 case Intrinsic::aarch64_sve_match:
364 case Intrinsic::aarch64_sve_nmatch:
365 case Intrinsic::aarch64_sve_whilege_x2:
366 case Intrinsic::aarch64_sve_whilegt_x2:
367 case Intrinsic::aarch64_sve_whilehi_x2:
368 case Intrinsic::aarch64_sve_whilehs_x2:
369 case Intrinsic::aarch64_sve_whilele_x2:
370 case Intrinsic::aarch64_sve_whilelo_x2:
371 case Intrinsic::aarch64_sve_whilels_x2:
372 case Intrinsic::aarch64_sve_whilelt_x2:
378static std::tuple<SDValue, SDValue>
399 if (!ConstDiscN || !
isUInt<16>(ConstDiscN->getZExtValue()))
405 AddrDisc = DAG->
getRegister(AArch64::NoRegister, MVT::i64);
407 return std::make_tuple(
426 if (Subtarget->hasLS64()) {
432 if (Subtarget->hasFPARMv8()) {
441 if (Subtarget->hasNEON()) {
445 addDRType(MVT::v2f32);
446 addDRType(MVT::v8i8);
447 addDRType(MVT::v4i16);
448 addDRType(MVT::v2i32);
449 addDRType(MVT::v1i64);
450 addDRType(MVT::v1f64);
451 addDRType(MVT::v4f16);
452 addDRType(MVT::v4bf16);
454 addQRType(MVT::v4f32);
455 addQRType(MVT::v2f64);
456 addQRType(MVT::v16i8);
457 addQRType(MVT::v8i16);
458 addQRType(MVT::v4i32);
459 addQRType(MVT::v2i64);
460 addQRType(MVT::v8f16);
461 addQRType(MVT::v8bf16);
464 if (Subtarget->isSVEorStreamingSVEAvailable()) {
492 if (Subtarget->useSVEForFixedLengthVectors()) {
534 if (Subtarget->hasFPARMv8()) {
625 if (Subtarget->hasFPARMv8()) {
631 if (Subtarget->hasFPARMv8()) {
685 if (Subtarget->hasCSSC()) {
764 if (Subtarget->hasFullFP16()) {
796 if (Subtarget->hasFullFP16()) {
809 auto LegalizeNarrowFP = [
this](
MVT ScalarVT) {
915 if (!Subtarget->hasFullFP16()) {
916 LegalizeNarrowFP(MVT::f16);
918 LegalizeNarrowFP(MVT::bf16);
936 for (
MVT Ty : {MVT::f32, MVT::f64})
938 if (Subtarget->hasFullFP16())
946 for (
MVT Ty : {MVT::f32, MVT::f64})
948 if (Subtarget->hasFullFP16())
961 if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
973 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
1001 if (Subtarget->outlineAtomics() && !Subtarget->hasLSFE()) {
1028 if (Subtarget->hasLSE128()) {
1042 if (Subtarget->hasLSE2()) {
1099 if (WideVT.getScalarSizeInBits() > NarrowVT.getScalarSizeInBits()) {
1105 if (Subtarget->hasFPARMv8()) {
1229 if (!Subtarget->isTargetWindows())
1245 if (Subtarget->hasSME())
1248 if (Subtarget->isNeonAvailable()) {
1293 for (
auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
1296 if (Subtarget->hasFullFP16()) {
1329 for (
auto VT : {MVT::v1i64, MVT::v2i64}) {
1345 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64,
1346 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1353 for (
MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1364 for (
MVT VT : { MVT::v4f16, MVT::v2f32,
1365 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1366 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1375 if (Subtarget->hasFullFP16())
1378 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1379 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1401 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1428 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1430 if (Subtarget->hasFullFP16())
1431 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1437 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1439 if (Subtarget->hasFullFP16())
1440 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1475 for (
MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1478 for (
MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1481 if (Subtarget->hasDotProd()) {
1490 if (Subtarget->hasMatMulInt8()) {
1506 if (VT.is128BitVector() || VT.is64BitVector()) {
1521 for (
MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1527 if (Subtarget->hasSME()) {
1533 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1535 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1541 for (
auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
1546 if (Subtarget->hasSVE2p1() ||
1547 (Subtarget->hasSME2() && Subtarget->isStreaming()))
1550 for (
auto VT : {MVT::v16i8, MVT::v8i8, MVT::v4i16, MVT::v2i32})
1553 for (
auto VT : {MVT::v8f16, MVT::v4f32, MVT::v2f64})
1557 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1558 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1627 if (!Subtarget->isLittleEndian())
1630 if (Subtarget->hasSVE2() ||
1631 (Subtarget->hasSME() && Subtarget->isStreaming()))
1637 for (
auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1643 for (
auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32})
1647 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
1648 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
1660 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1674 if (VT != MVT::nxv16i1) {
1684 {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1685 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1686 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1725 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1726 MVT::nxv4f32, MVT::nxv2f64}) {
1804 for (
auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1823 if (Subtarget->hasSVEB16B16() &&
1824 Subtarget->isNonStreamingSVEorSME2Available()) {
1826 for (
auto VT : {MVT::v4bf16, MVT::v8bf16, MVT::nxv2bf16, MVT::nxv4bf16,
1849 if (!Subtarget->hasSVEB16B16() ||
1850 !Subtarget->isNonStreamingSVEorSME2Available()) {
1851 for (
MVT VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1861 if (VT != MVT::nxv2bf16 && Subtarget->hasBF16())
1867 if (Subtarget->hasBF16() && Subtarget->isNeonAvailable())
1876 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1877 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1890 if (Subtarget->useSVEForFixedLengthVectors()) {
1893 VT, !Subtarget->isNeonAvailable()))
1894 addTypeForFixedLengthSVE(VT);
1898 VT, !Subtarget->isNeonAvailable()))
1899 addTypeForFixedLengthSVE(VT);
1903 for (
auto VT : {MVT::v8i8, MVT::v4i16})
1908 for (
auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1910 for (
auto VT : {MVT::v8f16, MVT::v4f32, MVT::v8bf16})
1932 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1933 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1942 for (
auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1965 for (
auto VT : {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1})
1970 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1980 if (Subtarget->hasMatMulInt8()) {
1982 MVT::nxv16i8,
Legal);
1988 if (Subtarget->hasSVE2() || Subtarget->hasSME()) {
1995 if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
1997 MVT::nxv8f16,
Legal);
2005 if (Subtarget->hasSVE2() ||
2006 (Subtarget->hasSME() && Subtarget->isStreaming())) {
2008 for (
auto VT : {MVT::v2i32, MVT::v4i16, MVT::v8i8, MVT::v16i8}) {
2012 for (
auto VT : {MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, MVT::nxv16i1}) {
2019 if (Subtarget->isSVEAvailable()) {
2020 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64,
2021 MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
2022 MVT::nxv4f32, MVT::nxv2f64, MVT::nxv2bf16, MVT::nxv4bf16,
2023 MVT::nxv8bf16, MVT::v4f16, MVT::v8f16, MVT::v2f32,
2024 MVT::v4f32, MVT::v1f64, MVT::v2f64, MVT::v8i8,
2025 MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
2026 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
2031 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
2032 MVT::nxv4f32, MVT::nxv2f64, MVT::v4f16, MVT::v8f16,
2033 MVT::v2f32, MVT::v4f32, MVT::v2f64})
2038 {MVT::nxv4i32, MVT::nxv2i64, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv2f64})
2043 for (
auto VT : {MVT::v2i32, MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32,
2053 for (
auto VT : {MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64,
2054 MVT::nxv2f32, MVT::nxv2f64, MVT::nxv4i8, MVT::nxv4i16,
2055 MVT::nxv4i32, MVT::nxv4f32}) {
2063 if (Subtarget->hasSVE2()) {
2081 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
2088 if (Subtarget->hasSVE()) {
2102 if (Subtarget->isTargetWindows()) {
2119void AArch64TargetLowering::addTypeForNEON(
MVT VT) {
2129 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
2150 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
2151 ((VT == MVT::v4bf16 || VT == MVT::v8bf16 || VT == MVT::v4f16 ||
2152 VT == MVT::v8f16) &&
2153 Subtarget->hasFullFP16()))
2178 if (VT != MVT::v8i8 && VT != MVT::v16i8)
2187 for (
unsigned Opcode :
2205 for (
unsigned Opcode :
2236 if (Subtarget->isLittleEndian()) {
2247 if (Subtarget->hasD128()) {
2265 if (!Subtarget->isSVEorStreamingSVEAvailable() ||
2272 (OpVT != MVT::i32 && OpVT != MVT::i64))))
2284 if (!Subtarget->isSVEorStreamingSVEAvailable())
2289 return VT != MVT::nxv16i1 && VT != MVT::nxv8i1 && VT != MVT::nxv4i1 &&
2290 VT != MVT::nxv2i1 && VT != MVT::v16i1 && VT != MVT::v8i1 &&
2291 VT != MVT::v4i1 && VT != MVT::v2i1;
2295 unsigned SearchSize)
const {
2297 if (!Subtarget->hasSVE2() || !Subtarget->isSVEAvailable())
2300 if (VT == MVT::nxv8i16 || VT == MVT::v8i16)
2301 return SearchSize != 8;
2302 if (VT == MVT::nxv16i8 || VT == MVT::v16i8 || VT == MVT::v8i8)
2303 return SearchSize != 8 && SearchSize != 16;
2307void AArch64TargetLowering::addTypeForFixedLengthSVE(
MVT VT) {
2332 while (InnerVT != VT) {
2346 while (InnerVT != VT) {
2355 bool PreferSVE = !PreferNEON && Subtarget->isSVEAvailable();
2376 if (Subtarget->hasMatMulInt8()) {
2486void AArch64TargetLowering::addDRType(
MVT VT) {
2488 if (Subtarget->isNeonAvailable())
2492void AArch64TargetLowering::addQRType(
MVT VT) {
2494 if (Subtarget->isNeonAvailable())
2511 Imm =
C->getZExtValue();
2519 case AArch64ISD::SQDMULH:
2531 return N->getOpcode() ==
Opc &&
2536 const APInt &Demanded,
2539 uint64_t OldImm = Imm, NewImm, Enc;
2544 if (Imm == 0 || Imm == Mask ||
2548 unsigned EltSize =
Size;
2565 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2567 uint64_t Sum = RotatedImm + NonDemandedBits;
2568 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2569 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
2570 NewImm = (Imm | Ones) & Mask;
2598 while (EltSize <
Size) {
2599 NewImm |= NewImm << EltSize;
2605 "demanded bits should never be altered");
2606 assert(OldImm != NewImm &&
"the new imm shouldn't be equal to the old imm");
2609 EVT VT =
Op.getValueType();
2615 if (NewImm == 0 || NewImm == OrigMask) {
2640 EVT VT =
Op.getValueType();
2654 switch (
Op.getOpcode()) {
2658 NewOpc =
Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
2661 NewOpc =
Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
2664 NewOpc =
Size == 32 ? AArch64::EORWri : AArch64::EORXri;
2679 switch (
Op.getOpcode()) {
2682 case AArch64ISD::DUP: {
2685 if (
SrcOp.getValueSizeInBits() !=
Op.getScalarValueSizeInBits()) {
2686 assert(
SrcOp.getValueSizeInBits() >
Op.getScalarValueSizeInBits() &&
2687 "Expected DUP implicit truncation");
2688 Known = Known.
trunc(
Op.getScalarValueSizeInBits());
2692 case AArch64ISD::CSEL: {
2699 case AArch64ISD::CSNEG:
2700 case AArch64ISD::CSINC:
2701 case AArch64ISD::CSINV: {
2709 if (
Op.getOpcode() == AArch64ISD::CSINC)
2713 else if (
Op.getOpcode() == AArch64ISD::CSINV)
2715 else if (
Op.getOpcode() == AArch64ISD::CSNEG)
2718 Op.getScalarValueSizeInBits())));
2723 case AArch64ISD::BICi: {
2726 ~(
Op->getConstantOperandAPInt(1) <<
Op->getConstantOperandAPInt(2))
2732 case AArch64ISD::VLSHR: {
2739 case AArch64ISD::VASHR: {
2746 case AArch64ISD::VSHL: {
2753 case AArch64ISD::MOVI: {
2758 case AArch64ISD::MOVIshift: {
2761 <<
Op->getConstantOperandVal(1)));
2764 case AArch64ISD::MOVImsl: {
2767 Known.
getBitWidth(), ~(~
Op->getConstantOperandVal(0) << ShiftAmt)));
2770 case AArch64ISD::MOVIedit: {
2776 case AArch64ISD::MVNIshift: {
2779 ~(
Op->getConstantOperandVal(0) <<
Op->getConstantOperandVal(1)),
2783 case AArch64ISD::MVNImsl: {
2790 case AArch64ISD::LOADgot:
2791 case AArch64ISD::ADDlow: {
2792 if (!Subtarget->isTargetILP32())
2798 case AArch64ISD::ASSERT_ZEXT_BOOL: {
2808 case Intrinsic::aarch64_ldaxr:
2809 case Intrinsic::aarch64_ldxr: {
2821 unsigned IntNo =
Op.getConstantOperandVal(0);
2825 case Intrinsic::aarch64_neon_uaddlv: {
2826 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2828 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2829 unsigned Bound = (VT == MVT::v8i8) ? 11 : 12;
2836 case Intrinsic::aarch64_neon_umaxv:
2837 case Intrinsic::aarch64_neon_uminv: {
2842 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2844 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2848 }
else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
2862 unsigned Depth)
const {
2863 EVT VT =
Op.getValueType();
2865 unsigned Opcode =
Op.getOpcode();
2867 case AArch64ISD::FCMEQ:
2868 case AArch64ISD::FCMGE:
2869 case AArch64ISD::FCMGT:
2872 case AArch64ISD::VASHR: {
2875 return std::min<uint64_t>(Tmp +
Op.getConstantOperandVal(1), VTBits);
2889 unsigned *
Fast)
const {
2899 if (ElementSizeBits % 8 == 0 && Alignment >=
Align(ElementSizeBits / 8))
2903 if (Subtarget->requiresStrictAlign())
2908 *
Fast = !Subtarget->isMisaligned128StoreSlow() || VT.
getStoreSize() != 16 ||
2927 unsigned *
Fast)
const {
2928 if (Subtarget->requiresStrictAlign())
2933 *
Fast = !Subtarget->isMisaligned128StoreSlow() ||
2934 Ty.getSizeInBytes() != 16 ||
2977 Register DestReg =
MI.getOperand(0).getReg();
2978 Register IfTrueReg =
MI.getOperand(1).getReg();
2979 Register IfFalseReg =
MI.getOperand(2).getReg();
2980 unsigned CondCode =
MI.getOperand(3).getImm();
2981 bool NZCVKilled =
MI.getOperand(4).isKill();
2995 MBB->addSuccessor(TrueBB);
2996 MBB->addSuccessor(EndBB);
3012 MI.eraseFromParent();
3020 "SEH does not use catchret!");
3031 Register TargetReg =
MI.getOperand(0).getReg();
3033 TII.probedStackAlloc(
MBBI, TargetReg,
false);
3035 MI.eraseFromParent();
3036 return NextInst->getParent();
3048 Register RegVL_GPR =
MRI.createVirtualRegister(RC_GPR);
3049 Register RegVL_GPRsp =
MRI.createVirtualRegister(RC_GPRsp);
3050 Register RegSVL_GPR =
MRI.createVirtualRegister(RC_GPR);
3051 Register RegSVL_GPRsp =
MRI.createVirtualRegister(RC_GPRsp);
3089 MBB->addSuccessor(TrapBB);
3090 MBB->addSuccessor(PassBB);
3092 MI.eraseFromParent();
3104 MIB.
add(
MI.getOperand(1));
3105 MIB.
add(
MI.getOperand(2));
3106 MIB.
add(
MI.getOperand(3));
3107 MIB.
add(
MI.getOperand(4));
3108 MIB.
add(
MI.getOperand(5));
3110 MI.eraseFromParent();
3121 MIB.
add(
MI.getOperand(0));
3122 MIB.
add(
MI.getOperand(1));
3123 MIB.
add(
MI.getOperand(2));
3124 MIB.
add(
MI.getOperand(1));
3126 MI.eraseFromParent();
3133 bool Op0IsDef)
const {
3139 for (
unsigned I = 1;
I <
MI.getNumOperands(); ++
I)
3140 MIB.
add(
MI.getOperand(
I));
3142 MI.eraseFromParent();
3152 unsigned StartIdx = 0;
3154 bool HasTile = BaseReg != AArch64::ZA;
3155 bool HasZPROut = HasTile &&
MI.getOperand(0).isReg();
3157 MIB.
add(
MI.getOperand(StartIdx));
3161 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm(),
3163 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm());
3167 if (
MI.getOperand(0).isReg() && !
MI.getOperand(1).isImm()) {
3168 MIB.
add(
MI.getOperand(StartIdx));
3173 for (
unsigned I = StartIdx;
I <
MI.getNumOperands(); ++
I)
3174 MIB.
add(
MI.getOperand(
I));
3176 MI.eraseFromParent();
3185 MIB.
add(
MI.getOperand(0));
3187 unsigned Mask =
MI.getOperand(0).getImm();
3188 for (
unsigned I = 0;
I < 8;
I++) {
3189 if (Mask & (1 <<
I))
3193 MI.eraseFromParent();
3204 if (TPIDR2.Uses > 0) {
3207 if (!Subtarget->isLittleEndian())
3209 "TPIDR2 block initialization is not supported on big-endian targets");
3237 "Lazy ZA save is not yet supported on Windows");
3241 if (TPIDR2.
Uses > 0) {
3247 Register SP =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3248 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::COPY), SP)
3252 auto Size =
MI.getOperand(1).getReg();
3253 auto Dest =
MI.getOperand(0).getReg();
3254 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(AArch64::MSUBXrrr), Dest)
3278 "Lazy ZA save is not yet supported on Windows");
3283 auto Size =
MI.getOperand(1).getReg();
3284 auto Dest =
MI.getOperand(0).getReg();
3285 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(AArch64::SUBXrx64), AArch64::SP)
3289 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::COPY), Dest)
3295 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF),
3296 MI.getOperand(0).getReg());
3310 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
3317 MI.getOperand(0).getReg())
3321 MI.getOperand(0).getReg())
3333 Register ResultReg =
MI.getOperand(0).getReg();
3336 }
else if (Subtarget->hasSME()) {
3338 .
addImm(AArch64SysReg::SVCR)
3341 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
3350 MI.eraseFromParent();
3358 while (
Reg.isVirtual()) {
3360 assert(
DefMI &&
"Virtual register definition not found");
3361 unsigned Opcode =
DefMI->getOpcode();
3363 if (Opcode == AArch64::COPY) {
3364 Reg =
DefMI->getOperand(1).getReg();
3366 if (
Reg.isPhysical())
3370 if (Opcode == AArch64::SUBREG_TO_REG) {
3371 Reg =
DefMI->getOperand(2).getReg();
3388 int64_t IntDisc = IntDiscOp.
getImm();
3389 assert(IntDisc == 0 &&
"Blend components are already expanded");
3394 case AArch64::MOVKXi:
3403 case AArch64::MOVi32imm:
3404 case AArch64::MOVi64imm:
3408 AddrDisc = AArch64::NoRegister;
3417 if (AddrDisc == AArch64::XZR)
3418 AddrDisc = AArch64::NoRegister;
3421 if (AddrDisc &&
MRI.getRegClass(AddrDisc) != AddrDiscRC) {
3422 Register TmpReg =
MRI.createVirtualRegister(AddrDiscRC);
3427 AddrDiscOp.
setReg(AddrDisc);
3428 IntDiscOp.
setImm(IntDisc);
3435 if (SMEOrigInstr != -1) {
3439 switch (SMEMatrixType) {
3455 switch (
MI.getOpcode()) {
3461 case AArch64::InitTPIDR2Obj:
3463 case AArch64::AllocateZABuffer:
3465 case AArch64::AllocateSMESaveBuffer:
3467 case AArch64::GetSMESaveSize:
3469 case AArch64::EntryPStateSM:
3471 case AArch64::F128CSEL:
3473 case TargetOpcode::STATEPOINT:
3479 MI.addOperand(*
MI.getMF(),
3485 case TargetOpcode::STACKMAP:
3486 case TargetOpcode::PATCHPOINT:
3489 case TargetOpcode::PATCHABLE_EVENT_CALL:
3490 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
3493 case AArch64::CATCHRET:
3496 case AArch64::PROBED_STACKALLOC_DYN:
3499 case AArch64::CHECK_MATCHING_VL_PSEUDO:
3502 case AArch64::LD1_MXIPXX_H_PSEUDO_B:
3503 return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0,
MI, BB);
3504 case AArch64::LD1_MXIPXX_H_PSEUDO_H:
3505 return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0,
MI, BB);
3506 case AArch64::LD1_MXIPXX_H_PSEUDO_S:
3507 return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0,
MI, BB);
3508 case AArch64::LD1_MXIPXX_H_PSEUDO_D:
3509 return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0,
MI, BB);
3510 case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
3511 return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0,
MI, BB);
3512 case AArch64::LD1_MXIPXX_V_PSEUDO_B:
3513 return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0,
MI, BB);
3514 case AArch64::LD1_MXIPXX_V_PSEUDO_H:
3515 return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0,
MI, BB);
3516 case AArch64::LD1_MXIPXX_V_PSEUDO_S:
3517 return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0,
MI, BB);
3518 case AArch64::LD1_MXIPXX_V_PSEUDO_D:
3519 return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0,
MI, BB);
3520 case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
3521 return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0,
MI, BB);
3522 case AArch64::LDR_ZA_PSEUDO:
3524 case AArch64::LDR_TX_PSEUDO:
3526 case AArch64::STR_TX_PSEUDO:
3528 case AArch64::ZERO_M_PSEUDO:
3530 case AArch64::ZERO_T_PSEUDO:
3532 case AArch64::MOVT_TIZ_PSEUDO:
3537 &AArch64::GPR64noipRegClass);
3564 N =
N->getOperand(0).getNode();
3569 if (
N->getOpcode() != AArch64ISD::DUP)
3572 auto Opnd0 =
N->getOperand(0);
3726 CondCode, CondCode2);
3739 bool IsLegal = (
C >> 12 == 0) || ((
C & 0xFFFULL) == 0 &&
C >> 24 == 0);
3741 <<
" legal: " << (IsLegal ?
"yes\n" :
"no\n"));
3760 if (
Op->getFlags().hasNoSignedWrap())
3786 (isIntEqualitySetCC(CC) ||
3794 EVT VT =
LHS.getValueType();
3799 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3804 Chain =
RHS.getValue(1);
3807 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
3813 EVT VT =
LHS.getValueType();
3818 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3829 unsigned Opcode = AArch64ISD::SUBS;
3833 Opcode = AArch64ISD::ADDS;
3836 isIntEqualitySetCC(CC)) {
3839 Opcode = AArch64ISD::ADDS;
3848 LHS.getOperand(0),
LHS.getOperand(1));
3852 }
else if (
LHS.getOpcode() == AArch64ISD::ANDS) {
3854 return LHS.getValue(1);
3920 unsigned Opcode = 0;
3923 if (
LHS.getValueType().isFloatingPoint()) {
3924 assert(
LHS.getValueType() != MVT::f128);
3925 if ((
LHS.getValueType() == MVT::f16 && !FullFP16) ||
3926 LHS.getValueType() == MVT::bf16) {
3930 Opcode = AArch64ISD::FCCMP;
3932 APInt Imm = Const->getAPIntValue();
3933 if (Imm.isNegative() && Imm.sgt(-32)) {
3934 Opcode = AArch64ISD::CCMN;
3938 Opcode = AArch64ISD::CCMN;
3941 isIntEqualitySetCC(CC)) {
3944 Opcode = AArch64ISD::CCMN;
3948 Opcode = AArch64ISD::CCMP;
3974 bool &CanNegate,
bool &MustBeFirst,
3975 bool &PreferFirst,
bool WillNegate,
3976 unsigned Depth = 0) {
3982 if (VT == MVT::f128)
3985 MustBeFirst =
false;
3989 {Val->getOperand(0), Val->getOperand(1)});
3996 bool IsOR = Opcode ==
ISD::OR;
4012 if (MustBeFirstL && MustBeFirstR)
4018 if (!CanNegateL && !CanNegateR)
4022 CanNegate = WillNegate && CanNegateL && CanNegateR;
4025 MustBeFirst = !CanNegate;
4030 MustBeFirst = MustBeFirstL || MustBeFirstR;
4032 PreferFirst = PreferFirstL || PreferFirstR;
4055 bool isInteger =
LHS.getValueType().isInteger();
4057 CC = getSetCCInverse(CC,
LHS.getValueType());
4063 assert(
LHS.getValueType().isFloatingPoint());
4089 bool IsOR = Opcode ==
ISD::OR;
4096 PreferFirstL, IsOR);
4097 assert(ValidL &&
"Valid conjunction/disjunction tree");
4105 PreferFirstR, IsOR);
4106 assert(ValidR &&
"Valid conjunction/disjunction tree");
4109 bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR;
4112 if (MustBeFirstL || ShouldFirstL) {
4113 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
4122 bool NegateAfterAll;
4126 assert(CanNegateR &&
"at least one side must be negatable");
4127 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
4131 NegateAfterR =
true;
4134 NegateR = CanNegateR;
4135 NegateAfterR = !CanNegateR;
4138 NegateAfterAll = !Negate;
4140 assert(Opcode ==
ISD::AND &&
"Valid conjunction/disjunction tree");
4141 assert(!Negate &&
"Valid conjunction/disjunction tree");
4145 NegateAfterR =
false;
4146 NegateAfterAll =
false;
4166 bool DummyCanNegate;
4167 bool DummyMustBeFirst;
4168 bool DummyPreferFirst;
4170 DummyPreferFirst,
false))
4181 auto isSupportedExtend = [&](
SDValue V) {
4187 uint64_t Mask = MaskCst->getZExtValue();
4188 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
4194 if (!
Op.hasOneUse())
4197 if (isSupportedExtend(
Op))
4200 unsigned Opc =
Op.getOpcode();
4203 uint64_t Shift = ShiftCst->getZExtValue();
4204 if (isSupportedExtend(
Op.getOperand(0)))
4205 return (Shift <= 4) ? 2 : 1;
4206 EVT VT =
Op.getValueType();
4207 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
4219 if (
LHS.getOpcode() !=
ISD::AND &&
LHS.getOpcode() != AArch64ISD::ANDS)
4239 EVT VT =
RHS.getValueType();
4240 APInt C = RHSC->getAPIntValue();
4255 if (!
C.isMinSignedValue()) {
4267 assert(!
C.isZero() &&
"C should not be zero here");
4278 if (!
C.isMaxSignedValue()) {
4289 if (!
C.isAllOnes()) {
4314 bool LHSIsCMN =
isCMN(
LHS, CC, DAG);
4315 bool RHSIsCMN =
isCMN(
RHS, CC, DAG);
4350 LHS.getNode()->hasNUsesOfValue(1, 0)) {
4351 int16_t ValueofRHS =
RHS->getAsZExtVal();
4379static std::pair<SDValue, SDValue>
4381 assert((
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::i64) &&
4382 "Unsupported value type");
4388 switch (
Op.getOpcode()) {
4392 Opc = AArch64ISD::ADDS;
4396 Opc = AArch64ISD::ADDS;
4400 Opc = AArch64ISD::SUBS;
4404 Opc = AArch64ISD::SUBS;
4412 if (
Op.getValueType() == MVT::i32) {
4435 assert(
Op.getValueType() == MVT::i64 &&
"Expected an i64 value type");
4445 Overflow = DAG.
getNode(AArch64ISD::SUBS,
DL, VTs, UpperBits, LowerBits)
4464 Overflow =
Value.getValue(1);
4466 return std::make_pair(
Value, Overflow);
4471 !Subtarget->isNeonAvailable()))
4472 return LowerToScalableOp(
Op, DAG);
4496 return DAG.
getNode(AArch64ISD::CSEL,
DL,
Op.getValueType(), TVal, FVal,
4519 if (
LHS.getValueType() != MVT::i32 &&
LHS.getValueType() != MVT::i64)
4526 if (!CFVal || !CTVal)
4563 return Cmp.getValue(1);
4576 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, One, Zero, CC, Glue);
4586 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, One, Zero, CC, Glue);
4592 unsigned Opcode,
bool IsSigned) {
4593 EVT VT0 =
Op.getValue(0).getValueType();
4594 EVT VT1 =
Op.getValue(1).getValueType();
4596 if (VT0 != MVT::i32 && VT0 != MVT::i64)
4599 bool InvertCarry = Opcode == AArch64ISD::SBCS;
4618 bool LastOperandIsImm =
false) {
4619 if (
Op.getValueType().isVector())
4624 const unsigned NumOperands =
Op.getNumOperands();
4625 auto getFloatVT = [](
EVT VT) {
4626 assert((VT == MVT::i32 || VT == MVT::i64) &&
"Unexpected VT");
4627 return VT == MVT::i32 ? MVT::f32 : MVT::f64;
4629 auto bitcastToFloat = [&](
SDValue Val) {
4630 return DAG.
getBitcast(getFloatVT(Val.getValueType()), Val);
4634 for (
unsigned I = 1;
I < NumOperands; ++
I) {
4636 const bool KeepInt = LastOperandIsImm && (
I == NumOperands - 1);
4637 NewOps.
push_back(KeepInt ? Val : bitcastToFloat(Val));
4639 EVT OrigVT =
Op.getValueType();
4664 DAG.
getNode(AArch64ISD::CSEL,
DL, MVT::i32, FVal, TVal, CCVal, Overflow);
4676 unsigned IsWrite =
Op.getConstantOperandVal(2);
4677 unsigned Locality =
Op.getConstantOperandVal(3);
4678 unsigned IsData =
Op.getConstantOperandVal(4);
4680 bool IsStream = !Locality;
4684 assert(Locality <= 3 &&
"Prefetch locality out-of-range");
4688 Locality = 3 - Locality;
4692 unsigned PrfOp = (IsWrite << 4) |
4696 return DAG.
getNode(AArch64ISD::PREFETCH,
DL, MVT::Other,
Op.getOperand(0),
4709 if (LHSConstOp && RHSConst) {
4713 uint64_t NewMaskValue = LHSConstValue & ~(RHSConstant - 1);
4726 EVT VT =
Op.getValueType();
4730 if (VT == MVT::nxv2f64 && SrcVal.
getValueType() == MVT::nxv2bf16) {
4738 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
4742 return LowerFixedLengthFPExtendToSVE(
Op, DAG);
4744 bool IsStrict =
Op->isStrictFPOpcode();
4745 SDValue Op0 =
Op.getOperand(IsStrict ? 1 : 0);
4747 if (VT == MVT::f64) {
4749 if (Op0VT == MVT::f32 || Op0VT == MVT::f16)
4752 if (Op0VT == MVT::bf16 && IsStrict) {
4755 {Op0,
Op.getOperand(0)});
4759 if (Op0VT == MVT::bf16)
4765 assert(
Op.getValueType() == MVT::f128 &&
"Unexpected lowering");
4771 EVT VT =
Op.getValueType();
4772 bool IsStrict =
Op->isStrictFPOpcode();
4773 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4775 bool Trunc =
Op.getConstantOperandVal(IsStrict ? 2 : 1) == 1;
4779 if (SrcVT == MVT::nxv8f32)
4783 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
4786 constexpr EVT
I32 = MVT::nxv4i32;
4792 if (SrcVT == MVT::nxv2f32 || SrcVT == MVT::nxv4f32) {
4793 if (Subtarget->hasBF16())
4794 return LowerToPredicatedOp(
Op, DAG,
4795 AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
4797 Narrow = getSVESafeBitCast(I32, SrcVal, DAG);
4802 }
else if (SrcVT == MVT::nxv2f64 &&
4803 (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable())) {
4806 Narrow = DAG.
getNode(AArch64ISD::FCVTX_MERGE_PASSTHRU,
DL, MVT::nxv2f32,
4807 Pg, SrcVal, DAG.
getUNDEF(MVT::nxv2f32));
4813 NewOps.
push_back(
Op.getOperand(IsStrict ? 2 : 1));
4814 return DAG.
getNode(
Op.getOpcode(),
DL, VT, NewOps,
Op->getFlags());
4831 IsNaN = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, I1, IsNaN);
4832 Narrow = DAG.
getSelect(
DL, I32, IsNaN, NaN, Narrow);
4837 return getSVESafeBitCast(VT, Narrow, DAG);
4841 return LowerFixedLengthFPRoundToSVE(
Op, DAG);
4846 !((Subtarget->hasNEON() || Subtarget->hasSME()) &&
4847 Subtarget->hasBF16())) {
4862 Narrow = DAG.
getNode(AArch64ISD::FCVTXN,
DL,
F32, Narrow);
4883 Narrow = DAG.
getSelect(
DL, I32, IsNaN, NaN, Narrow);
4900 if (SrcVT != MVT::f128) {
4917 bool IsStrict =
Op->isStrictFPOpcode();
4918 EVT InVT =
Op.getOperand(IsStrict ? 1 : 0).getValueType();
4919 EVT VT =
Op.getValueType();
4922 "Unimplemented SVE support for STRICT_FP_to_INT!");
4931 {
Op.getOperand(0),
Op.getOperand(1)});
4932 return DAG.
getNode(
Op.getOpcode(),
DL, {VT, MVT::Other},
4933 {Ext.getValue(1), Ext.getValue(0)});
4936 Op.getOpcode(),
DL,
Op.getValueType(),
4950 if (InVT == MVT::nxv8f32)
4954 ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
4955 : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
4956 return LowerToPredicatedOp(
Op, DAG, Opcode);
4961 return LowerFixedLengthFPToIntToSVE(
Op, DAG);
4965 if (VTSize < InVTSize) {
4970 {Op.getOperand(0), Op.getOperand(1)});
4980 if (VTSize > InVTSize) {
4987 {
Op.getOperand(0),
Op.getOperand(1)});
4988 return DAG.
getNode(
Op.getOpcode(),
DL, {VT, MVT::Other},
4989 {Ext.getValue(1), Ext.getValue(0)});
5004 return DAG.
getNode(
Op.getOpcode(),
DL, {ScalarVT, MVT::Other},
5005 {Op.getOperand(0), Extract});
5006 return DAG.
getNode(
Op.getOpcode(),
DL, ScalarVT, Extract);
5015 bool IsStrict =
Op->isStrictFPOpcode();
5016 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
5019 return LowerVectorFP_TO_INT(
Op, DAG);
5022 if ((SrcVal.
getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
5028 {
Op.getOperand(0), SrcVal});
5029 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
5030 {Ext.getValue(1), Ext.getValue(0)});
5045AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(
SDValue Op,
5051 EVT DstVT =
Op.getValueType();
5057 assert(SatWidth <= DstElementWidth &&
5058 "Saturation width cannot exceed result width");
5071 if ((SrcElementVT == MVT::f16 &&
5072 (!Subtarget->hasFullFP16() || DstElementWidth > 16)) ||
5073 SrcElementVT == MVT::bf16) {
5083 SrcElementVT = MVT::f32;
5084 SrcElementWidth = 32;
5085 }
else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
5086 SrcElementVT != MVT::f16 && SrcElementVT != MVT::bf16)
5091 if (SatWidth == 64 && SrcElementWidth < 64) {
5095 SrcElementVT = MVT::f64;
5096 SrcElementWidth = 64;
5099 if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth) {
5114 if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
5121 SrcVal2 ? DAG.
getNode(
Op.getOpcode(),
DL, IntVT, SrcVal2,
5157 return LowerVectorFP_TO_INT_SAT(
Op, DAG);
5159 EVT DstVT =
Op.getValueType();
5163 assert(SatWidth <= DstWidth &&
"Saturation width cannot exceed result width");
5166 if ((SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) || SrcVT == MVT::bf16) {
5169 }
else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16 &&
5175 if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
5176 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
5177 DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32))
5178 return DAG.
getNode(
Op.getOpcode(),
DL, DstVT, SrcVal,
5184 if (DstWidth < SatWidth)
5187 if (SrcVT == MVT::f16 && SatVT == MVT::i16 && DstVT == MVT::i32) {
5190 DAG.
getNode(AArch64ISD::FCVTZS_HALF,
DL, MVT::f32, SrcVal);
5195 SDValue CVTf32 = DAG.
getNode(AArch64ISD::FCVTZU_HALF,
DL, MVT::f32, SrcVal);
5220 EVT VT =
Op.getValueType();
5227 *DAG.
getContext(), Src.getValueType().getVectorElementType());
5243 bool IsStrict =
Op->isStrictFPOpcode();
5244 EVT VT =
Op.getValueType();
5247 EVT InVT =
In.getValueType();
5248 unsigned Opc =
Op.getOpcode();
5252 "Unimplemented SVE support for ISD:::STRICT_INT_TO_FP!");
5267 {Op.getOperand(0), In});
5269 {
Op.getValueType(), MVT::Other},
5280 if (VT == MVT::nxv8f32)
5283 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
5284 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
5285 return LowerToPredicatedOp(
Op, DAG, Opcode);
5290 return LowerFixedLengthIntToFPToSVE(
Op, DAG);
5294 if (VTSize < InVTSize) {
5300 bool IsTargetf16 =
false;
5301 if (
Op.hasOneUse() &&
5306 SDNode *
U = *
Op->user_begin();
5307 if (
U->hasOneUse() &&
U->user_begin()->getOpcode() ==
ISD::FP_ROUND) {
5308 EVT TmpVT =
U->user_begin()->getValueType(0);
5314 if (IsTargetf32 && !IsTargetf16) {
5324 {
In.getValue(1),
In.getValue(0),
5332 if (VTSize > InVTSize) {
5349 return DAG.
getNode(
Op.getOpcode(),
DL, {ScalarVT, MVT::Other},
5350 {Op.getOperand(0), Extract});
5351 return DAG.
getNode(
Op.getOpcode(),
DL, ScalarVT, Extract);
5359 if (
Op.getValueType().isVector())
5360 return LowerVectorINT_TO_FP(
Op, DAG);
5362 bool IsStrict =
Op->isStrictFPOpcode();
5363 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
5368 auto IntToFpViaPromotion = [&](EVT PromoteVT) {
5372 {Op.getOperand(0), SrcVal});
5374 {
Op.getValueType(), MVT::Other},
5379 DAG.
getNode(
Op.getOpcode(),
DL, PromoteVT, SrcVal),
5383 if (
Op.getValueType() == MVT::bf16) {
5384 unsigned MaxWidth = IsSigned
5388 if (MaxWidth <= 24) {
5389 return IntToFpViaPromotion(MVT::f32);
5393 if (MaxWidth <= 53) {
5394 return IntToFpViaPromotion(MVT::f64);
5445 IsStrict ? DAG.
getNode(
Op.getOpcode(),
DL, {MVT::f64, MVT::Other},
5446 {Op.getOperand(0), ToRound})
5447 : DAG.
getNode(
Op.getOpcode(),
DL, MVT::f64, ToRound);
5474 {
Op.getValueType(), MVT::Other},
5478 DAG.getIntPtrConstant(0,
DL,
true));
5483 if (
Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
5484 return IntToFpViaPromotion(MVT::f32);
5493 if (
Op.getValueType() != MVT::f128)
5501AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(
SDValue Op,
5503 assert((Subtarget->hasSVE2() ||
5504 (Subtarget->hasSME() && Subtarget->isStreaming())) &&
5505 "Lowering loop_dependence_raw_mask or loop_dependence_war_mask "
5506 "requires SVE or SME");
5509 EVT VT =
Op.getValueType();
5510 unsigned LaneOffset =
Op.getConstantOperandVal(3);
5512 uint64_t EltSizeInBytes =
Op.getConstantOperandVal(2);
5515 if (LaneOffset != 0 || !
is_contained({1u, 2u, 4u, 8u}, EltSizeInBytes))
5546 EVT OpVT =
Op.getValueType();
5547 EVT ArgVT =
Op.getOperand(0).getValueType();
5550 return LowerFixedLengthBitcastToSVE(
Op, DAG);
5558 "Expected int->fp bitcast!");
5571 return getSVESafeBitCast(OpVT, ExtResult, DAG);
5582 return getSVESafeBitCast(OpVT,
Op.getOperand(0), DAG);
5585 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
5589 if (ArgVT == MVT::f16 || ArgVT == MVT::bf16)
5592 assert(ArgVT == MVT::i16);
5602static std::optional<uint64_t>
5606 return std::nullopt;
5611 return std::nullopt;
5613 return C->getZExtValue();
5618 EVT VT =
N.getValueType();
5623 for (
const SDValue &Elt :
N->op_values()) {
5626 unsigned HalfSize = EltSize / 2;
5628 if (!
isIntN(HalfSize,
C->getSExtValue()))
5631 if (!
isUIntN(HalfSize,
C->getZExtValue()))
5643 EVT VT =
N.getValueType();
5665 unsigned Opcode =
N.getOpcode();
5676 unsigned Opcode =
N.getOpcode();
5818 if (IsN0SExt && IsN1SExt)
5819 return AArch64ISD::SMULL;
5824 if (IsN0ZExt && IsN1ZExt)
5825 return AArch64ISD::UMULL;
5831 if (IsN0ZExt || IsN1ZExt) {
5833 return AArch64ISD::UMULL;
5838 return AArch64ISD::UMULL;
5841 if (IsN0SExt || IsN1SExt) {
5843 return AArch64ISD::SMULL;
5846 return AArch64ISD::SMULL;
5849 if (!IsN1SExt && !IsN1ZExt)
5856 return AArch64ISD::SMULL;
5860 return AArch64ISD::UMULL;
5865 return AArch64ISD::UMULL;
5871 EVT VT =
Op.getValueType();
5873 bool OverrideNEON = !Subtarget->isNeonAvailable();
5875 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5880 "unexpected type for custom-lowering ISD::MUL");
5896 if (VT == MVT::v1i64) {
5897 if (Subtarget->hasSVE())
5898 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5914 if (Subtarget->hasSVE())
5915 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5930 "unexpected types for extended operands to VMULL");
5953 if (
Pattern == AArch64SVEPredPattern::all)
5962 if (PatNumElts == (NumElts * VScale))
5966 return DAG.
getNode(AArch64ISD::PTRUE,
DL, VT,
5971 bool IsSigned,
bool IsEqual) {
5975 if (!
N->getValueType(0).isScalableVector() ||
5980 APInt Y =
N->getConstantOperandAPInt(Op1);
5985 if (IsSigned ?
Y.isMaxSignedValue() :
Y.isMaxValue())
5991 APInt X =
N->getConstantOperandAPInt(Op0);
5994 APInt NumActiveElems =
5995 IsSigned ?
Y.ssub_ov(
X, Overflow) :
Y.usub_ov(
X, Overflow);
6002 NumActiveElems = IsSigned ? NumActiveElems.
sadd_ov(One, Overflow)
6003 : NumActiveElems.
uadd_ov(One, Overflow);
6008 std::optional<unsigned> PredPattern =
6010 unsigned MinSVEVectorSize = std::max(
6012 unsigned ElementSize = 128 /
N->getValueType(0).getVectorMinNumElements();
6013 if (PredPattern != std::nullopt &&
6014 NumActiveElems.
getZExtValue() <= (MinSVEVectorSize / ElementSize))
6015 return getPTrue(DAG,
DL,
N->getValueType(0), *PredPattern);
6024 EVT InVT =
Op.getValueType();
6028 "Expected a predicate-to-predicate bitcast");
6032 "Only expect to cast between legal scalable predicate types!");
6042 Op.getConstantOperandVal(0) == Intrinsic::aarch64_sve_convert_to_svbool &&
6043 Op.getOperand(1).getValueType().bitsGT(VT))
6044 Op =
Op.getOperand(1);
6062 Mask = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, VT, Mask);
6069 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
6075 TargetLowering::CallLoweringInfo CLI(DAG);
6077 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
6080 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
6126 SDValue TileSlice =
N->getOperand(2);
6129 int32_t ConstAddend = 0;
6138 ConstAddend = ImmNode->getSExtValue();
6142 int32_t ImmAddend = ConstAddend % 16;
6143 if (int32_t
C = (ConstAddend - ImmAddend)) {
6145 VarAddend = VarAddend
6152 auto SVL = DAG.
getNode(AArch64ISD::RDSVL,
DL, MVT::i64,
6164 return DAG.
getNode(IsLoad ? AArch64ISD::SME_ZA_LDR : AArch64ISD::SME_ZA_STR,
6166 {
N.getOperand(0), TileSlice,
Base,
6175 auto Op1 =
Op.getOperand(1);
6176 auto Op2 =
Op.getOperand(2);
6177 auto Mask =
Op.getOperand(3);
6180 EVT Op2VT = Op2.getValueType();
6181 EVT ResVT =
Op.getValueType();
6185 "Expected 8-bit or 16-bit characters.");
6199 Op2 = DAG.
getNode(AArch64ISD::DUPLANE128,
DL, OpContainerVT, Op2,
6227 ID, Mask, Op1, Op2);
6238 unsigned IntNo =
Op.getConstantOperandVal(1);
6243 case Intrinsic::aarch64_prefetch: {
6247 unsigned IsWrite =
Op.getConstantOperandVal(3);
6248 unsigned Locality =
Op.getConstantOperandVal(4);
6249 unsigned IsStream =
Op.getConstantOperandVal(5);
6250 unsigned IsData =
Op.getConstantOperandVal(6);
6251 unsigned PrfOp = (IsWrite << 4) |
6256 return DAG.
getNode(AArch64ISD::PREFETCH,
DL, MVT::Other, Chain,
6259 case Intrinsic::aarch64_range_prefetch: {
6263 unsigned IsWrite =
Op.getConstantOperandVal(3);
6264 unsigned IsStream =
Op.getConstantOperandVal(4);
6265 unsigned PrfOp = (IsStream << 2) | IsWrite;
6268 return DAG.
getNode(AArch64ISD::RANGE_PREFETCH,
DL, MVT::Other, Chain,
6272 case Intrinsic::aarch64_sme_str:
6273 case Intrinsic::aarch64_sme_ldr: {
6276 case Intrinsic::aarch64_sme_za_enable:
6278 AArch64ISD::SMSTART,
DL, DAG.
getVTList(MVT::Other, MVT::Glue),
6281 case Intrinsic::aarch64_sme_za_disable:
6283 AArch64ISD::SMSTOP,
DL, DAG.
getVTList(MVT::Other, MVT::Glue),
6291 unsigned IntNo =
Op.getConstantOperandVal(1);
6296 case Intrinsic::aarch64_mops_memset_tag: {
6303 auto Alignment =
Node->getMemOperand()->getAlign();
6304 bool IsVol =
Node->isVolatile();
6305 auto DstPtrInfo =
Node->getPointerInfo();
6309 SDValue MS = SDI.EmitMOPS(AArch64::MOPSMemorySetTaggingPseudo, DAG,
DL,
6310 Chain, Dst, Val,
Size, Alignment, IsVol,
6311 DstPtrInfo, MachinePointerInfo{});
6324 unsigned IntNo =
Op.getConstantOperandVal(0);
6328 case Intrinsic::thread_pointer: {
6330 return DAG.
getNode(AArch64ISD::THREAD_POINTER,
DL, PtrVT);
6332 case Intrinsic::aarch64_sve_whilewr_b:
6334 Op.getOperand(1),
Op.getOperand(2),
6337 case Intrinsic::aarch64_sve_whilewr_h:
6339 Op.getOperand(1),
Op.getOperand(2),
6342 case Intrinsic::aarch64_sve_whilewr_s:
6344 Op.getOperand(1),
Op.getOperand(2),
6347 case Intrinsic::aarch64_sve_whilewr_d:
6349 Op.getOperand(1),
Op.getOperand(2),
6352 case Intrinsic::aarch64_sve_whilerw_b:
6354 Op.getOperand(1),
Op.getOperand(2),
6357 case Intrinsic::aarch64_sve_whilerw_h:
6359 Op.getOperand(1),
Op.getOperand(2),
6362 case Intrinsic::aarch64_sve_whilerw_s:
6364 Op.getOperand(1),
Op.getOperand(2),
6367 case Intrinsic::aarch64_sve_whilerw_d:
6369 Op.getOperand(1),
Op.getOperand(2),
6372 case Intrinsic::aarch64_neon_abs: {
6373 EVT Ty =
Op.getValueType();
6374 if (Ty == MVT::i64) {
6385 case Intrinsic::aarch64_neon_pmull64: {
6389 std::optional<uint64_t> LHSLane =
6391 std::optional<uint64_t> RHSLane =
6394 assert((!LHSLane || *LHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6395 assert((!RHSLane || *RHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6401 auto TryVectorizeOperand = [](
SDValue N, std::optional<uint64_t> NLane,
6402 std::optional<uint64_t> OtherLane,
6404 SelectionDAG &DAG) ->
SDValue {
6413 if (OtherLane == 1) {
6422 DAG.
getNode(AArch64ISD::DUPLANE64,
DL, MVT::v2i64,
6428 return DAG.
getNode(AArch64ISD::DUP,
DL, MVT::v1i64,
N);
6433 assert(
N.getValueType() == MVT::i64 &&
6434 "Intrinsic aarch64_neon_pmull64 requires i64 parameters");
6438 LHS = TryVectorizeOperand(
LHS, LHSLane, RHSLane,
DL, DAG);
6439 RHS = TryVectorizeOperand(
RHS, RHSLane, LHSLane,
DL, DAG);
6443 case Intrinsic::aarch64_neon_smax:
6446 case Intrinsic::aarch64_neon_umax:
6449 case Intrinsic::aarch64_neon_smin:
6452 case Intrinsic::aarch64_neon_umin:
6455 case Intrinsic::aarch64_neon_scalar_sqxtn:
6456 case Intrinsic::aarch64_neon_scalar_sqxtun:
6457 case Intrinsic::aarch64_neon_scalar_uqxtn: {
6458 assert(
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::f32);
6459 if (
Op.getValueType() == MVT::i32)
6464 Op.getOperand(1))));
6467 case Intrinsic::aarch64_neon_sqxtn:
6470 case Intrinsic::aarch64_neon_sqxtun:
6473 case Intrinsic::aarch64_neon_uqxtn:
6476 case Intrinsic::aarch64_neon_sqshrn:
6477 if (
Op.getValueType().isVector())
6480 Op.getOperand(1).getValueType(),
6481 Op.getOperand(1),
Op.getOperand(2)));
6484 case Intrinsic::aarch64_neon_sqshrun:
6485 if (
Op.getValueType().isVector())
6488 Op.getOperand(1).getValueType(),
6489 Op.getOperand(1),
Op.getOperand(2)));
6492 case Intrinsic::aarch64_neon_uqshrn:
6493 if (
Op.getValueType().isVector())
6496 Op.getOperand(1).getValueType(),
6497 Op.getOperand(1),
Op.getOperand(2)));
6500 case Intrinsic::aarch64_neon_sqrshrn:
6501 if (
Op.getValueType().isVector())
6504 Op.getOperand(1).getValueType(),
6505 Op.getOperand(1),
Op.getOperand(2)));
6508 case Intrinsic::aarch64_neon_sqrshrun:
6509 if (
Op.getValueType().isVector())
6512 Op.getOperand(1).getValueType(),
6513 Op.getOperand(1),
Op.getOperand(2)));
6516 case Intrinsic::aarch64_neon_uqrshrn:
6517 if (
Op.getValueType().isVector())
6520 Op.getOperand(1).getValueType(),
6521 Op.getOperand(1),
Op.getOperand(2)));
6524 case Intrinsic::aarch64_neon_sqdmulh:
6526 case Intrinsic::aarch64_neon_sqrdmulh:
6528 case Intrinsic::aarch64_neon_sqrdmlah:
6530 case Intrinsic::aarch64_neon_sqrdmlsh:
6532 case Intrinsic::aarch64_neon_sqrshl:
6534 case Intrinsic::aarch64_neon_sqshl:
6536 case Intrinsic::aarch64_neon_uqrshl:
6538 case Intrinsic::aarch64_neon_uqshl:
6540 case Intrinsic::aarch64_neon_sqadd:
6541 if (
Op.getValueType().isVector())
6546 case Intrinsic::aarch64_neon_sqsub:
6547 if (
Op.getValueType().isVector())
6552 case Intrinsic::aarch64_neon_uqadd:
6553 if (
Op.getValueType().isVector())
6557 case Intrinsic::aarch64_neon_uqsub:
6558 if (
Op.getValueType().isVector())
6562 case Intrinsic::aarch64_neon_sqdmulls_scalar:
6564 case Intrinsic::aarch64_sve_whilelt:
6567 case Intrinsic::aarch64_sve_whilels:
6570 case Intrinsic::aarch64_sve_whilele:
6573 case Intrinsic::aarch64_sve_sunpkhi:
6574 return DAG.
getNode(AArch64ISD::SUNPKHI,
DL,
Op.getValueType(),
6576 case Intrinsic::aarch64_sve_sunpklo:
6577 return DAG.
getNode(AArch64ISD::SUNPKLO,
DL,
Op.getValueType(),
6579 case Intrinsic::aarch64_sve_uunpkhi:
6580 return DAG.
getNode(AArch64ISD::UUNPKHI,
DL,
Op.getValueType(),
6582 case Intrinsic::aarch64_sve_uunpklo:
6583 return DAG.
getNode(AArch64ISD::UUNPKLO,
DL,
Op.getValueType(),
6585 case Intrinsic::aarch64_sve_clasta_n:
6586 return DAG.
getNode(AArch64ISD::CLASTA_N,
DL,
Op.getValueType(),
6587 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6588 case Intrinsic::aarch64_sve_clastb_n:
6589 return DAG.
getNode(AArch64ISD::CLASTB_N,
DL,
Op.getValueType(),
6590 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6591 case Intrinsic::aarch64_sve_lasta:
6592 return DAG.
getNode(AArch64ISD::LASTA,
DL,
Op.getValueType(),
6593 Op.getOperand(1),
Op.getOperand(2));
6594 case Intrinsic::aarch64_sve_lastb:
6595 return DAG.
getNode(AArch64ISD::LASTB,
DL,
Op.getValueType(),
6596 Op.getOperand(1),
Op.getOperand(2));
6597 case Intrinsic::aarch64_sve_tbl:
6598 return DAG.
getNode(AArch64ISD::TBL,
DL,
Op.getValueType(),
Op.getOperand(1),
6600 case Intrinsic::aarch64_sve_trn1:
6601 return DAG.
getNode(AArch64ISD::TRN1,
DL,
Op.getValueType(),
6602 Op.getOperand(1),
Op.getOperand(2));
6603 case Intrinsic::aarch64_sve_trn2:
6604 return DAG.
getNode(AArch64ISD::TRN2,
DL,
Op.getValueType(),
6605 Op.getOperand(1),
Op.getOperand(2));
6606 case Intrinsic::aarch64_sve_uzp1:
6607 return DAG.
getNode(AArch64ISD::UZP1,
DL,
Op.getValueType(),
6608 Op.getOperand(1),
Op.getOperand(2));
6609 case Intrinsic::aarch64_sve_uzp2:
6610 return DAG.
getNode(AArch64ISD::UZP2,
DL,
Op.getValueType(),
6611 Op.getOperand(1),
Op.getOperand(2));
6612 case Intrinsic::aarch64_sve_zip1:
6613 return DAG.
getNode(AArch64ISD::ZIP1,
DL,
Op.getValueType(),
6614 Op.getOperand(1),
Op.getOperand(2));
6615 case Intrinsic::aarch64_sve_zip2:
6616 return DAG.
getNode(AArch64ISD::ZIP2,
DL,
Op.getValueType(),
6617 Op.getOperand(1),
Op.getOperand(2));
6618 case Intrinsic::aarch64_sve_splice:
6619 return DAG.
getNode(AArch64ISD::SPLICE,
DL,
Op.getValueType(),
6620 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6621 case Intrinsic::aarch64_sve_ptrue:
6622 return getPTrue(DAG,
DL,
Op.getValueType(),
Op.getConstantOperandVal(1));
6623 case Intrinsic::aarch64_sve_clz:
6624 return DAG.
getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6625 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6626 case Intrinsic::aarch64_sme_cntsd: {
6632 case Intrinsic::aarch64_sve_cnt: {
6635 if (
Data.getValueType().isFloatingPoint())
6637 return DAG.
getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6638 Op.getOperand(2),
Data,
Op.getOperand(1));
6640 case Intrinsic::aarch64_sve_dupq_lane:
6641 return LowerDUPQLane(
Op, DAG);
6642 case Intrinsic::aarch64_sve_convert_from_svbool:
6643 if (
Op.getValueType() == MVT::aarch64svcount)
6646 case Intrinsic::aarch64_sve_convert_to_svbool:
6647 if (
Op.getOperand(1).getValueType() == MVT::aarch64svcount)
6650 case Intrinsic::aarch64_sve_fneg:
6651 return DAG.
getNode(AArch64ISD::FNEG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6652 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6653 case Intrinsic::aarch64_sve_frintp:
6654 return DAG.
getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6655 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6656 case Intrinsic::aarch64_sve_frintm:
6657 return DAG.
getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6658 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6659 case Intrinsic::aarch64_sve_frinti:
6660 return DAG.
getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU,
DL,
6661 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6663 case Intrinsic::aarch64_sve_frintx:
6664 return DAG.
getNode(AArch64ISD::FRINT_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6665 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6666 case Intrinsic::aarch64_sve_frint32x:
6667 return DAG.
getNode(AArch64ISD::FRINT32_MERGE_PASSTHRU,
DL,
6668 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6670 case Intrinsic::aarch64_sve_frint64x:
6671 return DAG.
getNode(AArch64ISD::FRINT64_MERGE_PASSTHRU,
DL,
6672 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6674 case Intrinsic::aarch64_sve_frinta:
6675 return DAG.
getNode(AArch64ISD::FROUND_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6676 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6677 case Intrinsic::aarch64_sve_frintn:
6678 return DAG.
getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU,
DL,
6679 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6681 case Intrinsic::aarch64_sve_frintz:
6682 return DAG.
getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6683 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6684 case Intrinsic::aarch64_sve_frint32z:
6685 return DAG.
getNode(AArch64ISD::FTRUNC32_MERGE_PASSTHRU,
DL,
6686 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6688 case Intrinsic::aarch64_sve_frint64z:
6689 return DAG.
getNode(AArch64ISD::FTRUNC64_MERGE_PASSTHRU,
DL,
6690 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6692 case Intrinsic::aarch64_sve_ucvtf:
6693 return DAG.
getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU,
DL,
6694 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6696 case Intrinsic::aarch64_sve_scvtf:
6697 return DAG.
getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU,
DL,
6698 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6700 case Intrinsic::aarch64_sve_fcvtzu:
6701 return DAG.
getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6702 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6703 case Intrinsic::aarch64_sve_fcvtzs:
6704 return DAG.
getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6705 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6706 case Intrinsic::aarch64_sve_fsqrt:
6707 return DAG.
getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6708 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6709 case Intrinsic::aarch64_sve_frecpx:
6710 return DAG.
getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6711 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6712 case Intrinsic::aarch64_sve_frecpe_x:
6713 return DAG.
getNode(AArch64ISD::FRECPE,
DL,
Op.getValueType(),
6715 case Intrinsic::aarch64_sve_frecps_x:
6716 return DAG.
getNode(AArch64ISD::FRECPS,
DL,
Op.getValueType(),
6717 Op.getOperand(1),
Op.getOperand(2));
6718 case Intrinsic::aarch64_sve_frsqrte_x:
6719 return DAG.
getNode(AArch64ISD::FRSQRTE,
DL,
Op.getValueType(),
6721 case Intrinsic::aarch64_sve_frsqrts_x:
6722 return DAG.
getNode(AArch64ISD::FRSQRTS,
DL,
Op.getValueType(),
6723 Op.getOperand(1),
Op.getOperand(2));
6724 case Intrinsic::aarch64_sve_fabs:
6725 return DAG.
getNode(AArch64ISD::FABS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6726 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6727 case Intrinsic::aarch64_sve_abs:
6728 return DAG.
getNode(AArch64ISD::ABS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6729 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6730 case Intrinsic::aarch64_sve_neg:
6731 return DAG.
getNode(AArch64ISD::NEG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6732 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6733 case Intrinsic::aarch64_sve_insr: {
6735 EVT ScalarTy =
Scalar.getValueType();
6736 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
6739 return DAG.
getNode(AArch64ISD::INSR,
DL,
Op.getValueType(),
6740 Op.getOperand(1), Scalar);
6742 case Intrinsic::aarch64_sve_rbit:
6743 return DAG.
getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
DL,
6744 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6746 case Intrinsic::aarch64_sve_revb:
6747 return DAG.
getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6748 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6749 case Intrinsic::aarch64_sve_revh:
6750 return DAG.
getNode(AArch64ISD::REVH_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6751 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6752 case Intrinsic::aarch64_sve_revw:
6753 return DAG.
getNode(AArch64ISD::REVW_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6754 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6755 case Intrinsic::aarch64_sve_revd:
6756 return DAG.
getNode(AArch64ISD::REVD_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6757 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6758 case Intrinsic::aarch64_sve_sxtb:
6760 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6761 Op.getOperand(2),
Op.getOperand(3),
6765 case Intrinsic::aarch64_sve_sxth:
6767 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6768 Op.getOperand(2),
Op.getOperand(3),
6772 case Intrinsic::aarch64_sve_sxtw:
6774 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6775 Op.getOperand(2),
Op.getOperand(3),
6779 case Intrinsic::aarch64_sve_uxtb:
6781 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6782 Op.getOperand(2),
Op.getOperand(3),
6786 case Intrinsic::aarch64_sve_uxth:
6788 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6789 Op.getOperand(2),
Op.getOperand(3),
6793 case Intrinsic::aarch64_sve_uxtw:
6795 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6796 Op.getOperand(2),
Op.getOperand(3),
6800 case Intrinsic::localaddress: {
6802 const auto *RegInfo = Subtarget->getRegisterInfo();
6803 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
6805 Op.getSimpleValueType());
6808 case Intrinsic::eh_recoverfp: {
6813 SDValue IncomingFPOp =
Op.getOperand(2);
6818 "llvm.eh.recoverfp must take a function as the first argument");
6819 return IncomingFPOp;
6821 case Intrinsic::aarch64_neon_vsri:
6822 case Intrinsic::aarch64_neon_vsli:
6823 case Intrinsic::aarch64_sve_sri:
6824 case Intrinsic::aarch64_sve_sli: {
6825 EVT Ty =
Op.getValueType();
6832 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri ||
6833 IntNo == Intrinsic::aarch64_sve_sri;
6834 unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
6835 return DAG.
getNode(Opcode,
DL, Ty,
Op.getOperand(1),
Op.getOperand(2),
6839 case Intrinsic::aarch64_neon_srhadd:
6840 case Intrinsic::aarch64_neon_urhadd:
6841 case Intrinsic::aarch64_neon_shadd:
6842 case Intrinsic::aarch64_neon_uhadd: {
6843 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6844 IntNo == Intrinsic::aarch64_neon_shadd);
6845 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6846 IntNo == Intrinsic::aarch64_neon_urhadd);
6847 unsigned Opcode = IsSignedAdd
6850 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1),
6853 case Intrinsic::aarch64_neon_saddlp:
6854 case Intrinsic::aarch64_neon_uaddlp: {
6855 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
6856 ? AArch64ISD::UADDLP
6857 : AArch64ISD::SADDLP;
6858 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1));
6860 case Intrinsic::aarch64_neon_sdot:
6861 case Intrinsic::aarch64_neon_udot:
6862 case Intrinsic::aarch64_sve_sdot:
6863 case Intrinsic::aarch64_sve_udot: {
6864 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
6865 IntNo == Intrinsic::aarch64_sve_udot)
6868 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1),
6869 Op.getOperand(2),
Op.getOperand(3));
6871 case Intrinsic::aarch64_neon_usdot:
6872 case Intrinsic::aarch64_sve_usdot: {
6873 return DAG.
getNode(AArch64ISD::USDOT,
DL,
Op.getValueType(),
6874 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6876 case Intrinsic::aarch64_neon_saddlv:
6877 case Intrinsic::aarch64_neon_uaddlv: {
6878 EVT OpVT =
Op.getOperand(1).getValueType();
6879 EVT ResVT =
Op.getValueType();
6881 ((ResVT == MVT::i32 && (OpVT == MVT::v8i8 || OpVT == MVT::v16i8 ||
6882 OpVT == MVT::v8i16 || OpVT == MVT::v4i16)) ||
6883 (ResVT == MVT::i64 && (OpVT == MVT::v4i32 || OpVT == MVT::v2i32))) &&
6884 "Unexpected aarch64_neon_u/saddlv type");
6888 IntNo == Intrinsic::aarch64_neon_uaddlv ? AArch64ISD::UADDLV
6889 : AArch64ISD::SADDLV,
6890 DL, ResVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64,
Op.getOperand(1));
6894 return EXTRACT_VEC_ELT;
6896 case Intrinsic::experimental_cttz_elts: {
6910 DAG.
getNode(AArch64ISD::CTTZ_ELTS,
DL, MVT::i64, CttzOp);
6913 case Intrinsic::experimental_vector_match: {
6916 case Intrinsic::aarch64_cls:
6917 case Intrinsic::aarch64_cls64:
6924bool AArch64TargetLowering::shouldExtendGSIndex(
EVT VT,
EVT &EltTy)
const {
6933bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(
SDValue Extend,
6954 if (LD->isVolatile())
6957 EVT MemVT = LD->getMemoryVT();
6958 if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8 && MemVT != MVT::v2i16)
6961 Align Alignment = LD->getAlign();
6963 if (Subtarget.requiresStrictAlign() && Alignment < RequiredAlignment)
6969bool AArch64TargetLowering::isVectorLoadExtDesirable(
SDValue ExtVal)
const {
6977 if (!ExtVT.
isScalableVector() && !Subtarget->useSVEForFixedLengthVectors())
6992 unsigned NumExtMaskedLoads = 0;
6993 for (
auto *U : Ld->getMask()->users())
6995 NumExtMaskedLoads++;
6997 if (NumExtMaskedLoads <= 1)
7003 return PreExtScalarVT == MVT::i8 || PreExtScalarVT == MVT::i16 ||
7004 PreExtScalarVT == MVT::i32 || PreExtScalarVT == MVT::i64;
7008 std::map<std::tuple<bool, bool, bool>,
unsigned> AddrModes = {
7009 {std::make_tuple(
false,
false,
false),
7010 AArch64ISD::GLD1_MERGE_ZERO},
7011 {std::make_tuple(
false,
false,
true),
7012 AArch64ISD::GLD1_UXTW_MERGE_ZERO},
7013 {std::make_tuple(
false,
true,
false),
7014 AArch64ISD::GLD1_MERGE_ZERO},
7015 {std::make_tuple(
false,
true,
true),
7016 AArch64ISD::GLD1_SXTW_MERGE_ZERO},
7017 {std::make_tuple(
true,
false,
false),
7018 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
7019 {std::make_tuple(
true,
false,
true),
7020 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
7021 {std::make_tuple(
true,
true,
false),
7022 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
7023 {std::make_tuple(
true,
true,
true),
7024 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
7026 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
7027 return AddrModes.find(
Key)->second;
7035 case AArch64ISD::GLD1_MERGE_ZERO:
7036 return AArch64ISD::GLD1S_MERGE_ZERO;
7037 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
7038 return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
7039 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
7040 return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
7041 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
7042 return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
7043 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
7044 return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
7045 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
7046 return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
7047 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
7048 return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
7063 EVT VT =
Op.getValueType();
7087 EVT IndexVT =
Index.getValueType();
7099 assert(Subtarget->useSVEForFixedLengthVectors() &&
7100 "Cannot lower when not using SVE for fixed vectors!");
7109 Index.getValueType().getVectorElementType() == MVT::i64 ||
7110 Mask.getValueType().getVectorElementType() == MVT::i64)
7176 EVT IndexVT =
Index.getValueType();
7188 assert(Subtarget->useSVEForFixedLengthVectors() &&
7189 "Cannot lower when not using SVE for fixed vectors!");
7201 Index.getValueType().getVectorElementType() == MVT::i64 ||
7202 Mask.getValueType().getVectorElementType() == MVT::i64)
7212 if (PromotedVT != VT)
7237 assert(LoadNode &&
"Expected custom lowering of a masked load node");
7238 EVT VT =
Op->getValueType(0);
7241 return LowerFixedLengthVectorMLoadToSVE(
Op, DAG);
7265 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
7278 {Undef, Undef, Undef, Undef});
7288 return DAG.
getStore(ST->getChain(),
DL, ExtractTrunc,
7289 ST->getBasePtr(), ST->getMemOperand());
7295 MVT DestVT =
Op.getSimpleValueType();
7299 unsigned SrcAS =
N->getSrcAddressSpace();
7300 unsigned DestAS =
N->getDestAddressSpace();
7301 assert(SrcAS != DestAS &&
7302 "addrspacecast must be between different address spaces");
7305 "addrspacecast must be between different ptr sizes");
7331 assert (StoreNode &&
"Can only custom lower store nodes");
7335 EVT VT =
Value.getValueType();
7341 Subtarget->useSVEForFixedLengthVectors()))
7342 return LowerFixedLengthVectorStoreToSVE(
Op, DAG);
7354 MemVT == MVT::v4i8) {
7381 AArch64ISD::STNP, Dl, DAG.
getVTList(MVT::Other),
7382 {StoreNode->getChain(), DAG.getBitcast(MVT::v2i64, Lo),
7383 DAG.getBitcast(MVT::v2i64, Hi), StoreNode->getBasePtr()},
7387 }
else if (MemVT == MVT::i128 && StoreNode->
isVolatile()) {
7388 return LowerStore128(
Op, DAG);
7389 }
else if (MemVT == MVT::i64x8) {
7394 EVT PtrVT =
Base.getValueType();
7395 for (
unsigned i = 0; i < 8; i++) {
7396 SDValue Part = DAG.
getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
7416 bool IsStoreRelease =
7419 assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
7420 Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
7430 unsigned Opcode = IsStoreRelease ? AArch64ISD::STILP : AArch64ISD::STP;
7432 std::swap(StoreValue.first, StoreValue.second);
7435 {StoreNode->getChain(), StoreValue.first, StoreValue.second,
7436 StoreNode->getBasePtr()},
7448 EVT MemVT = Load->getMemoryVT();
7449 EVT ResVT = Load->getValueType(0);
7455 switch (Load->getExtensionType()) {
7468 SDValue Chain = Load->getChain();
7469 SDValue BasePtr = Load->getBasePtr();
7471 Align Alignment = Load->getAlign();
7477 DAG.
getLoad(ScalarLoadType,
DL, Chain, BasePtr, PtrInfo, Alignment);
7489 while (CurrentEltBits < DstEltBits) {
7491 CurrentNumElts = CurrentNumElts / 2;
7497 CurrentEltBits = CurrentEltBits * 2;
7500 Res = DAG.
getNode(ExtOpcode,
DL, ExtVT, Res);
7503 if (CurrentNumElts != NumElts) {
7516 assert(LoadNode &&
"Expected custom lowering of a load node");
7525 EVT PtrVT =
Base.getValueType();
7526 for (
unsigned i = 0; i < 8; i++) {
7532 Ops.push_back(Part);
7542SDValue AArch64TargetLowering::LowerFixedLengthVectorCompressToSVE(
7545 EVT VT =
Op.getValueType();
7560 EVT VT =
Op.getValueType();
7561 if (!Subtarget->isSVEAvailable())
7565 return LowerFixedLengthVectorCompressToSVE(
Op, DAG);
7571 EVT MaskVT =
Mask.getValueType();
7598 MVT VT =
Op.getSimpleValueType();
7601 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
7609 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT,
Op.getOperand(0), Neg,
7622 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
7637 MVT VT =
Op.getSimpleValueType();
7641 if (NewShiftNo == 0)
7642 return Op.getOperand(0);
7651 if (NewShiftNo == 0)
7652 return Op.getOperand(1);
7654 if (ShiftNo->getZExtValue() == NewShiftNo)
7669 EVT XScalarTy =
X.getValueType();
7674 switch (
Op.getSimpleValueType().SimpleTy) {
7683 ExpVT = MVT::nxv4i32;
7687 ExpVT = MVT::nxv2i64;
7705 if (
X.getValueType() != XScalarTy)
7713 return Op.getOperand(0);
7748 const char FptrReg = 0x11;
7754 Chain,
DL, DAG.
getConstant(0x58000080u | NestReg,
DL, MVT::i32), Addr,
7755 MachinePointerInfo(TrmpAddr));
7760 Chain,
DL, DAG.
getConstant(0x580000b0u | FptrReg,
DL, MVT::i32), Addr,
7761 MachinePointerInfo(TrmpAddr, 4));
7767 MachinePointerInfo(TrmpAddr, 8));
7772 DAG.
getStore(Chain,
DL, Nest, Addr, MachinePointerInfo(TrmpAddr, 16));
7777 DAG.
getStore(Chain,
DL, FPtr, Addr, MachinePointerInfo(TrmpAddr, 24));
7791 EVT VT =
Op.getValueType();
7793 (Subtarget->hasSVEB16B16() &&
7794 Subtarget->isNonStreamingSVEorSME2Available()))
7795 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMUL_PRED);
7797 assert(Subtarget->hasBF16() &&
"Expected +bf16 for custom FMUL lowering");
7798 assert((VT == MVT::nxv4bf16 || VT == MVT::nxv8bf16 || VT == MVT::v8bf16) &&
7799 "Unexpected FMUL VT");
7802 return [&, IID](EVT VT,
auto...
Ops) {
7809 EVT SrcVT =
Value.getValueType();
7820 auto FCVT = MakeGetIntrinsic(Intrinsic::aarch64_sve_fcvt_bf16f32_v2);
7821 auto FCVTNT = MakeGetIntrinsic(Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2);
7826 MakeGetIntrinsic(UseSVEBFMLAL ? Intrinsic::aarch64_sve_bfmlalb
7827 : Intrinsic::aarch64_neon_bfmlalb);
7829 MakeGetIntrinsic(UseSVEBFMLAL ? Intrinsic::aarch64_sve_bfmlalt
7830 : Intrinsic::aarch64_neon_bfmlalt);
7832 EVT AccVT = UseSVEBFMLAL ? MVT::nxv4f32 : MVT::v4f32;
7844 LHS = Reinterpret(
LHS, MVT::nxv8bf16);
7845 RHS = Reinterpret(
RHS, MVT::nxv8bf16);
7848 SDValue BottomF32 = Reinterpret(BFMLALB(AccVT, Zero,
LHS,
RHS), MVT::nxv4f32);
7850 FCVT(MVT::nxv8bf16, DAG.
getPOISON(MVT::nxv8bf16), Pg, BottomF32);
7852 if (VT == MVT::nxv4bf16)
7853 return Reinterpret(BottomBF16, VT);
7855 SDValue TopF32 = Reinterpret(BFMLALT(AccVT, Zero,
LHS,
RHS), MVT::nxv4f32);
7856 SDValue TopBF16 = FCVTNT(MVT::nxv8bf16, BottomBF16, Pg, TopF32);
7857 return Reinterpret(TopBF16, VT);
7864 EVT VT =
Op.getValueType();
7867 assert(VT.
isVector() &&
"Scalar fma lowering should be handled by patterns");
7870 if (VT != MVT::v8f16 && VT != MVT::v4f32 && VT != MVT::v2f64)
7871 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMA_PRED);
7875 ? LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMA_PRED)
7885 auto ConvertToScalableFnegMt = [&](
SDValue Op) {
7887 Op = LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
7891 OpA = ConvertToScalableFnegMt(OpA);
7892 OpB = ConvertToScalableFnegMt(OpB);
7893 OpC = ConvertToScalableFnegMt(OpC);
7896 DAG.
getNode(AArch64ISD::FMA_PRED,
DL, ContainerVT, Pg, OpA, OpB, OpC);
7905 switch (
Op.getOpcode()) {
7911 return LowerLOOP_DEPENDENCE_MASK(
Op, DAG);
7913 return LowerBITCAST(
Op, DAG);
7915 return LowerGlobalAddress(
Op, DAG);
7917 return LowerGlobalTLSAddress(
Op, DAG);
7919 return LowerPtrAuthGlobalAddress(
Op, DAG);
7921 return LowerADJUST_TRAMPOLINE(
Op, DAG);
7923 return LowerINIT_TRAMPOLINE(
Op, DAG);
7927 return LowerSETCC(
Op, DAG);
7929 return LowerSETCCCARRY(
Op, DAG);
7933 return LowerBR_CC(
Op, DAG);
7935 return LowerSELECT(
Op, DAG);
7937 return LowerSELECT_CC(
Op, DAG);
7939 return LowerJumpTable(
Op, DAG);
7941 return LowerBR_JT(
Op, DAG);
7943 return LowerBRIND(
Op, DAG);
7945 return LowerConstantPool(
Op, DAG);
7947 return LowerBlockAddress(
Op, DAG);
7949 return LowerVASTART(
Op, DAG);
7951 return LowerVACOPY(
Op, DAG);
7953 return LowerVAARG(
Op, DAG);
7970 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FADD_PRED);
7972 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FSUB_PRED);
7974 return LowerFMUL(
Op, DAG);
7976 return LowerFMA(
Op, DAG);
7978 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FDIV_PRED);
7980 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
7982 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
7984 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
7986 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
7988 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
7990 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
7992 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
7994 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
7996 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
7998 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
8001 return LowerFP_ROUND(
Op, DAG);
8004 return LowerFP_EXTEND(
Op, DAG);
8006 return LowerFRAMEADDR(
Op, DAG);
8008 return LowerSPONENTRY(
Op, DAG);
8010 return LowerRETURNADDR(
Op, DAG);
8012 return LowerADDROFRETURNADDR(
Op, DAG);
8014 return LowerCONCAT_VECTORS(
Op, DAG);
8016 return LowerINSERT_VECTOR_ELT(
Op, DAG);
8018 return LowerEXTRACT_VECTOR_ELT(
Op, DAG);
8020 return LowerBUILD_VECTOR(
Op, DAG);
8023 return LowerEXTEND_VECTOR_INREG(
Op, DAG);
8025 return LowerZERO_EXTEND_VECTOR_INREG(
Op, DAG);
8027 return LowerVECTOR_SHUFFLE(
Op, DAG);
8029 return LowerSPLAT_VECTOR(
Op, DAG);
8031 return LowerEXTRACT_SUBVECTOR(
Op, DAG);
8033 return LowerINSERT_SUBVECTOR(
Op, DAG);
8036 return LowerDIV(
Op, DAG);
8041 return LowerMinMax(
Op, DAG);
8045 return LowerVectorSRA_SRL_SHL(
Op, DAG);
8049 return LowerShiftParts(
Op, DAG);
8052 return LowerCTPOP_PARITY(
Op, DAG);
8054 return LowerFCOPYSIGN(
Op, DAG);
8056 return LowerVectorOR(
Op, DAG);
8058 return LowerXOR(
Op, DAG);
8065 return LowerINT_TO_FP(
Op, DAG);
8070 return LowerFP_TO_INT(
Op, DAG);
8073 return LowerFP_TO_INT_SAT(
Op, DAG);
8075 return LowerGET_ROUNDING(
Op, DAG);
8077 return LowerSET_ROUNDING(
Op, DAG);
8079 return LowerGET_FPMODE(
Op, DAG);
8081 return LowerSET_FPMODE(
Op, DAG);
8083 return LowerRESET_FPMODE(
Op, DAG);
8085 return LowerMUL(
Op, DAG);
8087 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MULHS_PRED);
8089 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MULHU_PRED);
8091 return LowerINTRINSIC_W_CHAIN(
Op, DAG);
8093 return LowerINTRINSIC_WO_CHAIN(
Op, DAG);
8095 return LowerINTRINSIC_VOID(
Op, DAG);
8098 assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
8099 return LowerStore128(
Op, DAG);
8103 return LowerSTORE(
Op, DAG);
8105 return LowerMSTORE(
Op, DAG);
8107 return LowerMGATHER(
Op, DAG);
8109 return LowerMSCATTER(
Op, DAG);
8111 return LowerVECREDUCE_SEQ_FADD(
Op, DAG);
8125 return LowerVECREDUCE(
Op, DAG);
8128 return LowerVECREDUCE_MUL(
Op, DAG);
8130 return LowerATOMIC_LOAD_AND(
Op, DAG);
8132 return LowerDYNAMIC_STACKALLOC(
Op, DAG);
8134 return LowerVSCALE(
Op, DAG);
8136 return LowerVECTOR_COMPRESS(
Op, DAG);
8140 return LowerFixedLengthVectorIntExtendToSVE(
Op, DAG);
8147 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
8148 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
8151 return LowerToPredicatedOp(
Op, DAG,
8152 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
8155 return LowerTRUNCATE(
Op, DAG);
8157 return LowerMLOAD(
Op, DAG);
8160 !Subtarget->isNeonAvailable()))
8161 return LowerFixedLengthVectorLoadToSVE(
Op, DAG);
8162 return LowerLOAD(
Op, DAG);
8166 return LowerToScalableOp(
Op, DAG);
8168 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMAX_PRED);
8170 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMAXNM_PRED);
8172 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMIN_PRED);
8174 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMINNM_PRED);
8176 return LowerFixedLengthVectorSelectToSVE(
Op, DAG);
8178 return LowerABS(
Op, DAG);
8180 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABDS_PRED);
8182 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABDU_PRED);
8184 return LowerAVG(
Op, DAG, AArch64ISD::HADDS_PRED);
8186 return LowerAVG(
Op, DAG, AArch64ISD::HADDU_PRED);
8188 return LowerAVG(
Op, DAG, AArch64ISD::RHADDS_PRED);
8190 return LowerAVG(
Op, DAG, AArch64ISD::RHADDU_PRED);
8192 return LowerBitreverse(
Op, DAG);
8194 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
8196 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU);
8198 return LowerCTTZ(
Op, DAG);
8201 return LowerVECTOR_SPLICE(
Op, DAG);
8203 return LowerVECTOR_DEINTERLEAVE(
Op, DAG);
8205 return LowerVECTOR_INTERLEAVE(
Op, DAG);
8207 return LowerGET_ACTIVE_LANE_MASK(
Op, DAG);
8210 if (
Op.getValueType().isVector())
8211 return LowerVectorXRINT(
Op, DAG);
8215 assert((
Op.getOperand(0).getValueType() == MVT::f16 ||
8216 Op.getOperand(0).getValueType() == MVT::bf16) &&
8217 "Expected custom lowering of rounding operations only for f16");
8220 return DAG.
getNode(
Op.getOpcode(),
DL,
Op.getValueType(), Ext);
8226 assert((
Op.getOperand(1).getValueType() == MVT::f16 ||
8227 Op.getOperand(1).getValueType() == MVT::bf16) &&
8228 "Expected custom lowering of rounding operations only for f16");
8231 {
Op.getOperand(0),
Op.getOperand(1)});
8232 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
8233 {Ext.getValue(1), Ext.getValue(0)});
8236 assert(
Op.getOperand(2).getValueType() == MVT::i128 &&
8237 "WRITE_REGISTER custom lowering is only for 128-bit sysregs");
8242 std::pair<SDValue, SDValue> Pair =
8247 SysRegName, Pair.first, Pair.second);
8257 return LowerVECTOR_HISTOGRAM(
Op, DAG);
8262 return LowerPARTIAL_REDUCE_MLA(
Op, DAG);
8267 return !Subtarget->useSVEForFixedLengthVectors();
8271 EVT VT,
bool OverrideNEON)
const {
8294 return Subtarget->isSVEorStreamingSVEAvailable();
8301 if (!Subtarget->useSVEForFixedLengthVectors())
8321 unsigned Opcode =
N->getOpcode();
8326 unsigned IID =
N->getConstantOperandVal(0);
8327 if (IID < Intrinsic::num_intrinsics)
8341 if (IID == Intrinsic::aarch64_neon_umull ||
8343 IID == Intrinsic::aarch64_neon_smull ||
8352 bool IsVarArg)
const {
8375 if (Subtarget->isTargetWindows()) {
8377 if (Subtarget->isWindowsArm64EC())
8383 if (!Subtarget->isTargetDarwin())
8391 if (Subtarget->isWindowsArm64EC())
8397 if (Subtarget->isWindowsArm64EC())
8421 if (Subtarget->isWindowsArm64EC())
8457 IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE;
8475 RTLIB::Libcall LC = RTLIB::SMEABI_TPIDR2_RESTORE;
8493 Chain = DAG.
getCopyToReg(Chain,
DL, AArch64::X0, TPIDR2Block, Glue);
8495 DAG.
getNode(AArch64ISD::RESTORE_ZA,
DL, MVT::Other,
8496 {Chain, TPIDR2_EL0, DAG.
getRegister(AArch64::X0, MVT::i64),
8497 RestoreRoutine, RegMask, Chain.
getValue(1)});
8513 auto &FuncInfo = *MF.
getInfo<AArch64FunctionInfo>();
8514 auto &Subtarget = DAG.
getSubtarget<AArch64Subtarget>();
8515 const AArch64RegisterInfo &
TRI = *Subtarget.getRegisterInfo();
8517 SMEAttrs SMEFnAttrs = FuncInfo.getSMEFnAttrs();
8543 if (
getTM().useNewSMEABILowering())
8553 AArch64ISD::SMSTART,
DL, DAG.
getVTList(MVT::Other, MVT::Glue), Chain,
8562 {Chain, DAG.getConstant(0, DL, MVT::i32), ZT0FrameIndex});
8573SDValue AArch64TargetLowering::LowerFormalArguments(
8581 Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
8583 (isVarArg && Subtarget->isWindowsArm64EC());
8584 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
8594 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.
getContext());
8602 unsigned NumArgs = Ins.
size();
8604 unsigned CurArgIdx = 0;
8605 bool UseVarArgCC =
false;
8607 UseVarArgCC = isVarArg;
8611 for (
unsigned i = 0; i != NumArgs; ++i) {
8612 MVT ValVT = Ins[i].VT;
8613 if (Ins[i].isOrigArg()) {
8614 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
8615 CurArgIdx = Ins[i].getOrigArgIndex();
8622 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
8624 else if (ActualMVT == MVT::i16)
8628 Ins[i].OrigTy, CCInfo);
8629 assert(!Res &&
"Call operand has unhandled type");
8634 bool IsLocallyStreaming =
8635 !
Attrs.hasStreamingInterface() &&
Attrs.hasStreamingBody();
8639 unsigned ExtraArgLocs = 0;
8640 for (
unsigned i = 0, e = Ins.
size(); i != e; ++i) {
8641 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
8643 if (Ins[i].
Flags.isByVal()) {
8647 int Size = Ins[i].Flags.getByValSize();
8648 unsigned NumRegs = (
Size + 7) / 8;
8660 if (Ins[i].
Flags.isSwiftAsync())
8661 MF.
getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(
true);
8667 const TargetRegisterClass *RC;
8669 if (RegVT == MVT::i32)
8670 RC = &AArch64::GPR32RegClass;
8671 else if (RegVT == MVT::i64)
8672 RC = &AArch64::GPR64RegClass;
8673 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
8674 RC = &AArch64::FPR16RegClass;
8675 else if (RegVT == MVT::f32)
8676 RC = &AArch64::FPR32RegClass;
8678 RC = &AArch64::FPR64RegClass;
8680 RC = &AArch64::FPR128RegClass;
8684 RC = &AArch64::PPRRegClass;
8685 }
else if (RegVT == MVT::aarch64svcount) {
8687 RC = &AArch64::PPRRegClass;
8690 RC = &AArch64::ZPRRegClass;
8697 if (IsLocallyStreaming) {
8712 DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
8731 "Indirect arguments should be scalable on most subtargets");
8753 uint32_t BEAlign = 0;
8754 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
8755 !Ins[i].Flags.isInConsecutiveRegs())
8756 BEAlign = 8 - ArgSize;
8759 MachinePointerInfo PtrInfo;
8765 unsigned ObjOffset = ArgOffset + BEAlign;
8795 "Indirect arguments should be scalable on most subtargets");
8815 Subtarget->isWindowsArm64EC()) &&
8816 "Indirect arguments should be scalable on most subtargets");
8819 unsigned NumParts = 1;
8820 if (Ins[i].
Flags.isInConsecutiveRegs()) {
8821 while (!Ins[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
8830 while (NumParts > 0) {
8831 ArgValue = DAG.
getLoad(PartLoad,
DL, Chain, Ptr, MachinePointerInfo());
8844 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
8850 if (Ins[i].isOrigArg()) {
8851 Argument *OrigArg =
F.getArg(Ins[i].getOrigArgIndex());
8853 if (!Ins[i].
Flags.isZExt()) {
8854 ArgValue = DAG.
getNode(AArch64ISD::ASSERT_ZEXT_BOOL,
DL,
8865 if (
Attrs.hasStreamingCompatibleInterface()) {
8867 DAG.
getNode(AArch64ISD::ENTRY_PSTATE_SM,
DL,
8868 DAG.
getVTList(MVT::i64, MVT::Other), {Chain});
8880 if (IsLocallyStreaming) {
8881 if (
Attrs.hasStreamingCompatibleInterface())
8890 for (
unsigned I=0;
I<InVals.
size(); ++
I) {
8902 if (!Subtarget->isTargetDarwin() || IsWin64) {
8908 saveVarArgRegisters(CCInfo, DAG,
DL, Chain);
8912 unsigned VarArgsOffset = CCInfo.getStackSize();
8915 alignTo(VarArgsOffset, Subtarget->isTargetILP32() ? 4 : 8);
8926 SmallVectorImpl<ForwardedRegister> &Forwards =
8928 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
8932 if (!CCInfo.isAllocated(AArch64::X8)) {
8934 Forwards.
push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
8943 for (
unsigned I = 0,
E = Ins.
size();
I !=
E; ++
I) {
8945 Ins[
I].Flags.isInReg()) &&
8946 Ins[
I].Flags.isSRet()) {
8961 unsigned StackArgSize = CCInfo.getStackSize();
8963 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
8967 StackArgSize =
alignTo(StackArgSize, 16);
8981 if (Subtarget->hasCustomCallingConv())
8982 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
8984 if (
getTM().useNewSMEABILowering()) {
8987 if (
Attrs.hasZAState()) {
8991 }
else if (
Attrs.hasAgnosticZAInterface()) {
8992 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
8997 auto *RetTy = EVT(MVT::i64).getTypeForEVT(*DAG.
getContext());
8998 TargetLowering::CallLoweringInfo CLI(DAG);
8999 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
9007 {Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
9013 Chain = DAG.
getNode(AArch64ISD::SME_STATE_ALLOC,
DL,
9024 if (
Attrs.hasZAState()) {
9031 Buffer = DAG.
getNode(AArch64ISD::ALLOCATE_ZA_BUFFER,
DL,
9032 DAG.
getVTList(MVT::i64, MVT::Other), {Chain, SVL});
9037 {Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
9043 AArch64ISD::INIT_TPIDR2OBJ,
DL, DAG.
getVTList(MVT::Other),
9044 { Buffer.getValue(1), Buffer.getValue(0),
9046 }
else if (
Attrs.hasAgnosticZAInterface()) {
9049 DAG.
getNode(AArch64ISD::GET_SME_SAVE_SIZE,
DL,
9050 DAG.
getVTList(MVT::i64, MVT::Other), Chain);
9054 Buffer = DAG.
getNode(AArch64ISD::ALLOC_SME_SAVE_BUFFER,
DL,
9056 {Chain, BufferSize});
9061 {Chain, BufferSize, DAG.getConstant(1, DL, MVT::i64)});
9073 for (
const ISD::InputArg &
I : Ins) {
9074 if (
I.Flags.isSwiftSelf() ||
I.Flags.isSwiftError() ||
9075 I.Flags.isSwiftAsync()) {
9079 "Swift attributes can't be used with preserve_none",
9089void AArch64TargetLowering::saveVarArgRegisters(
CCState &CCInfo,
9095 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9099 Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
9105 if (Subtarget->isWindowsArm64EC()) {
9112 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
9114 if (GPRSaveSize != 0) {
9117 if (GPRSaveSize & 15)
9124 if (Subtarget->isWindowsArm64EC()) {
9137 for (
unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
9143 MF, GPRIdx, (i - FirstVariadicGPR) * 8)
9153 if (Subtarget->hasFPARMv8() && !IsWin64) {
9155 const unsigned NumFPRArgRegs =
FPRArgRegs.size();
9158 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
9160 if (FPRSaveSize != 0) {
9165 for (
unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
9180 if (!MemOps.
empty()) {
9187SDValue AArch64TargetLowering::LowerCallResult(
9191 SDValue ThisVal,
bool RequiresSMChange)
const {
9192 DenseMap<unsigned, SDValue> CopiedRegs;
9194 for (
unsigned i = 0; i != RVLocs.
size(); ++i) {
9195 CCValAssign VA = RVLocs[i];
9199 if (i == 0 && isThisReturn) {
9201 "unexpected return calling convention register assignment");
9237 Val = DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
9304 unsigned NumArgs = Outs.
size();
9305 for (
unsigned i = 0; i != NumArgs; ++i) {
9306 MVT ArgVT = Outs[i].VT;
9309 bool UseVarArgCC =
false;
9313 if (IsCalleeWin64) {
9327 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
9329 else if (ActualMVT == MVT::i16)
9337 Outs[i].OrigTy, CCInfo);
9338 assert(!Res &&
"Call operand has unhandled type");
9353bool AArch64TargetLowering::isEligibleForTailCallOptimization(
9354 const CallLoweringInfo &CLI)
const {
9360 bool IsVarArg = CLI.IsVarArg;
9364 const SelectionDAG &DAG = CLI.DAG;
9371 SMECallAttrs CallAttrs =
9384 MF.
getInfo<AArch64FunctionInfo>()->isSVECC())
9387 bool CCMatch = CallerCC == CalleeCC;
9402 if (i->hasByValAttr())
9411 if (i->hasInRegAttr()) {
9412 unsigned ArgIdx = i - CallerF.
arg_begin();
9413 if (!CLI.CB || CLI.CB->arg_size() <= ArgIdx)
9415 AttributeSet
Attrs = CLI.CB->getParamAttributes(ArgIdx);
9416 if (!
Attrs.hasAttribute(Attribute::InReg) ||
9417 !
Attrs.hasAttribute(Attribute::StructRet) || !i->hasStructRetAttr() ||
9418 CLI.CB->getArgOperand(ArgIdx) != i) {
9435 const GlobalValue *GV =
G->getGlobal();
9438 (!
TT.isOSWindows() ||
TT.isOSBinFormatELF() ||
TT.isOSBinFormatMachO()))
9458 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
9459 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
9461 const uint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
9462 if (Subtarget->hasCustomCallingConv()) {
9463 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
9464 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
9466 if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
9475 CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs,
C);
9479 if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
9487 for (
const CCValAssign &ArgLoc : ArgLocs)
9488 if (!ArgLoc.isRegLoc())
9492 const AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9500 A.getValVT().isScalableVector() ||
9501 Subtarget->isWindowsArm64EC()) &&
9502 "Expected value to be scalable");
9522 int ClobberedFI)
const {
9525 int64_t LastByte = FirstByte + MFI.
getObjectSize(ClobberedFI) - 1;
9536 if (FI->getIndex() < 0) {
9538 int64_t InLastByte = InFirstByte;
9541 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
9542 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
9550bool AArch64TargetLowering::DoesCalleeRestoreStack(
CallingConv::ID CallCC,
9551 bool TailCallOpt)
const {
9562 APInt RequiredZero(SizeInBits, 0xFE);
9564 bool ZExtBool = (Bits.Zero & RequiredZero) == RequiredZero;
9568void AArch64TargetLowering::AdjustInstrPostInstrSelection(
MachineInstr &
MI,
9574 if (
MI.getOpcode() == AArch64::MSRpstatesvcrImm1 ||
9575 MI.getOpcode() == AArch64::MSRpstatePseudo) {
9576 for (
unsigned I =
MI.getNumOperands() - 1;
I > 0; --
I)
9577 if (MachineOperand &MO =
MI.getOperand(
I);
9578 MO.isReg() && MO.isImplicit() && MO.isDef() &&
9579 (AArch64::GPR32RegClass.contains(MO.getReg()) ||
9580 AArch64::GPR64RegClass.contains(MO.getReg())))
9581 MI.removeOperand(
I);
9585 if (
MI.getOperand(0).getImm() == AArch64SVCR::SVCRSM ||
9586 MI.getOperand(0).getImm() == AArch64SVCR::SVCRSMZA) {
9601 const MachineFunction &MF = *
MI.getMF();
9602 if (MF.
getInfo<AArch64FunctionInfo>()->hasStreamingModeChanges() &&
9603 (
MI.getOpcode() == AArch64::ADDXri ||
9604 MI.getOpcode() == AArch64::SUBXri)) {
9605 const MachineOperand &MO =
MI.getOperand(1);
9614 unsigned Condition,
bool InsertVectorLengthCheck)
const {
9622 Ops.push_back(InGlue);
9623 return DAG.
getNode(AArch64ISD::CHECK_MATCHING_VL,
DL,
9627 if (InsertVectorLengthCheck &&
Enable) {
9630 SDValue CheckVL = GetCheckVL(Chain, InGlue);
9643 assert(PStateReg.
isValid() &&
"PStateSM Register is invalid");
9650 Opcode =
Enable ? AArch64ISD::COND_SMSTART : AArch64ISD::COND_SMSTOP;
9651 Ops.push_back(ConditionOp);
9652 Ops.push_back(PStateSM);
9654 Opcode =
Enable ? AArch64ISD::SMSTART : AArch64ISD::SMSTOP;
9656 Ops.push_back(RegMask);
9659 Ops.push_back(InGlue);
9664 if (!InsertVectorLengthCheck ||
Enable)
9691 if (Flags.isZExt() || Flags.isSExt())
9698 Arg->
isAssert() ||
Op == AArch64ISD::ASSERT_ZEXT_BOOL) {
9710 int FI = FINode->getIndex();
9728AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
9730 SelectionDAG &DAG = CLI.DAG;
9737 bool &IsTailCall = CLI.IsTailCall;
9739 bool IsVarArg = CLI.IsVarArg;
9740 const CallBase *CB = CLI.CB;
9743 MachineFunction::CallSiteInfo CSInfo;
9744 bool IsThisReturn =
false;
9746 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9748 bool IsCFICall = CLI.CB && CLI.CB->isIndirectCall() && CLI.CFIType;
9749 bool IsSibCall =
false;
9750 bool GuardWithBTI =
false;
9752 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
9753 !Subtarget->noBTIAtReturnTwice()) {
9759 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.
getContext());
9762 unsigned NumArgs = Outs.
size();
9764 for (
unsigned i = 0; i != NumArgs; ++i) {
9765 if (Outs[i].
Flags.isVarArg() && Outs[i].VT.isScalableVector())
9767 "currently not supported");
9778 RetCCInfo.AnalyzeCallResult(Ins, RetCC);
9782 CSInfo = MachineFunction::CallSiteInfo(*CB);
9787 auto HasSVERegLoc = [](CCValAssign &Loc) {
9788 if (!Loc.isRegLoc())
9790 return AArch64::ZPRRegClass.contains(Loc.getLocReg()) ||
9791 AArch64::PPRRegClass.contains(Loc.getLocReg());
9793 if (
any_of(RVLocs, HasSVERegLoc) ||
any_of(ArgLocs, HasSVERegLoc))
9798 SMECallAttrs CallAttrs =
9801 std::optional<unsigned> ZAMarkerNode;
9804 if (UseNewSMEABILowering) {
9807 ZAMarkerNode = AArch64ISD::REQUIRES_ZA_SAVE;
9809 ZAMarkerNode = AArch64ISD::REQUIRES_ZT0_SAVE;
9812 ZAMarkerNode = AArch64ISD::INOUT_ZA_USE;
9817 IsTailCall = isEligibleForTailCallOptimization(CLI);
9821 if (!ZAMarkerNode && !TailCallOpt && IsTailCall &&
9829 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
9831 "site marked musttail");
9849 if (IsTailCall && !IsSibCall) {
9854 NumBytes =
alignTo(NumBytes, 16);
9859 FPDiff = NumReusableBytes - NumBytes;
9863 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (
unsigned)-FPDiff)
9871 assert(FPDiff % 16 == 0 &&
"unaligned stack on tail call");
9874 auto DescribeCallsite =
9875 [&](OptimizationRemarkAnalysis &
R) -> OptimizationRemarkAnalysis & {
9878 R <<
ore::NV(
"Callee", ES->getSymbol());
9879 else if (CLI.CB && CLI.CB->getCalledFunction())
9880 R <<
ore::NV(
"Callee", CLI.CB->getCalledFunction()->getName());
9882 R <<
"unknown callee";
9887 bool RequiresLazySave = !UseNewSMEABILowering && CallAttrs.
requiresLazySave();
9888 bool RequiresSaveAllZA =
9890 if (RequiresLazySave) {
9901 auto R = CLI.CB ? OptimizationRemarkAnalysis(
"sme",
"SMELazySaveZA",
9903 : OptimizationRemarkAnalysis(
"sme",
"SMELazySaveZA",
9905 return DescribeCallsite(R) <<
" sets up a lazy save for ZA";
9907 }
else if (RequiresSaveAllZA) {
9909 "Cannot share state that may not exist");
9915 if (RequiresSMChange) {
9918 auto R = CLI.CB ? OptimizationRemarkAnalysis(
"sme",
"SMETransition",
9920 : OptimizationRemarkAnalysis(
"sme",
"SMETransition",
9922 DescribeCallsite(R) <<
" requires a streaming mode transition";
9929 bool ShouldPreserveZT0 =
9934 if (ShouldPreserveZT0) {
9938 {Chain, DAG.getConstant(0, DL, MVT::i32), ZTFrameIdx});
9945 assert((!DisableZA || !RequiresLazySave) &&
9946 "Lazy-save should have PSTATE.SM=1 on entry to the function");
9950 AArch64ISD::SMSTOP,
DL, DAG.
getVTList(MVT::Other, MVT::Glue), Chain,
9955 assert((!IsSibCall || !ZAMarkerNode) &&
"ZA markers require CALLSEQ_START");
9965 {Chain, Chain.getValue(1)});
9973 SmallSet<unsigned, 8> RegsUsed;
9977 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
9979 for (
const auto &
F : Forwards) {
9986 unsigned ExtraArgLocs = 0;
9987 for (
unsigned i = 0, e = Outs.
size(); i != e; ++i) {
9988 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
9990 ISD::ArgFlagsTy
Flags = Outs[i].Flags;
10005 if (Outs[i].ArgVT == MVT::i1) {
10027 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
10043 assert((isScalable || Subtarget->isWindowsArm64EC()) &&
10044 "Indirect arguments should be scalable on most subtargets");
10047 TypeSize PartSize = StoreSize;
10048 unsigned NumParts = 1;
10049 if (Outs[i].
Flags.isInConsecutiveRegs()) {
10050 while (!Outs[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
10052 StoreSize *= NumParts;
10061 bool IsPred = VA.
getValVT() == MVT::aarch64svcount ||
10079 if (NumParts > 0) {
10095 if (i == 0 &&
Flags.isReturned() && !
Flags.isSwiftSelf() &&
10096 Outs[0].VT == MVT::i64) {
10098 "unexpected calling convention register assignment");
10100 "unexpected use of 'returned'");
10101 IsThisReturn =
true;
10110 [=](
const std::pair<unsigned, SDValue> &Elt) {
10119 [&VA](MachineFunction::ArgRegPair ArgReg) {
10120 return ArgReg.Reg == VA.getLocReg();
10127 Arg = DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
10132 if (
Options.EmitCallSiteInfo)
10139 MachinePointerInfo DstInfo;
10143 uint32_t BEAlign = 0;
10149 OpSize =
Flags.isByVal() ?
Flags.getByValSize() * 8
10151 OpSize = (OpSize + 7) / 8;
10152 if (!Subtarget->isLittleEndian() && !
Flags.isByVal() &&
10153 !
Flags.isInConsecutiveRegs()) {
10155 BEAlign = 8 - OpSize;
10158 int32_t
Offset = LocMemOffset + BEAlign;
10175 Chain = addTokenForArgument(Chain, DAG, MF.
getFrameInfo(), FI);
10183 if (Outs[i].
Flags.isByVal()) {
10187 Chain,
DL, DstAddr, Arg, SizeNode,
10188 Outs[i].
Flags.getNonZeroByValAlign(),
10190 nullptr, std::nullopt, DstInfo, MachinePointerInfo());
10207 if (IsVarArg && Subtarget->isWindowsArm64EC() &&
10208 !(CLI.CB && CLI.CB->isMustTailCall())) {
10226 if (!MemOpChains.
empty())
10230 if (RequiresSMChange) {
10231 bool InsertVectorLengthCheck =
10241 for (
auto &RegToPass : RegsToPass) {
10243 RegToPass.second, InGlue);
10250 const GlobalValue *CalledGlobal =
nullptr;
10251 unsigned OpFlags = 0;
10253 CalledGlobal =
G->getGlobal();
10254 OpFlags = Subtarget->classifyGlobalFunctionReference(CalledGlobal,
10260 const GlobalValue *GV =
G->getGlobal();
10265 Subtarget->isTargetMachO()) ||
10267 const char *Sym = S->getSymbol();
10280 if (IsTailCall && !IsSibCall) {
10285 unsigned Opc = IsTailCall ? AArch64ISD::TC_RETURN : AArch64ISD::CALL;
10287 std::vector<SDValue>
Ops;
10288 Ops.push_back(Chain);
10289 Ops.push_back(Callee);
10296 "tail calls cannot be marked with clang.arc.attachedcall");
10297 Opc = AArch64ISD::CALL_RVMARKER;
10303 Ops.insert(
Ops.begin() + 1, GA);
10310 Ops.insert(
Ops.begin() + 2, DoEmitMarker);
10312 Opc = AArch64ISD::CALL_ARM64EC_TO_X64;
10313 }
else if (GuardWithBTI) {
10314 Opc = AArch64ISD::CALL_BTI;
10325 const uint64_t
Key = CLI.PAI->Key;
10327 "Invalid auth call key");
10331 std::tie(IntDisc, AddrDisc) =
10334 if (
Opc == AArch64ISD::CALL_RVMARKER)
10335 Opc = AArch64ISD::AUTH_CALL_RVMARKER;
10337 Opc = IsTailCall ? AArch64ISD::AUTH_TC_RETURN : AArch64ISD::AUTH_CALL;
10339 Ops.push_back(IntDisc);
10340 Ops.push_back(AddrDisc);
10345 for (
auto &RegToPass : RegsToPass)
10347 RegToPass.second.getValueType()));
10350 const uint32_t *
Mask;
10351 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10352 if (IsThisReturn) {
10354 Mask =
TRI->getThisReturnPreservedMask(MF, CallConv);
10356 IsThisReturn =
false;
10357 Mask =
TRI->getCallPreservedMask(MF, CallConv);
10360 Mask =
TRI->getCallPreservedMask(MF, CallConv);
10362 if (Subtarget->hasCustomCallingConv())
10363 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
10365 if (
TRI->isAnyArgRegReserved(MF))
10366 TRI->emitReservedArgRegCallError(MF);
10368 assert(Mask &&
"Missing call preserved mask for calling convention");
10372 Ops.push_back(InGlue);
10374 if (CLI.DeactivationSymbol)
10387 if (CalledGlobal &&
10401 if (CalledGlobal &&
10405 uint64_t CalleePopBytes =
10406 DoesCalleeRestoreStack(CallConv, TailCallOpt) ?
alignTo(NumBytes, 16) : 0;
10414 Chain, InGlue, CallConv, IsVarArg, RVLocs,
DL, DAG, InVals, IsThisReturn,
10415 IsThisReturn ? OutVals[0] :
SDValue(), RequiresSMChange);
10420 if (RequiresSMChange) {
10426 if (!UseNewSMEABILowering &&
10430 AArch64ISD::SMSTART,
DL, DAG.
getVTList(MVT::Other, MVT::Glue), Result,
10433 if (ShouldPreserveZT0)
10436 {Result, DAG.getConstant(0, DL, MVT::i32), ZTFrameIdx});
10438 if (RequiresLazySave) {
10440 }
else if (RequiresSaveAllZA) {
10445 if (RequiresSMChange || RequiresLazySave || ShouldPreserveZT0 ||
10446 RequiresSaveAllZA) {
10447 for (
unsigned I = 0;
I < InVals.
size(); ++
I) {
10462 for (
const ISD::OutputArg &O : Outs) {
10463 if (
O.Flags.isSwiftSelf() ||
O.Flags.isSwiftError() ||
10464 O.Flags.isSwiftAsync()) {
10468 "Swift attributes can't be used with preserve_none",
10469 DL.getDebugLoc()));
10478bool AArch64TargetLowering::CanLowerReturn(
10481 const Type *RetTy)
const {
10484 CCState CCInfo(CallConv, isVarArg, MF, RVLocs,
Context);
10495 auto *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
10499 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.
getContext());
10505 SmallSet<unsigned, 4> RegsUsed;
10506 for (
unsigned i = 0, realRVLocIdx = 0; i != RVLocs.
size();
10507 ++i, ++realRVLocIdx) {
10508 CCValAssign &VA = RVLocs[i];
10510 SDValue Arg = OutVals[realRVLocIdx];
10516 if (Outs[i].ArgVT == MVT::i1) {
10532 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
10541 llvm::find_if(RetVals, [=](
const std::pair<unsigned, SDValue> &Elt) {
10551 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10567 for (
auto &RetVal : RetVals) {
10571 DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
10572 DAG.
getVTList(RetVal.second.getValueType(), MVT::Glue),
10574 Chain = DAG.
getCopyToReg(Chain,
DL, RetVal.first, RetVal.second, Glue);
10577 DAG.
getRegister(RetVal.first, RetVal.second.getValueType()));
10588 unsigned RetValReg = AArch64::X0;
10590 RetValReg = AArch64::X8;
10601 if (AArch64::GPR64RegClass.
contains(*
I))
10603 else if (AArch64::FPR64RegClass.
contains(*
I))
10614 RetOps.push_back(Glue);
10625 MachinePointerInfo());
10626 RetOps.insert(RetOps.begin() + 1, Arm64ECRetDest);
10628 return DAG.
getNode(AArch64ISD::TC_RETURN,
DL, MVT::Other, RetOps);
10631 return DAG.
getNode(AArch64ISD::RET_GLUE,
DL, MVT::Other, RetOps);
10640 unsigned Flag)
const {
10642 N->getOffset(), Flag);
10647 unsigned Flag)
const {
10653 unsigned Flag)
const {
10655 N->getOffset(), Flag);
10660 unsigned Flag)
const {
10666 unsigned Flag)
const {
10671template <
class NodeTy>
10673 unsigned Flags)
const {
10681 .
getInfo<AArch64FunctionInfo>()
10682 ->hasELFSignedGOT())
10685 return DAG.
getNode(AArch64ISD::LOADgot,
DL, Ty, GotAddr);
10689template <
class NodeTy>
10691 unsigned Flags)
const {
10697 AArch64ISD::WrapperLarge,
DL, Ty,
10705template <
class NodeTy>
10707 unsigned Flags)
const {
10715 return DAG.
getNode(AArch64ISD::ADDlow,
DL, Ty, ADRP,
Lo);
10719template <
class NodeTy>
10721 unsigned Flags)
const {
10725 SDValue Sym = getTargetNode(
N, Ty, DAG, Flags);
10726 return DAG.
getNode(AArch64ISD::ADR,
DL, Ty, Sym);
10732 const GlobalValue *GV = GN->
getGlobal();
10733 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV,
getTargetMachine());
10737 "unexpected offset in global node");
10742 return getGOT(GN, DAG, OpFlags);
10748 Result = getAddrLarge(GN, DAG, OpFlags);
10750 Result = getAddrTiny(GN, DAG, OpFlags);
10752 Result = getAddr(GN, DAG, OpFlags);
10791AArch64TargetLowering::LowerDarwinGlobalTLSAddress(
SDValue Op,
10793 assert(Subtarget->isTargetDarwin() &&
10794 "This function expects a Darwin target");
10809 PtrMemVT,
DL, Chain, DescAddr,
10824 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10825 const uint32_t *
Mask =
TRI->getTLSCallPreservedMask();
10826 if (Subtarget->hasCustomCallingConv())
10834 unsigned Opcode = AArch64ISD::CALL;
10836 Ops.push_back(Chain);
10837 Ops.push_back(FuncTLVGet);
10841 Opcode = AArch64ISD::AUTH_CALL;
10963SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(
SDValue SymAddr,
10968 auto *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
10972 SDVTList NodeTys = DAG.
getVTList(MVT::Other, MVT::Glue);
10975 bool RequiresSMChange = TLSCallAttrs.requiresSMChange();
10977 auto ChainAndGlue = [](
SDValue Chain) -> std::pair<SDValue, SDValue> {
10978 return {Chain, Chain.
getValue(1)};
10981 if (RequiresSMChange)
10982 std::tie(Chain, Glue) =
10988 ? AArch64ISD::TLSDESC_AUTH_CALLSEQ
10989 : AArch64ISD::TLSDESC_CALLSEQ;
10991 std::tie(Chain, Glue) = ChainAndGlue(DAG.
getNode(
10994 if (TLSCallAttrs.requiresLazySave())
10995 std::tie(Chain, Glue) = ChainAndGlue(DAG.
getNode(
10996 AArch64ISD::REQUIRES_ZA_SAVE,
DL, NodeTys, {Chain, Chain.getValue(1)}));
10998 if (RequiresSMChange)
10999 std::tie(Chain, Glue) =
11007AArch64TargetLowering::LowerELFGlobalTLSAddress(
SDValue Op,
11009 assert(Subtarget->isTargetELF() &&
"This function expects an ELF target");
11012 AArch64FunctionInfo *MFI =
11027 "in local exec TLS model");
11038 const GlobalValue *GV = GA->
getGlobal();
11043 return LowerELFTLSLocalExec(GV, ThreadBase,
DL, DAG);
11046 TPOff = DAG.
getNode(AArch64ISD::LOADgot,
DL, PtrVT, TPOff);
11064 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
11071 GV,
DL, MVT::i64, 0,
11088 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
11096AArch64TargetLowering::LowerWindowsGlobalTLSAddress(
SDValue Op,
11098 assert(Subtarget->isTargetWindows() &&
"Windows specific TLS lowering");
11110 TLSArray = DAG.
getLoad(PtrVT,
DL, Chain, TLSArray, MachinePointerInfo());
11123 DAG.
getNode(AArch64ISD::ADDlow,
DL, PtrVT, ADRP, TLSIndexLo);
11124 TLSIndex = DAG.
getLoad(MVT::i32,
DL, Chain, TLSIndex, MachinePointerInfo());
11134 MachinePointerInfo());
11135 Chain =
TLS.getValue(1);
11138 const GlobalValue *GV = GA->
getGlobal();
11150 Addr = DAG.
getNode(AArch64ISD::ADDlow,
DL, PtrVT, Addr, TGALo);
11160 if (Subtarget->isTargetDarwin())
11161 return LowerDarwinGlobalTLSAddress(
Op, DAG);
11162 if (Subtarget->isTargetELF())
11163 return LowerELFGlobalTLSAddress(
Op, DAG);
11164 if (Subtarget->isTargetWindows())
11165 return LowerWindowsGlobalTLSAddress(
Op, DAG);
11203 assert(TGN->getGlobal()->hasExternalWeakLinkage());
11209 if (TGN->getOffset() != 0)
11211 "unsupported non-zero offset in weak ptrauth global reference");
11218 {TGA, Key, Discriminator}),
11223AArch64TargetLowering::LowerPtrAuthGlobalAddress(
SDValue Op,
11226 uint64_t KeyC =
Op.getConstantOperandVal(1);
11227 SDValue AddrDiscriminator =
Op.getOperand(2);
11228 uint64_t DiscriminatorC =
Op.getConstantOperandVal(3);
11229 EVT VT =
Op.getValueType();
11239 "constant discriminator in ptrauth global out of range [0, 0xffff]");
11242 if (!Subtarget->isTargetELF() && !Subtarget->isTargetMachO())
11245 int64_t PtrOffsetC = 0;
11251 const GlobalValue *PtrGV = PtrN->getGlobal();
11254 const unsigned OpFlags =
11258 "unsupported non-GOT op flags on ptrauth global reference");
11261 PtrOffsetC += PtrN->getOffset();
11264 assert(PtrN->getTargetFlags() == 0 &&
11265 "unsupported target flags on ptrauth global");
11270 ? AddrDiscriminator
11274 if (!NeedsGOTLoad) {
11278 {TPtr, Key, TAddrDiscriminator, Discriminator}),
11287 {TPtr, Key, TAddrDiscriminator, Discriminator}),
11315 SDValue Dest,
unsigned Opcode,
11327 uint64_t Mask =
Op.getConstantOperandVal(1);
11332 if (
Op.getOperand(0).getOpcode() ==
ISD::SHL) {
11333 auto Op00 =
Op.getOperand(0).getOperand(0);
11336 Op.getOperand(1),
Op.getOperand(0).getOperand(1));
11337 return DAG.
getNode(Opcode,
DL, MVT::Other, Chain, Shr,
11357 bool ProduceNonFlagSettingCondBr =
11363 if (
LHS.getValueType() == MVT::f128) {
11368 if (!
RHS.getNode()) {
11388 OFCC = getInvertedCondCode(OFCC);
11391 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
11395 if (
LHS.getValueType().isInteger()) {
11397 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
11402 if (RHSC && RHSC->
getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
11408 return DAG.
getNode(AArch64ISD::CBZ,
DL, MVT::Other, Chain,
LHS, Dest);
11414 return DAG.
getNode(AArch64ISD::CBNZ,
DL, MVT::Other, Chain,
LHS, Dest);
11419 uint64_t SignBitPos;
11421 return DAG.
getNode(AArch64ISD::TBNZ,
DL, MVT::Other, Chain,
LHS,
11426 LHS.getOpcode() !=
ISD::AND && ProduceNonFlagSettingCondBr) {
11430 uint64_t SignBitPos;
11432 return DAG.
getNode(AArch64ISD::TBZ,
DL, MVT::Other, Chain,
LHS,
11438 if (Subtarget->hasCMPBR() &&
11440 ProduceNonFlagSettingCondBr) {
11449 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
11453 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::bf16 ||
11454 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
11463 DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CC1Val, Cmp);
11466 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, BR1, Dest, CC2Val,
11475 if (!Subtarget->isNeonAvailable() &&
11476 !Subtarget->useSVEForFixedLengthVectors())
11479 EVT VT =
Op.getValueType();
11507 if (!VT.
isVector() && !Subtarget->isNeonAvailable() &&
11508 Subtarget->isSVEorStreamingSVEAvailable()) {
11509 if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64 && VT != MVT::bf16)
11524 auto BitCast = [
this](EVT VT,
SDValue Op, SelectionDAG &DAG) {
11526 return getSVESafeBitCast(VT,
Op, DAG);
11533 auto SetVecVal = [&](
int Idx = -1) {
11540 VecVal1 = BitCast(VecVT, In1, DAG);
11541 VecVal2 = BitCast(VecVT, In2, DAG);
11547 }
else if (VT == MVT::f64) {
11548 VecVT = MVT::v2i64;
11549 SetVecVal(AArch64::dsub);
11550 }
else if (VT == MVT::f32) {
11551 VecVT = MVT::v4i32;
11552 SetVecVal(AArch64::ssub);
11553 }
else if (VT == MVT::f16 || VT == MVT::bf16) {
11554 VecVT = MVT::v8i16;
11555 SetVecVal(AArch64::hsub);
11566 if (VT == MVT::f64 || VT == MVT::v2f64) {
11574 DAG.
getNode(AArch64ISD::BSP,
DL, VecVT, SignMaskV, VecVal1, VecVal2);
11575 if (VT == MVT::f16 || VT == MVT::bf16)
11577 if (VT == MVT::f32)
11579 if (VT == MVT::f64)
11582 return BitCast(VT, BSP, DAG);
11588 Attribute::NoImplicitFloat))
11591 EVT VT =
Op.getValueType();
11594 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
11602 if (VT == MVT::i32 && IsParity)
11605 if (Subtarget->isSVEorStreamingSVEAvailable()) {
11606 if (VT == MVT::i32 || VT == MVT::i64) {
11607 EVT ContainerVT = VT == MVT::i32 ? MVT::nxv4i32 : MVT::nxv2i64;
11619 if (VT == MVT::i128) {
11632 if (!Subtarget->isNeonAvailable())
11643 if (VT == MVT::i32 || VT == MVT::i64) {
11644 if (VT == MVT::i32)
11650 AddV = DAG.
getNode(AArch64ISD::NVCAST,
DL,
11651 VT == MVT::i32 ? MVT::v2i32 : MVT::v1i64, AddV);
11657 }
else if (VT == MVT::i128) {
11663 DAG.
getNode(AArch64ISD::NVCAST,
DL, MVT::v2i64, AddV),
11671 assert(!IsParity &&
"ISD::PARITY of vector types not supported");
11673 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
11674 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
11675 "Unexpected type for custom ctpop lowering");
11683 EVT DT = VT == MVT::v2i64 ? MVT::v4i32 : VT;
11687 if (VT == MVT::v2i64) {
11688 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11689 Val = DAG.
getNode(AArch64ISD::UADDLP,
DL, VT, Val);
11690 }
else if (VT == MVT::v2i32) {
11691 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11692 }
else if (VT == MVT::v4i32) {
11693 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11702 unsigned EltSize = 8;
11708 Val = DAG.
getNode(AArch64ISD::UADDLP,
DL, WidenVT, Val);
11715 EVT VT =
Op.getValueType();
11718 VT, Subtarget->useSVEForFixedLengthVectors()));
11728 EVT VT =
Op.getValueType();
11730 unsigned Opcode =
Op.getOpcode();
11757 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SMAX_PRED);
11759 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SMIN_PRED);
11761 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::UMAX_PRED);
11763 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::UMIN_PRED);
11775 EVT VT =
Op.getValueType();
11779 VT, Subtarget->useSVEForFixedLengthVectors()))
11780 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU);
11792 REVB = DAG.
getNode(AArch64ISD::REV32,
DL, VST,
Op.getOperand(0));
11799 REVB = DAG.
getNode(AArch64ISD::REV32,
DL, VST,
Op.getOperand(0));
11806 REVB = DAG.
getNode(AArch64ISD::REV64,
DL, VST,
Op.getOperand(0));
11813 REVB = DAG.
getNode(AArch64ISD::REV64,
DL, VST,
Op.getOperand(0));
11819 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT,
11826 SmallVector<std::pair<SDValue, SDValue>, 16> &WorkList) {
11832 N =
N->getOperand(0);
11836 WorkList.push_back(std::make_pair(
N->getOperand(0),
N->getOperand(1)));
11842 if (
N->getOpcode() !=
ISD::OR || !
N->hasOneUse())
11856 EVT VT =
N->getValueType(0);
11866 unsigned NumXors = 0;
11871 std::tie(XOR0, XOR1) = WorkList[0];
11874 for (
unsigned I = 1;
I < WorkList.
size();
I++) {
11875 std::tie(XOR0, XOR1) = WorkList[
I];
11877 Cmp = DAG.
getNode(LogicOp,
DL, VT, Cmp, CmpChain);
11889 if (
Op.getValueType().isVector())
11890 return LowerVSETCC(
Op, DAG);
11892 bool IsStrict =
Op->isStrictFPOpcode();
11894 unsigned OpNo = IsStrict ? 1 : 0;
11897 Chain =
Op.getOperand(0);
11904 EVT VT =
Op.getValueType();
11910 if (
LHS.getValueType() == MVT::f128) {
11915 if (!
RHS.getNode()) {
11916 assert(
LHS.getValueType() ==
Op.getValueType() &&
11917 "Unexpected setcc expansion!");
11922 if (
LHS.getValueType().isInteger()) {
11938 SDValue Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, FVal, TVal, CCVal, Cmp);
11943 assert(
LHS.getValueType() == MVT::bf16 ||
LHS.getValueType() == MVT::f16 ||
11944 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
11965 Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, FVal, TVal, CC1Val, Cmp);
11975 DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, FVal, CC1Val, Cmp);
11978 Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, CS1, CC2Val, Cmp);
11988 EVT VT =
LHS.getValueType();
11989 if (VT != MVT::i32 && VT != MVT::i64)
11999 EVT OpVT =
Op.getValueType();
12008 return DAG.
getNode(AArch64ISD::CSEL,
DL, OpVT, FVal, TVal, CCVal,
12017 "function only supposed to emit natural comparisons");
12026 if (!
LHS.getValueType().isVector()) {
12031 DAG.
getUNDEF(VecVT), Fcmeq, Zero);
12065 assert(!
LHS.getValueType().isVector());
12066 assert(!
RHS.getValueType().isVector());
12070 if (!CTVal || !CFVal)
12084 bool OneNaN =
false;
12100 bool ShouldInvert =
false;
12109 if (!Cmp2 && !ShouldInvert)
12127SDValue AArch64TargetLowering::LowerSELECT_CC(
12133 if (
LHS.getValueType() == MVT::f128) {
12138 if (!
RHS.getNode()) {
12145 if ((
LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
12146 LHS.getValueType() == MVT::bf16) {
12152 if (
LHS.getValueType().isInteger()) {
12154 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
12166 LHS.getValueType() ==
RHS.getValueType()) {
12167 EVT VT =
LHS.getValueType();
12173 Shift = DAG.
getNOT(
DL, Shift, VT);
12187 uint64_t SignBitPos;
12189 EVT TestVT =
LHS.getValueType();
12193 LHS, SignBitConst);
12221 unsigned Opcode = AArch64ISD::CSEL;
12229 }
else if (CTVal && CFVal && CTVal->
isOne() && CFVal->
isZero()) {
12249 }
else if (CTVal && CFVal) {
12257 if (TrueVal == ~FalseVal) {
12258 Opcode = AArch64ISD::CSINV;
12259 }
else if (FalseVal > std::numeric_limits<int64_t>::min() &&
12260 TrueVal == -FalseVal) {
12261 Opcode = AArch64ISD::CSNEG;
12271 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
12272 Opcode = AArch64ISD::CSINC;
12274 if (TrueVal32 > FalseVal32) {
12280 const uint64_t TrueVal64 =
TrueVal;
12281 const uint64_t FalseVal64 =
FalseVal;
12283 if ((TrueVal64 == FalseVal64 + 1) || (TrueVal64 + 1 == FalseVal64)) {
12284 Opcode = AArch64ISD::CSINC;
12286 if (TrueVal > FalseVal) {
12299 if (Opcode != AArch64ISD::CSEL) {
12312 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->
isOne() &&
12317 if (CTVal && CTVal == RHSVal && AArch64CC ==
AArch64CC::EQ)
12319 else if (CFVal && CFVal == RHSVal && AArch64CC ==
AArch64CC::NE)
12321 }
else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->
isOne()) {
12322 assert (CTVal && CFVal &&
"Expected constant operands for CSNEG.");
12327 Opcode = AArch64ISD::CSINV;
12336 return DAG.
getNode(Opcode,
DL, VT, TVal, FVal, CCVal, Cmp);
12340 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::f32 ||
12341 LHS.getValueType() == MVT::f64);
12348 if (Subtarget->isNeonAvailable() &&
all_of(
Users, [](
const SDNode *U) {
12349 switch (
U->getOpcode()) {
12354 case AArch64ISD::DUP:
12372 if (
Flags.hasNoSignedZeros()) {
12376 if (RHSVal && RHSVal->
isZero()) {
12384 CFVal && CFVal->
isZero() &&
12392 SDValue CS1 = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, FVal, CC1Val, Cmp);
12398 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, CS1, CC2Val, Cmp);
12407 EVT Ty =
Op.getValueType();
12408 auto Idx =
Op.getConstantOperandAPInt(2);
12409 int64_t IdxVal = Idx.getSExtValue();
12411 "Only expect scalable vectors for custom lowering of VECTOR_SPLICE");
12420 std::optional<unsigned> PredPattern;
12432 return DAG.
getNode(AArch64ISD::SPLICE,
DL, Ty, Pred,
Op.getOperand(0),
12452 SDNodeFlags
Flags =
Op->getFlags();
12454 return LowerSELECT_CC(CC,
LHS,
RHS, TVal, FVal,
Op->users(), Flags,
DL, DAG);
12464 EVT Ty =
Op.getValueType();
12465 if (Ty == MVT::aarch64svcount) {
12502 return DAG.
getNode(AArch64ISD::CSEL,
DL,
Op.getValueType(), TVal, FVal,
12521 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
12529 Op->getFlags(),
DL, DAG);
12531 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
12546 !Subtarget->isTargetMachO())
12547 return getAddrLarge(JT, DAG);
12549 return getAddrTiny(JT, DAG);
12550 return getAddr(JT, DAG);
12563 AFI->setJumpTableEntryInfo(JTI, 4,
nullptr);
12568 "aarch64-jump-table-hardening")) {
12570 if (Subtarget->isTargetMachO()) {
12575 assert(Subtarget->isTargetELF() &&
12576 "jump table hardening only supported on MachO/ELF");
12607 std::optional<uint16_t> BADisc =
12608 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(MF.
getFunction());
12619 {Dest,
Key, Disc, AddrDisc, Chain});
12629 if (Subtarget->isTargetMachO()) {
12630 return getGOT(CP, DAG);
12633 return getAddrLarge(CP, DAG);
12635 return getAddrTiny(CP, DAG);
12637 return getAddr(CP, DAG);
12645 if (std::optional<uint16_t> BADisc =
12646 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(
12660 {TargetBA,
Key, AddrDisc, Disc});
12668 return getAddrLarge(BAN, DAG);
12670 return getAddrTiny(BAN, DAG);
12672 return getAddr(BAN, DAG);
12677 AArch64FunctionInfo *FuncInfo =
12686 MachinePointerInfo(SV));
12692 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
12696 if (Subtarget->isWindowsArm64EC()) {
12702 uint64_t StackOffset;
12717 MachinePointerInfo(SV));
12725 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
12726 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
12741 MachinePointerInfo(SV),
Align(PtrSize)));
12758 MachinePointerInfo(SV,
Offset),
12776 MachinePointerInfo(SV,
Offset),
12786 GROffsAddr, MachinePointerInfo(SV,
Offset),
Align(4)));
12794 VROffsAddr, MachinePointerInfo(SV,
Offset),
Align(4)));
12804 if (Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg()))
12805 return LowerWin64_VASTART(
Op, DAG);
12806 else if (Subtarget->isTargetDarwin())
12807 return LowerDarwin_VASTART(
Op, DAG);
12809 return LowerAAPCS_VASTART(
Op, DAG);
12817 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
12818 unsigned VaListSize =
12819 (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
12821 : Subtarget->isTargetILP32() ? 20 : 32;
12827 Align(PtrSize),
false,
false,
nullptr,
12828 std::nullopt, MachinePointerInfo(DestSV),
12829 MachinePointerInfo(SrcSV));
12833 assert(Subtarget->isTargetDarwin() &&
12834 "automatic va_arg instruction only works on Darwin");
12837 EVT VT =
Op.getValueType();
12841 MaybeAlign
Align(
Op.getConstantOperandVal(3));
12842 unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
12846 DAG.
getLoad(PtrMemVT,
DL, Chain, Addr, MachinePointerInfo(V));
12852 "currently not supported");
12854 if (Align && *Align > MinSlotSize) {
12870 ArgSize = std::max(ArgSize, MinSlotSize);
12871 bool NeedFPTrunc =
false;
12874 NeedFPTrunc =
true;
12884 DAG.
getStore(Chain,
DL, VANext, Addr, MachinePointerInfo(V));
12890 DAG.
getLoad(MVT::f64,
DL, APStore, VAList, MachinePointerInfo());
12900 return DAG.
getLoad(VT,
DL, APStore, VAList, MachinePointerInfo());
12908 EVT VT =
Op.getValueType();
12910 unsigned Depth =
Op.getConstantOperandVal(0);
12915 MachinePointerInfo());
12917 if (Subtarget->isTargetILP32())
12933#define GET_REGISTER_MATCHER
12934#include "AArch64GenAsmMatcher.inc"
12941 if (AArch64::X1 <=
Reg &&
Reg <= AArch64::X28) {
12943 unsigned DwarfRegNum =
MRI->getDwarfRegNum(
Reg,
false);
12944 if (!Subtarget->isXRegisterReserved(DwarfRegNum) &&
12945 !
MRI->isReservedReg(MF,
Reg))
12955 EVT VT =
Op.getValueType();
12971 EVT VT =
Op.getValueType();
12973 unsigned Depth =
Op.getConstantOperandVal(0);
12976 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
12993 if (Subtarget->hasPAuth()) {
13021 bool OptForSize)
const {
13022 bool IsLegal =
false;
13031 const APInt ImmInt = Imm.bitcastToAPInt();
13032 if (VT == MVT::f64)
13034 else if (VT == MVT::f32)
13036 else if (VT == MVT::f16 || VT == MVT::bf16)
13046 if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
13055 "Should be able to build any value with at most 4 moves");
13056 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 4 : 2));
13057 IsLegal = Insn.
size() <= Limit;
13061 <<
" imm value: "; Imm.dump(););
13073 if ((ST->hasNEON() &&
13074 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
13075 VT == MVT::f32 || VT == MVT::v1f32 || VT == MVT::v2f32 ||
13076 VT == MVT::v4f32)) ||
13078 (VT == MVT::nxv8f16 || VT == MVT::nxv4f32 || VT == MVT::nxv2f64))) {
13085 constexpr unsigned AccurateBits = 8;
13087 ExtraSteps = DesiredBits <= AccurateBits
13092 return DAG.
getNode(Opcode,
SDLoc(Operand), VT, Operand);
13102 EVT VT =
Op.getValueType();
13109AArch64TargetLowering::getSqrtResultForDenormInput(
SDValue Op,
13118 bool Reciprocal)
const {
13122 DAG, ExtraSteps)) {
13127 SDNodeFlags
Flags =
13132 for (
int i = ExtraSteps; i > 0; --i) {
13135 Step = DAG.
getNode(AArch64ISD::FRSQRTS,
DL, VT, Operand, Step, Flags);
13150 int &ExtraSteps)
const {
13153 DAG, ExtraSteps)) {
13161 for (
int i = ExtraSteps; i > 0; --i) {
13201const char *AArch64TargetLowering::LowerXConstraint(
EVT ConstraintVT)
const {
13209 if (!Subtarget->hasFPARMv8())
13234static std::optional<std::pair<unsigned, const TargetRegisterClass *>>
13237 (Constraint[1] !=
'p' && Constraint[1] !=
'z'))
13238 return std::nullopt;
13240 bool IsPredicate = Constraint[1] ==
'p';
13241 Constraint = Constraint.
substr(2, Constraint.
size() - 3);
13242 bool IsPredicateAsCount = IsPredicate && Constraint.
starts_with(
"n");
13243 if (IsPredicateAsCount)
13248 return std::nullopt;
13250 if (IsPredicateAsCount)
13251 return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
13253 return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
13254 return std::make_pair(AArch64::Z0 + V, &AArch64::ZPRRegClass);
13257static std::optional<PredicateConstraint>
13268 if (VT != MVT::aarch64svcount &&
13272 switch (Constraint) {
13274 return VT == MVT::aarch64svcount ? &AArch64::PNR_p8to15RegClass
13275 : &AArch64::PPR_p8to15RegClass;
13277 return VT == MVT::aarch64svcount ? &AArch64::PNR_3bRegClass
13278 : &AArch64::PPR_3bRegClass;
13280 return VT == MVT::aarch64svcount ? &AArch64::PNRRegClass
13281 : &AArch64::PPRRegClass;
13289static std::optional<ReducedGprConstraint>
13302 switch (Constraint) {
13304 return &AArch64::MatrixIndexGPR32_8_11RegClass;
13306 return &AArch64::MatrixIndexGPR32_12_15RegClass;
13340 return DAG.
getNode(AArch64ISD::CSINC,
DL, MVT::i32,
13343 getCondCode(DAG, getInvertedCondCode(CC)), NZCV);
13347SDValue AArch64TargetLowering::LowerAsmOutputForConstraint(
13349 const AsmOperandInfo &OpInfo,
SelectionDAG &DAG)
const {
13354 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
13355 OpInfo.ConstraintVT.getSizeInBits() < 8)
13370 if (OpInfo.ConstraintVT.getSizeInBits() <= 32)
13381AArch64TargetLowering::getConstraintType(
StringRef Constraint)
const {
13382 if (Constraint.
size() == 1) {
13383 switch (Constraint[0]) {
13420AArch64TargetLowering::getSingleConstraintMatchWeight(
13421 AsmOperandInfo &
info,
const char *constraint)
const {
13423 Value *CallOperandVal =
info.CallOperandVal;
13426 if (!CallOperandVal)
13430 switch (*constraint) {
13452std::pair<unsigned, const TargetRegisterClass *>
13453AArch64TargetLowering::getRegForInlineAsmConstraint(
13455 if (Constraint.
size() == 1) {
13456 switch (Constraint[0]) {
13459 return std::make_pair(0U,
nullptr);
13461 return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
13463 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
13464 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
13466 if (!Subtarget->hasFPARMv8())
13470 return std::make_pair(0U, &AArch64::ZPRRegClass);
13471 return std::make_pair(0U,
nullptr);
13473 if (VT == MVT::Other)
13477 return std::make_pair(0U, &AArch64::FPR16RegClass);
13479 return std::make_pair(0U, &AArch64::FPR32RegClass);
13481 return std::make_pair(0U, &AArch64::FPR64RegClass);
13483 return std::make_pair(0U, &AArch64::FPR128RegClass);
13489 if (!Subtarget->hasFPARMv8())
13492 return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
13494 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
13497 if (!Subtarget->hasFPARMv8())
13500 return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
13508 if (AArch64::ZPRRegClass.hasSubClassEq(
P->second) &&
13509 !Subtarget->isSVEorStreamingSVEAvailable())
13510 return std::make_pair(
TRI->getSubReg(
P->first, AArch64::zsub),
13511 &AArch64::FPR128RegClass);
13516 return std::make_pair(0U, RegClass);
13520 return std::make_pair(0U, RegClass);
13522 if (StringRef(
"{cc}").equals_insensitive(Constraint) ||
13524 return std::make_pair(
unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
13526 if (Constraint ==
"{za}") {
13527 return std::make_pair(
unsigned(AArch64::ZA), &AArch64::MPRRegClass);
13530 if (Constraint ==
"{zt0}") {
13531 return std::make_pair(
unsigned(AArch64::ZT0), &AArch64::ZTRRegClass);
13536 std::pair<unsigned, const TargetRegisterClass *> Res;
13541 unsigned Size = Constraint.
size();
13542 if ((
Size == 4 ||
Size == 5) && Constraint[0] ==
'{' &&
13543 tolower(Constraint[1]) ==
'v' && Constraint[
Size - 1] ==
'}') {
13546 if (!
Failed && RegNo >= 0 && RegNo <= 31) {
13551 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
13552 Res.second = &AArch64::FPR64RegClass;
13554 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
13555 Res.second = &AArch64::FPR128RegClass;
13561 if (Res.second && !Subtarget->hasFPARMv8() &&
13562 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
13563 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
13564 return std::make_pair(0U,
nullptr);
13571 bool AllowUnknown)
const {
13572 if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
13573 return EVT(MVT::i64x8);
13580void AArch64TargetLowering::LowerAsmOperandForConstraint(
13586 if (Constraint.
size() != 1)
13589 char ConstraintLetter = Constraint[0];
13590 switch (ConstraintLetter) {
13601 if (
Op.getValueType() == MVT::i64)
13602 Result = DAG.
getRegister(AArch64::XZR, MVT::i64);
13604 Result = DAG.
getRegister(AArch64::WZR, MVT::i32);
13626 switch (ConstraintLetter) {
13640 CVal =
C->getSExtValue();
13671 if ((CVal & 0xFFFF) == CVal)
13673 if ((CVal & 0xFFFF0000ULL) == CVal)
13675 uint64_t NCVal = ~(uint32_t)CVal;
13676 if ((NCVal & 0xFFFFULL) == NCVal)
13678 if ((NCVal & 0xFFFF0000ULL) == NCVal)
13685 if ((CVal & 0xFFFFULL) == CVal)
13687 if ((CVal & 0xFFFF0000ULL) == CVal)
13689 if ((CVal & 0xFFFF00000000ULL) == CVal)
13691 if ((CVal & 0xFFFF000000000000ULL) == CVal)
13693 uint64_t NCVal = ~CVal;
13694 if ((NCVal & 0xFFFFULL) == NCVal)
13696 if ((NCVal & 0xFFFF0000ULL) == NCVal)
13698 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
13700 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
13714 Ops.push_back(Result);
13751 EVT VT =
Op.getValueType();
13753 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
13757 if (VT != MVT::v16i8 && VT != MVT::v8i8)
13761 assert((NumElts == 8 || NumElts == 16) &&
13762 "Need to have exactly 8 or 16 elements in vector.");
13768 for (
unsigned i = 0; i < NumElts; ++i) {
13775 SourceVec = OperandSourceVec;
13776 else if (SourceVec != OperandSourceVec)
13789 }
else if (!AndMaskConstants.
empty()) {
13809 if (!MaskSourceVec) {
13813 }
else if (MaskSourceVec != MaskSource->
getOperand(0)) {
13827 if (!AndMaskConstants.
empty())
13834 SourceVec, MaskSourceVec);
13842 LLVM_DEBUG(
dbgs() <<
"AArch64TargetLowering::ReconstructShuffle\n");
13844 EVT VT =
Op.getValueType();
13846 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
13849 struct ShuffleSourceInfo {
13864 ShuffleSourceInfo(
SDValue Vec)
13865 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
13866 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
13874 for (
unsigned i = 0; i < NumElts; ++i) {
13880 V.getOperand(0).getValueType().isScalableVector()) {
13882 dbgs() <<
"Reshuffle failed: "
13883 "a shuffle can only come from building a vector from "
13884 "various elements of other fixed-width vectors, provided "
13885 "their indices are constant\n");
13891 auto Source =
find(Sources, SourceVec);
13892 if (Source == Sources.
end())
13893 Source = Sources.
insert(Sources.
end(), ShuffleSourceInfo(SourceVec));
13896 unsigned EltNo = V.getConstantOperandVal(1);
13897 Source->MinElt = std::min(Source->MinElt, EltNo);
13898 Source->MaxElt = std::max(Source->MaxElt, EltNo);
13903 if ((Sources.
size() == 3 || Sources.
size() == 4) && NumElts > 4) {
13908 for (
unsigned I = 0;
I < NumElts; ++
I) {
13911 for (
unsigned OF = 0; OF < OutputFactor; OF++)
13912 Mask.push_back(-1);
13918 unsigned Lane = V.getConstantOperandVal(1);
13919 for (
unsigned S = 0; S < Sources.
size(); S++) {
13920 if (V.getOperand(0) == Sources[S].Vec) {
13921 unsigned InputSize = Sources[S].Vec.getScalarValueSizeInBits();
13922 unsigned InputBase = 16 * S + Lane * InputSize / 8;
13923 for (
unsigned OF = 0; OF < OutputFactor; OF++)
13924 Mask.push_back(InputBase + OF);
13934 ? Intrinsic::aarch64_neon_tbl3
13935 : Intrinsic::aarch64_neon_tbl4,
13937 for (
unsigned i = 0; i < Sources.
size(); i++) {
13938 SDValue Src = Sources[i].Vec;
13939 EVT SrcVT = Src.getValueType();
13942 "Expected a legally typed vector");
13950 for (
unsigned i = 0; i < Mask.size(); i++)
13952 assert((Mask.size() == 8 || Mask.size() == 16) &&
13953 "Expected a v8i8 or v16i8 Mask");
13955 Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8,
DL, TBLMask));
13959 Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, TBLOperands);
13963 if (Sources.
size() > 2) {
13964 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: currently only do something "
13965 <<
"sensible when at most two source vectors are "
13973 for (
auto &Source : Sources) {
13974 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
13975 if (SrcEltTy.
bitsLT(SmallestEltTy)) {
13976 SmallestEltTy = SrcEltTy;
13979 unsigned ResMultiplier =
13988 for (
auto &Src : Sources) {
13989 EVT SrcVT = Src.ShuffleVec.getValueType();
14002 assert(2 * SrcVTSize == VTSize);
14007 DAG.
getUNDEF(Src.ShuffleVec.getValueType()));
14013 dbgs() <<
"Reshuffle failed: result vector too small to extract\n");
14017 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
14019 dbgs() <<
"Reshuffle failed: span too large for a VEXT to cope\n");
14023 if (Src.MinElt >= NumSrcElts) {
14028 Src.WindowBase = -NumSrcElts;
14029 }
else if (Src.MaxElt < NumSrcElts) {
14046 dbgs() <<
"Reshuffle failed: don't know how to lower AArch64ISD::EXT "
14047 "for SVE vectors.");
14052 DAG.
getNode(AArch64ISD::EXT,
DL, DestVT, VEXTSrc1, VEXTSrc2,
14054 Src.WindowBase = -Src.MinElt;
14061 for (
auto &Src : Sources) {
14063 if (SrcEltTy == SmallestEltTy)
14068 DAG.
getNode(AArch64ISD::NVCAST,
DL, ShuffleVT, Src.ShuffleVec);
14074 Src.WindowBase *= Src.WindowScale;
14079 for (
auto Src : Sources)
14080 assert(Src.ShuffleVec.getValueType() == ShuffleVT);
14088 if (Entry.isUndef())
14091 auto Src =
find(Sources, Entry.getOperand(0));
14100 int LanesDefined = BitsDefined / BitsPerShuffleLane;
14104 int *LaneMask = &Mask[i * ResMultiplier];
14106 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
14107 ExtractBase += NumElts * (Src - Sources.
begin());
14108 for (
int j = 0; j < LanesDefined; ++j)
14109 LaneMask[j] = ExtractBase + j;
14114 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: illegal shuffle mask\n");
14119 for (
unsigned i = 0; i < Sources.
size(); ++i)
14126 V = DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Shuffle);
14132 dbgs() <<
"Reshuffle, creating node: "; V.dump(););
14151 unsigned ExpectedElt = Imm;
14152 for (
unsigned i = 1; i < NumElts; ++i) {
14156 if (ExpectedElt == NumElts)
14161 if (ExpectedElt !=
static_cast<unsigned>(M[i]))
14172 if (V.getValueType() != MVT::v16i8)
14174 assert(V.getNumOperands() == 16 &&
"Expected 16 operands on the BUILDVECTOR");
14176 for (
unsigned X = 0;
X < 4;
X++) {
14188 for (
unsigned Y = 1;
Y < 4;
Y++) {
14204 V.getOperand(0).getOperand(0), V.getOperand(4).getOperand(0),
14205 V.getOperand(8).getOperand(0), V.getOperand(12).getOperand(0)};
14207 if (V.getValueType() == MVT::v4i32)
14223 unsigned &DupLaneOp) {
14225 "Only possible block sizes for wide DUP are: 16, 32, 64");
14244 for (
size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
14245 for (
size_t I = 0;
I < NumEltsPerBlock;
I++) {
14246 int Elt = M[BlockIndex * NumEltsPerBlock +
I];
14250 if ((
unsigned)Elt >= SingleVecNumElements)
14252 if (BlockElts[
I] < 0)
14253 BlockElts[
I] = Elt;
14254 else if (BlockElts[
I] != Elt)
14263 auto FirstRealEltIter =
find_if(BlockElts, [](
int Elt) {
return Elt >= 0; });
14264 assert(FirstRealEltIter != BlockElts.
end() &&
14265 "Shuffle with all-undefs must have been caught by previous cases, "
14267 if (FirstRealEltIter == BlockElts.
end()) {
14273 size_t FirstRealIndex = FirstRealEltIter - BlockElts.
begin();
14275 if ((
unsigned)*FirstRealEltIter < FirstRealIndex)
14278 size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
14281 if (Elt0 % NumEltsPerBlock != 0)
14285 for (
size_t I = 0;
I < NumEltsPerBlock;
I++)
14286 if (BlockElts[
I] >= 0 && (
unsigned)BlockElts[
I] != Elt0 +
I)
14289 DupLaneOp = Elt0 / NumEltsPerBlock;
14298 const int *FirstRealElt =
find_if(M, [](
int Elt) {
return Elt >= 0; });
14303 APInt ExpectedElt =
APInt(MaskBits, *FirstRealElt + 1,
false,
14307 bool FoundWrongElt = std::any_of(FirstRealElt + 1, M.end(), [&](
int Elt) {
14308 return Elt != ExpectedElt++ && Elt >= 0;
14340 if (NumElts % 2 != 0)
14342 WhichResult = (M[0] == 0 ? 0 : 1);
14343 unsigned Idx = WhichResult * NumElts / 2;
14344 for (
unsigned i = 0; i != NumElts; i += 2) {
14345 if ((M[i] >= 0 && (
unsigned)M[i] != Idx) ||
14346 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != Idx))
14359 WhichResult = (M[0] == 0 ? 0 : 1);
14360 for (
unsigned j = 0; j != 2; ++j) {
14361 unsigned Idx = WhichResult;
14362 for (
unsigned i = 0; i != Half; ++i) {
14363 int MIdx = M[i + j * Half];
14364 if (MIdx >= 0 && (
unsigned)MIdx != Idx)
14378 if (NumElts % 2 != 0)
14380 WhichResult = (M[0] == 0 ? 0 : 1);
14381 for (
unsigned i = 0; i < NumElts; i += 2) {
14382 if ((M[i] >= 0 && (
unsigned)M[i] != i + WhichResult) ||
14383 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != i + WhichResult))
14390 bool &DstIsLeft,
int &Anomaly) {
14391 if (M.size() !=
static_cast<size_t>(NumInputElements))
14394 int NumLHSMatch = 0, NumRHSMatch = 0;
14395 int LastLHSMismatch = -1, LastRHSMismatch = -1;
14397 for (
int i = 0; i < NumInputElements; ++i) {
14407 LastLHSMismatch = i;
14409 if (M[i] == i + NumInputElements)
14412 LastRHSMismatch = i;
14415 if (NumLHSMatch == NumInputElements - 1) {
14417 Anomaly = LastLHSMismatch;
14419 }
else if (NumRHSMatch == NumInputElements - 1) {
14421 Anomaly = LastRHSMismatch;
14434 for (
int I = 0,
E = NumElts / 2;
I !=
E;
I++) {
14439 int Offset = NumElts / 2;
14440 for (
int I = NumElts / 2,
E = NumElts;
I !=
E;
I++) {
14441 if (Mask[
I] !=
I + SplitLHS *
Offset)
14450 EVT VT =
Op.getValueType();
14485 unsigned OpNum = (PFEntry >> 26) & 0x0F;
14486 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
14487 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
14509 if (LHSID == (1 * 9 + 2) * 9 + 3)
14511 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 &&
"Illegal OP_COPY!");
14515 if (OpNum == OP_MOVLANE) {
14517 auto getPFIDLane = [](
unsigned ID,
int Elt) ->
int {
14518 assert(Elt < 4 &&
"Expected Perfect Lanes to be less than 4");
14524 return (
ID % 9 == 8) ? -1 :
ID % 9;
14533 assert(RHSID < 8 &&
"Expected a lane index for RHSID!");
14534 unsigned ExtLane = 0;
14540 int MaskElt = getPFIDLane(
ID, (RHSID & 0x01) << 1) >> 1;
14542 MaskElt = (getPFIDLane(
ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
14543 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
14544 ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
14545 Input = MaskElt < 2 ? V1 : V2;
14551 "Expected 16 or 32 bit shuffle elements");
14556 int MaskElt = getPFIDLane(
ID, RHSID);
14557 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
14558 ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
14559 Input = MaskElt < 4 ? V1 : V2;
14561 if (VT == MVT::v4i16) {
14567 Input.getValueType().getVectorElementType(),
14589 return DAG.
getNode(AArch64ISD::REV64,
DL, VT, OpLHS);
14594 return DAG.
getNode(AArch64ISD::REV32,
DL, VT, OpLHS);
14597 return DAG.
getNode(AArch64ISD::REV16,
DL, VT, OpLHS);
14604 if (EltTy == MVT::i8)
14605 Opcode = AArch64ISD::DUPLANE8;
14606 else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
14607 Opcode = AArch64ISD::DUPLANE16;
14608 else if (EltTy == MVT::i32 || EltTy == MVT::f32)
14609 Opcode = AArch64ISD::DUPLANE32;
14610 else if (EltTy == MVT::i64 || EltTy == MVT::f64)
14611 Opcode = AArch64ISD::DUPLANE64;
14618 return DAG.
getNode(Opcode,
DL, VT, OpLHS, Lane);
14624 return DAG.
getNode(AArch64ISD::EXT,
DL, VT, OpLHS, OpRHS,
14628 return DAG.
getNode(AArch64ISD::UZP1,
DL, VT, OpLHS, OpRHS);
14630 return DAG.
getNode(AArch64ISD::UZP2,
DL, VT, OpLHS, OpRHS);
14632 return DAG.
getNode(AArch64ISD::ZIP1,
DL, VT, OpLHS, OpRHS);
14634 return DAG.
getNode(AArch64ISD::ZIP2,
DL, VT, OpLHS, OpRHS);
14636 return DAG.
getNode(AArch64ISD::TRN1,
DL, VT, OpLHS, OpRHS);
14638 return DAG.
getNode(AArch64ISD::TRN2,
DL, VT, OpLHS, OpRHS);
14649 EVT EltVT =
Op.getValueType().getVectorElementType();
14662 MVT IndexVT = MVT::v8i8;
14663 unsigned IndexLen = 8;
14664 if (
Op.getValueSizeInBits() == 128) {
14665 IndexVT = MVT::v16i8;
14670 for (
int Val : ShuffleMask) {
14671 for (
unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
14672 unsigned Offset = Byte + Val * BytesPerElt;
14675 if (IsUndefOrZero &&
Offset >= IndexLen)
14685 if (IsUndefOrZero) {
14694 if (IndexLen == 8) {
14719 if (EltType == MVT::i8)
14720 return AArch64ISD::DUPLANE8;
14721 if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
14722 return AArch64ISD::DUPLANE16;
14723 if (EltType == MVT::i32 || EltType == MVT::f32)
14724 return AArch64ISD::DUPLANE32;
14725 if (EltType == MVT::i64 || EltType == MVT::f64)
14726 return AArch64ISD::DUPLANE64;
14734 auto getScaledOffsetDup = [](
SDValue BitCast,
int &LaneC,
MVT &CastVT) {
14745 unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
14747 if (ExtIdxInBits % CastedEltBitWidth != 0)
14755 LaneC += ExtIdxInBits / CastedEltBitWidth;
14762 unsigned SrcVecNumElts =
14769 if (getScaledOffsetDup(V, Lane, CastVT)) {
14770 V = DAG.
getBitcast(CastVT, V.getOperand(0).getOperand(0));
14772 V.getOperand(0).getValueType().is128BitVector()) {
14775 Lane += V.getConstantOperandVal(1);
14776 V = V.getOperand(0);
14802 EVT VT =
Op.getValueType();
14812 if (ElementSize > 32 || ElementSize == 1)
14842 EVT VT =
Op.getValueType();
14859 for (
unsigned I = 0;
I < 16;
I++) {
14860 if (ShuffleMask[
I] < 16)
14866 TBLMaskParts[
I] = DAG.
getConstant(
C->getSExtValue() + 32,
DL, MVT::i32);
14880AArch64TargetLowering::LowerEXTEND_VECTOR_INREG(
SDValue Op,
14883 EVT VT =
Op.getValueType();
14887 unsigned UnpackOpcode =
Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
14895 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv8i16, Val);
14896 if (VT == MVT::nxv8i16)
14900 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv4i32, Val);
14901 if (VT == MVT::nxv4i32)
14905 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv2i64, Val);
14906 assert(VT == MVT::nxv2i64 &&
"Unexpected result type!");
14917AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(
SDValue Op,
14920 EVT VT =
Op.getValueType();
14923 return LowerEXTEND_VECTOR_INREG(
Op, DAG);
14928 "Unexpected extension factor.");
14935 DAG.
getNode(AArch64ISD::ZIP1,
DL, SrcVT, SrcOp, Zeros));
14941 EVT VT =
Op.getValueType();
14946 return LowerFixedLengthVECTOR_SHUFFLEToSVE(
Op, DAG);
14952 ArrayRef<int> ShuffleMask = SVN->
getMask();
14959 "Unexpected VECTOR_SHUFFLE mask size!");
14985 for (
unsigned LaneSize : {64U, 32U, 16U}) {
14988 unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
14989 : LaneSize == 32 ? AArch64ISD::DUPLANE32
14990 : AArch64ISD::DUPLANE16;
15005 if (
isREVMask(ShuffleMask, EltSize, NumElts, 64))
15007 if (
isREVMask(ShuffleMask, EltSize, NumElts, 32))
15009 if (
isREVMask(ShuffleMask, EltSize, NumElts, 16))
15012 if (((NumElts == 8 && EltSize == 16) || (NumElts == 16 && EltSize == 8)) &&
15015 return DAG.
getNode(AArch64ISD::EXT,
DL, VT, Rev, Rev,
15019 bool ReverseEXT =
false;
15021 if (
isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
15033 unsigned WhichResult;
15034 unsigned OperandOrder;
15035 if (
isZIPMask(ShuffleMask, NumElts, WhichResult, OperandOrder)) {
15036 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
15038 OperandOrder == 0 ? V2 : V1);
15040 if (
isUZPMask(ShuffleMask, NumElts, WhichResult)) {
15041 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
15044 if (
isTRNMask(ShuffleMask, NumElts, WhichResult, OperandOrder)) {
15045 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
15047 OperandOrder == 0 ? V2 : V1);
15051 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
15055 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
15059 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
15069 if (
isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
15070 SDValue DstVec = DstIsLeft ? V1 : V2;
15074 int SrcLane = ShuffleMask[Anomaly];
15075 if (SrcLane >= NumInputElements) {
15077 SrcLane -= NumElts;
15084 ScalarVT = MVT::i32;
15097 if (NumElts == 4) {
15098 unsigned PFIndexes[4];
15099 for (
unsigned i = 0; i != 4; ++i) {
15100 if (ShuffleMask[i] < 0)
15103 PFIndexes[i] = ShuffleMask[i];
15107 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
15108 PFIndexes[2] * 9 + PFIndexes[3];
15118 "Expected larger vector element sizes to be handled already");
15120 for (
int M : ShuffleMask)
15122 M >=
static_cast<int>(NumElts) ? 0 : 0xffffffff,
DL, MVT::i32));
15136 EVT VT =
Op.getValueType();
15139 return LowerToScalableOp(
Op, DAG);
15142 "Unexpected vector type!");
15157 if (VT == MVT::nxv1i1)
15169 EVT VT =
Op.getValueType();
15182 if (CIdx && (CIdx->getZExtValue() <= 3)) {
15184 return DAG.
getNode(AArch64ISD::DUPLANE128,
DL, VT,
Op.getOperand(1), CI);
15206 SDValue TBL = DAG.
getNode(AArch64ISD::TBL,
DL, MVT::nxv2i64, V, ShuffleMask);
15212 APInt &UndefBits) {
15214 APInt SplatBits, SplatUndef;
15215 unsigned SplatBitSize;
15217 if (BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
15220 for (
unsigned i = 0; i < NumSplats; ++i) {
15221 CnstBits <<= SplatBitSize;
15222 UndefBits <<= SplatBitSize;
15224 UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.
getSizeInBits());
15235 const APInt &Bits) {
15236 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15238 EVT VT =
Op.getValueType();
15247 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15258 EVT VT =
Op.getValueType();
15263 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15266 bool isAdvSIMDModImm =
false;
15286 if (isAdvSIMDModImm) {
15300 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15311 EVT VT =
Op.getValueType();
15316 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15319 bool isAdvSIMDModImm =
false;
15331 if (isAdvSIMDModImm) {
15345 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15355 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15357 EVT VT =
Op.getValueType();
15359 bool isAdvSIMDModImm =
false;
15371 if (isAdvSIMDModImm) {
15376 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15385 const APInt &Bits) {
15386 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15388 EVT VT =
Op.getValueType();
15397 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15406 const APInt &Bits) {
15407 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15409 EVT VT =
Op.getValueType();
15412 bool isAdvSIMDModImm =
false;
15416 MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
15421 MovTy = MVT::v2f64;
15424 if (isAdvSIMDModImm) {
15428 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15448 for (
unsigned i = 1; i < NumElts; ++i)
15457 while (
N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
15458 N =
N.getOperand(0);
15464 unsigned NumElts =
N.getValueType().getVectorMinNumElements();
15467 while (
N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
15468 N =
N.getOperand(0);
15471 if (
N.getValueType().getVectorMinNumElements() < NumElts)
15481 if (
N.getOpcode() == AArch64ISD::PTRUE &&
15482 N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
15483 return N.getValueType().getVectorMinNumElements() >= NumElts;
15495 EVT VT =
N->getValueType(0);
15505 SDValue FirstOp =
N->getOperand(0);
15506 unsigned FirstOpc = FirstOp.
getOpcode();
15507 SDValue SecondOp =
N->getOperand(1);
15508 unsigned SecondOpc = SecondOp.
getOpcode();
15515 if ((FirstOpc ==
ISD::AND || FirstOpc == AArch64ISD::BICi) &&
15516 (SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR ||
15517 SecondOpc == AArch64ISD::SHL_PRED ||
15518 SecondOpc == AArch64ISD::SRL_PRED)) {
15522 }
else if ((SecondOpc ==
ISD::AND || SecondOpc == AArch64ISD::BICi) &&
15523 (FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR ||
15524 FirstOpc == AArch64ISD::SHL_PRED ||
15525 FirstOpc == AArch64ISD::SRL_PRED)) {
15532 bool IsShiftRight = Shift.
getOpcode() == AArch64ISD::VLSHR ||
15533 Shift.
getOpcode() == AArch64ISD::SRL_PRED;
15534 bool ShiftHasPredOp = Shift.
getOpcode() == AArch64ISD::SHL_PRED ||
15535 Shift.
getOpcode() == AArch64ISD::SRL_PRED;
15539 if (ShiftHasPredOp) {
15545 C2 =
C.getZExtValue();
15548 C2 = C2node->getZExtValue();
15562 assert(C1nodeImm && C1nodeShift);
15564 C1AsAPInt = C1AsAPInt.
zextOrTrunc(ElemSizeInBits);
15570 if (C2 > ElemSizeInBits)
15575 if (C1AsAPInt != RequiredC1)
15583 unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
15588 EVT VT =
N->getValueType(0);
15589 assert(VT.
isVector() &&
"Expected vector type in tryLowerToBSL\n");
15607 for (
int i = 1; i >= 0; --i) {
15608 for (
int j = 1; j >= 0; --j) {
15634 if (
Sub.getOperand(1) !=
Add.getOperand(0))
15637 return DAG.
getNode(AArch64ISD::BSP,
DL, VT,
Sub, SubSibling, AddSibling);
15645 for (
int i = 1; i >= 0; --i)
15646 for (
int j = 1; j >= 0; --j) {
15657 if (!BVN0 || !BVN1)
15660 bool FoundMatch =
true;
15664 if (!CN0 || !CN1 ||
15667 FoundMatch =
false;
15682 !Subtarget->isNeonAvailable()))
15683 return LowerToScalableOp(
Op, DAG);
15692 EVT VT =
Op.getValueType();
15697 BuildVectorSDNode *BVN =
15701 LHS =
Op.getOperand(1);
15719 UndefBits, &
LHS)) ||
15735 EVT VT =
Op.getValueType();
15749 CstLane->getAPIntValue().trunc(EltTy.
getSizeInBits()).getZExtValue(),
15751 }
else if (Lane.getNode()->isUndef()) {
15754 assert(Lane.getValueType() == MVT::i32 &&
15755 "Unexpected BUILD_VECTOR operand type");
15757 Ops.push_back(Lane);
15764 EVT VT =
Op.getValueType();
15772 int32_t ImmVal, ShiftVal;
15782 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Res);
15787 EVT VT =
Op.getValueType();
15789 "Expected a legal NEON vector");
15795 auto TryMOVIWithBits = [&](
APInt DefBits) {
15809 APInt NotDefBits = ~DefBits;
15819 if (
SDValue R = TryMOVIWithBits(DefBits))
15821 if (
SDValue R = TryMOVIWithBits(UndefBits))
15829 auto TryWithFNeg = [&](
APInt DefBits,
MVT FVT) {
15835 unsigned NumElts = VT.
getSizeInBits() / FVT.getScalarSizeInBits();
15836 for (
unsigned i = 0; i < NumElts; i++)
15837 NegBits |= Neg << (FVT.getScalarSizeInBits() * i);
15838 NegBits = DefBits ^ NegBits;
15842 if (
SDValue NewOp = TryMOVIWithBits(NegBits)) {
15846 AArch64ISD::NVCAST,
DL, VT,
15848 DAG.
getNode(AArch64ISD::NVCAST,
DL, VFVT, NewOp)));
15853 if ((R = TryWithFNeg(DefBits, MVT::f32)) ||
15854 (R = TryWithFNeg(DefBits, MVT::f64)) ||
15855 (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
15862SDValue AArch64TargetLowering::LowerFixedLengthBuildVectorToSVE(
15864 EVT VT =
Op.getValueType();
15888 NumElems -
count_if(
Op->op_values(), IsExtractElt) > 4)
15895 return Op.isUndef() ? Undef
15896 : DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
15897 ContainerVT, Undef, Op, ZeroI64);
15901 while (Intermediates.
size() > 1) {
15904 for (
unsigned I = 0;
I < Intermediates.
size();
I += 2) {
15907 Intermediates[
I / 2] =
15909 : DAG.
getNode(AArch64ISD::ZIP1,
DL, ZipVT, Op0, Op1);
15912 Intermediates.
resize(Intermediates.
size() / 2);
15923 EVT VT =
Op.getValueType();
15925 bool OverrideNEON = !Subtarget->isNeonAvailable() ||
15928 return LowerFixedLengthBuildVectorToSVE(
Op, DAG);
15946 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
15947 if (Val.isZero() || (VT.
isInteger() && Val.isAllOnes()))
15951 if (
Const->isZero() && !
Const->isNegative())
15972 bool isOnlyLowElement =
true;
15973 bool usesOnlyOneValue =
true;
15974 bool usesOnlyOneConstantValue =
true;
15976 bool AllLanesExtractElt =
true;
15977 unsigned NumConstantLanes = 0;
15978 unsigned NumDifferentLanes = 0;
15979 unsigned NumUndefLanes = 0;
15982 SmallMapVector<SDValue, unsigned, 16> DifferentValueMap;
15983 unsigned ConsecutiveValCount = 0;
15985 for (
unsigned i = 0; i < NumElts; ++i) {
15988 AllLanesExtractElt =
false;
15994 isOnlyLowElement =
false;
15999 ++NumConstantLanes;
16000 if (!ConstantValue.
getNode())
16002 else if (ConstantValue != V)
16003 usesOnlyOneConstantValue =
false;
16006 if (!
Value.getNode())
16008 else if (V !=
Value) {
16009 usesOnlyOneValue =
false;
16010 ++NumDifferentLanes;
16013 if (PrevVal != V) {
16014 ConsecutiveValCount = 0;
16029 DifferentValueMap[
V] = ++ConsecutiveValCount;
16032 if (!
Value.getNode()) {
16034 dbgs() <<
"LowerBUILD_VECTOR: value undefined, creating undef node\n");
16042 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: only low element used, creating 1 "
16043 "SCALAR_TO_VECTOR node\n");
16047 if (AllLanesExtractElt) {
16048 SDNode *
Vector =
nullptr;
16053 for (
unsigned i = 0; i < NumElts; ++i) {
16055 const SDNode *
N =
V.getNode();
16080 if (Val == 2 * i) {
16084 if (Val - 1 == 2 * i) {
16111 if (usesOnlyOneValue) {
16114 Value.getValueType() != VT) {
16116 dbgs() <<
"LowerBUILD_VECTOR: use DUP for non-constant splats\n");
16124 if (
Value.getValueSizeInBits() == 64) {
16126 dbgs() <<
"LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
16138 assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
16139 EltTy == MVT::f64) &&
"Unsupported floating-point vector type");
16141 dbgs() <<
"LowerBUILD_VECTOR: float constant splats, creating int "
16142 "BITCASTS, and try again\n");
16144 for (
unsigned i = 0; i < NumElts; ++i)
16148 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: trying to lower new vector: ";
16150 Val = LowerBUILD_VECTOR(Val, DAG);
16160 bool PreferDUPAndInsert =
16162 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
16163 NumDifferentLanes >= NumConstantLanes;
16169 if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
16173 APInt ConstantValueAPInt(1, 0);
16175 ConstantValueAPInt =
C->getAPIntValue().zextOrTrunc(BitSize);
16177 !ConstantValueAPInt.isAllOnes()) {
16181 Val = DAG.
getNode(AArch64ISD::DUP,
DL, VT, ConstantValue);
16185 for (
unsigned i = 0; i < NumElts; ++i) {
16199 dbgs() <<
"LowerBUILD_VECTOR: all elements are constant, use default "
16211 if (NumElts >= 4) {
16219 if (PreferDUPAndInsert) {
16224 for (
unsigned I = 0;
I < NumElts; ++
I)
16235 if (DifferentValueMap.
size() == 2 && NumUndefLanes == 0) {
16247 bool canUseVECTOR_CONCAT =
true;
16248 for (
auto Pair : DifferentValueMap) {
16250 if (Pair.second != NumElts / 2)
16251 canUseVECTOR_CONCAT =
false;
16264 if (canUseVECTOR_CONCAT) {
16287 if (NumElts >= 8) {
16288 SmallVector<int, 16> MaskVec;
16290 SDValue FirstLaneVal =
Op.getOperand(0);
16291 for (
unsigned i = 0; i < NumElts; ++i) {
16293 if (FirstLaneVal == Val)
16317 dbgs() <<
"LowerBUILD_VECTOR: alternatives failed, creating sequence "
16318 "of INSERT_VECTOR_ELT\n");
16335 LLVM_DEBUG(
dbgs() <<
"Creating node for op0, it is not undefined:\n");
16341 dbgs() <<
"Creating nodes for the other vector elements:\n";
16343 for (; i < NumElts; ++i) {
16354 dbgs() <<
"LowerBUILD_VECTOR: use default expansion, failed to find "
16355 "better alternative\n");
16362 !Subtarget->isNeonAvailable()))
16363 return LowerFixedLengthConcatVectorsToSVE(
Op, DAG);
16365 assert(
Op.getValueType().isScalableVector() &&
16367 "Expected legal scalable vector type!");
16372 "Unexpected number of operands in CONCAT_VECTORS");
16374 if (NumOperands == 2)
16379 while (ConcatOps.size() > 1) {
16380 for (
unsigned I = 0,
E = ConcatOps.size();
I !=
E;
I += 2) {
16388 ConcatOps.resize(ConcatOps.size() / 2);
16390 return ConcatOps[0];
16401 !Subtarget->isNeonAvailable()))
16402 return LowerFixedLengthInsertVectorElt(
Op, DAG);
16404 EVT VT =
Op.getOperand(0).getValueType();
16418 ExtendedValue,
Op.getOperand(2));
16431AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(
SDValue Op,
16434 EVT VT =
Op.getOperand(0).getValueType();
16440 if (VT == MVT::nxv1i1) {
16444 WidenedPred,
Op.getOperand(1));
16451 MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
16453 Extend,
Op.getOperand(1));
16458 return LowerFixedLengthExtractVectorElt(
Op, DAG);
16466 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16467 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
16468 VT == MVT::v8f16 || VT == MVT::v8bf16)
16471 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
16472 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
16483 if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
16493 EVT VT =
Op.getValueType();
16495 "Only cases that extract a fixed length vector are supported!");
16496 EVT InVT =
Op.getOperand(0).getValueType();
16504 unsigned Idx =
Op.getConstantOperandVal(1);
16523 if (PackedVT != InVT) {
16547 assert(
Op.getValueType().isScalableVector() &&
16548 "Only expect to lower inserts into scalable vectors!");
16550 EVT InVT =
Op.getOperand(1).getValueType();
16551 unsigned Idx =
Op.getConstantOperandVal(2);
16556 EVT VT =
Op.getValueType();
16572 if (Idx < (NumElts / 2))
16598 Vec0 = getSVESafeBitCast(NarrowVT, Vec0, DAG);
16599 Vec1 = getSVESafeBitCast(NarrowVT, Vec1, DAG);
16603 Vec1 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, Vec1);
16612 HiVec0 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, HiVec0);
16613 Narrow = DAG.
getNode(AArch64ISD::UZP1,
DL, NarrowVT, Vec1, HiVec0);
16616 "Invalid subvector index!");
16618 LoVec0 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, LoVec0);
16619 Narrow = DAG.
getNode(AArch64ISD::UZP1,
DL, NarrowVT, LoVec0, Vec1);
16622 return getSVESafeBitCast(VT, Narrow, DAG);
16630 std::optional<unsigned> PredPattern =
16642 if (
Op.getOpcode() != AArch64ISD::DUP &&
16655 SplatVal =
Op->getConstantOperandVal(0);
16656 if (
Op.getValueType().getVectorElementType() != MVT::i64)
16657 SplatVal = (int32_t)SplatVal;
16665 SplatVal = -SplatVal;
16673 EVT VT =
Op.getValueType();
16677 return LowerFixedLengthVectorIntDivideToSVE(
Op, DAG);
16682 unsigned PredOpcode =
Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
16691 DAG.
getNode(AArch64ISD::ASRD_MERGE_OP1,
DL, VT, Pg,
Op->getOperand(0),
16699 if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
16700 return LowerToPredicatedOp(
Op, DAG, PredOpcode);
16705 if (VT == MVT::nxv16i8)
16706 WidenedVT = MVT::nxv8i16;
16707 else if (VT == MVT::nxv8i16)
16708 WidenedVT = MVT::nxv4i32;
16712 unsigned UnpkLo =
Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
16713 unsigned UnpkHi =
Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
16722 return DAG.
getNode(AArch64ISD::UZP1,
DL, VT, ResultLoCast, ResultHiCast);
16725bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(
16726 EVT VT,
unsigned DefinedValues)
const {
16727 if (!Subtarget->isNeonAvailable())
16746 unsigned DummyUnsigned;
16754 isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
16756 isTRNMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
16757 isUZPMask(M, NumElts, DummyUnsigned) ||
16758 isZIPMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
16762 isINSMask(M, NumElts, DummyBool, DummyInt) ||
16778 Op =
Op.getOperand(0);
16780 APInt SplatBits, SplatUndef;
16781 unsigned SplatBitSize;
16783 if (!BVN || !BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
16784 HasAnyUndefs, ElementBits) ||
16785 SplatBitSize > ElementBits)
16796 assert(VT.
isVector() &&
"vector shift count is not a vector type");
16800 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
16807 assert(VT.
isVector() &&
"vector shift count is not a vector type");
16811 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
16816 EVT VT =
Op.getValueType();
16821 EVT OpVT =
Op.getOperand(0).getValueType();
16832 !Subtarget->isNeonAvailable()))
16833 return LowerFixedLengthVectorTruncateToSVE(
Op, DAG);
16843 unsigned &ShiftValue,
16856 ShiftValue = ShiftOp1->getZExtValue();
16865 "ResVT must be truncated or same type as the shift.");
16868 if (ShiftValue > ExtraBits && !
Add->getFlags().hasNoUnsignedWrap())
16875 uint64_t AddValue = AddOp1->getZExtValue();
16876 if (AddValue != 1ULL << (ShiftValue - 1))
16879 RShOperand =
Add->getOperand(0);
16885 EVT VT =
Op.getValueType();
16889 if (!
Op.getOperand(1).getValueType().isVector())
16893 switch (
Op.getOpcode()) {
16897 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SHL_PRED);
16899 if (
isVShiftLImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize)
16900 return DAG.
getNode(AArch64ISD::VSHL,
DL, VT,
Op.getOperand(0),
16905 Op.getOperand(0),
Op.getOperand(1));
16909 (Subtarget->hasSVE2() ||
16910 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
16912 unsigned ShiftValue;
16914 return DAG.
getNode(AArch64ISD::URSHR_I_PRED,
DL, VT,
16921 unsigned Opc =
Op.getOpcode() ==
ISD::SRA ? AArch64ISD::SRA_PRED
16922 : AArch64ISD::SRL_PRED;
16923 return LowerToPredicatedOp(
Op, DAG,
Opc);
16927 if (
isVShiftRImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize) {
16929 (
Op.getOpcode() ==
ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
16938 unsigned Opc = (
Op.getOpcode() ==
ISD::SRA) ? Intrinsic::aarch64_neon_sshl
16939 : Intrinsic::aarch64_neon_ushl;
16947 return NegShiftLeft;
16955 if (
Op.getValueType().isScalableVector())
16956 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
16959 !Subtarget->isNeonAvailable()))
16960 return LowerFixedLengthVectorSetccToSVE(
Op, DAG);
16965 EVT CmpVT =
LHS.getValueType().changeVectorElementTypeToInteger();
16968 if (
LHS.getValueType().getVectorElementType().isInteger())
16971 assert(((!Subtarget->hasFullFP16() &&
16972 LHS.getValueType().getVectorElementType() != MVT::f16) ||
16973 LHS.getValueType().getVectorElementType() != MVT::bf16 ||
16974 LHS.getValueType().getVectorElementType() != MVT::f128) &&
16975 "Unexpected type!");
16980 bool OneNaN =
false;
17004 if (!
Cmp.getNode())
17033 unsigned ScalarOpcode;
17051 "Expected power-of-2 length vector");
17059 if (ElemVT == MVT::i1) {
17061 if (NumElems > 16) {
17064 EVT HalfVT =
Lo.getValueType();
17075 unsigned ExtendedWidth = 64;
17078 ExtendedWidth = 128;
17083 unsigned ExtendOp =
17092 NumElems == 2 && ExtendedWidth == 128) {
17093 Extended = DAG.
getBitcast(MVT::v4i32, Extended);
17094 ExtendedVT = MVT::i32;
17096 switch (ScalarOpcode) {
17117 VecVT =
Lo.getValueType();
17133 for (
unsigned Shift = NumElems / 2; Shift > 0; Shift /= 2) {
17138 Scalar = DAG.
getNode(ScalarOpcode,
DL, ScalarVT, Scalar, Shifted);
17150 EVT SrcVT = Src.getValueType();
17155 SrcVT == MVT::v2f16) {
17163 bool OverrideNEON = !Subtarget->isNeonAvailable() ||
17172 SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {
17175 return LowerPredReductionToSVE(
Op, DAG);
17177 switch (
Op.getOpcode()) {
17179 return LowerReductionToSVE(AArch64ISD::UADDV_PRED,
Op, DAG);
17181 return LowerReductionToSVE(AArch64ISD::ANDV_PRED,
Op, DAG);
17183 return LowerReductionToSVE(AArch64ISD::ORV_PRED,
Op, DAG);
17185 return LowerReductionToSVE(AArch64ISD::SMAXV_PRED,
Op, DAG);
17187 return LowerReductionToSVE(AArch64ISD::SMINV_PRED,
Op, DAG);
17189 return LowerReductionToSVE(AArch64ISD::UMAXV_PRED,
Op, DAG);
17191 return LowerReductionToSVE(AArch64ISD::UMINV_PRED,
Op, DAG);
17193 return LowerReductionToSVE(AArch64ISD::EORV_PRED,
Op, DAG);
17195 return LowerReductionToSVE(AArch64ISD::FADDV_PRED,
Op, DAG);
17197 return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED,
Op, DAG);
17199 return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED,
Op, DAG);
17201 return LowerReductionToSVE(AArch64ISD::FMAXV_PRED,
Op, DAG);
17203 return LowerReductionToSVE(AArch64ISD::FMINV_PRED,
Op, DAG);
17211 switch (
Op.getOpcode()) {
17216 Op.getValueType(),
DL, DAG);
17236 EVT SrcVT = Src.getValueType();
17239 SDVTList SrcVTs = DAG.
getVTList(SrcVT, SrcVT);
17251 for (
unsigned I = 0;
I < Stages; ++
I) {
17253 Src = DAG.
getNode(BaseOpc,
DL, SrcVT, Src.getValue(0), Src.getValue(1));
17261 auto &Subtarget = DAG.
getSubtarget<AArch64Subtarget>();
17263 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
17268 MVT VT =
Op.getSimpleValueType();
17269 assert(VT != MVT::i128 &&
"Handled elsewhere, code replicated.");
17274 Op.getOperand(0),
Op.getOperand(1),
RHS,
17279AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
SDValue Op,
17284 SDNode *
Node =
Op.getNode();
17289 EVT VT =
Node->getValueType(0);
17292 "no-stack-arg-probe")) {
17294 Chain =
SP.getValue(1);
17304 RTLIB::LibcallImpl ChkStkImpl =
getLibcallImpl(RTLIB::STACK_PROBE);
17305 if (ChkStkImpl == RTLIB::Unsupported)
17314 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
17315 const uint32_t *
Mask =
TRI->getWindowsStackProbePreservedMask();
17316 if (Subtarget->hasCustomCallingConv())
17324 Chain, Callee, DAG.
getRegister(AArch64::X15, MVT::i64),
17335 Chain =
SP.getValue(1);
17349AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(
SDValue Op,
17352 SDNode *
Node =
Op.getNode();
17359 EVT VT =
Node->getValueType(0);
17363 Chain =
SP.getValue(1);
17370 Chain = DAG.
getNode(AArch64ISD::PROBED_ALLOCA,
DL, MVT::Other, Chain, SP);
17376AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(
SDValue Op,
17380 if (Subtarget->isTargetWindows())
17381 return LowerWindowsDYNAMIC_STACKALLOC(
Op, DAG);
17383 return LowerInlineDYNAMIC_STACKALLOC(
Op, DAG);
17389 unsigned NewOp)
const {
17390 if (Subtarget->hasSVE2())
17391 return LowerToPredicatedOp(
Op, DAG, NewOp);
17399 EVT VT =
Op.getValueType();
17400 assert(VT != MVT::i64 &&
"Expected illegal VSCALE node");
17403 APInt MulImm =
Op.getConstantOperandAPInt(0);
17409template <
unsigned NumVecs>
17419 for (
unsigned I = 0;
I < NumVecs; ++
I)
17428 Info.align.reset();
17440 auto &
DL =
I.getDataLayout();
17442 case Intrinsic::aarch64_sve_st2:
17444 case Intrinsic::aarch64_sve_st3:
17446 case Intrinsic::aarch64_sve_st4:
17448 case Intrinsic::aarch64_neon_ld2:
17449 case Intrinsic::aarch64_neon_ld3:
17450 case Intrinsic::aarch64_neon_ld4:
17451 case Intrinsic::aarch64_neon_ld1x2:
17452 case Intrinsic::aarch64_neon_ld1x3:
17453 case Intrinsic::aarch64_neon_ld1x4: {
17455 uint64_t NumElts =
DL.getTypeSizeInBits(
I.getType()) / 64;
17457 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17459 Info.align.reset();
17464 case Intrinsic::aarch64_neon_ld2lane:
17465 case Intrinsic::aarch64_neon_ld3lane:
17466 case Intrinsic::aarch64_neon_ld4lane:
17467 case Intrinsic::aarch64_neon_ld2r:
17468 case Intrinsic::aarch64_neon_ld3r:
17469 case Intrinsic::aarch64_neon_ld4r: {
17472 Type *RetTy =
I.getType();
17474 unsigned NumElts = StructTy->getNumElements();
17475 Type *VecTy = StructTy->getElementType(0);
17478 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17480 Info.align.reset();
17485 case Intrinsic::aarch64_neon_st2:
17486 case Intrinsic::aarch64_neon_st3:
17487 case Intrinsic::aarch64_neon_st4:
17488 case Intrinsic::aarch64_neon_st1x2:
17489 case Intrinsic::aarch64_neon_st1x3:
17490 case Intrinsic::aarch64_neon_st1x4: {
17492 unsigned NumElts = 0;
17493 for (
const Value *Arg :
I.args()) {
17494 Type *ArgTy = Arg->getType();
17497 NumElts +=
DL.getTypeSizeInBits(ArgTy) / 64;
17500 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17502 Info.align.reset();
17507 case Intrinsic::aarch64_neon_st2lane:
17508 case Intrinsic::aarch64_neon_st3lane:
17509 case Intrinsic::aarch64_neon_st4lane: {
17511 unsigned NumElts = 0;
17513 Type *VecTy =
I.getArgOperand(0)->getType();
17516 for (
const Value *Arg :
I.args()) {
17517 Type *ArgTy = Arg->getType();
17524 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17526 Info.align.reset();
17531 case Intrinsic::aarch64_ldaxr:
17532 case Intrinsic::aarch64_ldxr: {
17533 Type *ValTy =
I.getParamElementType(0);
17536 Info.ptrVal =
I.getArgOperand(0);
17538 Info.align =
DL.getABITypeAlign(ValTy);
17542 case Intrinsic::aarch64_stlxr:
17543 case Intrinsic::aarch64_stxr: {
17544 Type *ValTy =
I.getParamElementType(1);
17547 Info.ptrVal =
I.getArgOperand(1);
17549 Info.align =
DL.getABITypeAlign(ValTy);
17553 case Intrinsic::aarch64_ldaxp:
17554 case Intrinsic::aarch64_ldxp:
17556 Info.memVT = MVT::i128;
17557 Info.ptrVal =
I.getArgOperand(0);
17559 Info.align =
Align(16);
17562 case Intrinsic::aarch64_stlxp:
17563 case Intrinsic::aarch64_stxp:
17565 Info.memVT = MVT::i128;
17566 Info.ptrVal =
I.getArgOperand(2);
17568 Info.align =
Align(16);
17571 case Intrinsic::aarch64_sve_ldnt1: {
17575 Info.ptrVal =
I.getArgOperand(1);
17577 Info.align =
DL.getABITypeAlign(ElTy);
17581 case Intrinsic::aarch64_sve_stnt1: {
17585 Info.memVT =
MVT::getVT(
I.getOperand(0)->getType());
17586 Info.ptrVal =
I.getArgOperand(2);
17588 Info.align =
DL.getABITypeAlign(ElTy);
17592 case Intrinsic::aarch64_mops_memset_tag: {
17593 Value *Dst =
I.getArgOperand(0);
17594 Value *Val =
I.getArgOperand(1);
17599 Info.align =
I.getParamAlign(0).valueOrOne();
17614 std::optional<unsigned> ByteOffset)
const {
17631 Base.getOperand(1).hasOneUse() &&
17638 uint64_t ShiftAmount =
Base.getOperand(1).getConstantOperandVal(1);
17640 if (ShiftAmount ==
Log2_32(LoadBytes))
17650 if ((VT == MVT::i64 || VT == MVT::i32) && Extend->
use_size()) {
17669 return NumBits1 > NumBits2;
17676 return NumBits1 > NumBits2;
17683 if (
I->getOpcode() != Instruction::FMul)
17686 if (!
I->hasOneUse())
17691 if (!(
User->getOpcode() == Instruction::FSub ||
17692 User->getOpcode() == Instruction::FAdd))
17703 I->getFastMathFlags().allowContract()));
17713 return NumBits1 == 32 && NumBits2 == 64;
17720 return NumBits1 == 32 && NumBits2 == 64;
17738bool AArch64TargetLowering::isExtFreeImpl(
const Instruction *Ext)
const {
17746 for (
const Use &U : Ext->
uses()) {
17754 switch (Instr->getOpcode()) {
17755 case Instruction::Shl:
17759 case Instruction::GetElementPtr: {
17762 std::advance(GTI, U.getOperandNo()-1);
17775 if (ShiftAmt == 0 || ShiftAmt > 4)
17779 case Instruction::Trunc:
17796 unsigned NumElts,
bool IsLittleEndian,
17798 if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth > 64)
17801 assert(DstWidth % SrcWidth == 0 &&
17802 "TBL lowering is not supported for a conversion instruction with this "
17803 "source and destination element type.");
17805 unsigned Factor = DstWidth / SrcWidth;
17806 unsigned MaskLen = NumElts * Factor;
17809 Mask.resize(MaskLen, NumElts);
17811 unsigned SrcIndex = 0;
17812 for (
unsigned I = IsLittleEndian ? 0 : Factor - 1;
I < MaskLen;
I += Factor)
17813 Mask[
I] = SrcIndex++;
17821 bool IsLittleEndian) {
17823 unsigned NumElts = SrcTy->getNumElements();
17831 auto *FirstEltZero = Builder.CreateInsertElement(
17833 Value *Result = Builder.CreateShuffleVector(
Op, FirstEltZero, Mask);
17834 Result = Builder.CreateBitCast(Result, DstTy);
17835 if (DstTy != ZExtTy)
17836 Result = Builder.CreateZExt(Result, ZExtTy);
17842 bool IsLittleEndian) {
17849 !IsLittleEndian, Mask))
17852 auto *FirstEltZero = Builder.CreateInsertElement(
17855 return Builder.CreateShuffleVector(
Op, FirstEltZero, Mask);
17864 assert(SrcTy->getElementType()->isIntegerTy() &&
17865 "Non-integer type source vector element is not supported");
17866 assert(DstTy->getElementType()->isIntegerTy(8) &&
17867 "Unsupported destination vector element type");
17868 unsigned SrcElemTySz =
17870 unsigned DstElemTySz =
17872 assert((SrcElemTySz % DstElemTySz == 0) &&
17873 "Cannot lower truncate to tbl instructions for a source element size "
17874 "that is not divisible by the destination element size");
17875 unsigned TruncFactor = SrcElemTySz / DstElemTySz;
17876 assert((SrcElemTySz == 16 || SrcElemTySz == 32 || SrcElemTySz == 64) &&
17877 "Unsupported source vector element type size");
17885 for (
int Itr = 0; Itr < 16; Itr++) {
17886 if (Itr < NumElements)
17888 IsLittleEndian ? Itr * TruncFactor
17889 : Itr * TruncFactor + (TruncFactor - 1)));
17891 MaskConst.
push_back(Builder.getInt8(255));
17894 int MaxTblSz = 128 * 4;
17895 int MaxSrcSz = SrcElemTySz * NumElements;
17897 (MaxTblSz > MaxSrcSz) ? NumElements : (MaxTblSz / SrcElemTySz);
17898 assert(ElemsPerTbl <= 16 &&
17899 "Maximum elements selected using TBL instruction cannot exceed 16!");
17901 int ShuffleCount = 128 / SrcElemTySz;
17903 for (
int i = 0; i < ShuffleCount; ++i)
17910 while (ShuffleLanes.
back() < NumElements) {
17912 Builder.CreateShuffleVector(TI->
getOperand(0), ShuffleLanes), VecTy));
17914 if (Parts.
size() == 4) {
17917 Builder.CreateIntrinsic(Intrinsic::aarch64_neon_tbl4, VecTy, Parts));
17921 for (
int i = 0; i < ShuffleCount; ++i)
17922 ShuffleLanes[i] += ShuffleCount;
17926 "Lowering trunc for vectors requiring different TBL instructions is "
17930 if (!Parts.
empty()) {
17932 switch (Parts.
size()) {
17934 TblID = Intrinsic::aarch64_neon_tbl1;
17937 TblID = Intrinsic::aarch64_neon_tbl2;
17940 TblID = Intrinsic::aarch64_neon_tbl3;
17945 Results.push_back(Builder.CreateIntrinsic(TblID, VecTy, Parts));
17950 assert(
Results.size() <= 2 &&
"Trunc lowering does not support generation of "
17951 "more than 2 tbl instructions!");
17954 if (ElemsPerTbl < 16) {
17956 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
17957 FinalResult = Builder.CreateShuffleVector(
Results[0], FinalMask);
17961 if (ElemsPerTbl < 16) {
17962 std::iota(FinalMask.
begin(), FinalMask.
begin() + ElemsPerTbl, 0);
17963 std::iota(FinalMask.
begin() + ElemsPerTbl, FinalMask.
end(), 16);
17965 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
17979 if (!
EnableExtToTBL || Subtarget->useSVEForFixedLengthVectors())
17987 if (!L || L->getHeader() !=
I->getParent() ||
F->hasOptSize())
17992 if (!SrcTy || !DstTy)
17999 if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
18000 auto DstWidth = DstTy->getElementType()->getScalarSizeInBits();
18001 if (DstWidth % 8 != 0)
18004 auto *TruncDstType =
18008 auto SrcWidth = SrcTy->getElementType()->getScalarSizeInBits();
18009 if (
TTI.getCastInstrCost(
I->getOpcode(), DstTy, TruncDstType,
18012 if (SrcWidth * 2 >= TruncDstType->getElementType()->getScalarSizeInBits())
18015 DstTy = TruncDstType;
18023 if (SrcWidth * 4 <= DstWidth) {
18024 if (
all_of(
I->users(), [&](
auto *U) {
18025 using namespace llvm::PatternMatch;
18026 auto *SingleUser = cast<Instruction>(&*U);
18027 if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value()))))
18029 if (match(SingleUser,
18030 m_Intrinsic<Intrinsic::vector_partial_reduce_add>(
18031 m_Value(), m_Specific(I))))
18038 if (DstTy->getScalarSizeInBits() >= 64)
18044 DstTy, Subtarget->isLittleEndian());
18047 ZExt->replaceAllUsesWith(Result);
18048 ZExt->eraseFromParent();
18053 if (UIToFP && ((SrcTy->getElementType()->isIntegerTy(8) &&
18054 DstTy->getElementType()->isFloatTy()) ||
18055 (SrcTy->getElementType()->isIntegerTy(16) &&
18056 DstTy->getElementType()->isDoubleTy()))) {
18061 assert(ZExt &&
"Cannot fail for the i8 to float conversion");
18062 auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
18063 I->replaceAllUsesWith(UI);
18064 I->eraseFromParent();
18069 if (SIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
18070 DstTy->getElementType()->isFloatTy()) {
18074 Subtarget->isLittleEndian());
18075 assert(Shuffle &&
"Cannot fail for the i8 to float conversion");
18077 auto *AShr = Builder.CreateAShr(Cast, 24,
"",
true);
18078 auto *
SI = Builder.CreateSIToFP(AShr, DstTy);
18079 I->replaceAllUsesWith(
SI);
18080 I->eraseFromParent();
18088 (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
18089 SrcTy->getElementType()->isFloatTy() &&
18090 DstTy->getElementType()->isIntegerTy(8)) {
18092 auto *WideConv = Builder.CreateFPToUI(FPToUI->getOperand(0),
18094 auto *TruncI = Builder.CreateTrunc(WideConv, DstTy);
18095 I->replaceAllUsesWith(TruncI);
18096 I->eraseFromParent();
18106 if (TI && DstTy->getElementType()->isIntegerTy(8) &&
18107 ((SrcTy->getElementType()->isIntegerTy(32) ||
18108 SrcTy->getElementType()->isIntegerTy(64)) &&
18109 (SrcTy->getNumElements() == 16 || SrcTy->getNumElements() == 8))) {
18118 Align &RequiredAlignment)
const {
18123 RequiredAlignment =
Align(1);
18125 return NumBits == 32 || NumBits == 64;
18132 unsigned VecSize = 128;
18136 VecSize = std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
18137 return std::max<unsigned>(1, (MinElts * ElSize + 127) / VecSize);
18142 if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
18152 unsigned MinElts = EC.getKnownMinValue();
18154 UseScalable =
false;
18157 (!Subtarget->useSVEForFixedLengthVectors() ||
18162 !Subtarget->isSVEorStreamingSVEAvailable())
18170 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
18173 if (EC.isScalable()) {
18174 UseScalable =
true;
18175 return isPowerOf2_32(MinElts) && (MinElts * ElSize) % 128 == 0;
18178 unsigned VecSize =
DL.getTypeSizeInBits(VecTy);
18179 if (Subtarget->useSVEForFixedLengthVectors()) {
18180 unsigned MinSVEVectorSize =
18181 std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
18182 if (VecSize % MinSVEVectorSize == 0 ||
18184 (!Subtarget->isNeonAvailable() || VecSize > 128))) {
18185 UseScalable =
true;
18192 return Subtarget->isNeonAvailable() && (VecSize == 64 || VecSize % 128 == 0);
18224 bool Scalable,
Type *LDVTy,
18226 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
18227 static const Intrinsic::ID SVELoads[3] = {Intrinsic::aarch64_sve_ld2_sret,
18228 Intrinsic::aarch64_sve_ld3_sret,
18229 Intrinsic::aarch64_sve_ld4_sret};
18230 static const Intrinsic::ID NEONLoads[3] = {Intrinsic::aarch64_neon_ld2,
18231 Intrinsic::aarch64_neon_ld3,
18232 Intrinsic::aarch64_neon_ld4};
18241 bool Scalable,
Type *STVTy,
18243 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
18244 static const Intrinsic::ID SVEStores[3] = {Intrinsic::aarch64_sve_st2,
18245 Intrinsic::aarch64_sve_st3,
18246 Intrinsic::aarch64_sve_st4};
18247 static const Intrinsic::ID NEONStores[3] = {Intrinsic::aarch64_neon_st2,
18248 Intrinsic::aarch64_neon_st3,
18249 Intrinsic::aarch64_neon_st4};
18272 "Invalid interleave factor");
18273 assert(!Shuffles.
empty() &&
"Empty shufflevector input");
18275 "Unmatched number of shufflevectors and indices");
18280 assert(!Mask && GapMask.
popcount() == Factor &&
"Unexpected mask on a load");
18299 SI->getType()->getScalarSizeInBits() * 4 ==
18300 SI->user_back()->getType()->getScalarSizeInBits();
18310 Type *EltTy = FVTy->getElementType();
18318 FVTy->getNumElements() / NumLoads);
18326 Value *BaseAddr = LI->getPointerOperand();
18328 Type *PtrTy = LI->getPointerOperandType();
18330 LDVTy->getElementCount());
18333 UseScalable, LDVTy, PtrTy);
18340 Value *PTrue =
nullptr;
18342 std::optional<unsigned> PgPattern =
18344 if (Subtarget->getMinSVEVectorSizeInBits() ==
18345 Subtarget->getMaxSVEVectorSizeInBits() &&
18346 Subtarget->getMinSVEVectorSizeInBits() ==
DL.getTypeSizeInBits(FVTy))
18347 PgPattern = AArch64SVEPredPattern::all;
18351 PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
18355 for (
unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
18360 BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
18361 FVTy->getNumElements() * Factor);
18365 LdN = Builder.CreateCall(LdNFunc, {PTrue, BaseAddr},
"ldN");
18367 LdN = Builder.CreateCall(LdNFunc, BaseAddr,
"ldN");
18370 for (
unsigned i = 0; i < Shuffles.
size(); i++) {
18372 unsigned Index = Indices[i];
18374 Value *SubVec = Builder.CreateExtractValue(LdN, Index);
18377 SubVec = Builder.CreateExtractVector(FVTy, SubVec,
uint64_t(0));
18381 SubVec = Builder.CreateIntToPtr(
18383 FVTy->getNumElements()));
18385 SubVecs[SVI].push_back(SubVec);
18394 auto &SubVec = SubVecs[SVI];
18397 SVI->replaceAllUsesWith(WideVec);
18403template <
typename Iter>
18405 int MaxLookupDist = 20;
18406 unsigned IdxWidth =
DL.getIndexSizeInBits(0);
18407 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
18408 const Value *PtrA1 =
18411 while (++It != End) {
18412 if (It->isDebugOrPseudoInst())
18414 if (MaxLookupDist-- == 0)
18417 const Value *PtrB1 =
18418 SI->getPointerOperand()->stripAndAccumulateInBoundsConstantOffsets(
18420 if (PtrA1 == PtrB1 &&
18421 (OffsetA.sextOrTrunc(IdxWidth) - OffsetB.
sextOrTrunc(IdxWidth))
18460 const APInt &GapMask)
const {
18463 "Invalid interleave factor");
18468 "Unexpected mask on store");
18471 assert(VecTy->getNumElements() % Factor == 0 &&
"Invalid interleaved store");
18473 unsigned LaneLen = VecTy->getNumElements() / Factor;
18474 Type *EltTy = VecTy->getElementType();
18495 Type *IntTy =
DL.getIntPtrType(EltTy);
18496 unsigned NumOpElts =
18501 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
18502 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
18509 LaneLen /= NumStores;
18516 Value *BaseAddr =
SI->getPointerOperand();
18530 if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 &&
18538 Type *PtrTy =
SI->getPointerOperandType();
18540 STVTy->getElementCount());
18543 UseScalable, STVTy, PtrTy);
18545 Value *PTrue =
nullptr;
18547 std::optional<unsigned> PgPattern =
18549 if (Subtarget->getMinSVEVectorSizeInBits() ==
18550 Subtarget->getMaxSVEVectorSizeInBits() &&
18551 Subtarget->getMinSVEVectorSizeInBits() ==
18552 DL.getTypeSizeInBits(SubVecTy))
18553 PgPattern = AArch64SVEPredPattern::all;
18557 PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
18561 for (
unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
18566 for (
unsigned i = 0; i < Factor; i++) {
18568 unsigned IdxI = StoreCount * LaneLen * Factor + i;
18569 if (Mask[IdxI] >= 0) {
18570 Shuffle = Builder.CreateShuffleVector(
18573 unsigned StartMask = 0;
18574 for (
unsigned j = 1; j < LaneLen; j++) {
18575 unsigned IdxJ = StoreCount * LaneLen * Factor + j * Factor + i;
18576 if (Mask[IdxJ] >= 0) {
18577 StartMask = Mask[IdxJ] - j;
18586 Shuffle = Builder.CreateShuffleVector(
18594 Ops.push_back(Shuffle);
18598 Ops.push_back(PTrue);
18602 if (StoreCount > 0)
18603 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
18604 BaseAddr, LaneLen * Factor);
18606 Ops.push_back(BaseAddr);
18607 Builder.CreateCall(StNFunc,
Ops);
18615 if (Factor != 2 && Factor != 3 && Factor != 4) {
18616 LLVM_DEBUG(
dbgs() <<
"Matching ld2, ld3 and ld4 patterns failed\n");
18622 assert(!Mask &&
"Unexpected mask on a load\n");
18626 const DataLayout &
DL = LI->getModule()->getDataLayout();
18641 Type *PtrTy = LI->getPointerOperandType();
18643 UseScalable, LdTy, PtrTy);
18646 Value *Pred =
nullptr;
18649 Builder.CreateVectorSplat(LdTy->
getElementCount(), Builder.getTrue());
18651 Value *BaseAddr = LI->getPointerOperand();
18652 Value *Result =
nullptr;
18653 if (NumLoads > 1) {
18656 for (
unsigned I = 0;
I < NumLoads; ++
I) {
18660 Value *LdN =
nullptr;
18662 LdN = Builder.CreateCall(LdNFunc, {Pred,
Address},
"ldN");
18664 LdN = Builder.CreateCall(LdNFunc,
Address,
"ldN");
18667 for (
unsigned J = 0; J < Factor; ++J) {
18668 ExtractedLdValues[J] = Builder.CreateInsertVector(
18669 VTy, ExtractedLdValues[J], Builder.CreateExtractValue(LdN, J), Idx);
18676 for (
unsigned J = 0; J < Factor; ++J)
18677 Result = Builder.CreateInsertValue(Result, ExtractedLdValues[J], J);
18680 Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr},
"ldN");
18682 Result = Builder.CreateCall(LdNFunc, BaseAddr,
"ldN");
18693 unsigned Factor = InterleavedValues.
size();
18694 if (Factor != 2 && Factor != 3 && Factor != 4) {
18695 LLVM_DEBUG(
dbgs() <<
"Matching st2, st3 and st4 patterns failed\n");
18701 assert(!Mask &&
"Unexpected mask on plain store");
18721 Type *PtrTy =
SI->getPointerOperandType();
18723 UseScalable, StTy, PtrTy);
18727 Value *BaseAddr =
SI->getPointerOperand();
18728 Value *Pred =
nullptr;
18732 Builder.CreateVectorSplat(StTy->
getElementCount(), Builder.getTrue());
18734 auto ExtractedValues = InterleavedValues;
18739 for (
unsigned I = 0;
I < NumStores; ++
I) {
18741 if (NumStores > 1) {
18746 for (
unsigned J = 0; J < Factor; J++) {
18748 Builder.CreateExtractVector(StTy, ExtractedValues[J], Idx);
18751 StoreOperands[StoreOperands.
size() - 1] =
Address;
18753 Builder.CreateCall(StNFunc, StoreOperands);
18760 const AttributeList &FuncAttributes)
const {
18761 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
18762 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
18763 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
18767 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
18768 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
18769 if (
Op.isAligned(AlignCheck))
18777 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
18778 AlignmentIsAcceptable(MVT::v16i8,
Align(16)))
18780 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128,
Align(16)))
18782 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
18784 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
18790 const MemOp &
Op,
const AttributeList &FuncAttributes)
const {
18791 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
18792 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
18793 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
18797 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
18798 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
18799 if (
Op.isAligned(AlignCheck))
18807 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
18808 AlignmentIsAcceptable(MVT::v2i64,
Align(16)))
18810 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128,
Align(16)))
18812 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
18814 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
18821 if (Immed == std::numeric_limits<int64_t>::min()) {
18830 if (!Subtarget->hasSVE2())
18849 return std::abs(Imm / 8) <= 16;
18852 return std::abs(Imm / 4) <= 16;
18855 return std::abs(Imm / 2) <= 16;
18882 if (Insn.
size() > 1)
18919 if (AM.
Scale == 1) {
18922 }
else if (AM.
Scale == 2) {
18934 if (Ty->isScalableTy()) {
18940 uint64_t VecNumBytes =
DL.getTypeSizeInBits(Ty).getKnownMinValue() / 8;
18962 if (Ty->isSized()) {
18963 uint64_t NumBits =
DL.getTypeSizeInBits(Ty);
18964 NumBytes = NumBits / 8;
18969 return Subtarget->getInstrInfo()->isLegalAddressingMode(NumBytes, AM.
BaseOffs,
18977 int64_t MaxOffset)
const {
18978 int64_t HighPart = MinOffset & ~0xfffULL;
19001 return Subtarget->hasFullFP16();
19007 Subtarget->isNonStreamingSVEorSME2Available();
19017 switch (Ty->getScalarType()->getTypeID()) {
19037 static const MCPhysReg ScratchRegs[] = {
19038 AArch64::X16, AArch64::X17, AArch64::LR, 0
19040 return ScratchRegs;
19044 static const MCPhysReg RCRegs[] = {AArch64::FPCR};
19053 "Expected shift op");
19055 SDValue ShiftLHS =
N->getOperand(0);
19056 EVT VT =
N->getValueType(0);
19077 return SRLC->getZExtValue() == SHLC->getZExtValue();
19089 (
N->getOperand(0).getOpcode() ==
ISD::SHL ||
19090 N->getOperand(0).getOpcode() ==
ISD::SRL) &&
19091 "Expected XOR(SHIFT) pattern");
19096 if (XorC && ShiftC) {
19097 unsigned MaskIdx, MaskLen;
19098 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
19099 unsigned ShiftAmt = ShiftC->getZExtValue();
19100 unsigned BitWidth =
N->getValueType(0).getScalarSizeInBits();
19101 if (
N->getOperand(0).getOpcode() ==
ISD::SHL)
19102 return MaskIdx == ShiftAmt && MaskLen == (
BitWidth - ShiftAmt);
19103 return MaskIdx == 0 && MaskLen == (
BitWidth - ShiftAmt);
19113 N->getOperand(0).getOpcode() ==
ISD::SRL) ||
19115 N->getOperand(0).getOpcode() ==
ISD::SHL)) &&
19116 "Expected shift-shift mask");
19118 if (!
N->getOperand(0)->hasOneUse())
19122 EVT VT =
N->getValueType(0);
19123 if (
N->getOpcode() ==
ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
19126 return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
19131 if (
N->getOpcode() ==
ISD::SHL &&
N->hasOneUse()) {
19133 unsigned ShlAmt = C2->getZExtValue();
19134 if (
auto ShouldADD = *
N->user_begin();
19135 ShouldADD->getOpcode() ==
ISD::ADD && ShouldADD->hasOneUse()) {
19137 EVT MemVT = Load->getMemoryVT();
19139 if (Load->getValueType(0).isScalableVector())
19153 unsigned BinOpcode,
EVT VT,
unsigned SelectOpcode,
SDValue X,
19161 assert(Ty->isIntegerTy());
19163 unsigned BitSize = Ty->getPrimitiveSizeInBits();
19167 int64_t Val = Imm.getSExtValue();
19174 Val &= (1LL << 32) - 1;
19182 unsigned Index)
const {
19204 EVT VT =
N->getValueType(0);
19205 if (!Subtarget->hasNEON() || !VT.
isVector())
19219 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.
getSizeInBits() - 1)
19254 if (
N->getValueType(0) != MVT::i32)
19257 SDValue VecReduceOp0 =
N->getOperand(0);
19258 bool SawTrailingZext =
false;
19264 SawTrailingZext =
true;
19269 MVT AbsInputVT = SawTrailingZext ? MVT::v16i16 : MVT::v16i32;
19271 unsigned Opcode = VecReduceOp0.
getOpcode();
19277 if (ABS->getOperand(0)->getOpcode() !=
ISD::SUB ||
19278 ABS->getOperand(0)->getValueType(0) != AbsInputVT)
19281 SDValue SUB = ABS->getOperand(0);
19282 unsigned Opcode0 = SUB->getOperand(0).getOpcode();
19283 unsigned Opcode1 = SUB->getOperand(1).getOpcode();
19285 if (SUB->getOperand(0)->getValueType(0) != AbsInputVT ||
19286 SUB->getOperand(1)->getValueType(0) != AbsInputVT)
19290 bool IsZExt =
false;
19298 SDValue EXT0 = SUB->getOperand(0);
19299 SDValue EXT1 = SUB->getOperand(1);
19316 UABDHigh8Op0, UABDHigh8Op1);
19327 UABDLo8Op0, UABDLo8Op1);
19348 if (!
N->getValueType(0).isScalableVector() ||
19349 (!ST->hasSVE2p1() && !(ST->hasSME2() && ST->isStreaming())))
19354 return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR;
19357 auto MaskEC =
N->getValueType(0).getVectorElementCount();
19358 if (!MaskEC.isKnownMultipleOf(NumExts))
19372 if (
Use->getValueType(0).getVectorElementCount() != ExtMinEC)
19376 unsigned Offset =
Use->getConstantOperandVal(1);
19378 if (Extracts[Part] !=
nullptr)
19381 Extracts[Part] =
Use;
19397 EVT ExtVT = Extracts[0]->getValueType(0);
19401 DCI.
CombineTo(Extracts[0], R.getValue(0));
19402 DCI.
CombineTo(Extracts[1], R.getValue(1));
19406 if (NumExts == 2) {
19407 assert(
N->getValueType(0) == DoubleExtVT);
19413 for (
unsigned I = 2;
I < NumExts;
I += 2) {
19418 DCI.
CombineTo(Extracts[
I + 1], R.getValue(1));
19420 R.getValue(0), R.getValue(1)));
19434 if (!ST->isNeonAvailable())
19437 if (!ST->hasDotProd())
19448 unsigned DotOpcode;
19452 if (
A.getOperand(0).getValueType() !=
B.getOperand(0).getValueType())
19454 auto OpCodeA =
A.getOpcode();
19458 auto OpCodeB =
B.getOpcode();
19462 if (OpCodeA == OpCodeB) {
19467 if (!ST->hasMatMulInt8())
19469 DotOpcode = AArch64ISD::USDOT;
19474 DotOpcode = AArch64ISD::UDOT;
19476 DotOpcode = AArch64ISD::SDOT;
19481 EVT Op0VT =
A.getOperand(0).getValueType();
19484 if (!IsValidElementCount || !IsValidSize)
19493 B =
B.getOperand(0);
19496 unsigned NumOfVecReduce;
19498 if (IsMultipleOf16) {
19500 TargetType = MVT::v4i32;
19503 TargetType = MVT::v2i32;
19506 if (NumOfVecReduce == 1) {
19509 A.getOperand(0),
B);
19516 for (;
I < VecReduce16Num;
I += 1) {
19535 if (VecReduce8Num == 0)
19536 return VecReduceAdd16;
19558 auto DetectAddExtract = [&](
SDValue A) {
19562 EVT VT =
A.getValueType();
19587 : AArch64ISD::SADDLP;
19591 if (
SDValue R = DetectAddExtract(
A))
19594 if (
A.getOperand(0).getOpcode() ==
ISD::ADD &&
A.getOperand(0).hasOneUse())
19598 if (
A.getOperand(1).getOpcode() ==
ISD::ADD &&
A.getOperand(1).hasOneUse())
19611 EVT VT =
A.getValueType();
19612 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
19623 if (ExtVT0 != ExtVT1 ||
19638 return DAG.
getNode(AArch64ISD::NVCAST,
SDLoc(
A), MVT::v8i16, Uaddlv);
19655 MVT OpVT =
A.getSimpleValueType();
19656 assert(
N->getSimpleValueType(0) == OpVT &&
19657 "The operand type should be consistent with the result type of UADDV");
19661 if (KnownLeadingLanes.
isZero())
19671 APInt DemandedElts =
19690AArch64TargetLowering::BuildSDIVPow2(
SDNode *
N,
const APInt &Divisor,
19697 EVT VT =
N->getValueType(0);
19702 if (VT.
isVector() && Subtarget->isSVEorStreamingSVEAvailable())
19706 if ((VT != MVT::i32 && VT != MVT::i64) ||
19712 if (Divisor == 2 ||
19713 Divisor == APInt(Divisor.
getBitWidth(), -2,
true))
19720AArch64TargetLowering::BuildSREMPow2(
SDNode *
N,
const APInt &Divisor,
19727 EVT VT =
N->getValueType(0);
19735 if ((VT != MVT::i32 && VT != MVT::i64) ||
19751 CSNeg = DAG.
getNode(AArch64ISD::CSNEG,
DL, VT,
And,
And, CCVal, Cmp);
19762 CSNeg = DAG.
getNode(AArch64ISD::CSNEG,
DL, VT, AndPos, AndNeg, CCVal,
19777 case Intrinsic::aarch64_sve_cntb:
19778 case Intrinsic::aarch64_sve_cnth:
19779 case Intrinsic::aarch64_sve_cntw:
19780 case Intrinsic::aarch64_sve_cntd:
19790 if (IID == Intrinsic::aarch64_sve_cntp)
19791 return Op.getOperand(1).getValueType().getVectorElementCount();
19793 case Intrinsic::aarch64_sve_cntd:
19795 case Intrinsic::aarch64_sve_cntw:
19797 case Intrinsic::aarch64_sve_cnth:
19799 case Intrinsic::aarch64_sve_cntb:
19802 return std::nullopt;
19829 return TypeNode->
getVT();
19839 if (Mask == UCHAR_MAX)
19841 else if (Mask == USHRT_MAX)
19843 else if (Mask == UINT_MAX)
19865 unsigned ExtendOpcode = Extend.
getOpcode();
19881 if (PreExtendType == MVT::Other ||
19886 bool SeenZExtOrSExt = !IsAnyExt;
19894 unsigned Opc =
Op.getOpcode();
19901 if (SeenZExtOrSExt && OpcIsSExt != IsSExt)
19904 IsSExt = OpcIsSExt;
19905 SeenZExtOrSExt =
true;
19913 EVT PreExtendLegalType =
19919 PreExtendLegalType));
19930 unsigned ExtOpc = !SeenZExtOrSExt
19933 return DAG.
getNode(ExtOpc,
DL, VT, NBV);
19940 EVT VT =
Mul->getValueType(0);
19941 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
19952 return DAG.
getNode(
Mul->getOpcode(),
DL, VT, Op0 ? Op0 :
Mul->getOperand(0),
19953 Op1 ? Op1 :
Mul->getOperand(1));
19968 EVT VT =
Mul->getValueType(0);
19970 int ConstMultiplier =
19976 unsigned AbsConstValue =
abs(ConstMultiplier);
19977 unsigned OperandShift =
19986 unsigned B = ConstMultiplier < 0 ? 32 : 31;
19987 unsigned CeilAxOverB = (AbsConstValue + (
B - 1)) /
B;
19991 if (LowerBound > UpperBound)
19996 int Shift = std::min(std::max( 0, LowerBound), UpperBound);
19999 int32_t RdsvlMul = (AbsConstValue >> (OperandShift + Shift)) *
20000 (ConstMultiplier < 0 ? -1 : 1);
20001 auto Rdsvl = DAG.
getNode(AArch64ISD::RDSVL,
DL, MVT::i64,
20014 EVT VT =
N->getValueType(0);
20015 if (VT != MVT::v2i64 && VT != MVT::v1i64 && VT != MVT::v2i32 &&
20016 VT != MVT::v4i32 && VT != MVT::v4i16 && VT != MVT::v8i16)
20018 if (
N->getOperand(0).getOpcode() !=
ISD::AND ||
20019 N->getOperand(0).getOperand(0).getOpcode() !=
ISD::SRL)
20032 if (!V1.
isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
20033 V3 != (HalfSize - 1))
20044 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, CM);
20052 EVT VT =
N->getValueType(0);
20058 N->getOperand(0).getOperand(0).getValueType() !=
20059 N->getOperand(1).getOperand(0).getValueType())
20063 N->getOperand(0).getOpcode() !=
N->getOperand(1).getOpcode())
20066 SDValue N0 =
N->getOperand(0).getOperand(0);
20067 SDValue N1 =
N->getOperand(1).getOperand(0);
20072 if ((S2 == MVT::i32 &&
S1 == MVT::i8) ||
20073 (S2 == MVT::i64 && (
S1 == MVT::i8 ||
S1 == MVT::i16))) {
20106 EVT VT =
N->getValueType(0);
20110 unsigned AddSubOpc;
20112 auto IsAddSubWith1 = [&](
SDValue V) ->
bool {
20113 AddSubOpc = V->getOpcode();
20125 if (IsAddSubWith1(N0)) {
20127 return DAG.
getNode(AddSubOpc,
DL, VT, N1, MulVal);
20130 if (IsAddSubWith1(N1)) {
20132 return DAG.
getNode(AddSubOpc,
DL, VT, N0, MulVal);
20143 const APInt &ConstValue =
C->getAPIntValue();
20150 if (ConstValue.
sge(1) && ConstValue.
sle(16))
20165 unsigned TrailingZeroes = ConstValue.
countr_zero();
20166 if (TrailingZeroes) {
20174 if (
N->hasOneUse() && (
N->user_begin()->getOpcode() ==
ISD::ADD ||
20175 N->user_begin()->getOpcode() ==
ISD::SUB))
20180 APInt ShiftedConstValue = ConstValue.
ashr(TrailingZeroes);
20183 auto Shl = [&](
SDValue N0,
unsigned N1) {
20214 for (
unsigned i = 1; i <
BitWidth / 2; i++) {
20234 unsigned TrailingZeroes = CVMinus1.
countr_zero();
20235 APInt SCVMinus1 = CVMinus1.
ashr(TrailingZeroes) - 1;
20251 unsigned TrailingZeroes = CVMinus1.
countr_zero();
20252 APInt CVPlus1 = CVMinus1.
ashr(TrailingZeroes) + 1;
20272 APInt SCVMinus1 = ShiftedConstValue - 1;
20273 APInt SCVPlus1 = ShiftedConstValue + 1;
20274 APInt CVPlus1 = ConstValue + 1;
20278 return Shl(
Add(Shl(N0, ShiftAmt), N0), TrailingZeroes);
20281 return Sub(Shl(N0, ShiftAmt), N0);
20283 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
20284 return Sub(Shl(N0, ShiftAmt), Shl(N0, TrailingZeroes));
20286 if (Subtarget->hasALULSLFast() &&
20287 isPowPlusPlusConst(ConstValue, CVM, CVN)) {
20288 APInt CVMMinus1 = CVM - 1;
20289 APInt CVNMinus1 = CVN - 1;
20290 unsigned ShiftM1 = CVMMinus1.
logBase2();
20291 unsigned ShiftN1 = CVNMinus1.
logBase2();
20293 if (ShiftM1 <= 4 && ShiftN1 <= 4) {
20295 return Add(Shl(MVal, ShiftN1), MVal);
20298 if (Subtarget->hasALULSLFast() &&
20299 isPowPlusPlusOneConst(ConstValue, CVM, CVN)) {
20303 if (ShiftM <= 4 && ShiftN <= 4) {
20309 if (Subtarget->hasALULSLFast() &&
20310 isPowMinusMinusOneConst(ConstValue, CVM, CVN)) {
20314 if (ShiftM <= 4 && ShiftN <= 4) {
20323 APInt SCVPlus1 = -ShiftedConstValue + 1;
20324 APInt CVNegPlus1 = -ConstValue + 1;
20325 APInt CVNegMinus1 = -ConstValue - 1;
20328 return Sub(N0, Shl(N0, ShiftAmt));
20330 ShiftAmt = CVNegMinus1.
logBase2();
20331 return Negate(
Add(Shl(N0, ShiftAmt), N0));
20333 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
20334 return Sub(Shl(N0, TrailingZeroes), Shl(N0, ShiftAmt));
20354 EVT VT =
N->getValueType(0);
20356 N->getOperand(0)->getOperand(0)->getOpcode() !=
ISD::SETCC ||
20357 VT.
getSizeInBits() !=
N->getOperand(0)->getValueType(0).getSizeInBits())
20367 if (!BV->isConstant())
20372 EVT IntVT = BV->getValueType(0);
20379 N->getOperand(0)->getOperand(0), MaskConst);
20393 if (
N->isStrictFPOpcode())
20404 return !VT.
isVector() && VT != MVT::bf16 && VT != MVT::f128;
20407 SDValue SrcVal =
N->getOperand(0);
20409 EVT DestTy =
N->getValueType(0);
20416 if (DestTy.
bitsGT(SrcTy)) {
20425 if (SrcVecTy == MVT::nxv2i32 || DestVecTy == MVT::nxv2i32)
20431 DAG.
getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
20448 EVT VT =
N->getValueType(0);
20449 if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64)
20451 if (VT == MVT::f16 && !Subtarget->hasFullFP16())
20455 if (VT.
getSizeInBits() !=
N->getOperand(0).getValueSizeInBits())
20476 (
N->getOpcode() ==
ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
20495 if (!
N->getValueType(0).isSimple())
20499 if (!
Op.getValueType().isSimple() ||
Op.getOpcode() !=
ISD::FMUL)
20502 if (!
Op.getValueType().is64BitVector() && !
Op.getValueType().is128BitVector())
20509 MVT FloatTy =
Op.getSimpleValueType().getVectorElementType();
20511 if (FloatBits != 32 && FloatBits != 64 &&
20512 (FloatBits != 16 || !Subtarget->hasFullFP16()))
20515 MVT IntTy =
N->getSimpleValueType(0).getVectorElementType();
20516 uint32_t IntBits = IntTy.getSizeInBits();
20517 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
20521 if (IntBits > FloatBits)
20526 int32_t Bits = IntBits == 64 ? 64 : 32;
20528 if (
C == -1 ||
C == 0 ||
C > Bits)
20531 EVT ResTy =
Op.getValueType().changeVectorElementTypeToInteger();
20545 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
20546 : Intrinsic::aarch64_neon_vcvtfp2fxu;
20552 if (IntBits < FloatBits)
20569 EVT VT =
N->getValueType(0);
20573 if (CSel0.
getOpcode() != AArch64ISD::CSEL ||
20592 if (Cmp1.
getOpcode() != AArch64ISD::SUBS &&
20593 Cmp0.
getOpcode() == AArch64ISD::SUBS) {
20598 if (Cmp1.
getOpcode() != AArch64ISD::SUBS)
20605 if (
N->getOpcode() ==
ISD::AND ||
N->getOpcode() == AArch64ISD::ANDS) {
20618 if (Op1 && Op1->getAPIntValue().isNegative() &&
20619 Op1->getAPIntValue().sgt(-32)) {
20626 AbsOp1, NZCVOp, Condition, Cmp0);
20629 Cmp1.
getOperand(1), NZCVOp, Condition, Cmp0);
20653 MaskForTy = 0xffull;
20656 MaskForTy = 0xffffull;
20659 MaskForTy = 0xffffffffull;
20668 return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
20676 while (
Op.getOpcode() == AArch64ISD::REINTERPRET_CAST &&
20678 Op =
Op->getOperand(0);
20688 unsigned Opc = Src->getOpcode();
20691 if (
Opc == AArch64ISD::UUNPKHI ||
Opc == AArch64ISD::UUNPKLO) {
20705 auto MaskAndTypeMatch = [ExtVal](
EVT VT) ->
bool {
20706 return ((ExtVal == 0xFF && VT == MVT::i8) ||
20707 (ExtVal == 0xFFFF && VT == MVT::i16) ||
20708 (ExtVal == 0xFFFFFFFF && VT == MVT::i32));
20714 if (MaskAndTypeMatch(EltTy))
20720 if (MaskedLoadOp && (MaskedLoadOp->getExtensionType() ==
ISD::ZEXTLOAD ||
20723 if (MaskAndTypeMatch(EltTy))
20747 return N->getOperand(1);
20749 return N->getOperand(0);
20756 if (!Src.hasOneUse())
20764 case AArch64ISD::LD1_MERGE_ZERO:
20765 case AArch64ISD::LDNF1_MERGE_ZERO:
20766 case AArch64ISD::LDFF1_MERGE_ZERO:
20769 case AArch64ISD::GLD1_MERGE_ZERO:
20770 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
20771 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
20772 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
20773 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
20774 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
20775 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
20776 case AArch64ISD::GLDFF1_MERGE_ZERO:
20777 case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
20778 case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
20779 case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
20780 case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
20781 case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
20782 case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
20783 case AArch64ISD::GLDNT1_MERGE_ZERO:
20804 EVT VT =
N->getValueType(0);
20810 for (
auto U :
N->users())
20841 EVT VT =
N->getValueType(0);
20881 DefBits = ~(DefBits | ZeroSplat);
20888 UndefBits = ~(UndefBits | ZeroSplat);
20890 UndefBits, &
LHS)) ||
20904 EVT VT =
N->getValueType(0);
20907 if (!
N->getFlags().hasAllowReassociation())
20914 unsigned Opc =
A.getConstantOperandVal(0);
20915 if (
Opc != Intrinsic::aarch64_neon_vcmla_rot0 &&
20916 Opc != Intrinsic::aarch64_neon_vcmla_rot90 &&
20917 Opc != Intrinsic::aarch64_neon_vcmla_rot180 &&
20918 Opc != Intrinsic::aarch64_neon_vcmla_rot270)
20923 A.getOperand(2),
A.getOperand(3));
20939 return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
20941 return VT == MVT::i64;
20955 (
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege ||
20956 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege_x2 ||
20957 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt ||
20958 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt_x2 ||
20959 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi ||
20960 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi_x2 ||
20961 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs ||
20962 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs_x2 ||
20963 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele ||
20964 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele_x2 ||
20965 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo ||
20966 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo_x2 ||
20967 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
20968 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels_x2 ||
20969 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
20970 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt_x2)))
21050 MVT::bf16, MVT::f32, MVT::f64}),
21059 return DAG.
getNode(AArch64ISD::LASTB,
SDLoc(
N),
N->getValueType(0), Mask,
21075 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
21077 EVT VT =
N->getValueType(0);
21078 const bool FullFP16 = Subtarget->hasFullFP16();
21110 if (Shuffle && Shuffle->
getMaskElt(0) == 1 &&
21125 {N0->getOperand(0), Extract1, Extract2});
21144 unsigned OffsetElts = 0;
21160 Load->getMemoryVT().isByteSized() &&
21162 return U.getResNo() != N0.getResNo() ||
21163 (U.getUser()->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21164 !any_of(U.getUser()->uses(), [](const SDUse &U2) {
21165 return U2.getUser()->getOpcode() ==
21166 ISD::INSERT_VECTOR_ELT ||
21167 U2.getUser()->getOpcode() == ISD::BUILD_VECTOR ||
21168 U2.getUser()->getOpcode() == ISD::SCALAR_TO_VECTOR;
21175 unsigned Offset = (OffsetElts +
N->getConstantOperandVal(1)) *
21176 Load->getValueType(0).getScalarSizeInBits() / 8;
21185 DAG.
getExtLoad(ExtType,
DL, VT, Load->getChain(), BasePtr,
21186 Load->getPointerInfo().getWithOffset(
Offset),
21187 Load->getValueType(0).getScalarType(),
21189 Load->getMemOperand()->getFlags(), Load->getAAInfo());
21202 EVT VT =
N->getValueType(0);
21203 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
21209 N->getNumOperands() == 2 && N0Opc == AArch64ISD::ZIP1 &&
21219 return DAG.
getNode(AArch64ISD::TRN1,
DL, VT, Op0MoreElems, Op1MoreElems);
21244 (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
21246 MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
21248 for (
size_t i = 0; i < Mask.size(); ++i)
21270 if (N00Opc == AArch64ISD::VLSHR && N10Opc == AArch64ISD::VLSHR &&
21276 NScalarSize =
N->getValueType(0).getScalarSizeInBits();
21278 if (N001ConstVal == N101ConstVal && N001ConstVal > NScalarSize) {
21279 N000 = DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, N000);
21280 N100 = DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, N100);
21285 return DAG.
getNode(AArch64ISD::VLSHR,
DL, VT, Uzp, NewShiftConstant);
21290 if (
N->getOperand(0).getValueType() == MVT::v4i8 ||
21291 N->getOperand(0).getValueType() == MVT::v2i16 ||
21292 N->getOperand(0).getValueType() == MVT::v2i8) {
21293 EVT SrcVT =
N->getOperand(0).getValueType();
21297 if (
N->getNumOperands() % 2 == 0 &&
21299 if (V.getValueType() != SrcVT)
21303 LoadSDNode *LD = dyn_cast<LoadSDNode>(V);
21304 return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() &&
21305 LD->getExtensionType() == ISD::NON_EXTLOAD;
21307 EVT FVT = SrcVT == MVT::v2i8 ? MVT::f16 : MVT::f32;
21311 for (
unsigned i = 0; i <
N->getNumOperands(); i++) {
21318 LD->getBasePtr(), LD->getMemOperand());
21320 Ops.push_back(NewLoad);
21339 auto isBitwiseVectorNegate = [](
SDValue V) {
21340 return V->getOpcode() ==
ISD::XOR &&
21366 if (
N->getNumOperands() == 2 && N0Opc == N1Opc && VT.
is128BitVector() &&
21378 return DAG.
getNode(N0Opc,
DL, VT, Concat0, Concat1);
21382 auto IsRSHRN = [](
SDValue Shr) {
21383 if (Shr.getOpcode() != AArch64ISD::VLSHR)
21386 EVT VT =
Op.getValueType();
21387 unsigned ShtAmt = Shr.getConstantOperandVal(1);
21392 if (
Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
21394 Op.getOperand(1).getConstantOperandVal(0)
21395 <<
Op.getOperand(1).getConstantOperandVal(1));
21396 else if (
Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
21399 Op.getOperand(1).getConstantOperandVal(0));
21403 if (Imm != 1ULL << (ShtAmt - 1))
21409 if (
N->getNumOperands() == 2 && IsRSHRN(N0) &&
21417 X.getValueType().getDoubleNumVectorElementsVT(*DCI.
DAG.
getContext());
21428 if (
N->getNumOperands() == 2 && N0Opc == AArch64ISD::ZIP1 &&
21435 return DAG.
getNode(AArch64ISD::ZIP1,
DL, VT, E0, E1);
21459 MVT RHSTy =
RHS.getValueType().getSimpleVT();
21465 dbgs() <<
"aarch64-lower: concat_vectors bitcast simplification\n");
21481 EVT VT =
N->getValueType(0);
21503 SDValue SubVec =
N->getOperand(1);
21504 uint64_t IdxVal =
N->getConstantOperandVal(2);
21521 if (IdxVal == 0 && Vec.
isUndef())
21527 (IdxVal != 0 && IdxVal != NumSubElts))
21572 EVT ResTy =
N->getValueType(0);
21583 VecResTy = MVT::v4f32;
21585 VecResTy = MVT::v2f64;
21610 MVT VT =
N.getSimpleValueType();
21612 N.getConstantOperandVal(1) == 0)
21613 N =
N.getOperand(0);
21615 switch (
N.getOpcode()) {
21616 case AArch64ISD::DUP:
21617 case AArch64ISD::DUPLANE8:
21618 case AArch64ISD::DUPLANE16:
21619 case AArch64ISD::DUPLANE32:
21620 case AArch64ISD::DUPLANE64:
21621 case AArch64ISD::MOVI:
21622 case AArch64ISD::MOVIshift:
21623 case AArch64ISD::MOVIedit:
21624 case AArch64ISD::MOVImsl:
21625 case AArch64ISD::MVNIshift:
21626 case AArch64ISD::MVNImsl:
21640 if (
N.getValueType().is64BitVector()) {
21652 N =
N.getOperand(0);
21655 if (
N.getOperand(0).getValueType().isScalableVector())
21657 return N.getConstantOperandAPInt(1) ==
21658 N.getOperand(0).getValueType().getVectorNumElements() / 2;
21707 if (
Op.getOpcode() != AArch64ISD::CSEL)
21723 if (!TValue || !FValue)
21727 if (!TValue->
isOne()) {
21774 if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
21791 EVT VT =
Op->getValueType(0);
21798 EVT VT =
N->getValueType(0);
21811 if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero())
21818 if (Op1.
getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 ||
21819 Op2.
getOpcode() != AArch64ISD::UADDV ||
21829 DAG.
getNode(AArch64ISD::UADDV,
DL, ValVT, AddVal),
21837 EVT VT =
N->getValueType(0);
21845 if (
LHS.getOpcode() != AArch64ISD::CSEL &&
21846 LHS.getOpcode() != AArch64ISD::CSNEG) {
21848 if (
LHS.getOpcode() != AArch64ISD::CSEL &&
21849 LHS.getOpcode() != AArch64ISD::CSNEG) {
21854 if (!
LHS.hasOneUse())
21864 if (!CTVal || !CFVal)
21867 if (!(
LHS.getOpcode() == AArch64ISD::CSEL &&
21869 !(
LHS.getOpcode() == AArch64ISD::CSNEG &&
21874 if (
LHS.getOpcode() == AArch64ISD::CSEL && CTVal->
isOne() &&
21882 if (
LHS.getOpcode() == AArch64ISD::CSNEG && CTVal->
isOne() &&
21897 assert(((
LHS.getOpcode() == AArch64ISD::CSEL && CFVal->
isOne()) ||
21898 (
LHS.getOpcode() == AArch64ISD::CSNEG && CFVal->
isAllOnes())) &&
21899 "Unexpected constant value");
21905 return DAG.
getNode(AArch64ISD::CSINC,
DL, VT, NewNode,
RHS, CCVal, Cmp);
21910 EVT VT =
N->getValueType(0);
21917 auto isZeroDot = [](
SDValue Dot) {
21918 return (Dot.
getOpcode() == AArch64ISD::UDOT ||
21920 Dot.
getOpcode() == AArch64ISD::USDOT) &&
21923 if (!isZeroDot(Dot))
21925 if (!isZeroDot(Dot))
21986 MVT VT =
N->getSimpleValueType(0);
21998 LHS.getOpcode() !=
RHS.getOpcode())
22001 unsigned ExtType =
LHS.getOpcode();
22007 if (!
RHS.getNode())
22013 if (!
LHS.getNode())
22023 return Op.getOpcode() == AArch64ISD::SUBS &&
22024 !
Op.getNode()->hasAnyUseOfValue(0);
22030 if (
Op.getOpcode() != AArch64ISD::CSEL)
22031 return std::nullopt;
22034 return std::nullopt;
22040 return getInvertedCondCode(CC);
22042 return std::nullopt;
22066 Op->getOperand(0),
Op->getOperand(1),
22079 EVT VT =
N->getValueType(0);
22091 EVT VT =
N->getValueType(0);
22094 (VT == MVT::v4f16 || VT == MVT::v4bf16)) {
22095 SDValue Elt0 =
N->getOperand(0), Elt1 =
N->getOperand(1),
22096 Elt2 =
N->getOperand(2), Elt3 =
N->getOperand(3);
22110 Elt1->getOperand(0)->getConstantOperandVal(1) == 1) {
22114 if (Elt2->isUndef() && Elt3->isUndef()) {
22120 Elt2->getConstantOperandVal(1) ==
22121 Elt3->getConstantOperandVal(1) &&
22122 Elt2->getOperand(0)->getOpcode() ==
22124 Elt3->getOperand(0)->getOpcode() ==
22129 Elt2->getOperand(0)->getOperand(0) ==
22130 Elt3->getOperand(0)->getOperand(0) &&
22131 Elt2->getOperand(0)->getConstantOperandVal(1) == 0 &&
22132 Elt3->getOperand(0)->getConstantOperandVal(1) == 1) {
22135 DAG.
getNode(AArch64ISD::FCVTXN,
DL, MVT::v2f32, HighLanesSrcVec);
22138 SDValue DoubleToSingleSticky =
22139 DAG.
getNode(AArch64ISD::FCVTXN,
DL, MVT::v2f32, LowLanesSrcVec);
22141 DoubleToSingleSticky, HighLanes);
22149 if (VT == MVT::v2f64) {
22150 SDValue Elt0 =
N->getOperand(0), Elt1 =
N->getOperand(1);
22161 Elt1->getOperand(0)->getConstantOperandVal(1) &&
22176 HalfToSingle, SubvectorIdx);
22191 if (VT != MVT::v2i32)
22194 SDValue Elt0 =
N->getOperand(0), Elt1 =
N->getOperand(1);
22232 EVT DestVT =
N->getValueType(0);
22244 unsigned ShiftAmt = 0;
22246 case (1ULL << 15) - 1:
22247 ScalarType = MVT::i16;
22250 case (1ULL << 31) - 1:
22251 ScalarType = MVT::i32;
22263 if (!RightShiftVec)
22267 if (SExtValue != (ShiftAmt - 1))
22284 if (SExt0Type != SExt1Type || SExt0Type.
getScalarType() != ScalarType ||
22319 EVT VT =
N->getValueType(0);
22342 "Unexpected legalisation result!");
22344 EVT SrcVectorType =
Op.getValueType();
22347 assert((SrcVectorType == MVT::v2i64 || SrcVectorType == MVT::nxv2i64) &&
22348 "Unexpected legalisation result!");
22350 unsigned ExtractIndex =
22364 unsigned Opcode =
N.getOpcode();
22370 SrcVT =
N.getOperand(0).getValueType();
22372 return SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8;
22378 return AndMask == 0xff || AndMask == 0xffff || AndMask == 0xffffffff;
22390 auto IsOneUseExtend = [](
SDValue N) {
22401 if (SUB.getOpcode() !=
ISD::SUB || !SUB.hasOneUse())
22404 SDValue Shift = SUB.getOperand(0);
22405 if (!IsOneUseExtend(Shift))
22409 EVT VT =
N->getValueType(0);
22425 EVT VT =
N->getValueType(0);
22426 if (VT != MVT::i32 && VT != MVT::i64)
22449 RHSImm > 4 &&
LHS.hasOneUse())
22466 if (!
Add.hasOneUse())
22473 if (
M1.getOpcode() !=
ISD::MUL &&
M1.getOpcode() != AArch64ISD::SMULL &&
22474 M1.getOpcode() != AArch64ISD::UMULL)
22480 EVT VT =
N->getValueType(0);
22505 if (!
N->getValueType(0).isFixedLengthVector())
22516 if (MulValue.
getOpcode() != AArch64ISD::MUL_PRED)
22528 DAG.
getNode(
N->getOpcode(),
SDLoc(
N), ScalableVT, {ScaledOp, MulValue});
22532 if (
SDValue res = performOpt(
N->getOperand(0),
N->getOperand(1)))
22535 return performOpt(
N->getOperand(1),
N->getOperand(0));
22543 EVT VT =
N->getValueType(0);
22544 if (VT != MVT::i64 ||
22572 DAG.
getNode(
N->getOpcode(),
DL, MVT::v1i64, Op0, Op1),
22581 if (!Ld || !Ld->isSimple())
22612 B.getOperand(1).getNumOperands() != 4)
22616 int NumElts =
B.getValueType().getVectorNumElements();
22617 int NumSubElts = NumElts / 4;
22618 for (
int I = 0;
I < NumSubElts;
I++) {
22620 if (SV1->getMaskElt(
I) !=
I ||
22621 SV1->getMaskElt(
I + NumSubElts) !=
I + NumSubElts ||
22622 SV1->getMaskElt(
I + NumSubElts * 2) !=
I + NumSubElts * 2 ||
22623 SV1->getMaskElt(
I + NumSubElts * 3) !=
I + NumElts)
22626 if (SV2->getMaskElt(
I) !=
I ||
22627 SV2->getMaskElt(
I + NumSubElts) !=
I + NumSubElts ||
22628 SV2->getMaskElt(
I + NumSubElts * 2) !=
I + NumElts)
22635 if (!Ld0 || !Ld1 || !Ld2 || !Ld3 || !Ld0->isSimple() || !Ld1->isSimple() ||
22636 !Ld2->isSimple() || !Ld3->isSimple())
22649 unsigned &NumSubLoads) {
22656 if (NumSubLoads && Loads0.
size() != NumSubLoads)
22658 NumSubLoads = Loads0.
size();
22659 return Loads0.
size() == Loads1.
size() &&
22660 all_of(
zip(Loads0, Loads1), [&DAG](
auto L) {
22661 unsigned Size =
get<0>(L)->getValueType(0).getSizeInBits();
22662 return Size ==
get<1>(L)->getValueType(0).getSizeInBits() &&
22675 DAG, NumSubLoads) &&
22703 EVT VT =
N->getValueType(0);
22721 Other.getOperand(0).getValueType() ||
22728 unsigned NumSubLoads = 0;
22737 unsigned NumSubElts = NumElts / NumSubLoads;
22759 for (
const auto &[L0, L1] :
zip(Loads0, Loads1)) {
22761 L0->getBasePtr(), L0->getPointerInfo(),
22762 L0->getBaseAlign());
22772 Ops.push_back(GenCombinedTree(O0, O1, DAG));
22775 SDValue NewOp = GenCombinedTree(Op0, Op1, DAG);
22778 int Hi = NumSubElts,
Lo = 0;
22779 for (
unsigned i = 0; i < NumSubLoads; i++) {
22780 for (
unsigned j = 0; j < NumSubElts; j++) {
22781 LowMask[i * NumSubElts + j] =
Lo++;
22782 HighMask[i * NumSubElts + j] =
Hi++;
22817 return DAG.
getNode(
N->getOpcode(),
DL, VT, Ext0, NShift);
22828 EVT VT =
N->getValueType(0);
22829 if (VT != MVT::i32 && VT != MVT::i64)
22839 if (Flags.getOpcode() != AArch64ISD::SUBS)
22859 EVT VT =
N->getValueType(0);
22860 if (VT != MVT::v2i32 && VT != MVT::v4i16 && VT != MVT::v8i8)
22866 if (AShr.
getOpcode() != AArch64ISD::VASHR)
22868 if (AShr.
getOpcode() != AArch64ISD::VASHR ||
22869 LShr.
getOpcode() != AArch64ISD::VLSHR ||
22878 AArch64ISD::VLSHR,
DL, VT, Trunc,
22930 assert(
LHS.getValueType().is64BitVector() &&
22931 RHS.getValueType().is64BitVector() &&
22932 "unexpected shape for long operation");
22939 if (!
RHS.getNode())
22943 if (!
LHS.getNode())
22956 MVT ElemTy =
N->getSimpleValueType(0).getScalarType();
22957 unsigned ElemBits = ElemTy.getSizeInBits();
22959 int64_t ShiftAmount;
22961 APInt SplatValue, SplatUndef;
22962 unsigned SplatBitSize;
22965 HasAnyUndefs, ElemBits) ||
22966 SplatBitSize != ElemBits)
22971 ShiftAmount = CVN->getSExtValue();
22976 if (ShiftAmount == 0 && IID != Intrinsic::aarch64_neon_sqshlu)
22977 return N->getOperand(1);
22984 case Intrinsic::aarch64_neon_sqshl:
22985 Opcode = AArch64ISD::SQSHL_I;
22986 IsRightShift =
false;
22988 case Intrinsic::aarch64_neon_uqshl:
22989 Opcode = AArch64ISD::UQSHL_I;
22990 IsRightShift =
false;
22992 case Intrinsic::aarch64_neon_srshl:
22993 Opcode = AArch64ISD::SRSHR_I;
22994 IsRightShift =
true;
22996 case Intrinsic::aarch64_neon_urshl:
22997 Opcode = AArch64ISD::URSHR_I;
22998 IsRightShift =
true;
23000 case Intrinsic::aarch64_neon_sqshlu:
23001 Opcode = AArch64ISD::SQSHLU_I;
23002 IsRightShift =
false;
23004 case Intrinsic::aarch64_neon_sshl:
23005 case Intrinsic::aarch64_neon_ushl:
23009 if (ShiftAmount < 0) {
23010 Opcode = IID == Intrinsic::aarch64_neon_sshl ? AArch64ISD::VASHR
23011 : AArch64ISD::VLSHR;
23012 ShiftAmount = -ShiftAmount;
23014 Opcode = AArch64ISD::VSHL;
23015 IsRightShift =
false;
23019 EVT VT =
N->getValueType(0);
23022 if (VT == MVT::i64) {
23027 if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(
int)ElemBits) {
23030 if (
N->getValueType(0) == MVT::i64)
23034 }
else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
23037 if (
N->getValueType(0) == MVT::i64)
23059 N->getOperand(0),
N->getOperand(1), AndN.
getOperand(0));
23066 DAG.
getNode(
Opc,
DL,
N->getOperand(1).getSimpleValueType(),
23076 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
23077 ScalarTy = MVT::i32;
23089 SDValue Scalar =
N->getOperand(3);
23090 EVT ScalarTy = Scalar.getValueType();
23092 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
23095 SDValue Passthru =
N->getOperand(1);
23097 return DAG.
getNode(AArch64ISD::DUP_MERGE_PASSTHRU,
DL,
N->getValueType(0),
23098 Pred, Scalar, Passthru);
23104 EVT VT =
N->getValueType(0);
23133 SDValue Comparator =
N->getOperand(3);
23134 if (Comparator.
getOpcode() == AArch64ISD::DUP ||
23137 EVT VT =
N->getValueType(0);
23138 EVT CmpVT =
N->getOperand(2).getValueType();
23149 case Intrinsic::aarch64_sve_cmpeq_wide:
23150 case Intrinsic::aarch64_sve_cmpne_wide:
23151 case Intrinsic::aarch64_sve_cmpge_wide:
23152 case Intrinsic::aarch64_sve_cmpgt_wide:
23153 case Intrinsic::aarch64_sve_cmplt_wide:
23154 case Intrinsic::aarch64_sve_cmple_wide: {
23156 int64_t ImmVal = CN->getSExtValue();
23157 if (ImmVal >= -16 && ImmVal <= 15)
23165 case Intrinsic::aarch64_sve_cmphs_wide:
23166 case Intrinsic::aarch64_sve_cmphi_wide:
23167 case Intrinsic::aarch64_sve_cmplo_wide:
23168 case Intrinsic::aarch64_sve_cmpls_wide: {
23170 uint64_t ImmVal = CN->getZExtValue();
23184 return DAG.
getNode(AArch64ISD::SETCC_MERGE_ZERO,
DL, VT, Pred,
23196 assert(
Op.getValueType().isScalableVector() &&
23198 "Expected legal scalable vector type!");
23200 "Expected same type for PTEST operands");
23208 if (
Op.getValueType() != MVT::nxv16i1) {
23211 Pg = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, MVT::nxv16i1, Pg);
23214 Op = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, MVT::nxv16i1,
Op);
23217 unsigned PTest = AArch64ISD::PTEST;
23219 PTest = AArch64ISD::PTEST_ANY;
23221 PTest = AArch64ISD::PTEST_FIRST;
23238 SDValue VecToReduce =
N->getOperand(2);
23257 SDValue VecToReduce =
N->getOperand(2);
23274 SDValue InitVal =
N->getOperand(2);
23275 SDValue VecToReduce =
N->getOperand(3);
23282 DAG.
getUNDEF(ReduceVT), InitVal, Zero);
23294 if (
N->getValueType(0) != MVT::i16)
23308 bool SwapOperands =
false) {
23310 assert(
N->getNumOperands() == 4 &&
"Expected 3 operand intrinsic!");
23312 SDValue Op1 =
N->getOperand(SwapOperands ? 3 : 2);
23313 SDValue Op2 =
N->getOperand(SwapOperands ? 2 : 3);
23329 EVT VT =
N->getValueType(0);
23337 case Intrinsic::aarch64_sve_bsl:
23338 return DAG.
getNode(AArch64ISD::BSP,
DL, VT, Op3, Op1, Op2);
23339 case Intrinsic::aarch64_sve_bsl1n:
23342 case Intrinsic::aarch64_sve_bsl2n:
23343 return DAG.
getNode(AArch64ISD::BSP,
DL, VT, Op3, Op1,
23345 case Intrinsic::aarch64_sve_nbsl:
23359 case Intrinsic::aarch64_neon_vcvtfxs2fp:
23360 case Intrinsic::aarch64_neon_vcvtfxu2fp:
23362 case Intrinsic::aarch64_neon_saddv:
23364 case Intrinsic::aarch64_neon_uaddv:
23366 case Intrinsic::aarch64_neon_sminv:
23368 case Intrinsic::aarch64_neon_uminv:
23370 case Intrinsic::aarch64_neon_smaxv:
23372 case Intrinsic::aarch64_neon_umaxv:
23374 case Intrinsic::aarch64_neon_fmax:
23376 N->getOperand(1),
N->getOperand(2));
23377 case Intrinsic::aarch64_neon_fmin:
23379 N->getOperand(1),
N->getOperand(2));
23380 case Intrinsic::aarch64_neon_fmaxnm:
23382 N->getOperand(1),
N->getOperand(2));
23383 case Intrinsic::aarch64_neon_fminnm:
23385 N->getOperand(1),
N->getOperand(2));
23386 case Intrinsic::aarch64_neon_smull:
23387 return DAG.
getNode(AArch64ISD::SMULL,
SDLoc(
N),
N->getValueType(0),
23388 N->getOperand(1),
N->getOperand(2));
23389 case Intrinsic::aarch64_neon_umull:
23390 return DAG.
getNode(AArch64ISD::UMULL,
SDLoc(
N),
N->getValueType(0),
23391 N->getOperand(1),
N->getOperand(2));
23392 case Intrinsic::aarch64_neon_pmull:
23393 return DAG.
getNode(AArch64ISD::PMULL,
SDLoc(
N),
N->getValueType(0),
23394 N->getOperand(1),
N->getOperand(2));
23395 case Intrinsic::aarch64_neon_sqdmull:
23397 case Intrinsic::aarch64_neon_sqshl:
23398 case Intrinsic::aarch64_neon_uqshl:
23399 case Intrinsic::aarch64_neon_sqshlu:
23400 case Intrinsic::aarch64_neon_srshl:
23401 case Intrinsic::aarch64_neon_urshl:
23402 case Intrinsic::aarch64_neon_sshl:
23403 case Intrinsic::aarch64_neon_ushl:
23405 case Intrinsic::aarch64_neon_sabd:
23407 N->getOperand(1),
N->getOperand(2));
23408 case Intrinsic::aarch64_neon_uabd:
23410 N->getOperand(1),
N->getOperand(2));
23411 case Intrinsic::aarch64_neon_fcvtzs:
23413 case Intrinsic::aarch64_neon_fcvtzu:
23415 case Intrinsic::aarch64_neon_fcvtas:
23417 case Intrinsic::aarch64_neon_fcvtau:
23419 case Intrinsic::aarch64_neon_fcvtms:
23421 case Intrinsic::aarch64_neon_fcvtmu:
23423 case Intrinsic::aarch64_neon_fcvtns:
23425 case Intrinsic::aarch64_neon_fcvtnu:
23427 case Intrinsic::aarch64_neon_fcvtps:
23429 case Intrinsic::aarch64_neon_fcvtpu:
23431 case Intrinsic::aarch64_crc32b:
23432 case Intrinsic::aarch64_crc32cb:
23434 case Intrinsic::aarch64_crc32h:
23435 case Intrinsic::aarch64_crc32ch:
23437 case Intrinsic::aarch64_sve_saddv:
23439 if (
N->getOperand(2).getValueType().getVectorElementType() == MVT::i64)
23443 case Intrinsic::aarch64_sve_uaddv:
23445 case Intrinsic::aarch64_sve_smaxv:
23447 case Intrinsic::aarch64_sve_umaxv:
23449 case Intrinsic::aarch64_sve_sminv:
23451 case Intrinsic::aarch64_sve_uminv:
23453 case Intrinsic::aarch64_sve_orv:
23455 case Intrinsic::aarch64_sve_eorv:
23457 case Intrinsic::aarch64_sve_andv:
23459 case Intrinsic::aarch64_sve_index:
23461 case Intrinsic::aarch64_sve_dup:
23463 case Intrinsic::aarch64_sve_dup_x:
23466 case Intrinsic::aarch64_sve_ext:
23468 case Intrinsic::aarch64_sve_mul_u:
23469 return DAG.
getNode(AArch64ISD::MUL_PRED,
SDLoc(
N),
N->getValueType(0),
23470 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23471 case Intrinsic::aarch64_sve_smulh_u:
23472 return DAG.
getNode(AArch64ISD::MULHS_PRED,
SDLoc(
N),
N->getValueType(0),
23473 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23474 case Intrinsic::aarch64_sve_umulh_u:
23475 return DAG.
getNode(AArch64ISD::MULHU_PRED,
SDLoc(
N),
N->getValueType(0),
23476 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23477 case Intrinsic::aarch64_sve_smin_u:
23478 return DAG.
getNode(AArch64ISD::SMIN_PRED,
SDLoc(
N),
N->getValueType(0),
23479 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23480 case Intrinsic::aarch64_sve_umin_u:
23481 return DAG.
getNode(AArch64ISD::UMIN_PRED,
SDLoc(
N),
N->getValueType(0),
23482 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23483 case Intrinsic::aarch64_sve_smax_u:
23484 return DAG.
getNode(AArch64ISD::SMAX_PRED,
SDLoc(
N),
N->getValueType(0),
23485 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23486 case Intrinsic::aarch64_sve_umax_u:
23487 return DAG.
getNode(AArch64ISD::UMAX_PRED,
SDLoc(
N),
N->getValueType(0),
23488 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23489 case Intrinsic::aarch64_sve_lsl_u:
23490 return DAG.
getNode(AArch64ISD::SHL_PRED,
SDLoc(
N),
N->getValueType(0),
23491 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23492 case Intrinsic::aarch64_sve_lsr_u:
23493 return DAG.
getNode(AArch64ISD::SRL_PRED,
SDLoc(
N),
N->getValueType(0),
23494 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23495 case Intrinsic::aarch64_sve_asr_u:
23496 return DAG.
getNode(AArch64ISD::SRA_PRED,
SDLoc(
N),
N->getValueType(0),
23497 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23498 case Intrinsic::aarch64_sve_fadd_u:
23499 return DAG.
getNode(AArch64ISD::FADD_PRED,
SDLoc(
N),
N->getValueType(0),
23500 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23501 case Intrinsic::aarch64_sve_fdiv_u:
23502 return DAG.
getNode(AArch64ISD::FDIV_PRED,
SDLoc(
N),
N->getValueType(0),
23503 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23504 case Intrinsic::aarch64_sve_fmax_u:
23505 return DAG.
getNode(AArch64ISD::FMAX_PRED,
SDLoc(
N),
N->getValueType(0),
23506 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23507 case Intrinsic::aarch64_sve_fmaxnm_u:
23508 return DAG.
getNode(AArch64ISD::FMAXNM_PRED,
SDLoc(
N),
N->getValueType(0),
23509 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23510 case Intrinsic::aarch64_sve_fmla_u:
23511 return DAG.
getNode(AArch64ISD::FMA_PRED,
SDLoc(
N),
N->getValueType(0),
23512 N->getOperand(1),
N->getOperand(3),
N->getOperand(4),
23514 case Intrinsic::aarch64_sve_fmin_u:
23515 return DAG.
getNode(AArch64ISD::FMIN_PRED,
SDLoc(
N),
N->getValueType(0),
23516 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23517 case Intrinsic::aarch64_sve_fminnm_u:
23518 return DAG.
getNode(AArch64ISD::FMINNM_PRED,
SDLoc(
N),
N->getValueType(0),
23519 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23520 case Intrinsic::aarch64_sve_fmul_u:
23521 return DAG.
getNode(AArch64ISD::FMUL_PRED,
SDLoc(
N),
N->getValueType(0),
23522 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23523 case Intrinsic::aarch64_sve_fsub_u:
23524 return DAG.
getNode(AArch64ISD::FSUB_PRED,
SDLoc(
N),
N->getValueType(0),
23525 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23526 case Intrinsic::aarch64_sve_add_u:
23529 case Intrinsic::aarch64_sve_sub_u:
23532 case Intrinsic::aarch64_sve_subr:
23534 case Intrinsic::aarch64_sve_and_u:
23537 case Intrinsic::aarch64_sve_bic_u:
23538 return DAG.
getNode(AArch64ISD::BIC,
SDLoc(
N),
N->getValueType(0),
23539 N->getOperand(2),
N->getOperand(3));
23540 case Intrinsic::aarch64_sve_saddwb:
23541 return DAG.
getNode(AArch64ISD::SADDWB,
SDLoc(
N),
N->getValueType(0),
23542 N->getOperand(1),
N->getOperand(2));
23543 case Intrinsic::aarch64_sve_saddwt:
23544 return DAG.
getNode(AArch64ISD::SADDWT,
SDLoc(
N),
N->getValueType(0),
23545 N->getOperand(1),
N->getOperand(2));
23546 case Intrinsic::aarch64_sve_uaddwb:
23547 return DAG.
getNode(AArch64ISD::UADDWB,
SDLoc(
N),
N->getValueType(0),
23548 N->getOperand(1),
N->getOperand(2));
23549 case Intrinsic::aarch64_sve_uaddwt:
23550 return DAG.
getNode(AArch64ISD::UADDWT,
SDLoc(
N),
N->getValueType(0),
23551 N->getOperand(1),
N->getOperand(2));
23552 case Intrinsic::aarch64_sve_eor_u:
23555 case Intrinsic::aarch64_sve_orr_u:
23558 case Intrinsic::aarch64_sve_sabd_u:
23561 return DAG.
getNode(AArch64ISD::ABDS_PRED,
SDLoc(
N),
N->getValueType(0),
23562 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23563 case Intrinsic::aarch64_sve_uabd_u:
23566 return DAG.
getNode(AArch64ISD::ABDU_PRED,
SDLoc(
N),
N->getValueType(0),
23567 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23568 case Intrinsic::aarch64_sve_sdiv_u:
23569 return DAG.
getNode(AArch64ISD::SDIV_PRED,
SDLoc(
N),
N->getValueType(0),
23570 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23571 case Intrinsic::aarch64_sve_udiv_u:
23572 return DAG.
getNode(AArch64ISD::UDIV_PRED,
SDLoc(
N),
N->getValueType(0),
23573 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23574 case Intrinsic::aarch64_sve_sqadd:
23576 case Intrinsic::aarch64_sve_sqsub_u:
23578 N->getOperand(2),
N->getOperand(3));
23579 case Intrinsic::aarch64_sve_uqadd:
23581 case Intrinsic::aarch64_sve_uqsub_u:
23583 N->getOperand(2),
N->getOperand(3));
23584 case Intrinsic::aarch64_sve_sqadd_x:
23586 N->getOperand(1),
N->getOperand(2));
23587 case Intrinsic::aarch64_sve_sqsub_x:
23589 N->getOperand(1),
N->getOperand(2));
23590 case Intrinsic::aarch64_sve_uqadd_x:
23592 N->getOperand(1),
N->getOperand(2));
23593 case Intrinsic::aarch64_sve_uqsub_x:
23595 N->getOperand(1),
N->getOperand(2));
23596 case Intrinsic::aarch64_sve_asrd:
23597 return DAG.
getNode(AArch64ISD::ASRD_MERGE_OP1,
SDLoc(
N),
N->getValueType(0),
23598 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23599 case Intrinsic::aarch64_sve_cmphs:
23600 if (!
N->getOperand(2).getValueType().isFloatingPoint())
23602 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23605 case Intrinsic::aarch64_sve_cmphi:
23606 if (!
N->getOperand(2).getValueType().isFloatingPoint())
23608 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23611 case Intrinsic::aarch64_sve_fcmpge:
23612 case Intrinsic::aarch64_sve_cmpge:
23614 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23617 case Intrinsic::aarch64_sve_fcmpgt:
23618 case Intrinsic::aarch64_sve_cmpgt:
23620 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23623 case Intrinsic::aarch64_sve_fcmpeq:
23624 case Intrinsic::aarch64_sve_cmpeq:
23626 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23629 case Intrinsic::aarch64_sve_fcmpne:
23630 case Intrinsic::aarch64_sve_cmpne:
23632 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23635 case Intrinsic::aarch64_sve_fcmpuo:
23637 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23640 case Intrinsic::aarch64_sve_fadda:
23642 case Intrinsic::aarch64_sve_faddv:
23644 case Intrinsic::aarch64_sve_fmaxnmv:
23646 case Intrinsic::aarch64_sve_fmaxv:
23648 case Intrinsic::aarch64_sve_fminnmv:
23650 case Intrinsic::aarch64_sve_fminv:
23652 case Intrinsic::aarch64_sve_sel:
23654 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23655 case Intrinsic::aarch64_sve_cmpeq_wide:
23657 case Intrinsic::aarch64_sve_cmpne_wide:
23659 case Intrinsic::aarch64_sve_cmpge_wide:
23661 case Intrinsic::aarch64_sve_cmpgt_wide:
23663 case Intrinsic::aarch64_sve_cmplt_wide:
23665 case Intrinsic::aarch64_sve_cmple_wide:
23667 case Intrinsic::aarch64_sve_cmphs_wide:
23669 case Intrinsic::aarch64_sve_cmphi_wide:
23671 case Intrinsic::aarch64_sve_cmplo_wide:
23673 case Intrinsic::aarch64_sve_cmpls_wide:
23675 case Intrinsic::aarch64_sve_ptest_any:
23676 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23678 case Intrinsic::aarch64_sve_ptest_first:
23679 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23681 case Intrinsic::aarch64_sve_ptest_last:
23682 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23684 case Intrinsic::aarch64_sve_whilelo:
23686 N->getOperand(1),
N->getOperand(2));
23687 case Intrinsic::aarch64_sve_bsl:
23688 case Intrinsic::aarch64_sve_bsl1n:
23689 case Intrinsic::aarch64_sve_bsl2n:
23690 case Intrinsic::aarch64_sve_nbsl:
23697 unsigned OC =
N->getOpcode();
23713 const SDValue SetCC =
N->getOperand(0);
23735 SDLoc(SetCC),
N->getValueType(0), Ext1, Ext2,
23747 EVT VT =
N->getValueType(0);
23748 if ((VT != MVT::v4i32 && VT != MVT::v8i16) ||
23753 unsigned ExtOffset =
N->getOperand(0).getConstantOperandVal(1);
23757 EVT InVT =
N->getOperand(0).getOperand(0).getValueType();
23770 bool IsUndefDeInterleave =
false;
23771 if (!IsDeInterleave)
23772 IsUndefDeInterleave =
23773 Shuffle->getOperand(1).isUndef() &&
23776 [](
int M) { return M < 0; }) &&
23781 if ((!IsDeInterleave && !IsUndefDeInterleave) || Idx >= 4)
23785 Shuffle->getOperand(IsUndefDeInterleave ? 1 : 0));
23787 Shuffle->getOperand(IsUndefDeInterleave ? 0 : 1));
23788 SDValue UZP = DAG.
getNode(Idx < 2 ? AArch64ISD::UZP1 : AArch64ISD::UZP2,
DL,
23790 if ((Idx & 1) == 1)
23806 EVT VT =
N->getValueType(0);
23808 (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16))
23812 unsigned ExtOffset = (
unsigned)-1;
23814 ExtOffset =
Op.getConstantOperandVal(1);
23815 Op =
Op.getOperand(0);
23818 unsigned Shift = 0;
23820 Op.getValueType().getScalarSizeInBits());
23822 if (
Op.getOpcode() == AArch64ISD::VLSHR) {
23823 Shift =
Op.getConstantOperandVal(1);
23824 Op =
Op.getOperand(0);
23825 Mask = Mask.lshr(Shift);
23829 Op =
Op.getOperand(0);
23831 }
else if (
Op.getOpcode() == AArch64ISD::BICi) {
23832 Mask =
~APInt(
Op.getValueType().getScalarSizeInBits(),
23833 Op.getConstantOperandVal(1) <<
Op.getConstantOperandVal(2));
23835 Op =
Op.getOperand(0);
23838 if (ExtOffset == (
unsigned)-1) {
23840 ExtOffset =
Op.getConstantOperandVal(1);
23841 Op =
Op.getOperand(0);
23848 if (
Op.getOpcode() != AArch64ISD::UZP1 &&
Op.getOpcode() != AArch64ISD::UZP2)
23850 if (
Op.getOpcode() == AArch64ISD::UZP2)
23855 Op.getOperand(ExtOffset == 0 ? 0 : 1));
23857 BC = DAG.
getNode(AArch64ISD::VLSHR,
DL, VT, BC,
23870 N->getOperand(0).getValueType().is64BitVector() &&
23871 (
N->getOperand(0).getOpcode() ==
ISD::ABDU ||
23872 N->getOperand(0).getOpcode() ==
ISD::ABDS)) {
23873 SDNode *ABDNode =
N->getOperand(0).getNode();
23887 if (
N->getValueType(0).isFixedLengthVector() &&
23903 (
N->getValueType(0) == MVT::i32 ||
N->getValueType(0) == MVT::i64)) {
23907 return DAG.
getNode(AArch64ISD::REV16,
SDLoc(
N),
N->getValueType(0),
23915 SDValue SplatVal,
unsigned NumVecElts) {
23934 if (BasePtr->getOpcode() ==
ISD::ADD &&
23937 BasePtr = BasePtr->getOperand(0);
23940 unsigned Offset = EltOffset;
23941 while (--NumVecElts) {
23957 assert(ContentTy.
isSimple() &&
"No SVE containers for extended types");
23968 return MVT::nxv2i64;
23973 return MVT::nxv4i32;
23977 case MVT::nxv8bf16:
23978 return MVT::nxv8i16;
23980 return MVT::nxv16i8;
23986 EVT VT =
N->getValueType(0);
23991 EVT ContainerVT = VT;
24004 if (ContainerVT.
isInteger() && (VT != ContainerVT))
24012 EVT VT =
N->getValueType(0);
24013 EVT PtrTy =
N->getOperand(3).getValueType();
24022 MINode->getOperand(3), DAG.
getUNDEF(PtrTy),
24024 MINode->getMemoryVT(), MINode->getMemOperand(),
24035template <
unsigned Opcode>
24037 static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO ||
24038 Opcode == AArch64ISD::LD1RO_MERGE_ZERO,
24039 "Unsupported opcode.");
24041 EVT VT =
N->getValueType(0);
24047 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(2),
N->getOperand(3)};
24060 EVT DataVT =
Data.getValueType();
24068 if (
Data.getValueType().isFloatingPoint())
24080 return DAG.
getNode(AArch64ISD::ST1_PRED,
DL,
N->getValueType(0),
Ops);
24087 EVT DataVT =
Data.getValueType();
24088 EVT PtrTy =
N->getOperand(4).getValueType();
24096 MINode->getMemoryVT(), MINode->getMemOperand(),
24126 if (!(((NumVecElts == 2 || NumVecElts == 3) &&
24128 ((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
24154 for (
int I = 0;
I < NumVecElts; ++
I) {
24166 ZeroReg = AArch64::WZR;
24169 ZeroReg = AArch64::XZR;
24193 if (NumVecElts != 4 && NumVecElts != 2)
24204 std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
24206 for (
unsigned I = 0;
I < NumVecElts; ++
I) {
24222 if (IndexVal >= NumVecElts)
24224 IndexNotInserted.reset(IndexVal);
24229 if (IndexNotInserted.any())
24253 return ReplacedZeroSplat;
24259 if (!Subtarget->isMisaligned128StoreSlow())
24284 return ReplacedSplat;
24307 assert(
N->getOpcode() == AArch64ISD::SPLICE &&
"Unexpected Opcode!");
24310 if (
N->getOperand(2).isUndef())
24311 return N->getOperand(1);
24318 assert((
N->getOpcode() == AArch64ISD::UUNPKHI ||
24319 N->getOpcode() == AArch64ISD::UUNPKLO) &&
24320 "Unexpected Opcode!");
24323 if (
N->getOperand(0).isUndef())
24324 return DAG.
getUNDEF(
N->getValueType(0));
24329 if (
N->getOperand(0).getOpcode() ==
ISD::MLOAD &&
24330 N->getOpcode() == AArch64ISD::UUNPKLO) {
24336 SDValue(MLD, 0).hasOneUse() && Mask->getOpcode() == AArch64ISD::PTRUE &&
24340 unsigned PgPattern = Mask->getConstantOperandVal(0);
24341 EVT VT =
N->getValueType(0);
24367 if (
N->getOpcode() != AArch64ISD::UZP1)
24371 EVT DstVT =
N->getValueType(0);
24372 return (SrcVT == MVT::nxv8i16 && DstVT == MVT::nxv16i8) ||
24373 (SrcVT == MVT::nxv4i32 && DstVT == MVT::nxv8i16) ||
24374 (SrcVT == MVT::nxv2i64 && DstVT == MVT::nxv4i32);
24381 assert(
N->getOpcode() == AArch64ISD::UZP1 &&
"Only UZP1 expected.");
24384 EVT ResVT =
N->getValueType(0);
24387 if (RshOpc != AArch64ISD::RSHRNB_I)
24398 if (
Lo.getOpcode() != AArch64ISD::UUNPKLO &&
24399 Hi.getOpcode() != AArch64ISD::UUNPKHI)
24402 if (OrigArg !=
Hi.getOperand(0))
24406 return DAG.
getNode(AArch64ISD::URSHR_I_PRED,
DL, ResVT,
24427 if (VT == MVT::nxv8i16)
24428 ResVT = MVT::nxv16i8;
24429 else if (VT == MVT::nxv4i32)
24430 ResVT = MVT::nxv8i16;
24431 else if (VT == MVT::nxv2i64)
24432 ResVT = MVT::nxv4i32;
24437 unsigned ShiftValue;
24442 AArch64ISD::RSHRNB_I,
DL, ResVT,
24444 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Rshrnb);
24448 if (V.getOpcode() != AArch64ISD::NVCAST)
24452 if (!
Op.getValueType().isVector() ||
24453 V.getValueType().getVectorElementCount() !=
24454 Op.getValueType().getVectorElementCount() * 2)
24465 EVT ResVT =
N->getValueType(0);
24476 if (ExtIdx0 == 0 && ExtIdx1 == NumElements / 2) {
24487 if (
N->getOpcode() == AArch64ISD::UZP2)
24492 EVT BCVT = MVT::Other, HalfVT = MVT::Other;
24498 HalfVT = MVT::v8i8;
24502 HalfVT = MVT::v4i16;
24506 HalfVT = MVT::v2i32;
24509 if (BCVT != MVT::Other) {
24522 Rshrnb = DAG.
getNode(AArch64ISD::NVCAST,
DL, ResVT, Rshrnb);
24523 return DAG.
getNode(AArch64ISD::UZP1,
DL, ResVT, Rshrnb, Op1);
24529 Rshrnb = DAG.
getNode(AArch64ISD::NVCAST,
DL, ResVT, Rshrnb);
24530 return DAG.
getNode(AArch64ISD::UZP1,
DL, ResVT, Op0, Rshrnb);
24536 if (PreCast.getOpcode() == AArch64ISD::UUNPKLO) {
24537 if (PreCast.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
24538 SDValue X = PreCast.getOperand(0).getOperand(0);
24539 return DAG.
getNode(AArch64ISD::UZP1,
DL, ResVT,
X, Op1);
24546 if (PreCast.getOpcode() == AArch64ISD::UUNPKHI) {
24547 if (PreCast.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
24548 SDValue Z = PreCast.getOperand(0).getOperand(1);
24549 return DAG.
getNode(AArch64ISD::UZP1,
DL, ResVT, Op0, Z);
24571 if (ResVT != MVT::v2i32 && ResVT != MVT::v4i16 && ResVT != MVT::v8i8)
24580 if ((ResVT == MVT::v4i16 && Op0Ty == MVT::v2i32) ||
24581 (ResVT == MVT::v8i8 && Op0Ty == MVT::v4i16)) {
24585 SourceOp0, SourceOp1);
24605 ResultTy = MVT::v4i32;
24608 ResultTy = MVT::v8i16;
24611 ResultTy = MVT::v16i8;
24622 EVT BitcastResultTy;
24626 BitcastResultTy = MVT::v2i64;
24629 BitcastResultTy = MVT::v4i32;
24632 BitcastResultTy = MVT::v8i16;
24643 unsigned Opc =
N->getOpcode();
24645 const bool Scaled =
Opc == AArch64ISD::GLD1_SCALED_MERGE_ZERO ||
24646 Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
24647 const bool Signed =
Opc == AArch64ISD::GLD1S_MERGE_ZERO ||
24648 Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
24649 const bool Extended =
Opc == AArch64ISD::GLD1_SXTW_MERGE_ZERO ||
24650 Opc == AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO ||
24651 Opc == AArch64ISD::GLD1_UXTW_MERGE_ZERO ||
24652 Opc == AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO;
24661 EVT ResVT =
N->getValueType(0);
24663 const auto OffsetOpc =
Offset.getOpcode();
24664 const bool OffsetIsZExt =
24665 OffsetOpc == AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU;
24666 const bool OffsetIsSExt =
24667 OffsetOpc == AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU;
24670 if (!Extended && (OffsetIsSExt || OffsetIsZExt)) {
24678 if (ExtPg == Pg && ExtFromEVT == MVT::i32) {
24685 return DAG.
getNode(NewOpc,
DL, {ResVT, MVT::Other},
24686 {Chain, Pg,
Base, UnextendedOffset, Ty});
24698 assert(
N->getOpcode() == AArch64ISD::VASHR ||
24699 N->getOpcode() == AArch64ISD::VLSHR);
24702 unsigned OpScalarSize =
Op.getScalarValueSizeInBits();
24704 unsigned ShiftImm =
N->getConstantOperandVal(1);
24705 assert(OpScalarSize > ShiftImm &&
"Invalid shift imm");
24708 if (
N->getOpcode() == AArch64ISD::VASHR &&
24709 Op.getOpcode() == AArch64ISD::VSHL &&
24710 N->getOperand(1) ==
Op.getOperand(1))
24712 return Op.getOperand(0);
24715 if (
N->getFlags().hasExact())
24719 APInt DemandedMask = ~ShiftedOutBits;
24732 N->getOperand(0)->getOperand(0)->getValueType(0).getScalarType() ==
24734 SDValue CC =
N->getOperand(0)->getOperand(0);
24753 EVT VT =
N->getValueType(0);
24759 unsigned LoadIdx = IsLaneOp ? 1 : 0;
24766 if (LD->isIndexed())
24772 Lane =
N->getOperand(2);
24788 for (
SDUse &U : LD->uses()) {
24789 if (U.getResNo() == 1)
24791 if (U.getUser() !=
N)
24798 if (
N->hasOneUse()) {
24799 unsigned UseOpc =
N->user_begin()->getOpcode();
24804 SDValue Addr = LD->getOperand(1);
24815 uint32_t IncVal = CInc->getZExtValue();
24817 if (IncVal != NumBytes)
24835 Ops.push_back(LD->getOperand(0));
24838 Ops.push_back(Lane);
24840 Ops.push_back(Addr);
24841 Ops.push_back(Inc);
24843 EVT Tys[3] = { VT, MVT::i64, MVT::Other };
24845 unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
24866static bool performTBISimplification(
SDValue Addr,
24872 unsigned NumIgnoreBits =
24873 Subtarget.hasMTE() || Subtarget.isTargetDarwin() ? 4 : 8;
24888 "Expected STORE dag node in input!");
24891 if (!
Store->isTruncatingStore() ||
Store->isIndexed())
24902 Store->getBasePtr(),
Store->getMemOperand());
24925 EVT MemVT =
LD->getMemoryVT();
24927 LD->getBaseAlign() >= 4)
24935 assert(
LD->getOffset().isUndef() &&
"undef offset expected");
24971 performTBISimplification(
N->getOperand(1), DCI, DAG);
24974 EVT RegVT =
LD->getValueType(0);
24975 EVT MemVT =
LD->getMemoryVT();
24980 unsigned AddrSpace =
LD->getAddressSpace();
24984 if (PtrVT !=
LD->getBasePtr().getSimpleValueType()) {
24988 Cast,
LD->getPointerInfo(), MemVT,
24989 LD->getBaseAlign(),
24990 LD->getMemOperand()->getFlags());
24997 if (
SDValue Res = combineV3I8LoadExt(LD, DAG))
25000 if (!
LD->isNonTemporal())
25021 for (
unsigned I = 0;
I < Num256Loads;
I++) {
25022 unsigned PtrOffset =
I * 32;
25027 NewVT,
DL, Chain, NewPtr,
LD->getPointerInfo().getWithOffset(PtrOffset),
25028 NewAlign,
LD->getMemOperand()->getFlags(),
LD->getAAInfo());
25038 unsigned PtrOffset = (MemVT.
getSizeInBits() - BitsRemaining) / 8;
25046 DAG.
getLoad(RemainingVT,
DL, Chain, NewPtr,
25047 LD->getPointerInfo().getWithOffset(PtrOffset), NewAlign,
25048 LD->getMemOperand()->getFlags(),
LD->getAAInfo());
25051 SDValue ExtendedRemainingLoad =
25053 {UndefVector, RemainingLoad, InsertIdx});
25054 LoadOps.
push_back(ExtendedRemainingLoad);
25071 EVT VecVT =
Op.getValueType();
25073 "Need boolean vector type.");
25080 return Op.getOperand(0).getValueType();
25084 for (
SDValue Operand :
Op->op_values()) {
25088 EVT OperandVT = tryGetOriginalBoolVectorType(Operand,
Depth + 1);
25090 BaseVT = OperandVT;
25091 else if (OperandVT != BaseVT)
25105 EVT VecVT = ComparisonResult.getValueType();
25109 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
25119 VecVT = tryGetOriginalBoolVectorType(ComparisonResult);
25121 unsigned BitsPerElement = std::max(64 / NumElts, 8u);
25139 VecVT == MVT::v16i8) {
25143 for (
unsigned Half = 0; Half < 2; ++Half) {
25144 for (
unsigned I = 0;
I < 8; ++
I) {
25147 unsigned MaskBit = IsLE ? (1u <<
I) : (1u << (7 -
I));
25155 SDValue UpperRepresentativeBits =
25156 DAG.
getNode(AArch64ISD::EXT,
DL, VecVT, RepresentativeBits,
25159 RepresentativeBits, UpperRepresentativeBits);
25166 for (
unsigned I = 0;
I < NumEl; ++
I) {
25167 unsigned MaskBit = IsLE ? (1u <<
I) : (1u << (NumEl - 1 -
I));
25181 if (!
Store->isTruncatingStore())
25207 Store->getMemOperand());
25224 if (
Value.getValueType().isVector())
25228 while (
Value->isAssert())
25239 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
25243 EVT VT =
Value.getSimpleValueType();
25260 DCI.
CombineTo(
ST->getValue().getNode(), Extracted);
25264bool isHalvingTruncateOfLegalScalableType(
EVT SrcVT,
EVT DstVT) {
25265 return (SrcVT == MVT::nxv8i16 && DstVT == MVT::nxv8i8) ||
25266 (SrcVT == MVT::nxv4i32 && DstVT == MVT::nxv4i16) ||
25267 (SrcVT == MVT::nxv2i64 && DstVT == MVT::nxv2i32);
25274 EVT ValueVT =
Value.getValueType();
25281 assert(
ST->getOffset().isUndef() &&
"undef offset expected");
25285 Value->getOperand(0).getValueType().getVectorElementType(), 4);
25317static unsigned getFPSubregForVT(
EVT VT) {
25320 case MVT::aarch64mfp8:
25321 return AArch64::bsub;
25323 return AArch64::hsub;
25325 return AArch64::ssub;
25327 return AArch64::dsub;
25341 EVT ValueVT =
Value.getValueType();
25342 EVT MemVT =
ST->getMemoryVT();
25346 if (
SDValue Res = combineStoreValueFPToInt(ST, DCI, DAG, Subtarget))
25349 auto hasValidElementTypeForFPTruncStore = [](
EVT VT) {
25351 return EltVT == MVT::f32 || EltVT == MVT::f64;
25355 unsigned AddrSpace =
ST->getAddressSpace();
25362 ST->getBaseAlign(),
ST->getMemOperand()->getFlags(),
25367 if (
SDValue Res = combineI8TruncStore(ST, DAG, Subtarget))
25379 hasValidElementTypeForFPTruncStore(
Value.getOperand(0).getValueType()))
25381 ST->getMemOperand());
25387 performTBISimplification(
N->getOperand(2), DCI, DAG))
25390 if (
SDValue Store = foldTruncStoreOfExt(DAG,
N))
25393 if (
SDValue Store = combineBoolVectorAndTruncateStore(DAG, ST))
25396 if (
ST->isTruncatingStore() &&
25397 isHalvingTruncateOfLegalScalableType(ValueVT, MemVT)) {
25401 MemVT,
ST->getMemOperand());
25425 if (ValueVT != MemVT && !
ST->isTruncatingStore())
25438 !ExtCst->isZero() &&
ST->getBasePtr().getOpcode() !=
ISD::ADD)
25441 if (MemVT == MVT::i64 || MemVT == MVT::i32) {
25445 for (
const auto &
Use :
Vector->uses()) {
25446 if (
Use.getResNo() !=
Vector.getResNo())
25457 if (!ExtCst || !ExtCst->isZero()) {
25463 DAG.
getUNDEF(VectorVT), Ext, Zero);
25466 EVT FPMemVT = MemVT == MVT::i8
25470 FPMemVT, ExtVector);
25472 return DAG.
getStore(
ST->getChain(),
DL, FPSubreg,
ST->getBasePtr(),
25473 ST->getMemOperand());
25484 unsigned NumParts =
N->getNumOperands();
25493 for (
unsigned I = 0;
I < NumParts;
I++)
25494 if (
N->getOperand(
I) !=
SDValue(InterleaveOp,
I))
25503 unsigned RequiredNumParts) {
25506 if (!isSequentialConcatOfVectorInterleave(WideMask.
getNode(),
25507 MaskInterleaveOps))
25510 if (MaskInterleaveOps.
size() != RequiredNumParts)
25517 return MaskInterleaveOps[0];
25524 assert(
EC.isKnownMultipleOf(RequiredNumParts) &&
25525 "Expected element count divisible by number of parts");
25526 EC =
EC.divideCoefficientBy(RequiredNumParts);
25531static SDValue performInterleavedMaskedStoreCombine(
25547 if (!isSequentialConcatOfVectorInterleave(WideValue.
getNode(),
25548 ValueInterleaveOps))
25551 unsigned NumParts = ValueInterleaveOps.
size();
25552 if (NumParts != 2 && NumParts != 4)
25557 EVT SubVecTy = ValueInterleaveOps[0].getValueType();
25565 getNarrowMaskForInterleavedOps(DAG,
DL, MST->
getMask(), NumParts);
25570 NumParts == 2 ? Intrinsic::aarch64_sve_st2 : Intrinsic::aarch64_sve_st4;
25573 NewStOps.
append(ValueInterleaveOps);
25587 if (
SDValue Res = performInterleavedMaskedStoreCombine(
N, DCI, DAG))
25595 Value.getValueType().isInteger()) {
25600 EVT InVT =
Value.getOperand(0).getValueType();
25604 unsigned PgPattern =
Mask->getConstantOperandVal(0);
25624 EVT ValueVT =
Value->getValueType(0);
25626 if (!isHalvingTruncateOfLegalScalableType(ValueVT, MemVT))
25643 EVT IndexVT = Index.getValueType();
25653 if (Index.getOpcode() ==
ISD::ADD) {
25668 if (Index.getOpcode() ==
ISD::SHL &&
25669 Index.getOperand(0).getOpcode() ==
ISD::ADD) {
25679 Add.getOperand(0), ShiftOp);
25701 EVT IndexVT = Index.getValueType();
25706 EVT DataVT =
N->getOperand(1).getValueType();
25720 int64_t Stride = 0;
25726 else if (Index.getOpcode() ==
ISD::SHL &&
25732 Stride = Step << Shift->getZExtValue();
25740 if (Stride < std::numeric_limits<int32_t>::min() ||
25741 Stride > std::numeric_limits<int32_t>::max())
25745 unsigned MaxVScale =
25747 int64_t LastElementOffset =
25750 if (LastElementOffset < std::numeric_limits<int32_t>::min() ||
25751 LastElementOffset > std::numeric_limits<int32_t>::max())
25782 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
25795 SDValue Ops[] = {Chain, HG->getInc(), Mask, BasePtr,
25796 Index, Scale, HG->getIntID()};
25798 DL,
Ops, HG->getMemOperand(), IndexType);
25809 unsigned AddrOpIdx =
N->getNumOperands() - 1;
25810 SDValue Addr =
N->getOperand(AddrOpIdx);
25830 bool IsStore =
false;
25831 bool IsLaneOp =
false;
25832 bool IsDupOp =
false;
25833 unsigned NewOpc = 0;
25834 unsigned NumVecs = 0;
25835 unsigned IntNo =
N->getConstantOperandVal(1);
25838 case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
25839 NumVecs = 2;
break;
25840 case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
25841 NumVecs = 3;
break;
25842 case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
25843 NumVecs = 4;
break;
25844 case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
25845 NumVecs = 2; IsStore =
true;
break;
25846 case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
25847 NumVecs = 3; IsStore =
true;
break;
25848 case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
25849 NumVecs = 4; IsStore =
true;
break;
25850 case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
25851 NumVecs = 2;
break;
25852 case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
25853 NumVecs = 3;
break;
25854 case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
25855 NumVecs = 4;
break;
25856 case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
25857 NumVecs = 2; IsStore =
true;
break;
25858 case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
25859 NumVecs = 3; IsStore =
true;
break;
25860 case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
25861 NumVecs = 4; IsStore =
true;
break;
25862 case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
25863 NumVecs = 2; IsDupOp =
true;
break;
25864 case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
25865 NumVecs = 3; IsDupOp =
true;
break;
25866 case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
25867 NumVecs = 4; IsDupOp =
true;
break;
25868 case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
25869 NumVecs = 2; IsLaneOp =
true;
break;
25870 case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
25871 NumVecs = 3; IsLaneOp =
true;
break;
25872 case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
25873 NumVecs = 4; IsLaneOp =
true;
break;
25874 case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
25875 NumVecs = 2; IsStore =
true; IsLaneOp =
true;
break;
25876 case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
25877 NumVecs = 3; IsStore =
true; IsLaneOp =
true;
break;
25878 case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
25879 NumVecs = 4; IsStore =
true; IsLaneOp =
true;
break;
25884 VecTy =
N->getOperand(2).getValueType();
25886 VecTy =
N->getValueType(0);
25891 uint32_t IncVal = CInc->getZExtValue();
25893 if (IsLaneOp || IsDupOp)
25895 if (IncVal != NumBytes)
25900 Ops.push_back(
N->getOperand(0));
25902 if (IsLaneOp || IsStore)
25903 for (
unsigned i = 2; i < AddrOpIdx; ++i)
25904 Ops.push_back(
N->getOperand(i));
25905 Ops.push_back(Addr);
25906 Ops.push_back(Inc);
25910 unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
25912 for (n = 0; n < NumResultVecs; ++n)
25914 Tys[n++] = MVT::i64;
25915 Tys[n] = MVT::Other;
25924 std::vector<SDValue> NewResults;
25925 for (
unsigned i = 0; i < NumResultVecs; ++i) {
25928 NewResults.push_back(
SDValue(UpdN.
getNode(), NumResultVecs + 1));
25942 switch(V.getNode()->getOpcode()) {
25947 if ((LoadNode->
getMemoryVT() == MVT::i8 && width == 8)
25948 || (LoadNode->
getMemoryVT() == MVT::i16 && width == 16)) {
25956 if ((TypeNode->
getVT() == MVT::i8 && width == 8)
25957 || (TypeNode->
getVT() == MVT::i16 && width == 16)) {
25965 if ((TypeNode->
getVT() == MVT::i8 && width == 8)
25966 || (TypeNode->
getVT() == MVT::i16 && width == 16)) {
25975 1LL << (width - 1);
26045 int CompConstant) {
26049 int MaxUInt = (1 << width);
26057 AddConstant -= (1 << (width-1));
26062 if ((AddConstant == 0) ||
26063 (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
26064 (AddConstant >= 0 && CompConstant < 0) ||
26065 (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
26070 if ((AddConstant == 0) ||
26071 (AddConstant >= 0 && CompConstant <= 0) ||
26072 (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
26077 if ((AddConstant >= 0 && CompConstant < 0) ||
26078 (AddConstant <= 0 && CompConstant >= -1 &&
26079 CompConstant < AddConstant + MaxUInt))
26084 if ((AddConstant == 0) ||
26085 (AddConstant > 0 && CompConstant <= 0) ||
26086 (AddConstant < 0 && CompConstant <= AddConstant))
26091 if ((AddConstant >= 0 && CompConstant <= 0) ||
26092 (AddConstant <= 0 && CompConstant >= 0 &&
26093 CompConstant <= AddConstant + MaxUInt))
26098 if ((AddConstant > 0 && CompConstant < 0) ||
26099 (AddConstant < 0 && CompConstant >= 0 &&
26100 CompConstant < AddConstant + MaxUInt) ||
26101 (AddConstant >= 0 && CompConstant >= 0 &&
26102 CompConstant >= AddConstant) ||
26103 (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
26122 unsigned CCIndex,
unsigned CmpIndex,
26151 N->getOperand(CCIndex)->getValueType(0));
26159 assert((CCIndex == 2 && CmpIndex == 3) &&
26160 "Expected CCIndex to be 2 and CmpIndex to be 3.");
26161 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1), AArch64_CC,
26163 return DAG.
getNode(
N->getOpcode(),
N,
N->getVTList(),
Ops);
26170 unsigned CmpIndex) {
26172 SDNode *SubsNode =
N->getOperand(CmpIndex).getNode();
26173 unsigned CondOpcode = SubsNode->
getOpcode();
26183 unsigned MaskBits = 0;
26207 unsigned ShiftAmt = M.countl_zero();
26212 ShiftedC, ShiftedX);
26219 uint32_t CNV = CN->getZExtValue();
26222 else if (CNV == 65535)
26296 SDValue CSel = Cmp.getOperand(0);
26300 return DAG.
getNode(
N->getOpcode(),
DL,
N->getVTList(), Chain, Dest,
26306 unsigned CmpOpc = Cmp.getOpcode();
26307 if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
26312 if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
26319 "Expected the value type to be the same for both operands!");
26320 if (
LHS.getValueType() != MVT::i32 &&
LHS.getValueType() != MVT::i64)
26336 BR = DAG.
getNode(AArch64ISD::CBZ,
SDLoc(
N), MVT::Other, Chain,
LHS, Dest);
26338 BR = DAG.
getNode(AArch64ISD::CBNZ,
SDLoc(
N), MVT::Other, Chain,
LHS, Dest);
26347 unsigned CC =
N->getConstantOperandVal(2);
26352 Zero =
N->getOperand(0);
26353 CTTZ =
N->getOperand(1);
26355 Zero =
N->getOperand(1);
26356 CTTZ =
N->getOperand(0);
26362 CTTZ.getOperand(0).getOpcode() !=
ISD::CTTZ))
26365 assert((CTTZ.getValueType() == MVT::i32 || CTTZ.getValueType() == MVT::i64) &&
26366 "Illegal type in CTTZ folding");
26372 ? CTTZ.getOperand(0).getOperand(0)
26373 : CTTZ.getOperand(0);
26379 ? CTTZ.getOperand(0).getValueSizeInBits()
26380 : CTTZ.getValueSizeInBits();
26407 if (CmpRHS.
getOpcode() == AArch64ISD::CSEL)
26409 else if (CmpLHS.
getOpcode() != AArch64ISD::CSEL)
26432 else if (CmpRHS !=
X)
26441 EVT VT =
Op->getValueType(0);
26444 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, L, R, CCValue,
Cond);
26453 SDValue SubsNode =
N->getOperand(3);
26459 EVT VT =
N->getValueType(0);
26461 unsigned ExpectedOpcode;
26469 CmpOpConst->getValueType(0));
26471 CmpOpConst->getValueType(0));
26474 ExpectedOp = CmpOpToMatch;
26475 SubsOp = CmpOpToMatch;
26480 if (
Op.getOpcode() != ExpectedOpcode)
26482 if (
Op.getOperand(0).getOpcode() !=
ISD::ADD ||
26483 !
Op.getOperand(0).hasOneUse())
26487 if (
X != CmpOpOther)
26489 if (
X != CmpOpOther)
26491 if (ExpectedOp !=
Op.getOperand(1))
26499 SDValue TReassocOp = GetReassociationOp(
N->getOperand(0), ExpectedOp);
26500 SDValue FReassocOp = GetReassociationOp(
N->getOperand(1), ExpectedOp);
26501 if (!TReassocOp && !FReassocOp)
26508 auto Reassociate = [&](
SDValue ReassocOp,
unsigned OpNum) {
26510 return N->getOperand(OpNum);
26517 SDValue TValReassoc = Reassociate(TReassocOp, 0);
26518 SDValue FValReassoc = Reassociate(FReassocOp, 1);
26519 return DAG.
getNode(AArch64ISD::CSEL,
SDLoc(
N), VT, TValReassoc, FValReassoc,
26527 if (
SDValue R = Fold(CC, ExpectedOp, SubsOp))
26549 auto CheckedFold = [&](
bool Check,
APInt NewCmpConst,
26552 CmpOpConst->getValueType(0));
26554 CmpOpConst->getValueType(0));
26555 return Check ? Fold(NewCC, ExpectedOp, SubsOp) :
SDValue();
26560 return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(),
26564 return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(),
26567 return CheckedFold(!CmpOpConst->getAPIntValue().isZero(),
26570 return CheckedFold(!CmpOpConst->getAPIntValue().isZero(),
26573 return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(),
26576 return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(),
26579 return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(),
26582 return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(),
26597 if (PTest.
getOpcode() != AArch64ISD::PTEST_ANY)
26603 if (TruePred.
getOpcode() == AArch64ISD::REINTERPRET_CAST)
26606 if (AnyPred.
getOpcode() == AArch64ISD::REINTERPRET_CAST)
26627 if (
N->getOperand(0) ==
N->getOperand(1))
26628 return N->getOperand(0);
26647 Cond.hasOneUse() &&
Cond->hasNUsesOfValue(0, 0) &&
26649 {Cond.getOperand(1), Cond.getOperand(0)}) &&
26651 {Cond.getOperand(0), Cond.getOperand(1)}) &&
26659 Cond.getOperand(1),
Cond.getOperand(0));
26660 return DAG.
getNode(AArch64ISD::CSEL,
DL,
N->getVTList(),
N->getOperand(0),
26677 EVT Op0MVT =
Op->getOperand(0).getValueType();
26683 SDNode *FirstUse = *
Op->user_begin();
26690 return N->getOpcode() != ISD::VSELECT || N->getValueType(0) != UseMVT;
26705 Op->getOperand(0));
26707 Op->getOperand(0));
26708 if (Op0SExt && (isSignedIntSetCC(CC) || isIntEqualitySetCC(CC))) {
26709 Op0ExtV =
SDValue(Op0SExt, 0);
26711 }
else if (Op0ZExt && (isUnsignedIntSetCC(CC) || isIntEqualitySetCC(CC))) {
26712 Op0ExtV =
SDValue(Op0ZExt, 0);
26719 Op0ExtV, Op1ExtV,
Op->getOperand(2));
26746 EVT VT =
N->getValueType(0);
26753 LHS->getOpcode() == AArch64ISD::CSEL &&
26755 LHS->hasOneUse()) {
26759 auto NewCond = getInvertedCondCode(OldCond);
26763 LHS.getOperand(0),
LHS.getOperand(1),
26771 LHS->hasOneUse()) {
26772 EVT TstVT =
LHS->getValueType(0);
26776 uint64_t TstImm = -1ULL <<
LHS->getConstantOperandVal(1);
26791 EVT ToVT =
LHS->getValueType(0);
26792 EVT FromVT =
LHS->getOperand(0).getValueType();
26797 DL, MVT::i1,
LHS->getOperand(0));
26808 EVT CmpVT =
LHS.getValueType();
26815 SplatLHSVal.
isOne())
26825 unsigned GenericOpcode) {
26829 EVT VT =
N->getValueType(0);
26832 if (!
N->hasAnyUseOfValue(1)) {
26856 if (!
N->hasAnyUseOfValue(0))
26858 return DAG.
getNode(AArch64ISD::SUBS,
SDLoc(
N),
N->getVTList(), R,
26884 if (InnerSetCC->
getOpcode() != AArch64ISD::SETCC_MERGE_ZERO)
26892 if (Pred.getOpcode() == AArch64ISD::PTRUE &&
26893 InnerPred.
getOpcode() == AArch64ISD::PTRUE &&
26895 Pred->getConstantOperandVal(0) >= AArch64SVEPredPattern::vl1 &&
26896 Pred->getConstantOperandVal(0) <= AArch64SVEPredPattern::vl256)
26903 if (V.getOpcode() != AArch64ISD::VASHR ||
26904 V.getOperand(0).getOpcode() != AArch64ISD::VSHL)
26907 unsigned BitWidth = V->getValueType(0).getScalarSizeInBits();
26908 unsigned ShiftAmtR = V.getConstantOperandVal(1);
26909 unsigned ShiftAmtL = V.getOperand(0).getConstantOperandVal(1);
26910 return (ShiftAmtR == ShiftAmtL && ShiftAmtR == (
BitWidth - 1));
26915 assert(
N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
26916 "Unexpected opcode!");
26929 LHS->getOperand(0)->getValueType(0) ==
N->getValueType(0)) {
26933 if (
LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
26934 LHS->getOperand(0)->getOperand(0) == Pred)
26935 return LHS->getOperand(0);
26941 return LHS->getOperand(0);
26950 LHS->getOperand(0), Pred);
26967 LHS.getValueType(), L0, Shl, L2);
26968 return DAG.
getNode(AArch64ISD::SETCC_MERGE_ZERO,
DL,
N->getValueType(0),
26969 Pred, NewLHS,
RHS,
N->getOperand(3));
26983 if (!
Op->hasOneUse())
26999 Bit < Op->getOperand(0).getValueSizeInBits()) {
27003 if (
Op->getNumOperands() != 2)
27010 switch (
Op->getOpcode()) {
27016 if ((
C->getZExtValue() >> Bit) & 1)
27022 if (
C->getZExtValue() <= Bit &&
27023 (Bit -
C->getZExtValue()) <
Op->getValueType(0).getSizeInBits()) {
27024 Bit = Bit -
C->getZExtValue();
27031 Bit = Bit +
C->getZExtValue();
27032 if (Bit >=
Op->getValueType(0).getSizeInBits())
27033 Bit =
Op->getValueType(0).getSizeInBits() - 1;
27038 if ((Bit +
C->getZExtValue()) <
Op->getValueType(0).getSizeInBits()) {
27039 Bit = Bit +
C->getZExtValue();
27046 if ((
C->getZExtValue() >> Bit) & 1)
27056 unsigned Bit =
N->getConstantOperandVal(2);
27057 bool Invert =
false;
27058 SDValue TestSrc =
N->getOperand(1);
27061 if (TestSrc == NewTestSrc)
27064 unsigned NewOpc =
N->getOpcode();
27066 if (NewOpc == AArch64ISD::TBZ)
27067 NewOpc = AArch64ISD::TBNZ;
27069 assert(NewOpc == AArch64ISD::TBNZ);
27070 NewOpc = AArch64ISD::TBZ;
27075 return DAG.
getNode(NewOpc,
DL, MVT::Other,
N->getOperand(0), NewTestSrc,
27085 auto SelectA =
N->getOperand(1);
27086 auto SelectB =
N->getOperand(2);
27087 auto NTy =
N->getValueType(0);
27089 if (!NTy.isScalableVector())
27095 switch (SelectB.getOpcode()) {
27103 if (SelectA != SelectB.getOperand(0))
27109 auto InverseSetCC =
27114 {InverseSetCC, SelectB, SelectA});
27127 SDValue IfTrue =
N->getOperand(1);
27128 SDValue IfFalse =
N->getOperand(2);
27129 EVT ResVT =
N->getValueType(0);
27133 return N->getOperand(1);
27136 return N->getOperand(2);
27163 SDNode *SplatLHS =
N->getOperand(1).getNode();
27164 SDNode *SplatRHS =
N->getOperand(2).getNode();
27166 if (CmpLHS.
getValueType() ==
N->getOperand(1).getValueType() &&
27169 MVT::v2i32, MVT::v4i32, MVT::v2i64}),
27213 EVT ResVT =
N->getValueType(0);
27225 "Scalar-SETCC feeding SELECT has unexpected result type!");
27231 if (SrcVT == MVT::i1 ||
27239 if (!ResVT.
isVector() || NumMaskElts == 0)
27274 return DAG.
getSelect(
DL, ResVT, Mask,
N->getOperand(1),
N->getOperand(2));
27279 EVT VT =
N->getValueType(0);
27293 if (
N->getOpcode() == AArch64ISD::DUP) {
27304 EVT MemVT = LD->getMemoryVT();
27307 (MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) &&
27308 ElemVT != MemVT && LD->hasOneUse()) {
27324 if (
Op.getOpcode() == AArch64ISD::FCMEQ ||
27325 Op.getOpcode() == AArch64ISD::FCMGE ||
27326 Op.getOpcode() == AArch64ISD::FCMGT) {
27328 EVT ExpandedVT = VT;
27347 SDValue EXTRACT_VEC_ELT =
N->getOperand(0);
27365 if (
N->getValueType(0) ==
N->getOperand(0).getValueType())
27366 return N->getOperand(0);
27367 if (
N->getOperand(0).getOpcode() == AArch64ISD::NVCAST)
27368 return DAG.
getNode(AArch64ISD::NVCAST,
SDLoc(
N),
N->getValueType(0),
27369 N->getOperand(0).getOperand(0));
27394 MinOffset = std::min(MinOffset,
C->getZExtValue());
27413 if (
Offset >= (1 << 20))
27418 if (!
T->isSized() ||
27432 !BR.getValueType().isScalarInteger())
27444 "This method is only for scalable vectors of offsets");
27460 unsigned ScalarSizeInBytes) {
27462 if (OffsetInBytes % ScalarSizeInBytes)
27466 if (OffsetInBytes / ScalarSizeInBytes > 31)
27480 unsigned ScalarSizeInBytes) {
27488 bool OnlyPackedOffsets =
true) {
27489 const SDValue Src =
N->getOperand(2);
27490 const EVT SrcVT = Src->getValueType(0);
27492 "Scatter stores are only possible for SVE vectors");
27504 if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64) &&
27505 ((Opcode != AArch64ISD::SST1Q_PRED &&
27506 Opcode != AArch64ISD::SST1Q_INDEX_PRED) ||
27507 ((SrcVT != MVT::nxv8f16) && (SrcVT != MVT::nxv8bf16))))
27520 if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) {
27523 Opcode = AArch64ISD::SSTNT1_PRED;
27524 }
else if (Opcode == AArch64ISD::SST1Q_INDEX_PRED) {
27527 Opcode = AArch64ISD::SST1Q_PRED;
27535 if ((Opcode == AArch64ISD::SSTNT1_PRED || Opcode == AArch64ISD::SST1Q_PRED) &&
27536 Offset.getValueType().isVector())
27545 if (Opcode == AArch64ISD::SST1_IMM_PRED) {
27548 if (MVT::nxv4i32 ==
Base.getValueType().getSimpleVT().SimpleTy)
27549 Opcode = AArch64ISD::SST1_UXTW_PRED;
27551 Opcode = AArch64ISD::SST1_PRED;
27564 if (!OnlyPackedOffsets &&
27565 Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
27584 if (Src.getValueType().isFloatingPoint())
27601 bool OnlyPackedOffsets =
true) {
27602 const EVT RetVT =
N->getValueType(0);
27604 "Gather loads are only possible for SVE vectors");
27622 if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) {
27625 Opcode = AArch64ISD::GLDNT1_MERGE_ZERO;
27626 }
else if (Opcode == AArch64ISD::GLD1Q_INDEX_MERGE_ZERO) {
27629 Opcode = AArch64ISD::GLD1Q_MERGE_ZERO;
27637 if ((Opcode == AArch64ISD::GLDNT1_MERGE_ZERO ||
27638 Opcode == AArch64ISD::GLD1Q_MERGE_ZERO) &&
27639 Offset.getValueType().isVector())
27648 if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO ||
27649 Opcode == AArch64ISD::GLDFF1_IMM_MERGE_ZERO) {
27652 if (MVT::nxv4i32 ==
Base.getValueType().getSimpleVT().SimpleTy)
27653 Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
27654 ? AArch64ISD::GLD1_UXTW_MERGE_ZERO
27655 : AArch64ISD::GLDFF1_UXTW_MERGE_ZERO;
27657 Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
27658 ? AArch64ISD::GLD1_MERGE_ZERO
27659 : AArch64ISD::GLDFF1_MERGE_ZERO;
27672 if (!OnlyPackedOffsets &&
27673 Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
27694 if (RetVT.
isInteger() && (RetVT != HwRetVt))
27710 unsigned Opc = Src->getOpcode();
27713 if (
Opc == AArch64ISD::UUNPKHI ||
Opc == AArch64ISD::UUNPKLO) {
27715 unsigned SOpc =
Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
27716 : AArch64ISD::SUNPKLO;
27731 assert((EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) &&
27732 "Sign extending from an invalid type");
27739 return DAG.
getNode(SOpc,
DL,
N->getValueType(0), Ext);
27743 if (
Opc == AArch64ISD::CSEL &&
27745 EVT VT =
N->getValueType(0);
27751 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal,
27757 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT,
27771 unsigned MemVTOpNum = 4;
27773 case AArch64ISD::LD1_MERGE_ZERO:
27774 NewOpc = AArch64ISD::LD1S_MERGE_ZERO;
27777 case AArch64ISD::LDNF1_MERGE_ZERO:
27778 NewOpc = AArch64ISD::LDNF1S_MERGE_ZERO;
27781 case AArch64ISD::LDFF1_MERGE_ZERO:
27782 NewOpc = AArch64ISD::LDFF1S_MERGE_ZERO;
27785 case AArch64ISD::GLD1_MERGE_ZERO:
27786 NewOpc = AArch64ISD::GLD1S_MERGE_ZERO;
27788 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
27789 NewOpc = AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
27791 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
27792 NewOpc = AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
27794 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
27795 NewOpc = AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
27797 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
27798 NewOpc = AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
27800 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
27801 NewOpc = AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
27803 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
27804 NewOpc = AArch64ISD::GLD1S_IMM_MERGE_ZERO;
27806 case AArch64ISD::GLDFF1_MERGE_ZERO:
27807 NewOpc = AArch64ISD::GLDFF1S_MERGE_ZERO;
27809 case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
27810 NewOpc = AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO;
27812 case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
27813 NewOpc = AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO;
27815 case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
27816 NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO;
27818 case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
27819 NewOpc = AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO;
27821 case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
27822 NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO;
27824 case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
27825 NewOpc = AArch64ISD::GLDFF1S_IMM_MERGE_ZERO;
27827 case AArch64ISD::GLDNT1_MERGE_ZERO:
27828 NewOpc = AArch64ISD::GLDNT1S_MERGE_ZERO;
27837 if ((SignExtSrcVT != SrcMemVT) || !Src.hasOneUse())
27840 EVT DstVT =
N->getValueType(0);
27844 for (
unsigned I = 0;
I < Src->getNumOperands(); ++
I)
27845 Ops.push_back(Src->getOperand(
I));
27859 const unsigned OffsetPos = 4;
27863 if (
Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32)
27882 unsigned ScalarSizeInBytes) {
27883 const unsigned ImmPos = 4, OffsetPos = 3;
27903 switch (
Op.getOpcode()) {
27906 case AArch64ISD::ANDV_PRED:
27907 case AArch64ISD::EORV_PRED:
27908 case AArch64ISD::FADDA_PRED:
27909 case AArch64ISD::FADDV_PRED:
27910 case AArch64ISD::FMAXNMV_PRED:
27911 case AArch64ISD::FMAXV_PRED:
27912 case AArch64ISD::FMINNMV_PRED:
27913 case AArch64ISD::FMINV_PRED:
27914 case AArch64ISD::ORV_PRED:
27915 case AArch64ISD::SADDV_PRED:
27916 case AArch64ISD::SMAXV_PRED:
27917 case AArch64ISD::SMINV_PRED:
27918 case AArch64ISD::UADDV_PRED:
27919 case AArch64ISD::UMAXV_PRED:
27920 case AArch64ISD::UMINV_PRED:
27928 switch (
Op.getOpcode()) {
27931 case AArch64ISD::REINTERPRET_CAST:
27935 case AArch64ISD::PTRUE:
27936 return Op.getConstantOperandVal(0) == AArch64SVEPredPattern::all;
27942 SDValue InsertVec =
N->getOperand(0);
27943 SDValue InsertElt =
N->getOperand(1);
27944 SDValue InsertIdx =
N->getOperand(2);
27999 EVT VT =
N->getValueType(0);
28005 auto hasValidElementTypeForFPExtLoad = [](
EVT VT) {
28007 return EltVT == MVT::f32 || EltVT == MVT::f64;
28035 EVT VT =
N->getValueType(0);
28038 if (!VT.
isScalableVector() || Subtarget->hasSVE2() || Subtarget->hasSME())
28054 EVT VT =
N->getValueType(0);
28056 SDValue Insert =
N->getOperand(0);
28060 if (!Insert.getOperand(0).isUndef())
28063 uint64_t IdxInsert = Insert.getConstantOperandVal(2);
28064 uint64_t IdxDupLane =
N->getConstantOperandVal(1);
28065 if (IdxInsert != 0 || IdxDupLane != 0)
28068 SDValue Bitcast = Insert.getOperand(1);
28072 SDValue Subvec = Bitcast.getOperand(0);
28082 DAG.
getUNDEF(NewSubvecVT), Subvec, Insert->getOperand(2));
28083 SDValue NewDuplane128 = DAG.
getNode(AArch64ISD::DUPLANE128,
DL, NewSubvecVT,
28084 NewInsert,
N->getOperand(1));
28109 ExtractHigh =
LHS.getOperand(0);
28116 ExtractHigh =
RHS.getOperand(0);
28127 if (TruncHighOp.
getOpcode() == AArch64ISD::DUP ||
28145 bool HasFoundMULLow =
true;
28147 if (ExtractHighSrcVec->
use_size() != 2)
28148 HasFoundMULLow =
false;
28157 HasFoundMULLow =
false;
28164 if (!ExtractLow || !ExtractLow->
hasOneUse())
28165 HasFoundMULLow =
false;
28168 if (HasFoundMULLow) {
28170 if (ExtractLowUser->
getOpcode() !=
N->getOpcode()) {
28171 HasFoundMULLow =
false;
28173 if (ExtractLowUser->
getOperand(0) == ExtractLow) {
28177 HasFoundMULLow =
false;
28182 HasFoundMULLow =
false;
28195 if (HasFoundMULLow && (TruncLowOp.
getOpcode() == AArch64ISD::DUP ||
28200 if (TruncHighOpVT != UZP1VT)
28202 if (TruncLowOpVT != UZP1VT)
28206 DAG.
getNode(AArch64ISD::UZP1,
DL, UZP1VT, TruncLowOp, TruncHighOp);
28213 if (HasFoundMULLow) {
28243 auto Mask =
N->getOperand(0);
28244 auto Pred =
N->getOperand(1);
28249 if (Pred->getOpcode() == AArch64ISD::REINTERPRET_CAST)
28250 Pred = Pred->getOperand(0);
28253 Pred = Pred->getOperand(0);
28254 Pred = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, MVT::nxv16i1, Pred);
28255 return DAG.
getNode(AArch64ISD::PTEST_FIRST,
DL,
N->getValueType(0), Mask,
28287 EVT VT =
N->getValueType(0);
28288 if (VT != MVT::v1i64)
28304 if (UADDLV.
getOpcode() != AArch64ISD::UADDLV ||
28314 DAG.
getNode(AArch64ISD::NVCAST,
DL, MVT::v1i64, EXTRACT_SUBVEC);
28324 unsigned NumParts =
N->getNumOperands();
28325 if (NumParts != 2 && NumParts != 4)
28328 EVT SubVecTy =
N->getValueType(0);
28341 for (
unsigned I = 0;
I < NumParts;
I++) {
28357 if (!MaskedLoad || !MaskedLoad->hasNUsesOfValue(NumParts, 0) ||
28359 !MaskedLoad->getOffset().isUndef() ||
28360 (!MaskedLoad->getPassThru()->isUndef() &&
28367 getNarrowMaskForInterleavedOps(DAG,
DL, MaskedLoad->getMask(), NumParts);
28371 const Intrinsic::ID IID = NumParts == 2 ? Intrinsic::aarch64_sve_ld2_sret
28372 : Intrinsic::aarch64_sve_ld4_sret;
28373 SDValue NewLdOps[] = {MaskedLoad->getChain(),
28375 MaskedLoad->getBasePtr()};
28379 {SubVecTy, SubVecTy, MVT::Other}, NewLdOps);
28382 {SubVecTy, SubVecTy, SubVecTy, SubVecTy, MVT::Other},
28387 for (
unsigned Idx = 0; Idx < NumParts; Idx++)
28418 if (
N->hasOneUse()) {
28419 unsigned UseOpc =
N->user_begin()->getOpcode();
28421 UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS)
28426 EVT VT =
N->getValueType(0);
28441 unsigned IntrinsicID =
N->getConstantOperandVal(1);
28443 (IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR
28444 : AArch64SysReg::RNDRRS);
28469 EVT VT =
N->getValueType(0);
28470 EVT MaskVT = Mask.getValueType();
28489 switch (
N->getOpcode()) {
28508 case AArch64ISD::ANDS:
28510 case AArch64ISD::ADC:
28514 case AArch64ISD::SBC:
28516 case AArch64ISD::ADCS:
28520 case AArch64ISD::SBCS:
28524 case AArch64ISD::ADDS:
28526 case AArch64ISD::SUBS:
28528 case AArch64ISD::BICi:
28569 return performLOADCombine(
N, DCI, DAG, Subtarget);
28571 return performSTORECombine(
N, DCI, DAG, Subtarget);
28573 return performMSTORECombine(
N, DCI, DAG, Subtarget);
28580 case AArch64ISD::BRCOND:
28582 case AArch64ISD::TBNZ:
28583 case AArch64ISD::TBZ:
28585 case AArch64ISD::CSEL:
28587 case AArch64ISD::DUP:
28588 case AArch64ISD::DUPLANE8:
28589 case AArch64ISD::DUPLANE16:
28590 case AArch64ISD::DUPLANE32:
28591 case AArch64ISD::DUPLANE64:
28593 case AArch64ISD::DUPLANE128:
28595 case AArch64ISD::NVCAST:
28597 case AArch64ISD::SPLICE:
28599 case AArch64ISD::UUNPKLO:
28600 case AArch64ISD::UUNPKHI:
28602 case AArch64ISD::UZP1:
28603 case AArch64ISD::UZP2:
28605 case AArch64ISD::SETCC_MERGE_ZERO:
28607 case AArch64ISD::REINTERPRET_CAST:
28609 case AArch64ISD::GLD1_MERGE_ZERO:
28610 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
28611 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
28612 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
28613 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
28614 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
28615 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
28616 case AArch64ISD::GLD1S_MERGE_ZERO:
28617 case AArch64ISD::GLD1S_SCALED_MERGE_ZERO:
28618 case AArch64ISD::GLD1S_UXTW_MERGE_ZERO:
28619 case AArch64ISD::GLD1S_SXTW_MERGE_ZERO:
28620 case AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO:
28621 case AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO:
28622 case AArch64ISD::GLD1S_IMM_MERGE_ZERO:
28624 case AArch64ISD::VASHR:
28625 case AArch64ISD::VLSHR:
28627 case AArch64ISD::SUNPKLO:
28629 case AArch64ISD::BSP:
28639 case AArch64ISD::UADDV:
28641 case AArch64ISD::SMULL:
28642 case AArch64ISD::UMULL:
28643 case AArch64ISD::PMULL:
28645 case AArch64ISD::PTEST_FIRST:
28649 switch (
N->getConstantOperandVal(1)) {
28650 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
28652 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
28654 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
28656 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
28658 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
28659 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
28660 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
28661 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
28662 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
28663 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
28664 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
28665 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
28667 case Intrinsic::aarch64_neon_ld2:
28668 case Intrinsic::aarch64_neon_ld3:
28669 case Intrinsic::aarch64_neon_ld4:
28670 case Intrinsic::aarch64_neon_ld1x2:
28671 case Intrinsic::aarch64_neon_ld1x3:
28672 case Intrinsic::aarch64_neon_ld1x4:
28673 case Intrinsic::aarch64_neon_ld2lane:
28674 case Intrinsic::aarch64_neon_ld3lane:
28675 case Intrinsic::aarch64_neon_ld4lane:
28676 case Intrinsic::aarch64_neon_ld2r:
28677 case Intrinsic::aarch64_neon_ld3r:
28678 case Intrinsic::aarch64_neon_ld4r:
28679 case Intrinsic::aarch64_neon_st2:
28680 case Intrinsic::aarch64_neon_st3:
28681 case Intrinsic::aarch64_neon_st4:
28682 case Intrinsic::aarch64_neon_st1x2:
28683 case Intrinsic::aarch64_neon_st1x3:
28684 case Intrinsic::aarch64_neon_st1x4:
28685 case Intrinsic::aarch64_neon_st2lane:
28686 case Intrinsic::aarch64_neon_st3lane:
28687 case Intrinsic::aarch64_neon_st4lane:
28689 case Intrinsic::aarch64_sve_ldnt1:
28691 case Intrinsic::aarch64_sve_ld1rq:
28693 case Intrinsic::aarch64_sve_ld1ro:
28695 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
28697 case Intrinsic::aarch64_sve_ldnt1_gather:
28699 case Intrinsic::aarch64_sve_ldnt1_gather_index:
28701 AArch64ISD::GLDNT1_INDEX_MERGE_ZERO);
28702 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
28704 case Intrinsic::aarch64_sve_ld1:
28706 case Intrinsic::aarch64_sve_ldnf1:
28708 case Intrinsic::aarch64_sve_ldff1:
28710 case Intrinsic::aarch64_sve_st1:
28712 case Intrinsic::aarch64_sve_stnt1:
28714 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
28716 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
28718 case Intrinsic::aarch64_sve_stnt1_scatter:
28720 case Intrinsic::aarch64_sve_stnt1_scatter_index:
28722 case Intrinsic::aarch64_sve_ld1_gather:
28724 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
28725 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
28727 case Intrinsic::aarch64_sve_ld1q_gather_index:
28729 AArch64ISD::GLD1Q_INDEX_MERGE_ZERO);
28730 case Intrinsic::aarch64_sve_ld1_gather_index:
28732 AArch64ISD::GLD1_SCALED_MERGE_ZERO);
28733 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
28736 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
28739 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
28741 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO,
28743 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
28745 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO,
28747 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
28749 case Intrinsic::aarch64_sve_ldff1_gather:
28751 case Intrinsic::aarch64_sve_ldff1_gather_index:
28753 AArch64ISD::GLDFF1_SCALED_MERGE_ZERO);
28754 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
28756 AArch64ISD::GLDFF1_SXTW_MERGE_ZERO,
28758 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
28760 AArch64ISD::GLDFF1_UXTW_MERGE_ZERO,
28762 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
28764 AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO,
28766 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
28768 AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO,
28770 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
28772 AArch64ISD::GLDFF1_IMM_MERGE_ZERO);
28773 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
28774 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
28776 case Intrinsic::aarch64_sve_st1q_scatter_index:
28778 case Intrinsic::aarch64_sve_st1_scatter:
28780 case Intrinsic::aarch64_sve_st1_scatter_index:
28782 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
28785 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
28788 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
28790 AArch64ISD::SST1_SXTW_SCALED_PRED,
28792 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
28794 AArch64ISD::SST1_UXTW_SCALED_PRED,
28796 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
28798 case Intrinsic::aarch64_rndr:
28799 case Intrinsic::aarch64_rndrrs:
28801 case Intrinsic::aarch64_sme_ldr_zt:
28803 DAG.
getVTList(MVT::Other),
N->getOperand(0),
28804 N->getOperand(2),
N->getOperand(3));
28805 case Intrinsic::aarch64_sme_str_zt:
28807 DAG.
getVTList(MVT::Other),
N->getOperand(0),
28808 N->getOperand(2),
N->getOperand(3));
28831bool AArch64TargetLowering::isUsedByReturnOnly(
SDNode *
N,
28833 if (
N->getNumValues() != 1)
28835 if (!
N->hasNUsesOfValue(1, 0))
28839 SDNode *Copy = *
N->user_begin();
28843 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
28846 TCChain = Copy->getOperand(0);
28850 bool HasRet =
false;
28852 if (
Node->getOpcode() != AArch64ISD::RET_GLUE)
28868bool AArch64TargetLowering::mayBeEmittedAsTailCall(
const CallInst *CI)
const {
28876 if (!CstOffset || CstOffset->isZero())
28882 return isInt<9>(CstOffset->getSExtValue());
28885bool AArch64TargetLowering::getIndexedAddressParts(
SDNode *
N,
SDNode *
Op,
28893 SDNode *ValOnlyUser =
nullptr;
28894 for (SDUse &U :
N->uses()) {
28895 if (
U.getResNo() == 1)
28897 if (ValOnlyUser ==
nullptr)
28898 ValOnlyUser =
U.getUser();
28900 ValOnlyUser =
nullptr;
28905 auto IsUndefOrZero = [](
SDValue V) {
28913 (ValOnlyUser->
getOpcode() == AArch64ISD::DUP_MERGE_PASSTHRU &&
28914 IsUndefOrZero(ValOnlyUser->
getOperand(2)))))
28917 Base =
Op->getOperand(0);
28921 int64_t RHSC =
RHS->getSExtValue();
28923 RHSC = -(uint64_t)RHSC;
28929 if (!Subtarget->isLittleEndian() && MemType.
isVector() &&
28947 VT =
LD->getMemoryVT();
28948 Ptr =
LD->getBasePtr();
28950 VT =
ST->getMemoryVT();
28951 Ptr =
ST->getBasePtr();
28961bool AArch64TargetLowering::getPostIndexedAddressParts(
28967 VT =
LD->getMemoryVT();
28968 Ptr =
LD->getBasePtr();
28970 VT =
ST->getMemoryVT();
28971 Ptr =
ST->getBasePtr();
28990 EVT VT =
N->getValueType(0);
28991 [[maybe_unused]]
EVT SrcVT =
Op.getValueType();
28993 "Must be bool vector.");
28999 bool AllUndef =
true;
29000 for (
unsigned I = 1;
I <
Op.getNumOperands(); ++
I)
29001 AllUndef &=
Op.getOperand(
I).isUndef();
29004 Op =
Op.getOperand(0);
29007 SDValue VectorBits = vectorToScalarBitmask(
Op.getNode(), DAG);
29018 EVT VT =
N->getValueType(0);
29028void AArch64TargetLowering::ReplaceBITCASTResults(
29032 EVT VT =
N->getValueType(0);
29033 EVT SrcVT =
Op.getValueType();
29035 if (VT == MVT::v2i16 && SrcVT == MVT::i32) {
29040 if (VT == MVT::v4i8 && SrcVT == MVT::i32) {
29045 if (VT == MVT::v2i8 && SrcVT == MVT::i16) {
29052 "Expected fp->int bitcast!");
29071 if (VT != MVT::i16 || (SrcVT != MVT::f16 && SrcVT != MVT::bf16))
29083 EVT VT =
N->getValueType(0);
29086 !
N->getFlags().hasAllowReassociation()) ||
29087 (VT.
getScalarType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
29095 X =
N->getOperand(1);
29100 if (Shuf->getOperand(0) !=
X || !Shuf->getOperand(1)->isUndef())
29105 for (
int I = 0,
E = Mask.size();
I <
E;
I++)
29106 if (Mask[
I] != (
I % 2 == 0 ?
I + 1 :
I - 1))
29111 assert(LoHi.first.getValueType() == LoHi.second.getValueType());
29112 SDValue Addp = DAG.
getNode(AArch64ISD::ADDP,
N, LoHi.first.getValueType(),
29113 LoHi.first, LoHi.second);
29124 DAG.
getUNDEF(LoHi.first.getValueType())),
29131 unsigned AcrossOp) {
29142void AArch64TargetLowering::ReplaceExtractSubVectorResults(
29145 EVT InVT =
In.getValueType();
29152 EVT VT =
N->getValueType(0);
29165 unsigned Index = CIndex->getZExtValue();
29170 : (unsigned)AArch64ISD::UUNPKHI;
29177void AArch64TargetLowering::ReplaceGetActiveLaneMaskResults(
29179 assert((Subtarget->hasSVE2p1() ||
29180 (Subtarget->hasSME2() && Subtarget->isStreaming())) &&
29181 "Custom lower of get.active.lane.mask missing required feature.");
29183 assert(
N->getValueType(0) == MVT::nxv32i1 &&
29184 "Unexpected result type for get.active.lane.mask");
29191 "Unexpected operand type for get.active.lane.mask");
29205 {WideMask.getValue(0), WideMask.getValue(1)}));
29211 auto [VLo, VHi] = DAG.
SplitScalar(V,
DL, MVT::i64, MVT::i64);
29218 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
29227 assert(
N->getValueType(0) == MVT::i128 &&
29228 "AtomicCmpSwap on types less than 128 should be legal");
29231 if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
29242 switch (
MemOp->getMergedOrdering()) {
29244 Opcode = AArch64::CASPX;
29247 Opcode = AArch64::CASPAX;
29250 Opcode = AArch64::CASPLX;
29254 Opcode = AArch64::CASPALX;
29264 unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
29278 switch (
MemOp->getMergedOrdering()) {
29280 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
29283 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
29286 Opcode = AArch64::CMP_SWAP_128_RELEASE;
29290 Opcode = AArch64::CMP_SWAP_128;
29297 auto Desired = DAG.
SplitScalar(
N->getOperand(2),
DL, MVT::i64, MVT::i64);
29298 auto New = DAG.
SplitScalar(
N->getOperand(3),
DL, MVT::i64, MVT::i64);
29299 SDValue Ops[] = {
N->getOperand(1), Desired.first, Desired.second,
29300 New.first, New.second,
N->getOperand(0)};
29302 Opcode,
SDLoc(
N), DAG.
getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other),
29318 "ATOMIC_LOAD_AND should be lowered to LDCLRP directly");
29324 switch (Ordering) {
29326 return AArch64::LDCLRP;
29329 return AArch64::LDCLRPA;
29332 return AArch64::LDCLRPL;
29336 return AArch64::LDCLRPAL;
29344 switch (Ordering) {
29346 return AArch64::LDSETP;
29349 return AArch64::LDSETPA;
29352 return AArch64::LDSETPL;
29356 return AArch64::LDSETPAL;
29364 switch (Ordering) {
29366 return AArch64::SWPP;
29369 return AArch64::SWPPA;
29372 return AArch64::SWPPL;
29376 return AArch64::SWPPAL;
29398 assert(
N->getValueType(0) == MVT::i128 &&
29399 "AtomicLoadXXX on types less than 128 should be legal");
29401 if (!Subtarget->hasLSE128())
29405 const SDValue &Chain =
N->getOperand(0);
29406 const SDValue &Ptr =
N->getOperand(1);
29407 const SDValue &Val128 =
N->getOperand(2);
29408 std::pair<SDValue, SDValue> Val2x64 =
29411 const unsigned ISDOpcode =
N->getOpcode();
29412 const unsigned MachineOpcode =
29425 SDValue Ops[] = {Val2x64.first, Val2x64.second, Ptr, Chain};
29443void AArch64TargetLowering::ReplaceNodeResults(
29445 switch (
N->getOpcode()) {
29449 ReplaceBITCASTResults(
N,
Results, DAG);
29468 case AArch64ISD::SADDV:
29471 case AArch64ISD::UADDV:
29474 case AArch64ISD::SMINV:
29477 case AArch64ISD::UMINV:
29480 case AArch64ISD::SMAXV:
29483 case AArch64ISD::UMAXV:
29489 LowerToPredicatedOp(
SDValue(
N, 0), DAG, AArch64ISD::MULHS_PRED));
29494 LowerToPredicatedOp(
SDValue(
N, 0), DAG, AArch64ISD::MULHU_PRED));
29500 assert(
N->getValueType(0) == MVT::i128 &&
"unexpected illegal conversion");
29507 assert(
N->getValueType(0) != MVT::i128 &&
29508 "128-bit ATOMIC_LOAD_AND should be lowered directly to LDCLRP");
29514 "Expected 128-bit atomicrmw.");
29530 if (LoadNode->
isNonTemporal() && Subtarget->isLittleEndian() &&
29539 AArch64ISD::LDNP, SDLoc(
N),
29540 DAG.
getVTList({MVT::v2i64, MVT::v2i64, MVT::Other}),
29541 {LoadNode->getChain(), LoadNode->getBasePtr()},
29560 bool isLoadAcquire =
29562 unsigned Opcode = isLoadAcquire ? AArch64ISD::LDIAPP : AArch64ISD::LDP;
29565 assert(Subtarget->hasFeature(AArch64::FeatureRCPC3));
29568 Opcode, SDLoc(
N), DAG.
getVTList({MVT::i64, MVT::i64, MVT::Other}),
29569 {LoadNode->getChain(), LoadNode->getBasePtr()},
29576 Result.getValue(FirstRes),
Result.getValue(1 - FirstRes));
29582 ReplaceExtractSubVectorResults(
N,
Results, DAG);
29591 ReplaceGetActiveLaneMaskResults(
N,
Results, DAG);
29594 EVT VT =
N->getValueType(0);
29601 case Intrinsic::aarch64_sve_clasta_n: {
29602 assert((VT == MVT::i8 || VT == MVT::i16) &&
29603 "custom lowering for unexpected type");
29606 auto V = DAG.
getNode(AArch64ISD::CLASTA_N,
DL, MVT::i32,
29607 N->getOperand(1), Op2,
N->getOperand(3));
29611 case Intrinsic::aarch64_sve_clastb_n: {
29612 assert((VT == MVT::i8 || VT == MVT::i16) &&
29613 "custom lowering for unexpected type");
29616 auto V = DAG.
getNode(AArch64ISD::CLASTB_N,
DL, MVT::i32,
29617 N->getOperand(1), Op2,
N->getOperand(3));
29621 case Intrinsic::aarch64_sve_lasta: {
29622 assert((VT == MVT::i8 || VT == MVT::i16) &&
29623 "custom lowering for unexpected type");
29625 auto V = DAG.
getNode(AArch64ISD::LASTA,
DL, MVT::i32,
29626 N->getOperand(1),
N->getOperand(2));
29630 case Intrinsic::aarch64_sve_lastb: {
29631 assert((VT == MVT::i8 || VT == MVT::i16) &&
29632 "custom lowering for unexpected type");
29634 auto V = DAG.
getNode(AArch64ISD::LASTB,
DL, MVT::i32,
29635 N->getOperand(1),
N->getOperand(2));
29639 case Intrinsic::aarch64_sme_in_streaming_mode: {
29644 getRuntimePStateSM(DAG, Chain,
DL,
N->getValueType(0));
29649 case Intrinsic::experimental_vector_match: {
29667 assert(
N->getValueType(0) == MVT::i128 &&
29668 "READ_REGISTER custom lowering is only for 128-bit sysregs");
29673 AArch64ISD::MRRS,
DL, DAG.
getVTList({MVT::i64, MVT::i64, MVT::Other}),
29674 Chain, SysRegName);
29688 if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
29703 if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 ||
29713 if (!Subtarget->hasLSE2())
29717 return LI->getType()->getPrimitiveSizeInBits() == 128 &&
29718 LI->getAlign() >=
Align(16);
29721 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
29728 if (!Subtarget->hasLSE128())
29734 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
29735 SI->getAlign() >=
Align(16) &&
29740 return RMW->getValOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
29741 RMW->getAlign() >=
Align(16) &&
29750 if (!Subtarget->hasLSE2() || !Subtarget->hasRCPC3())
29754 return LI->getType()->getPrimitiveSizeInBits() == 128 &&
29755 LI->getAlign() >=
Align(16) &&
29759 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
29760 SI->getAlign() >=
Align(16) &&
29783 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
29803 return !Subtarget->hasLSE();
29811 unsigned Size =
SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
29864 return !Subtarget.hasFPARMv8();
29881 unsigned Size = Ty->getPrimitiveSizeInBits();
29882 assert(
Size <= 128 &&
"AtomicExpandPass should've handled larger sizes.");
29884 bool CanUseLSE128 = Subtarget->hasLSE128() &&
Size == 128 &&
29901 if (Subtarget->hasLSE()) {
29919 if (Subtarget->outlineAtomics()) {
29957 if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
29979 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
29985 if (ValueTy->getPrimitiveSizeInBits() == 128) {
29987 IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
29990 Builder.CreateIntrinsic(
Int, Addr,
nullptr,
"lohi");
29992 Value *
Lo = Builder.CreateExtractValue(LoHi, 0,
"lo");
29993 Value *
Hi = Builder.CreateExtractValue(LoHi, 1,
"hi");
29996 Lo = Builder.CreateZExt(
Lo, Int128Ty,
"lo64");
29997 Hi = Builder.CreateZExt(
Hi, Int128Ty,
"hi64");
29999 Value *
Or = Builder.CreateOr(
30000 Lo, Builder.CreateShl(
Hi, ConstantInt::get(Int128Ty, 64)),
"val64");
30001 return Builder.CreateBitCast(
Or, ValueTy);
30006 IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
30010 CallInst *CI = Builder.CreateIntrinsic(
Int, Tys, Addr);
30012 Attribute::ElementType, IntEltTy));
30013 Value *Trunc = Builder.CreateTrunc(CI, IntEltTy);
30015 return Builder.CreateBitCast(Trunc, ValueTy);
30020 Builder.CreateIntrinsic(Intrinsic::aarch64_clrex, {});
30026 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
30034 IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
30039 Value *CastVal = Builder.CreateBitCast(Val, Int128Ty);
30041 Value *
Lo = Builder.CreateTrunc(CastVal, Int64Ty,
"lo");
30043 Builder.CreateTrunc(Builder.CreateLShr(CastVal, 64), Int64Ty,
"hi");
30044 return Builder.CreateCall(Stxr, {
Lo,
Hi, Addr});
30048 IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
30054 Val = Builder.CreateBitCast(Val, IntValTy);
30056 CallInst *CI = Builder.CreateCall(
30057 Stxr, {Builder.CreateZExtOrBitCast(
30061 Attribute::ElementType, Val->
getType()));
30068 if (!Ty->isArrayTy()) {
30069 const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
30079bool AArch64TargetLowering::shouldNormalizeToSelectSequence(
LLVMContext &,
30087 M, Intrinsic::thread_pointer, IRB.
getPtrTy());
30099 if (Subtarget->isTargetAndroid())
30104 if (Subtarget->isTargetFuchsia())
30113 RTLIB::LibcallImpl SecurityCheckCookieLibcall =
30114 Libcalls.getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
30116 RTLIB::LibcallImpl SecurityCookieVar =
30117 Libcalls.getLibcallImpl(RTLIB::STACK_CHECK_GUARD);
30118 if (SecurityCheckCookieLibcall != RTLIB::Unsupported &&
30119 SecurityCookieVar != RTLIB::Unsupported) {
30131 F->addParamAttr(0, Attribute::AttrKind::InReg);
30143 if (Subtarget->isTargetAndroid())
30152 const Constant *PersonalityFn)
const {
30154 return AArch64::X0;
30160 const Constant *PersonalityFn)
const {
30162 return AArch64::X1;
30175 return Mask->getValue().isPowerOf2();
30181 unsigned OldShiftOpcode,
unsigned NewShiftOpcode,
30185 X, XC, CC,
Y, OldShiftOpcode, NewShiftOpcode, DAG))
30188 return X.getValueType().isScalarInteger() || NewShiftOpcode ==
ISD::SHL;
30195 !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
30211 const MCPhysReg *IStart =
TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
30220 if (AArch64::GPR64RegClass.
contains(*
I))
30221 RC = &AArch64::GPR64RegClass;
30222 else if (AArch64::FPR64RegClass.
contains(*
I))
30223 RC = &AArch64::FPR64RegClass;
30233 assert(Entry->getParent()->getFunction().hasFnAttribute(
30234 Attribute::NoUnwind) &&
30235 "Function should be nounwind in insertCopiesSplitCSR!");
30236 Entry->addLiveIn(*
I);
30241 for (
auto *Exit : Exits)
30243 TII->get(TargetOpcode::COPY), *
I)
30256 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
30284 if (FPVT == MVT::v8f16 && !Subtarget->hasFullFP16())
30286 if (FPVT == MVT::v8bf16)
30302 "Invalid call instruction for a KCFI check");
30304 switch (
MBBI->getOpcode()) {
30306 case AArch64::BLRNoIP:
30307 case AArch64::TCRETURNri:
30308 case AArch64::TCRETURNrix16x17:
30309 case AArch64::TCRETURNrix17:
30310 case AArch64::TCRETURNrinotx16:
30317 assert(
Target.isReg() &&
"Invalid target operand for an indirect call");
30318 Target.setIsRenamable(
false);
30332 if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
30338void AArch64TargetLowering::finalizeLowering(
MachineFunction &MF)
const {
30364bool AArch64TargetLowering::shouldLocalize(
30366 auto &MF = *
MI.getMF();
30368 auto maxUses = [](
unsigned RematCost) {
30370 if (RematCost == 1)
30371 return std::numeric_limits<unsigned>::max();
30372 if (RematCost == 2)
30381 unsigned Opc =
MI.getOpcode();
30383 case TargetOpcode::G_GLOBAL_VALUE: {
30392 case TargetOpcode::G_FCONSTANT:
30393 case TargetOpcode::G_CONSTANT: {
30394 const ConstantInt *CI;
30395 unsigned AdditionalCost = 0;
30397 if (
Opc == TargetOpcode::G_CONSTANT)
30398 CI =
MI.getOperand(1).getCImm();
30400 LLT Ty =
MRI.getType(
MI.getOperand(0).getReg());
30405 auto APF =
MI.getOperand(1).getFPImm()->getValueAPF();
30413 AdditionalCost = 1;
30421 RematCost += AdditionalCost;
30423 unsigned MaxUses = maxUses(RematCost);
30425 if (MaxUses == std::numeric_limits<unsigned>::max())
30427 return MRI.hasAtMostUserInstrs(
Reg, MaxUses);
30431 case AArch64::ADRP:
30432 case AArch64::G_ADD_LOW:
30434 case TargetOpcode::G_PTR_ADD:
30456 if (AI->getAllocatedType()->isScalableTy())
30476 "Expected legal fixed length vector!");
30481 return EVT(MVT::nxv16i8);
30483 return EVT(MVT::nxv8i16);
30485 return EVT(MVT::nxv4i32);
30487 return EVT(MVT::nxv2i64);
30489 return EVT(MVT::nxv8bf16);
30491 return EVT(MVT::nxv8f16);
30493 return EVT(MVT::nxv4f32);
30495 return EVT(MVT::nxv2f64);
30504 "Expected legal fixed length vector!");
30506 std::optional<unsigned> PgPattern =
30508 assert(PgPattern &&
"Unexpected element count for SVE predicate");
30515 MaskVT = MVT::nxv16i1;
30520 MaskVT = MVT::nxv8i1;
30524 MaskVT = MVT::nxv4i1;
30528 MaskVT = MVT::nxv2i1;
30532 return getPTrue(DAG,
DL, MaskVT, *PgPattern);
30538 "Expected legal scalable vector!");
30553 "Expected to convert into a scalable vector!");
30554 assert(V.getValueType().isFixedLengthVector() &&
30555 "Expected a fixed length vector operand!");
30564 "Expected to convert into a fixed length vector!");
30565 assert(V.getValueType().isScalableVector() &&
30566 "Expected a scalable vector operand!");
30573SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
30578 EVT VT =
Op.getValueType();
30580 EVT LoadVT = ContainerVT;
30581 EVT MemVT =
Load->getMemoryVT();
30591 LoadVT,
DL,
Load->getChain(),
Load->getBasePtr(),
Load->getOffset(), Pg,
30593 Load->getAddressingMode(),
Load->getExtensionType());
30600 Result = getSVESafeBitCast(ExtendVT, Result, DAG);
30601 Result = DAG.
getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU,
DL, ContainerVT,
30602 Pg, Result, DAG.
getUNDEF(ContainerVT));
30615 EVT InVT = Mask.getValueType();
30622 bool InvertCond =
false;
30625 Mask = Mask.getOperand(0);
30646 {Pg, Op1, Op2, DAG.getCondCode(CC)});
30650SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
30655 EVT VT =
Op.getValueType();
30663 "Incorrect mask type");
30669 bool IsPassThruZeroOrUndef =
false;
30671 if (
Load->getPassThru()->isUndef()) {
30672 PassThru = DAG.
getUNDEF(ContainerVT);
30673 IsPassThruZeroOrUndef =
true;
30680 IsPassThruZeroOrUndef =
true;
30684 ContainerVT,
DL,
Load->getChain(),
Load->getBasePtr(),
Load->getOffset(),
30685 Mask, PassThru,
Load->getMemoryVT(),
Load->getMemOperand(),
30686 Load->getAddressingMode(),
Load->getExtensionType());
30689 if (!IsPassThruZeroOrUndef) {
30701SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
30706 EVT VT =
Store->getValue().getValueType();
30708 EVT MemVT =
Store->getMemoryVT();
30717 NewValue = DAG.
getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU,
DL, TruncVT, Pg,
30729 Store->getBasePtr(),
Store->getOffset(), Pg, MemVT,
30730 Store->getMemOperand(),
Store->getAddressingMode(),
30731 Store->isTruncatingStore());
30738 EVT VT =
Store->getValue().getValueType();
30740 return LowerFixedLengthVectorMStoreToSVE(
Op, DAG);
30742 if (!
Store->isCompressingStore())
30745 EVT MaskVT =
Store->getMask().getValueType();
30754 if (MaskReduceVT != MVT::i64)
30765 CompressedMask,
Store->getMemoryVT(),
30766 Store->getMemOperand(),
Store->getAddressingMode(),
30767 Store->isTruncatingStore(),
30771SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
30776 EVT VT =
Store->getValue().getValueType();
30784 Mask,
Store->getMemoryVT(),
Store->getMemOperand(),
30785 Store->getAddressingMode(),
Store->isTruncatingStore(),
30786 Store->isCompressingStore());
30789SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
30792 EVT VT =
Op.getValueType();
30796 unsigned PredOpcode =
Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
30809 DAG.
getNode(AArch64ISD::ASRD_MERGE_OP1,
DL, ContainerVT, Pg, Op1, Op2);
30818 if (EltVT == MVT::i32 || EltVT == MVT::i64)
30819 return LowerToPredicatedOp(
Op, DAG, PredOpcode);
30835 auto HalveAndExtendVector = [&DAG, &
DL, &HalfVT, &PromVT,
30839 DAG.
getConstant(HalfVT.getVectorNumElements(),
DL, MVT::i64);
30842 return std::pair<SDValue, SDValue>(
30848 auto [Op0LoExt, Op0HiExt] = HalveAndExtendVector(
Op.getOperand(0));
30849 auto [Op1LoExt, Op1HiExt] = HalveAndExtendVector(
Op.getOperand(1));
30857SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
30859 EVT VT =
Op.getValueType();
30868 unsigned ExtendOpc =
Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
30875 Val = DAG.
getNode(ExtendOpc,
DL, MVT::nxv8i16, Val);
30880 Val = DAG.
getNode(ExtendOpc,
DL, MVT::nxv4i32, Val);
30885 Val = DAG.
getNode(ExtendOpc,
DL, MVT::nxv2i64, Val);
30893SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
30895 EVT VT =
Op.getValueType();
30909 Val = DAG.
getNode(AArch64ISD::UZP1,
DL, MVT::nxv4i32, Val, Val);
30915 Val = DAG.
getNode(AArch64ISD::UZP1,
DL, MVT::nxv8i16, Val, Val);
30921 Val = DAG.
getNode(AArch64ISD::UZP1,
DL, MVT::nxv16i8, Val, Val);
30929SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt(
30931 EVT VT =
Op.getValueType();
30932 EVT InVT =
Op.getOperand(0).getValueType();
30942SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
30944 EVT VT =
Op.getValueType();
30948 EVT InVT =
Op.getOperand(0).getValueType();
30953 Op.getOperand(1),
Op.getOperand(2));
30963 unsigned NewOp)
const {
30964 EVT VT =
Op.getValueType();
30974 for (
const SDValue &V :
Op->op_values()) {
30981 EVT VTArg = VTNode->getVT().getVectorElementType();
30989 "Expected only legal fixed-width types");
30996 auto ScalableRes = DAG.
getNode(NewOp,
DL, ContainerVT, Operands);
31003 for (
const SDValue &V :
Op->op_values()) {
31004 assert((!
V.getValueType().isVector() ||
31005 V.getValueType().isScalableVector()) &&
31006 "Only scalable vectors are supported!");
31013 return DAG.
getNode(NewOp,
DL, VT, Operands,
Op->getFlags());
31021 EVT VT =
Op.getValueType();
31023 "Only expected to lower fixed length vector operation!");
31028 for (
const SDValue &V :
Op->op_values()) {
31032 if (!
V.getValueType().isVector()) {
31038 assert(
V.getValueType().isFixedLengthVector() &&
31040 "Only fixed length vectors are supported!");
31044 auto ScalableRes = DAG.
getNode(
Op.getOpcode(), SDLoc(
Op), ContainerVT,
Ops);
31048SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(
SDValue ScalarOp,
31050 SDLoc
DL(ScalarOp);
31056 EVT ContainerVT = SrcVT;
31067 DAG.
getUNDEF(ContainerVT), AccOp, Zero);
31076SDValue AArch64TargetLowering::LowerPredReductionToSVE(
SDValue ReduceOp,
31078 SDLoc
DL(ReduceOp);
31080 EVT OpVT =
Op.getValueType();
31105 if (OpVT == MVT::nxv1i1) {
31107 Pg = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, MVT::nxv2i1, Pg);
31108 Op = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, MVT::nxv2i1,
Op);
31119SDValue AArch64TargetLowering::LowerReductionToSVE(
unsigned Opcode,
31122 SDLoc
DL(ScalarOp);
31128 Subtarget->useSVEForFixedLengthVectors())) {
31148 EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
31167AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(
SDValue Op,
31169 EVT VT =
Op.getValueType();
31172 EVT InVT =
Op.getOperand(1).getValueType();
31179 EVT MaskVT =
Op.getOperand(0).getValueType();
31193SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
31196 EVT InVT =
Op.getOperand(0).getValueType();
31200 "Only expected to lower fixed length vector operation!");
31202 "Expected integer result of the same bit length as the inputs!");
31209 auto Cmp = DAG.
getNode(AArch64ISD::SETCC_MERGE_ZERO,
DL, CmpVT,
31210 {Pg, Op1, Op2,
Op.getOperand(2)});
31218AArch64TargetLowering::LowerFixedLengthBitcastToSVE(
SDValue Op,
31221 auto SrcOp =
Op.getOperand(0);
31222 EVT VT =
Op.getValueType();
31224 EVT ContainerSrcVT =
31232SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
31238 "Unexpected number of operands in CONCAT_VECTORS");
31240 auto SrcOp1 =
Op.getOperand(0);
31241 auto SrcOp2 =
Op.getOperand(1);
31242 EVT VT =
Op.getValueType();
31243 EVT SrcVT = SrcOp1.getValueType();
31249 DAG.
getNode(AArch64ISD::DUPLANE128,
DL, ContainerVT,
31255 if (NumOperands > 2) {
31258 for (
unsigned I = 0;
I < NumOperands;
I += 2)
31260 Op->getOperand(
I),
Op->getOperand(
I + 1)));
31271 Op = DAG.
getNode(AArch64ISD::SPLICE,
DL, ContainerVT, Pg, SrcOp1, SrcOp2);
31277AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(
SDValue Op,
31279 EVT VT =
Op.getValueType();
31294 Val = getSVESafeBitCast(ExtendVT, Val, DAG);
31295 Val = DAG.
getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU,
DL, ContainerVT,
31296 Pg, Val, DAG.
getUNDEF(ContainerVT));
31302AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(
SDValue Op,
31304 EVT VT =
Op.getValueType();
31316 Val = DAG.
getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU,
DL, RoundVT, Pg, Val,
31326AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(
SDValue Op,
31328 EVT VT =
Op.getValueType();
31332 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
31333 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
31351 Val = DAG.
getNode(Opcode,
DL, ContainerDstVT, Pg, Val,
31361 Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
31370AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(
SDValue Op,
31373 EVT OpVT =
Op.getValueType();
31384 for (
unsigned I = 0;
I < 3; ++
I) {
31387 SDValue V = getSVESafeBitCast(PackedVT,
Op.getOperand(
I), DAG);
31399 Ops.push_back(StackPtr);
31402 SDVTList VTs = DAG.
getVTList(PackedVT, PackedVT, PackedVT, MVT::Other);
31413 if (Subtarget->hasSME2() && Subtarget->isStreaming() &&
31420 IntID = Intrinsic::aarch64_sve_uzp_x2;
31423 if (Subtarget->getMinSVEVectorSizeInBits() < 256 &&
31426 IntID = Intrinsic::aarch64_sve_uzp_x4;
31432 Ops.append(
Op->op_values().begin(),
Op->op_values().end());
31439 if (OpVT == MVT::v1i64 || OpVT == MVT::v1f64)
31452 EVT OpVT =
Op.getValueType();
31459 InVecs.
push_back(getSVESafeBitCast(PackedVT, V, DAG));
31471 Ops.append(InVecs);
31473 Ops.push_back(StackPtr);
31480 for (
unsigned I = 0;
I < 3; ++
I) {
31484 Results.push_back(getSVESafeBitCast(OpVT, L, DAG));
31493 if (Subtarget->hasSME2() && Subtarget->isStreaming() &&
31500 IntID = Intrinsic::aarch64_sve_zip_x2;
31503 if (Subtarget->getMinSVEVectorSizeInBits() < 256 &&
31506 IntID = Intrinsic::aarch64_sve_zip_x4;
31512 Ops.append(
Op->op_values().begin(),
Op->op_values().end());
31519 if (OpVT == MVT::v1i64 || OpVT == MVT::v1f64)
31546 "Unexpected histogram update operation");
31548 EVT IndexVT =
Index.getValueType();
31555 bool ExtTrunc = IncSplatVT != MemVT;
31570 DAG.
getVTList(IncSplatVT, MVT::Other), MemVT,
DL,
Ops, GMMO, IndexType,
31590 ScatterOps, SMMO, IndexType, ExtTrunc);
31603AArch64TargetLowering::LowerPARTIAL_REDUCE_MLA(
SDValue Op,
31610 EVT ResultVT =
Op.getValueType();
31611 EVT OrigResultVT = ResultVT;
31612 EVT OpVT =
LHS.getValueType();
31614 bool ConvertToScalable =
31620 if (!ConvertToScalable && ResultVT == MVT::v2i32 && OpVT == MVT::v16i8) {
31625 SDValue Reduced = DAG.
getNode(AArch64ISD::ADDP,
DL, MVT::v4i32, Wide, Wide);
31629 if (ConvertToScalable) {
31635 Op = DAG.
getNode(
Op.getOpcode(),
DL, ResultVT, {Acc, LHS, RHS});
31650 if (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable()) {
31651 unsigned LoOpcode = IsUnsigned ? AArch64ISD::UADDWB : AArch64ISD::SADDWB;
31652 unsigned HiOpcode = IsUnsigned ? AArch64ISD::UADDWT : AArch64ISD::SADDWT;
31654 Res = DAG.
getNode(HiOpcode,
DL, ResultVT,
Lo, DotNode);
31657 auto [DotNodeLo, DotNodeHi] = DAG.
SplitVector(DotNode,
DL);
31674AArch64TargetLowering::LowerGET_ACTIVE_LANE_MASK(
SDValue Op,
31676 EVT VT =
Op.getValueType();
31679 assert(Subtarget->isSVEorStreamingSVEAvailable() &&
31680 "Lowering fixed length get_active_lane_mask requires SVE!");
31690 Op.getOperand(0),
Op.getOperand(1));
31697AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(
SDValue Op,
31699 EVT VT =
Op.getValueType();
31703 unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
31704 : AArch64ISD::FCVTZU_MERGE_PASSTHRU;
31721 Val = getSVESafeBitCast(CvtVT, Val, DAG);
31722 Val = DAG.
getNode(Opcode,
DL, ContainerDstVT, Pg, Val,
31754 if (!IsSingleOp && !Subtarget.hasSVE2())
31757 EVT VTOp1 =
Op.getOperand(0).getValueType();
31759 unsigned IndexLen = MinSVESize / BitsPerElt;
31764 bool MinMaxEqual = (MinSVESize == MaxSVESize);
31765 assert(ElementsPerVectorReg <= IndexLen && ShuffleMask.size() <= IndexLen &&
31766 "Incorrectly legalised shuffle operation");
31775 if (!IsSingleOp && !MinMaxEqual && BitsPerElt == 8)
31778 for (
int Index : ShuffleMask) {
31786 if ((
unsigned)Index >= ElementsPerVectorReg) {
31788 Index += IndexLen - ElementsPerVectorReg;
31790 Index = Index - ElementsPerVectorReg;
31793 }
else if (!MinMaxEqual)
31798 if ((
unsigned)Index >= MaxOffset)
31807 for (
unsigned i = 0; i < IndexLen - ElementsPerVectorReg; ++i) {
31824 else if (Subtarget.hasSVE2()) {
31825 if (!MinMaxEqual) {
31827 SDValue VScale = (BitsPerElt == 64)
31851SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
31853 EVT VT =
Op.getValueType();
31857 auto ShuffleMask = SVN->
getMask();
31867 auto MinLegalExtractEltScalarTy = [](EVT ScalarTy) -> EVT {
31868 if (ScalarTy == MVT::i8 || ScalarTy == MVT::i16)
31882 bool ReverseEXT =
false;
31884 if (
isEXTMask(ShuffleMask, VT, ReverseEXT, Imm) &&
31892 Op = DAG.
getNode(AArch64ISD::INSR,
DL, ContainerVT, Op2, Scalar);
31897 for (
unsigned BlockSize : {64U, 32U, 16U}) {
31901 RevOp = AArch64ISD::BSWAP_MERGE_PASSTHRU;
31902 else if (EltSize == 16)
31903 RevOp = AArch64ISD::REVH_MERGE_PASSTHRU;
31905 RevOp = AArch64ISD::REVW_MERGE_PASSTHRU;
31918 if (Subtarget->hasSVE2p1() && EltSize == 64 &&
31921 SDValue Revd = DAG.
getNode(AArch64ISD::REVD_MERGE_PASSTHRU,
DL, ContainerVT,
31922 Pg, Op1, DAG.
getUNDEF(ContainerVT));
31926 unsigned WhichResult;
31927 unsigned OperandOrder;
31930 WhichResult == 0) {
31932 OperandOrder == 0 ? Op1 : Op2,
31933 OperandOrder == 0 ? Op2 : Op1);
31939 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
31941 DAG.
getNode(
Opc,
DL, ContainerVT, OperandOrder == 0 ? Op1 : Op2,
31942 OperandOrder == 0 ? Op2 : Op1);
31948 DAG, VT, DAG.
getNode(AArch64ISD::ZIP1,
DL, ContainerVT, Op1, Op1));
31951 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
31953 DAG, VT, DAG.
getNode(
Opc,
DL, ContainerVT, Op1, Op1));
31974 unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
31975 unsigned MaxSVESize = Subtarget->getMaxSVEVectorSizeInBits();
31976 if (MinSVESize == MaxSVESize && MaxSVESize == VT.
getSizeInBits()) {
31985 WhichResult != 0) {
31987 OperandOrder == 0 ? Op1 : Op2,
31988 OperandOrder == 0 ? Op2 : Op1);
31993 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
31995 DAG, VT, DAG.
getNode(
Opc,
DL, ContainerVT, Op1, Op2));
32000 DAG, VT, DAG.
getNode(AArch64ISD::ZIP2,
DL, ContainerVT, Op1, Op1));
32003 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
32005 DAG, VT, DAG.
getNode(
Opc,
DL, ContainerVT, Op1, Op1));
32008 if ((Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()) &&
32009 Subtarget->isSVEorStreamingSVEAvailable()) {
32011 "Unsupported SVE vector size");
32015 if (std::optional<unsigned> Lane =
32016 isDUPQMask(ShuffleMask, Segments, SegmentElts)) {
32023 DAG.getConstant(*Lane, DL, MVT::i64,
32037 if (MinSVESize || !Subtarget->isNeonAvailable())
32047 EVT InVT =
Op.getValueType();
32051 "Only expect to cast between legal scalable vector types!");
32054 "For predicate bitcasts, use getSVEPredicateBitCast");
32070 VT == PackedVT || InVT == PackedInVT) &&
32071 "Unexpected bitcast!");
32074 if (InVT != PackedInVT)
32075 Op = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, PackedInVT,
Op);
32077 if (Subtarget->isLittleEndian() ||
32088 Op = DAG.
getNode(AArch64ISD::NVCAST,
DL, PackedVTAsInt,
Op);
32095 if (VT != PackedVT)
32096 Op = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, VT,
Op);
32103 return ::isAllActivePredicate(DAG,
N);
32107 return ::getPromotedVTForPredicate(VT);
32110bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
32112 const APInt &OriginalDemandedElts,
KnownBits &Known, TargetLoweringOpt &TLO,
32113 unsigned Depth)
const {
32115 unsigned Opc =
Op.getOpcode();
32117 case AArch64ISD::VSHL: {
32121 if (ShiftR->
getOpcode() != AArch64ISD::VLSHR)
32132 if (ShiftRBits != ShiftLBits)
32135 unsigned ScalarSize =
Op.getScalarValueSizeInBits();
32136 assert(ScalarSize > ShiftLBits &&
"Invalid shift imm");
32139 APInt UnusedBits = ~OriginalDemandedBits;
32141 if ((ZeroBits & UnusedBits) != ZeroBits)
32148 case AArch64ISD::BICi: {
32152 TLO.DAG.computeKnownBits(Op0, OriginalDemandedElts,
Depth + 1);
32154 APInt BitsToClear =
32155 (
Op->getConstantOperandAPInt(1) <<
Op->getConstantOperandAPInt(2))
32157 APInt AlreadyZeroedBitsToClear = BitsToClear & KnownOp0.
Zero;
32158 if (BitsToClear.
isSubsetOf(AlreadyZeroedBitsToClear))
32159 return TLO.CombineTo(
Op, Op0);
32168 unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();
32169 if (!MaxSVEVectorSizeInBits)
32171 unsigned VscaleMax = MaxSVEVectorSizeInBits / 128;
32172 unsigned MaxValue = MaxCount->getKnownMinValue() * VscaleMax;
32187 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO,
Depth);
32190bool AArch64TargetLowering::canCreateUndefOrPoisonForTargetNode(
32195 switch (
Op.getOpcode()) {
32196 case AArch64ISD::MOVI:
32197 case AArch64ISD::MOVIedit:
32198 case AArch64ISD::MOVImsl:
32199 case AArch64ISD::MOVIshift:
32200 case AArch64ISD::MVNImsl:
32201 case AArch64ISD::MVNIshift:
32202 case AArch64ISD::VASHR:
32203 case AArch64ISD::VLSHR:
32204 case AArch64ISD::VSHL:
32211bool AArch64TargetLowering::isTargetCanonicalConstantNode(
SDValue Op)
const {
32212 return Op.getOpcode() == AArch64ISD::DUP ||
32213 Op.getOpcode() == AArch64ISD::MOVI ||
32214 Op.getOpcode() == AArch64ISD::MOVIshift ||
32215 Op.getOpcode() == AArch64ISD::MOVImsl ||
32216 Op.getOpcode() == AArch64ISD::MOVIedit ||
32217 Op.getOpcode() == AArch64ISD::MVNIshift ||
32218 Op.getOpcode() == AArch64ISD::MVNImsl ||
32223 Op.getOperand(0).getOpcode() == AArch64ISD::MOVIedit &&
32224 Op.getOperand(0).getConstantOperandVal(0) == 0) ||
32226 Op.getOperand(0).getOpcode() == AArch64ISD::DUP) ||
32231 return Subtarget->hasSVE() || Subtarget->hasSVE2() ||
32232 Subtarget->hasComplxNum();
32243 if (!VTy->isScalableTy() && !Subtarget->hasComplxNum())
32247 unsigned NumElements = VTy->getElementCount().getKnownMinValue();
32253 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
32254 if ((VTyWidth < 128 && (VTy->isScalableTy() || VTyWidth != 64)) ||
32258 if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2() && VTy->isScalableTy()) {
32262 return ScalarWidth == 32 || ScalarWidth == 64;
32263 return 8 <= ScalarWidth && ScalarWidth <= 64;
32270 return (ScalarTy->isHalfTy() && Subtarget->hasFullFP16()) ||
32271 ScalarTy->isFloatTy() || ScalarTy->isDoubleTy();
32281 bool IsScalable = Ty->isScalableTy();
32282 bool IsInt = Ty->getElementType()->isIntegerTy();
32285 Ty->getScalarSizeInBits() * Ty->getElementCount().getKnownMinValue();
32288 "Vector type must be either 64 or a power of 2 that is at least 128");
32290 if (TyWidth > 128) {
32291 int Stride = Ty->getElementCount().getKnownMinValue() / 2;
32293 ->getElementCount()
32294 .getKnownMinValue() /
32297 auto *LowerSplitA =
B.CreateExtractVector(HalfTy, InputA,
uint64_t(0));
32298 auto *LowerSplitB =
B.CreateExtractVector(HalfTy, InputB,
uint64_t(0));
32299 auto *UpperSplitA =
B.CreateExtractVector(HalfTy, InputA, Stride);
32300 auto *UpperSplitB =
B.CreateExtractVector(HalfTy, InputB, Stride);
32301 Value *LowerSplitAcc =
nullptr;
32302 Value *UpperSplitAcc =
nullptr;
32308 UpperSplitAcc =
B.CreateExtractVector(HalfAccTy,
Accumulator, AccStride);
32310 B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
32312 B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
32316 return B.CreateInsertVector(FullTy, Result, UpperSplitInt, AccStride);
32322 return B.CreateIntrinsic(
32323 Intrinsic::aarch64_sve_cmla_x, Ty,
32324 {
Accumulator, InputA, InputB,
B.getInt32((
int)Rotation * 90)});
32326 auto *Mask =
B.getAllOnesMask(Ty->getElementCount());
32327 return B.CreateIntrinsic(
32328 Intrinsic::aarch64_sve_fcmla, Ty,
32329 {Mask,
Accumulator, InputA, InputB,
B.getInt32((
int)Rotation * 90)});
32332 Intrinsic::ID IdMap[4] = {Intrinsic::aarch64_neon_vcmla_rot0,
32333 Intrinsic::aarch64_neon_vcmla_rot90,
32334 Intrinsic::aarch64_neon_vcmla_rot180,
32335 Intrinsic::aarch64_neon_vcmla_rot270};
32338 return B.CreateIntrinsic(IdMap[(
int)Rotation], Ty,
32347 return B.CreateIntrinsic(
32348 Intrinsic::aarch64_sve_cadd_x, Ty,
32349 {InputA, InputB,
B.getInt32((
int)Rotation * 90)});
32351 auto *Mask =
B.getAllOnesMask(Ty->getElementCount());
32352 return B.CreateIntrinsic(
32353 Intrinsic::aarch64_sve_fcadd, Ty,
32354 {Mask, InputA, InputB,
B.getInt32((
int)Rotation * 90)});
32361 IntId = Intrinsic::aarch64_neon_vcadd_rot90;
32363 IntId = Intrinsic::aarch64_neon_vcadd_rot270;
32368 return B.CreateIntrinsic(IntId, Ty, {InputA, InputB});
32373 return B.CreateIntrinsic(
32374 Intrinsic::aarch64_sve_cdot,
Accumulator->getType(),
32375 {Accumulator, InputA, InputB, B.getInt32((int)Rotation * 90)});
32381bool AArch64TargetLowering::preferScalarizeSplat(
SDNode *
N)
const {
32382 unsigned Opc =
N->getOpcode();
32385 [&](
SDNode *
Use) { return Use->getOpcode() == ISD::MUL; }))
32392 return Subtarget->getMinimumJumpTableEntries();
32398 bool NonUnitFixedLengthVector =
32400 if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
32405 unsigned NumIntermediates;
32413 bool NonUnitFixedLengthVector =
32415 if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
32420 unsigned NumIntermediates;
32422 NumIntermediates, VT2);
32427 unsigned &NumIntermediates,
MVT &RegisterVT)
const {
32429 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
32434 assert(Subtarget->useSVEForFixedLengthVectors() &&
"Unexpected mode!");
32435 assert(IntermediateVT == RegisterVT &&
"Unexpected VT mismatch!");
32446 IntermediateVT = NewVT;
32449 return NumIntermediates;
32456 NumIntermediates *= NumSubRegs;
32457 NumRegs *= NumSubRegs;
32463 IntermediateVT = RegisterVT = MVT::v16i8;
32466 IntermediateVT = RegisterVT = MVT::v8i16;
32469 IntermediateVT = RegisterVT = MVT::v4i32;
32472 IntermediateVT = RegisterVT = MVT::v2i64;
32475 IntermediateVT = RegisterVT = MVT::v8f16;
32478 IntermediateVT = RegisterVT = MVT::v4f32;
32481 IntermediateVT = RegisterVT = MVT::v2f64;
32484 IntermediateVT = RegisterVT = MVT::v8bf16;
32493 return !Subtarget->isTargetWindows() &&
32502 if (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32)
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static SDValue trySVESplat64(SDValue Op, SelectionDAG &DAG, const AArch64Subtarget *ST, APInt &DefBits)
static SDValue tryLowerSmallVectorExtLoad(LoadSDNode *Load, SelectionDAG &DAG)
Helper function to optimize loads of extended small vectors.
static void CustomNonLegalBITCASTResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, EVT ExtendVT, EVT CastVT)
static bool isConcatMask(ArrayRef< int > Mask, EVT VT, bool SplitLHS)
static bool isAddSubSExt(SDValue N, SelectionDAG &DAG)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static void changeVectorFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC usable with the vector...
static SDValue performZExtDeinterleaveShuffleCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isSingletonEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
static SDValue foldCSELofCTTZ(SDNode *N, SelectionDAG &DAG)
static SDValue performCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, unsigned CCIndex, unsigned CmpIndex)
static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue NormalizeBuildVector(SDValue Op, SelectionDAG &DAG)
static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of zeros to a vector store by scalar stores of WZR/XZR.
static SDValue tryToWidenSetCCOperands(SDNode *Op, SelectionDAG &DAG)
static SDValue performLastTrueTestVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue GenerateTBL(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performDUPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG)
static std::optional< PredicateConstraint > parsePredicateConstraint(StringRef Constraint)
static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static void analyzeCallOperands(const AArch64TargetLowering &TLI, const AArch64Subtarget *Subtarget, const TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo)
static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo)
Check whether or not Op is a SET_CC operation, either a generic or an AArch64 lowered one.
static bool isLegalArithImmed(uint64_t C)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performVectorDeinterleaveCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static ScalableVectorType * getSVEContainerIRType(FixedVectorType *VTy)
static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG)
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend)
static SDValue performMulVectorCmpZeroCombine(SDNode *N, SelectionDAG &DAG)
static SDValue convertFixedMaskToScalableVector(SDValue Mask, SelectionDAG &DAG)
static bool isZeroingInactiveLanes(SDValue Op)
static SDValue performPTestFirstCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue trySwapVSelectOperands(SDNode *N, SelectionDAG &DAG)
static SDValue tryCombineMULLWithUZP1(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static bool isExtendedBUILD_VECTOR(SDValue N, SelectionDAG &DAG, bool isSigned)
static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG)
static bool isZerosVector(const SDNode *N)
isZerosVector - Check whether SDNode N is a zero-filled vector.
static SDValue performGLD1Combine(SDNode *N, SelectionDAG &DAG)
static SDValue performNVCASTCombine(SDNode *N, SelectionDAG &DAG)
Get rid of unnecessary NVCASTs (that don't change the type).
static const TargetRegisterClass * getReducedGprRegisterClass(ReducedGprConstraint Constraint, EVT VT)
static const MachineInstr * stripVRegCopies(const MachineRegisterInfo &MRI, Register Reg)
static SDValue carryFlagToValue(SDValue Glue, EVT VT, SelectionDAG &DAG, bool Invert)
static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset, SDLoc DL, unsigned BitWidth)
static bool isPredicateCCSettingOp(SDValue N)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performSVEAndCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
bool isVectorizedBinOp(unsigned Opcode)
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG)
static SDValue overflowFlagToValue(SDValue Glue, EVT VT, SelectionDAG &DAG)
static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2, ArrayRef< int > ShuffleMask, EVT VT, EVT ContainerVT, SelectionDAG &DAG)
static SDValue performBRCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static MVT getSVEContainerType(EVT ContentTy)
static bool isMergePassthruOpcode(unsigned Opc)
static unsigned selectUmullSmull(SDValue &N0, SDValue &N1, SelectionDAG &DAG, SDLoc DL, bool &IsMLA)
static SDValue performFADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performNEONPostLDSTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Target-specific DAG combine function for NEON load/store intrinsics to merge base address updates.
static SDValue emitVectorComparison(SDValue LHS, SDValue RHS, AArch64CC::CondCode CC, bool NoNans, EVT VT, const SDLoc &DL, SelectionDAG &DAG)
Emit vector comparison for floating-point values, producing a mask.
static SDValue performVectorExtCombine(SDNode *N, SelectionDAG &DAG)
static void ReplaceCMP_SWAP_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp, SelectionDAG &DAG)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
static SDValue performSelectCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with the compare-mask instruct...
static bool isCheapToExtend(const SDValue &N)
static cl::opt< bool > EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, cl::desc("Enable AArch64 logical imm instruction " "optimization"), cl::init(true))
static SDValue performExtractLastActiveCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG)
static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes, unsigned ScalarSizeInBytes)
Check if the value of OffsetInBytes can be used as an immediate for the gather load/prefetch and scat...
static bool isUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of "vector_shuffle v,...
static bool shouldLowerTailCallStackArg(const MachineFunction &MF, const CCValAssign &VA, SDValue Arg, ISD::ArgFlagsTy Flags, int CallOffset)
Check whether a stack argument requires lowering in a tail call.
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static std::optional< ElementCount > getMaxValueForSVECntIntrinsic(SDValue Op)
static unsigned getDUPLANEOp(EVT EltType)
static void changeFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget, const TargetMachine &TM)
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST, EVT VT, EVT MemVT, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool canLowerSRLToRoundingShiftForVT(SDValue Shift, EVT ResVT, SelectionDAG &DAG, unsigned &ShiftValue, SDValue &RShOperand)
static bool isExtendOrShiftOperand(SDValue N)
static bool isLanes1toNKnownZero(SDValue Op)
static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG)
static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N, SelectionDAG &DAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static EVT getPackedSVEVectorVT(EVT VT)
static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerPtrAuthGlobalAddressStatically(SDValue TGA, SDLoc DL, EVT VT, AArch64PACKey::ID KeyC, SDValue Discriminator, SDValue AddrDiscriminator, SelectionDAG &DAG)
static SDValue performVecReduceBitwiseCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performFlagSettingCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, unsigned GenericOpcode)
static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performCSELCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static void ReplaceReductionResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, unsigned InterOp, unsigned AcrossOp)
static bool isEquivalentMaskless(unsigned CC, unsigned width, ISD::LoadExtType ExtType, int AddConstant, int CompConstant)
static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG)
static SDValue constructDup(SDValue V, int Lane, SDLoc DL, EVT VT, unsigned Opcode, SelectionDAG &DAG)
static bool isCMP(SDValue Op)
static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool rmwOpMayLowerToLibcall(const AArch64Subtarget &Subtarget, const AtomicRMWInst *RMW)
static Function * getStructuredLoadFunction(Module *M, unsigned Factor, bool Scalable, Type *LDVTy, Type *PtrTy)
unsigned numberOfInstrToLoadImm(APInt C)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue foldCSELOfCSEL(SDNode *Op, SelectionDAG &DAG)
static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc, SelectionDAG &DAG, bool UnpredOp=false, bool SwapOperands=false)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static bool isPow2Splat(SDValue Op, uint64_t &SplatVal, bool &Negated)
static void createTblForTrunc(TruncInst *TI, bool IsLittleEndian)
static SDValue performANDSCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, SelectionDAG &DAG)
static AArch64CC::CondCode parseConstraintCode(llvm::StringRef Constraint)
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static bool callConvSupportsVarArgs(CallingConv::ID CC)
Return true if the call convention supports varargs Currently only those that pass varargs like the C...
static SDValue performBICiCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static const MCPhysReg GPRArgRegs[]
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, APInt &UndefBits)
static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG)
static SDValue performSignExtendSetCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static bool isPassedInFPR(EVT VT)
static unsigned getIntrinsicID(const SDNode *N)
static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG, bool Invert)
static SDValue performAddUADDVCombine(SDNode *N, SelectionDAG &DAG)
static bool IsSVECntIntrinsic(SDValue S)
static SDValue performExtBinopLoadFold(SDNode *N, SelectionDAG &DAG)
static bool findMoreOptimalIndexType(const MaskedGatherScatterSDNode *N, SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static SDValue emitRestoreZALazySave(SDValue Chain, SDLoc DL, const AArch64TargetLowering &TLI, const AArch64RegisterInfo &TRI, AArch64FunctionInfo &FuncInfo, SelectionDAG &DAG)
static bool isWideDUPMask(ArrayRef< int > M, EVT VT, unsigned BlockSize, unsigned &DupLaneOp)
Check if a vector shuffle corresponds to a DUP instructions with a larger element width than the vect...
constexpr MVT FlagsVT
Value type used for NZCV flags.
static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
static cl::opt< bool > EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden, cl::desc("Combine ext and trunc to TBL"), cl::init(true))
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, SDValue SplatVal, unsigned NumVecElts)
static SDValue performNegCSelCombine(SDNode *N, SelectionDAG &DAG)
static std::optional< std::pair< unsigned, const TargetRegisterClass * > > parseSVERegAsConstraint(StringRef Constraint)
static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG)
static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *ST)
static SDValue tryLowerToBSL(SDValue N, SelectionDAG &DAG)
static SDValue performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue removeRedundantInsertVectorElt(SDNode *N)
static std::optional< AArch64CC::CondCode > getCSETCondCode(SDValue Op)
static bool isLane0KnownActive(SDValue Op)
static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc, SelectionDAG &DAG)
static SDValue trySQDMULHCombine(SDNode *N, SelectionDAG &DAG)
static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG)
Legalize the gather prefetch (scalar + vector addressing mode) when the offset vector is an unpacked ...
static bool isNegatedInteger(SDValue Op)
static SDValue performFirstTrueTestVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
constexpr MVT CondCodeVT
Value type used for condition codes.
static bool isLoadOrMultipleLoads(SDValue B, SmallVector< LoadSDNode * > &Loads)
static SDValue performSubAddMULCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc)
static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16)
static SDValue performSMINCombine(SDNode *N, SelectionDAG &DAG)
SDValue LowerVectorMatch(SDValue Op, SelectionDAG &DAG)
static Function * getStructuredStoreFunction(Module *M, unsigned Factor, bool Scalable, Type *STVTy, Type *PtrTy)
static SDValue performZExtUZPCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performVectorShiftCombine(SDNode *N, const AArch64TargetLowering &TLI, TargetLowering::DAGCombinerInfo &DCI)
Optimize a vector shift instruction and its operand if shifted out bits are not used.
static SDValue performUADDVAddCombine(SDValue A, SelectionDAG &DAG)
static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG, unsigned ScalarSizeInBytes)
Combines a node carrying the intrinsic aarch64_sve_prf<T>_gather_scalar_offset into a node that uses ...
static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of a scalar to a vector store by scalar stores of the scalar value.
unsigned getSignExtendedGatherOpcode(unsigned Opcode)
static bool isOrXorChain(SDValue N, unsigned &Num, SmallVector< std::pair< SDValue, SDValue >, 16 > &WorkList)
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode, bool OnlyPackedOffsets=true)
static SDValue foldOverflowCheck(SDNode *Op, SelectionDAG &DAG, bool IsAdd)
static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc, SelectionDAG &DAG)
static SDValue performDupLane128Combine(SDNode *N, SelectionDAG &DAG)
static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, const APInt &Demanded, TargetLowering::TargetLoweringOpt &TLO, unsigned NewOpc)
bool isLegalCmpImmed(APInt C)
static bool isSafeSignedCMN(SDValue Op, SelectionDAG &DAG)
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode, SelectionDAG &DAG, bool LastOperandIsImm=false)
static SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue performCTPOPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG)
static SDValue performConcatVectorsCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performSVEMulAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue foldCSELofLASTB(SDNode *Op, SelectionDAG &DAG)
static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N, SelectionDAG &DAG)
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, const SDLoc &DL)
static SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG)
static SDValue performBuildShuffleExtendCombine(SDValue BV, SelectionDAG &DAG)
Combines a buildvector(sext/zext) or shuffle(sext/zext, undef) node pattern into sext/zext(buildvecto...
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static Value * createTblShuffleForZExt(IRBuilderBase &Builder, Value *Op, FixedVectorType *ZExtTy, FixedVectorType *DstTy, bool IsLittleEndian)
static SDValue performAddSubIntoVectorOp(SDNode *N, SelectionDAG &DAG)
static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG)
static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC, SDValue RHS={})
changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64 CC
static const MCPhysReg FPRArgRegs[]
static SDValue getSETCC(AArch64CC::CondCode CC, SDValue NZCV, const SDLoc &DL, SelectionDAG &DAG)
Helper function to create 'CSET', which is equivalent to 'CSINC <Wd>, WZR, WZR, invert(<cond>)'.
static SDValue performAddTruncShiftCombine(SDNode *N, SelectionDAG &DAG)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static SDValue tryCombineNeonFcvtFP16ToI16(SDNode *N, unsigned Opcode, SelectionDAG &DAG)
static void replaceBoolVectorBitcast(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performActiveLaneMaskCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *ST)
static SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT, int Pattern)
static bool isEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseEXT, unsigned &Imm)
static std::optional< ReducedGprConstraint > parseReducedGprConstraint(StringRef Constraint)
static SDValue tryCombineFixedPointConvert(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue getPredicateForVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SMECallAttrs getSMECallAttrs(const Function &Caller, const RTLIB::RuntimeLibcallsInfo &RTLCI, const TargetLowering::CallLoweringInfo &CLI)
static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG)
Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup)) making use of the vector SExt/ZE...
static SDValue performAddSubLongCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG)
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Fold a floating-point multiply by power of two into floating-point to fixed-point conversion.
static EVT calculatePreExtendType(SDValue Extend)
Calculates what the pre-extend type is, based on the extension operation node provided by Extend.
static SDValue performSetCCPunpkCombine(SDNode *N, SelectionDAG &DAG)
static bool isSignExtInReg(const SDValue &V)
static EVT getPromotedVTForPredicate(EVT VT)
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Turn vector tests of the signbit in the form of: xor (sra X, elt_size(X)-1), -1 into: cmge X,...
static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG)
static bool isAllConstantBuildVector(const SDValue &PotentialBVec, uint64_t &ConstVal)
static SDValue performExtractSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue tryToReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Tries to replace scalar FP <-> INT conversions with SVE in streaming functions, this can help to redu...
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG)
static Value * UseTlsOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG, const AArch64Subtarget *ST)
static bool isUnpackedType(EVT VT, SelectionDAG &DAG)
Returns true if the conceptual representation for VT does not map directly to its physical register r...
static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op, AArch64CC::CondCode Cond)
static bool isSetCCOrZExtSetCC(const SDValue &Op, SetCCInfoAndKind &Info)
cl::opt< bool > EnableAArch64ELFLocalDynamicTLSGeneration("aarch64-elf-ldtls-generation", cl::Hidden, cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false))
static SDValue ReconstructTruncateFromBuildVector(SDValue V, SelectionDAG &DAG)
static SDValue performBSPExpandForSVE(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue getCondCode(SelectionDAG &DAG, AArch64CC::CondCode CC)
Like SelectionDAG::getCondCode(), but for AArch64 condition codes.
static SDValue foldADCToCINC(SDNode *N, SelectionDAG &DAG)
static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG)
static SDValue optimizeIncrementingWhile(SDNode *N, SelectionDAG &DAG, bool IsSigned, bool IsEqual)
static SDValue performSunpkloCombine(SDNode *N, SelectionDAG &DAG)
static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static unsigned getAtomicLoad128Opcode(unsigned ISDOpcode, AtomicOrdering Ordering)
static void ReplaceAddWithADDP(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
cl::opt< bool > EnableSVEGISel("aarch64-enable-gisel-sve", cl::Hidden, cl::desc("Enable / disable SVE scalable vectors in Global ISel"), cl::init(false))
static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performSetccMergeZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performPostLD1Combine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, bool IsLaneOp)
Target-specific DAG combine function for post-increment LD1 (lane) and post-increment LD1R.
std::pair< SDValue, uint64_t > lookThroughSignExtension(SDValue Val)
static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG)
bool hasNearbyPairedStore(Iter It, Iter End, Value *Ptr, const DataLayout &DL)
static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG)
static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val, bool &CanNegate, bool &MustBeFirst, bool &PreferFirst, bool WillNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations that can be expressed as a conjunction.
static bool foldIndexIntoBase(SDValue &BasePtr, SDValue &Index, SDValue Scale, SDLoc DL, SelectionDAG &DAG)
static SDValue emitFloatCompareMask(SDValue LHS, SDValue RHS, SDValue TVal, SDValue FVal, ISD::CondCode CC, bool NoNaNs, const SDLoc &DL, SelectionDAG &DAG)
For SELECT_CC, when the true/false values are (-1, 0) and the compared values are scalars,...
static SDValue getZT0FrameIndex(MachineFrameInfo &MFI, AArch64FunctionInfo &FuncInfo, SelectionDAG &DAG)
static SDValue performRNDRCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue skipExtensionForVectorMULL(SDValue N, SelectionDAG &DAG)
static SDValue performOrXorChainCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performAddCombineForShiftedOperands(SDNode *N, SelectionDAG &DAG)
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
static bool shouldBeAdjustedToZero(SDValue LHS, APInt C, ISD::CondCode &CC)
static bool isPackedPredicateType(EVT VT, SelectionDAG &DAG)
static SDValue combineSVEBitSel(unsigned IID, SDNode *N, SelectionDAG &DAG)
static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG, unsigned Opcode, bool IsSigned)
static bool isPackedVectorType(EVT VT, SelectionDAG &DAG)
Returns true if VT's elements occupy the lowest bit positions of its associated register class withou...
static bool isTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of "vector_shuffle v,...
static AArch64SME::ToggleCondition getSMToggleCondition(const SMECallAttrs &CallAttrs)
static bool isAddSubZExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static SDValue optimizeBitTest(SDLoc DL, SDValue Op, SDValue Chain, SDValue Dest, unsigned Opcode, SelectionDAG &DAG)
static SDValue performExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performMaskedGatherScatterCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert, SelectionDAG &DAG)
static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performBuildVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode, bool OnlyPackedOffsets=true)
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG)
static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc, SelectionDAG &DAG)
static SDValue isNVCastToHalfWidthElements(SDValue V)
static bool isHalvingTruncateAndConcatOfLegalIntScalableType(SDNode *N)
static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode, SDValue Operand, SelectionDAG &DAG, int &ExtraSteps)
static SDValue performUADDVZextCombine(SDValue A, SelectionDAG &DAG)
static SDValue performAddCSelIntoCSinc(SDNode *N, SelectionDAG &DAG)
Perform the scalar expression combine in the form of: CSEL(c, 1, cc) + b => CSINC(b+c,...
static SDValue performCTLZCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static bool isEligibleForSmallVectorLoadOpt(LoadSDNode *LD, const AArch64Subtarget &Subtarget)
Helper function to check if a small vector load can be optimized.
static std::optional< uint64_t > getConstantLaneNumOfExtractHalfOperand(SDValue &Op)
static void ReplaceATOMIC_LOAD_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL, SelectionDAG &DAG, SDValue Chain, bool IsSignaling)
static bool areLoadedOffsetButOtherwiseSame(SDValue Op0, SDValue Op1, SelectionDAG &DAG, unsigned &NumSubLoads)
static SDValue performMulRdsvlCombine(SDNode *Mul, SelectionDAG &DAG)
static bool isEssentiallyExtractHighSubvector(SDValue N)
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
static Value * createTblShuffleForSExt(IRBuilderBase &Builder, Value *Op, FixedVectorType *DstTy, bool IsLittleEndian)
static unsigned getExtFactor(SDValue &V)
getExtFactor - Determine the adjustment factor for the position when generating an "extract from vect...
static bool setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL, AArch64TargetLowering::IntrinsicInfo &Info, const CallBase &CI)
Set the IntrinsicInfo for the aarch64_sve_st<N> intrinsics.
static cl::opt< unsigned > MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden, cl::desc("Maximum of xors"))
static SDValue performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue performMULLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG)
static SDValue trySimplifySrlAddToRshrnb(SDValue Srl, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performReinterpretCastCombine(SDNode *N)
static SDValue emitSMEStateSaveRestore(const AArch64TargetLowering &TLI, SelectionDAG &DAG, AArch64FunctionInfo *Info, SDLoc DL, SDValue Chain, bool IsSave)
SDValue ReconstructShuffleWithRuntimeMask(SDValue Op, SelectionDAG &DAG)
static SDValue performTBZCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static void simplifySetCCIntoEq(ISD::CondCode &CC, SDValue &LHS, SDValue &RHS, SelectionDAG &DAG, const SDLoc DL)
static SDValue tryCombineExtendRShTrunc(SDNode *N, SelectionDAG &DAG)
static bool isAllInactivePredicate(SDValue N)
static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT, SDLoc DL, SelectionDAG &DAG)
static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static cl::opt< bool > EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden, cl::desc("Combine extends of AArch64 masked " "gather intrinsics"), cl::init(true))
static bool isZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of "vector_shuffle v,...
static SDValue performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static cl::opt< bool > UseFEATCPACodegen("aarch64-use-featcpa-codegen", cl::Hidden, cl::desc("Generate ISD::PTRADD nodes for pointer arithmetic in " "SelectionDAG for FEAT_CPA"), cl::init(false))
static bool createTblShuffleMask(unsigned SrcWidth, unsigned DstWidth, unsigned NumElts, bool IsLittleEndian, SmallVectorImpl< int > &Mask)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue performAddCombineSubShift(SDNode *N, SDValue SUB, SDValue Z, SelectionDAG &DAG)
static SDValue performANDSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static const TargetRegisterClass * getPredicateRegisterClass(PredicateConstraint Constraint, EVT VT)
static SDValue performAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSubsToAndsCombine(SDNode *N, SDNode *SubsNode, SDNode *AndNode, SelectionDAG &DAG, unsigned CCIndex, unsigned CmpIndex, unsigned CC)
static std::pair< SDValue, SDValue > getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG)
#define FALKOR_STRIDED_ACCESS_MD
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI, Type *T)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Function Alias Analysis Results
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
const HexagonInstrInfo * TII
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv Induction Variable Users
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
This file provides utility analysis objects describing memory locations.
This file defines ARC utility functions which are used by various parts of the compiler.
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
PowerPC Reduce CR logical Operation
static bool getVal(MDTuple *MD, const char *Key, uint64_t &Val)
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Contains matchers for matching SelectionDAG nodes and values.
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static LLVM_ATTRIBUTE_ALWAYS_INLINE MVT::SimpleValueType getSimpleVT(const uint8_t *MatcherTable, unsigned &MatcherIndex)
getSimpleVT - Decode a value in MatcherTable, if it's a VBR encoded value, use GetVBR to decode it.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static const int BlockSize
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static constexpr int Concat[]
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool branchTargetEnforcement() const
unsigned getVarArgsFPRSize() const
void setVarArgsStackOffset(unsigned Offset)
void setVarArgsStackIndex(int Index)
void setEarlyAllocSMESaveBuffer(Register Ptr)
int getZT0SpillSlotIndex() const
TPIDR2Object & getTPIDR2Obj()
void setTailCallReservedStack(unsigned bytes)
bool hasELFSignedGOT() const
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
void setIsSplitCSR(bool s)
int getVarArgsFPRIndex() const
void incNumLocalDynamicTLSAccesses()
void setBytesInStackArgArea(unsigned bytes)
int getVarArgsStackIndex() const
void setVarArgsGPRIndex(int Index)
int getVarArgsGPRIndex() const
void setPStateSMReg(Register Reg)
void setVarArgsFPRSize(unsigned Size)
unsigned getVarArgsStackOffset() const
SMEAttrs getSMEFnAttrs() const
unsigned getVarArgsGPRSize() const
void setZT0SpillSlotIndex(int FI)
unsigned getSRetReturnReg() const
Register getPStateSMReg() const
bool hasZT0SpillSlotIndex() const
void setSMESaveBufferUsed(bool Used=true)
void setSRetReturnReg(unsigned Reg)
void setSMESaveBufferAddr(Register Reg)
unsigned getBytesInStackArgArea() const
unsigned isSMESaveBufferUsed() const
void setVarArgsFPRIndex(int Index)
void setVarArgsGPRSize(unsigned Size)
void setArgumentStackToRestore(unsigned bytes)
void setHasStreamingModeChanges(bool HasChanges)
bool isTargetWindows() const
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
unsigned getMaximumJumpTableSize() const
Align getPrefLoopAlignment() const
Align getPrefFunctionAlignment() const
bool isTargetMachO() const
unsigned getMaxBytesForLoopAlignment() const
bool supportsAddressTopByteIgnored() const
CPU has TBI (top byte of addresses is ignored during HW address translation) and OS enables it.
bool isStreamingCompatible() const
Returns true if the function has a streaming-compatible body.
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
bool useSVEForFixedLengthVectors() const
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool isLittleEndian() const
bool isStreaming() const
Returns true if the function has a streaming body.
unsigned getSVEVectorSizeInBits() const
unsigned getMaxSVEVectorSizeInBits() const
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
unsigned getMinSVEVectorSizeInBits() const
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable, SDValue Chain, SDValue InGlue, unsigned Condition, bool InsertVectorLengthCheck=false) const
If a change in streaming mode is required on entry to/return from a function call it emits and return...
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset) const override
Return true if it is profitable to reduce a load to a smaller type.
Value * getIRStackGuard(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
Value * getSafeStackPointerLocation(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const override
If the target has a standard location for the unsafe stack pointer, returns the address of that locat...
void initializeSplitCSR(MachineBasicBlock *Entry) const override
Perform necessary initialization to handle a subset of CSRs explicitly via copies.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
void insertSSPDeclarations(Module &M, const LibcallLoweringInfo &Libcalls) const override
Inserts necessary declarations for SSP (stack protection) purpose.
EVT getPromotedVTForPredicate(EVT VT) const
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
unsigned getVaListSizeInBits(const DataLayout &DL) const override
Returns the size of the platform's va_list object.
MachineBasicBlock * EmitZAInstr(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB) const
void insertCopiesSplitCSR(MachineBasicBlock *Entry, const SmallVectorImpl< MachineBasicBlock * > &Exits) const override
Insert explicit copies in entry and exit blocks.
int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const override
Return the prefered common base offset.
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
MachineBasicBlock * EmitInitTPIDR2Object(MachineInstr &MI, MachineBasicBlock *BB) const
bool lowerInterleavedStore(Instruction *Store, Value *Mask, ShuffleVectorInst *SVI, unsigned Factor, const APInt &GapMask) const override
Lower an interleaved store into a stN intrinsic.
MachineBasicBlock * EmitTileLoad(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB) const
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, bool UseScalable) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override
Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
bool preferSelectsOverBooleanArithmetic(EVT VT) const override
Should we prefer selects to doing arithmetic on boolean types.
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
Provide custom lowering hooks for some operations.
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context, EVT VT) const override
bool shouldInsertTrailingSeqCstFenceForAtomicStore(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert a seq_cst trailing fence without reducing the or...
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool shouldRemoveRedundantExtend(SDValue Op) const override
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC) const
Selects the correct CCAssignFn for a given CallingConvention value.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ISD::SETCC ValueType.
bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Selects the correct CCAssignFn for a given CallingConvention value.
bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask, IntrinsicInst *DI) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool isLegalICmpImmediate(int64_t) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
bool isOpSuitableForLSE128(const Instruction *I) const
void fixupPtrauthDiscriminator(MachineInstr &MI, MachineBasicBlock *BB, MachineOperand &IntDiscOp, MachineOperand &AddrDiscOp, const TargetRegisterClass *AddrDiscRC) const
Replace (0, vreg) discriminator components with the operands of blend or with (immediate,...
bool lowerInterleavedLoad(Instruction *Load, Value *Mask, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor, const APInt &GapMask) const override
Lower an interleaved load into a ldN intrinsic.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool fallBackToDAGISel(const Instruction &Inst) const override
bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override
Return true if the target has native support for the specified value type and it is 'desirable' to us...
bool isLegalAddScalableImmediate(int64_t) const override
Return true if adding the specified scalable immediate is legal, that is the target has add instructi...
Value * createComplexDeinterleavingIR(IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
MachineBasicBlock * EmitCheckMatchingVL(MachineInstr &MI, MachineBasicBlock *MBB) const
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const
Returns true if VecTy is a legal interleaved access type.
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
For some targets, an LLVM struct type must be broken down into multiple simple types,...
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
MachineBasicBlock * EmitLoweredCatchRet(MachineInstr &MI, MachineBasicBlock *BB) const
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
bool isZExtFree(Type *Ty1, Type *Ty2) const override
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const override
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const
MachineBasicBlock * EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isProfitableToHoist(Instruction *I) const override
Check if it is profitable to hoist instruction in then/else to if.
bool isOpSuitableForRCPC3(const Instruction *I) const
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
MachineBasicBlock * EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode, bool Op0IsDef) const
MachineBasicBlock * EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const
bool shouldExpandVectorMatch(EVT VT, unsigned SearchSize) const override
Return true if the @llvm.experimental.vector.match intrinsic should be expanded for vector type ‘VT’ ...
MachineBasicBlock * EmitEntryPStateSM(MachineInstr &MI, MachineBasicBlock *BB) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const override
Control the following reassociation of operands: (op (op x, c1), y) -> (op (op x, y),...
bool shouldPreservePtrArith(const Function &F, EVT PtrVT) const override
In AArch64, true if FEAT_CPA is present.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
MachineBasicBlock * EmitF128CSEL(MachineInstr &MI, MachineBasicBlock *BB) const
LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &FuncAttributes) const override
LLT returning variant.
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
MachineBasicBlock * EmitAllocateSMESaveBuffer(MachineInstr &MI, MachineBasicBlock *BB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool needsFixedCatchObjects() const override
Used for exception handling on Win64.
MachineBasicBlock * EmitAllocateZABuffer(MachineInstr &MI, MachineBasicBlock *BB) const
const AArch64TargetMachine & getTM() const
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const override
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
bool isOpSuitableForLDPSTP(const Instruction *I) const
AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI)
MachineBasicBlock * EmitGetSMESaveSize(MachineInstr &MI, MachineBasicBlock *BB) const
bool hasPairedLoad(EVT LoadedType, Align &RequiredAlignment) const override
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
bool isLegalAddImmediate(int64_t) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldConsiderGEPOffsetSplit() const override
bool isVectorClearMaskLegal(ArrayRef< int > M, EVT VT) const override
Similar to isShuffleMaskLegal.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering) const override
This method returns a target specific FastISel object, or null if the target does not support "fast" ...
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool useLoadStackGuardNode(const Module &M) const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
bool lowerInterleaveIntrinsicToStore(Instruction *Store, Value *Mask, ArrayRef< Value * > InterleaveValues) const override
Lower an interleave intrinsic to a target specific store intrinsic.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Returns false if N is a bit extraction pattern of (X >> C) & Mask.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Returns false if N is a bit extraction pattern of (X >> C) & Mask.
bool enableAggressiveFMAFusion(EVT VT) const override
Enable aggressive FMA fusion on targets that want it.
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override
Return the type to use for a scalar shift opcode, given the shifted amount type.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
MachineBasicBlock * EmitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override
Return true if the @llvm.get.active.lane.mask intrinsic should be expanded using generic code in Sele...
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON=false) const
bool mergeStoresAfterLegalization(EVT VT) const override
SVE code generation for fixed length vectors does not custom lower BUILD_VECTOR.
bool useNewSMEABILowering() const
Returns true if the new SME ABI lowering should be used.
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
LLVM_ABI APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
bool isMinSignedValue() const
Determine if this is the smallest signed value.
uint64_t getZExtValue() const
Get zero extended value.
static LLVM_ABI void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
unsigned popcount() const
Count the number of bits set.
LLVM_ABI APInt getHiBits(unsigned numBits) const
Compute an APInt containing numBits highbits from this APInt.
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
bool isNegative() const
Determine sign of this APInt.
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
bool sle(const APInt &RHS) const
Signed less or equal comparison.
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
unsigned countr_zero() const
Count the number of trailing zero bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isMask(unsigned numBits) const
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
bool isOne() const
Determine if this is a value of 1.
int64_t getSExtValue() const
Get sign extended value.
an instruction to allocate memory on the stack
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
An instruction that atomically checks whether a specified value is in a memory location,...
Value * getCompareOperand()
an instruction that atomically reads a memory location, combines it with another value,...
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
bool isFloatingPointOperation() const
BinOp getOperation() const
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const BlockAddress * getBlockAddress() const
Function * getFunction() const
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantFPSDNode * getConstantFPSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant FP or null if this is not a constant FP splat.
LLVM_ABI std::optional< std::pair< APInt, APInt > > isConstantSequence() const
If this BuildVector is constant and represents the numerical series "<a, a+n, a+2n,...
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
LLVM_ABI int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
LLVM_ABI bool isConstant() const
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static LLVM_ABI bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
LLVM_ABI void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
int64_t getLocMemOffset() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
This class represents a function call, abstracting a target machine's calling convention.
bool isZero() const
Return true if the value is positive or negative zero.
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
bool isLittleEndian() const
Layout endianness...
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Class to represent fixed width SIMD vectors.
static FixedVectorType * getInteger(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getParamType(unsigned i) const
Parameter type accessors.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
FunctionType * getFunctionType() const
Returns the FunctionType for me.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Constant * getPersonalityFn() const
Get the personality function associated with this function.
AttributeList getAttributes() const
Return the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
const Argument * const_arg_iterator
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
int64_t getOffset() const
const GlobalValue * getGlobal() const
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool hasExternalWeakLinkage() const
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Type * getValueType() const
Common base class shared among various IRBuilders.
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
BasicBlock * GetInsertBlock() const
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Tracks which library functions to use for a particular subtarget.
LLVM_ABI CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall.
LLVM_ABI RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Return the lowering's selection of implementation call for Call.
An instruction for reading from memory.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool isScalableVT() const
Return true if the type is a scalable type.
static auto all_valuetypes()
SimpleValueType Iteration.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto scalable_vector_valuetypes()
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
bool is64BitVector() const
Return true if this is a 64-bit vector type.
static auto fp_fixedlen_vector_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MachineInstr * remove_instr(MachineInstr *I)
Remove the possibly bundled instruction from the instruction list without deleting it.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
SSPLayoutKind getObjectSSPLayout(int ObjectIdx) const
LLVM_ABI void computeMaxCallFrameSize(MachineFunction &MF, std::vector< MachineBasicBlock::iterator > *FrameSDOps=nullptr)
Computes the maximum size of a callframe.
void setAdjustsStack(bool V)
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
@ SSPLK_None
Did not trigger a stack protector.
void setFrameAddressIsTaken(bool T)
bool hasScalableStackID(int ObjectIdx) const
bool isImmutableObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to an immutable object.
int getStackProtectorIndex() const
Return the index for the stack protector object.
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
void setStackID(int ObjectIdx, uint8_t ID)
void setHasTailCall(bool V=true)
bool hasMustTailInVarArgFunc() const
Returns true if the function is variadic and contains a musttail call.
void setReturnAddressIsTaken(bool s)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
LLVM_ABI int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca)
Notify the MachineFrameInfo object that a variable sized object has been created.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasStackProtectorIndex() const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
This is a base class used to represent MGATHER and MSCATTER nodes.
const SDValue & getIndex() const
bool isIndexScaled() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
bool isIndexSigned() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getInc() const
const SDValue & getScale() const
const SDValue & getMask() const
const SDValue & getIntID() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
AtomicOrdering getMergedOrdering() const
Return a single atomic ordering that is at least as strong as both the success and failure orderings ...
const SDValue & getChain() const
bool isNonTemporal() const
bool isAtomic() const
Return true if the memory operation ordering is Unordered or higher.
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
bool getRtLibUseGOT() const
Returns true if PLT should be avoided for RTLib calls.
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isStrictFPOpcode()
Test if this node is a strict floating point pseudo-op.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
void dropFlags(unsigned Mask)
iterator_range< use_iterator > uses()
size_t use_size() const
Return the number of uses of this node.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
LLVM_ABI bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
op_iterator op_end() const
bool isAssert() const
Test if this node is an assert operation.
op_iterator op_begin() const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
void setNode(SDNode *N)
set the SDNode
unsigned getOpcode() const
unsigned getNumOperands() const
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasStreamingInterface() const
bool hasStreamingCompatibleInterface() const
bool hasAgnosticZAInterface() const
bool hasStreamingInterfaceOrBody() const
bool hasNonStreamingInterface() const
bool hasStreamingBody() const
bool hasSharedZAInterface() const
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresEnablingZAAfterCall() const
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresDisablingZABeforeCall() const
bool requiresPreservingAllZAState() const
Class to represent scalable SIMD vectors.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC)
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
LLVM_ABI SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
LLVM_ABI SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags, bool AllowCommute=false)
Get the specified node if it's already available, or else return NULL.
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
LLVM_ABI SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
const LibcallLoweringInfo & getLibcalls() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
void addCalledGlobal(const SDNode *Node, const GlobalValue *GV, unsigned OpFlags)
Set CalledGlobal to be associated with Node.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getDeactivationSymbol(const GlobalValue *GV)
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI bool isSingleSourceMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
int getSplatIndex() const
ArrayRef< int > getMask() const
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
constexpr size_t size() const
size - Get the string size.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
virtual Value * getIRStackGuard(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const
Check whether or not MI needs to be moved close to its uses.
void setMaximumJumpTableSize(unsigned)
Indicate the maximum number of entries in jump tables.
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
virtual void insertSSPDeclarations(Module &M, const LibcallLoweringInfo &Libcalls) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setMaxBytesForAlignment(unsigned MaxBytes)
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
unsigned getMaximumJumpTableSize() const
Return upper limit for number of entries in a jump table.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool EnableExtLdPromotion
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
@ ZeroOrOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
static StringRef getLibcallImplName(RTLIB::LibcallImpl Call)
Get the libcall routine name for the specified libcall implementation.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual Value * getSafeStackPointerLocation(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const
Returns the target-specific address of the unsafe stack pointer.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
MVT getFrameIndexTy(const DataLayout &DL) const
Return the type for frame index, which is determined by the alloca address space specified through th...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned TLSSize
Bit size of immediate TLS offsets (0 == use the default).
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt128Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
@ HalfTyID
16-bit floating point type
@ FloatTyID
32-bit floating point type
@ BFloatTyID
16-bit floating point type (7-bit significand)
@ DoubleTyID
64-bit floating point type
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeID getTypeID() const
Return the type id for the type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
User * getUser() const
Returns the User that contains this Use.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< use_iterator > uses()
LLVM_ABI void dump() const
Support for debugging, callable in GDB: V->dump()
Base class of all SIMD vector types.
static VectorType * getHalfElementsVectorType(VectorType *VTy)
This static method returns a VectorType with half as many elements as the input type and the same ele...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static VectorType * getTruncatedElementVectorType(VectorType *VTy)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Type * getIndexedType() const
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isValidCBCond(AArch64CC::CondCode Code)
True, if a given condition code can be used in a fused compare-and-branch instructions,...
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint64_t decodeAdvSIMDModImmType10(uint8_t Imm)
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static bool isSVELogicalImm(unsigned SizeInBits, uint64_t ImmVal, uint64_t &Encoding)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
ArrayRef< MCPhysReg > getFPRArgRegs()
int getSMEPseudoMap(uint16_t Opcode)
static constexpr unsigned SVEMaxBitsPerVector
const unsigned RoundingBitsPos
const uint64_t ReservedFPControlBits
static constexpr unsigned SVEBitsPerBlock
ArrayRef< MCPhysReg > getGPRArgRegs()
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ ARM64EC_Thunk_Native
Calling convention used in the ARM64EC ABI to implement calls between ARM64 code and thunks.
@ AArch64_VectorCall
Used between AArch64 Advanced SIMD functions.
@ Swift
Calling convention for Swift.
@ AArch64_SVE_VectorCall
Used between AArch64 SVE functions.
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
@ PreserveMost
Used for runtime calls that preserves most registers.
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2
Preserve X2-X15, X19-X29, SP, Z0-Z31, P0-P15.
@ CXX_FAST_TLS
Used for access functions.
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0
Preserve X0-X13, X19-X29, SP, Z0-Z31, P0-P15.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1
Preserve X1-X15, X19-X29, SP, Z0-Z31, P0-P15.
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ PreserveNone
Used for runtime calls that preserves none general registers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
@ ARM64EC_Thunk_X64
Calling convention used in the ARM64EC ABI to implement calls between x64 code and thunks.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNormalMaskedLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed masked load.
bool isNormalMaskedStore(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed masked store.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ LOOP_DEPENDENCE_RAW_MASK
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
@ BSWAP
Byte Swap and Counting operators.
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SET_FPMODE
Sets the current dynamic floating-point control modes.
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ VECTOR_FIND_LAST_ACTIVE
Finds the index of the last active mask element Operands: Mask.
@ FMODF
FMODF - Decomposes the operand into integral and fractional parts, each having the same type and sign...
@ FATAN2
FATAN2 - atan2, inspired by libm.
@ FSINCOSPI
FSINCOSPI - Compute both the sine and cosine times pi more accurately than FSINCOS(pi*x),...
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
@ FADD
Simple binary floating point operators.
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ RESET_FPMODE
Sets default dynamic floating-point control modes.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SET_ROUNDING
Set rounding mode.
@ SIGN_EXTEND
Conversion operators.
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ ADDROFRETURNADDR
ADDROFRETURNADDR - Represents the llvm.addressofreturnaddress intrinsic.
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
@ BRIND
BRIND - Indirect branch.
@ BR_JT
BR_JT - Jumptable branch.
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ GET_ACTIVE_LANE_MASK
GET_ACTIVE_LANE_MASK - this corrosponds to the llvm.get.active.lane.mask intrinsic.
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ CTLS
Count leading redundant sign bits.
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ GET_FPMODE
Reads the current dynamic floating-point control modes.
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ PtrAuthGlobalAddress
A ptrauth constant.
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ UBSANTRAP
UBSANTRAP - Trap with an immediate describing the kind of sanitizer failure.
@ SMULO
Same for multiplication.
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ VECTOR_SPLICE_LEFT
VECTOR_SPLICE_LEFT(VEC1, VEC2, IMM) - Shifts CONCAT_VECTORS(VEC1, VEC2) left by IMM elements and retu...
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
@ VECTOR_SPLICE_RIGHT
VECTOR_SPLICE_RIGHT(VEC1, VEC2, IMM) - Shifts CONCAT_VECTORS(VEC1, VEC2) right by IMM elements and re...
@ STRICT_FADD
Constrained versions of the binary floating point operators.
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
@ CLEAR_CACHE
llvm.clear_cache intrinsic Operands: Input Chain, Start Addres, End Address Outputs: Output Chain
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ EXPERIMENTAL_VECTOR_HISTOGRAM
Experimental vector histogram intrinsic Operands: Input Chain, Inc, Mask, Base, Index,...
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor ...
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
@ LOOP_DEPENDENCE_WAR_MASK
The llvm.loop.dependence.
bool isOverflowIntrOpRes(SDValue Op)
Returns true if the specified value is the overflow result from one of the overflow intrinsic nodes.
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI bool isVectorShrinkable(const SDNode *N, unsigned NewEltSize, bool Signed)
Returns true if the specified node is a vector where all elements can be truncated to the specified e...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
static const int LAST_INDEXED_MODE
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
CastInst_match< OpTy, UIToFPInst > m_UIToFP(const OpTy &Op)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
const unsigned VectorBits
@ ScalablePredicateVector
initializer< Ty > init(const Ty &Val)
std::optional< Function * > getAttachedARCFunction(const CallBase *CB)
This function returns operand bundle clang_arc_attachedcall's argument, which is the address of the A...
bool attachedCallOpBundleNeedsMarker(const CallBase *CB)
This function determines whether the clang_arc_attachedcall should be emitted with or without the mar...
bool hasAttachedCallOpBundle(const CallBase *CB)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool isPackedVectorType(EVT SomeVT)
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool CC_AArch64_Arm64EC_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
bool CC_AArch64_Win64PCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs=nullptr, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
testing::Matcher< const detail::ErrorHolder & > Failed()
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
auto map_to_vector(ContainerTy &&C, FuncTy &&F)
Map a range to a SmallVector with element types deduced from the mapping.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
std::optional< unsigned > getSVEPredPatternFromNumElements(unsigned MinNumElts)
Return specific VL predicate pattern based on the number of elements.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
bool CC_AArch64_Arm64EC_Thunk(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
unsigned M1(unsigned Val)
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool RetCC_AArch64_Arm64EC_Thunk(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
bool CC_AArch64_Arm64EC_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
ComplexDeinterleavingOperation
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
generic_gep_type_iterator<> gep_type_iterator
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
LLVM_ABI SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
constexpr int PoisonMaskElem
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CC_AArch64_DarwinPCS_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
ComplexDeinterleavingRotation
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
FunctionAddr VTableAddr uintptr_t uintptr_t Data
bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr unsigned getDefRegState(bool B)
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
gep_type_iterator gep_type_begin(const User *GEP)
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
static const MachineMemOperand::Flags MOStridedAccess
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
@ Default
The result values are uniform if and only if all operands are uniform.
bool CC_AArch64_DarwinPCS_ILP32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
bool CC_AArch64_Arm64EC_Thunk_Native(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool RetCC_AArch64_Arm64EC_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
bool CC_AArch64_Preserve_None(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const unsigned PerfectShuffleTable[6561+1]
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper structure to keep track of a SET_CC lowered into AArch64 code.
Helper structure to keep track of ISD::SET_CC operands.
Helper structure to be able to read SetCC information.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Represent subnormal handling kind for floating point instruction inputs and outputs.
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
uint64_t getScalarStoreSize() const
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
ElementCount getVectorElementCount() const
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
bool isScalableVT() const
Return true if the type is a scalable type.
bool isFixedLengthVector() const
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool is64BitVector() const
Return true if this is a 64-bit vector type.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
bool isZero() const
Returns true if value is all zero.
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
unsigned getBitWidth() const
Get the bit width of this value.
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
APInt getSignedMinValue() const
Return the minimal signed value possible given these KnownBits.
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Constraint for a predicate of the form "cmp Pred Op, OtherOp", where Op is the value the constraint a...
A simple container for information about the supported runtime calls.
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
bool isBeforeLegalizeOps() const
bool isAfterLegalizeDAG() const
bool isCalledByLegalizer() const
bool isBeforeLegalize() const
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
Helper structure to keep track of SetCC information.