72#include "llvm/IR/IntrinsicsAArch64.h"
108#define DEBUG_TYPE "aarch64-lower"
111STATISTIC(NumShiftInserts,
"Number of vector shift inserts");
112STATISTIC(NumOptimizedImms,
"Number of times immediates were optimized");
119 cl::desc(
"Allow AArch64 Local Dynamic TLS code generation"),
124 cl::desc(
"Enable AArch64 logical imm instruction "
134 cl::desc(
"Combine extends of AArch64 masked "
135 "gather intrinsics"),
139 cl::desc(
"Combine ext and trunc to TBL"),
154 cl::desc(
"Enable / disable SVE scalable vectors in Global ISel"),
161 AArch64::X3, AArch64::X4, AArch64::X5,
162 AArch64::X6, AArch64::X7};
164 AArch64::Q3, AArch64::Q4, AArch64::Q5,
165 AArch64::Q6, AArch64::Q7};
190 return MVT::nxv8bf16;
197 switch (EC.getKnownMinValue()) {
213 "Expected scalable predicate vector type!");
235 "Expected legal vector type!");
282 switch (
Op.getOpcode()) {
291 switch (
Op.getConstantOperandVal(0)) {
294 case Intrinsic::aarch64_sve_ptrue:
295 case Intrinsic::aarch64_sve_pnext:
296 case Intrinsic::aarch64_sve_cmpeq:
297 case Intrinsic::aarch64_sve_cmpne:
298 case Intrinsic::aarch64_sve_cmpge:
299 case Intrinsic::aarch64_sve_cmpgt:
300 case Intrinsic::aarch64_sve_cmphs:
301 case Intrinsic::aarch64_sve_cmphi:
302 case Intrinsic::aarch64_sve_cmpeq_wide:
303 case Intrinsic::aarch64_sve_cmpne_wide:
304 case Intrinsic::aarch64_sve_cmpge_wide:
305 case Intrinsic::aarch64_sve_cmpgt_wide:
306 case Intrinsic::aarch64_sve_cmplt_wide:
307 case Intrinsic::aarch64_sve_cmple_wide:
308 case Intrinsic::aarch64_sve_cmphs_wide:
309 case Intrinsic::aarch64_sve_cmphi_wide:
310 case Intrinsic::aarch64_sve_cmplo_wide:
311 case Intrinsic::aarch64_sve_cmpls_wide:
312 case Intrinsic::aarch64_sve_fcmpeq:
313 case Intrinsic::aarch64_sve_fcmpne:
314 case Intrinsic::aarch64_sve_fcmpge:
315 case Intrinsic::aarch64_sve_fcmpgt:
316 case Intrinsic::aarch64_sve_fcmpuo:
317 case Intrinsic::aarch64_sve_facgt:
318 case Intrinsic::aarch64_sve_facge:
319 case Intrinsic::aarch64_sve_whilege:
320 case Intrinsic::aarch64_sve_whilegt:
321 case Intrinsic::aarch64_sve_whilehi:
322 case Intrinsic::aarch64_sve_whilehs:
323 case Intrinsic::aarch64_sve_whilele:
324 case Intrinsic::aarch64_sve_whilelo:
325 case Intrinsic::aarch64_sve_whilels:
326 case Intrinsic::aarch64_sve_whilelt:
327 case Intrinsic::aarch64_sve_match:
328 case Intrinsic::aarch64_sve_nmatch:
329 case Intrinsic::aarch64_sve_whilege_x2:
330 case Intrinsic::aarch64_sve_whilegt_x2:
331 case Intrinsic::aarch64_sve_whilehi_x2:
332 case Intrinsic::aarch64_sve_whilehs_x2:
333 case Intrinsic::aarch64_sve_whilele_x2:
334 case Intrinsic::aarch64_sve_whilelo_x2:
335 case Intrinsic::aarch64_sve_whilels_x2:
336 case Intrinsic::aarch64_sve_whilelt_x2:
342static std::tuple<SDValue, SDValue>
362 const auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
363 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
369 AddrDisc = DAG->
getRegister(AArch64::NoRegister, MVT::i64);
371 return std::make_tuple(
390 if (Subtarget->hasLS64()) {
396 if (Subtarget->hasFPARMv8()) {
404 if (Subtarget->hasNEON()) {
408 addDRType(MVT::v2f32);
409 addDRType(MVT::v8i8);
410 addDRType(MVT::v4i16);
411 addDRType(MVT::v2i32);
412 addDRType(MVT::v1i64);
413 addDRType(MVT::v1f64);
414 addDRType(MVT::v4f16);
415 addDRType(MVT::v4bf16);
417 addQRType(MVT::v4f32);
418 addQRType(MVT::v2f64);
419 addQRType(MVT::v16i8);
420 addQRType(MVT::v8i16);
421 addQRType(MVT::v4i32);
422 addQRType(MVT::v2i64);
423 addQRType(MVT::v8f16);
424 addQRType(MVT::v8bf16);
463 if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
588 if (Subtarget->hasFPARMv8()) {
594 if (Subtarget->hasFPARMv8()) {
648 if (Subtarget->hasCSSC()) {
727 if (Subtarget->hasFullFP16()) {
764 auto LegalizeNarrowFP = [
this](
MVT ScalarVT) {
867 if (!Subtarget->hasFullFP16()) {
868 LegalizeNarrowFP(MVT::f16);
870 LegalizeNarrowFP(MVT::bf16);
888 for (
MVT Ty : {MVT::f32, MVT::f64})
890 if (Subtarget->hasFullFP16())
898 for (
MVT Ty : {MVT::f32, MVT::f64})
900 if (Subtarget->hasFullFP16())
913 if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
925 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
951#define LCALLNAMES(A, B, N) \
952 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
953 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
954 setLibcallName(A##N##_REL, #B #N "_rel"); \
955 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
956#define LCALLNAME4(A, B) \
957 LCALLNAMES(A, B, 1) \
958 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
959#define LCALLNAME5(A, B) \
960 LCALLNAMES(A, B, 1) \
961 LCALLNAMES(A, B, 2) \
962 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
963 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
964 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
965 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
966 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
967 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
968 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
974 if (Subtarget->hasLSE128()) {
988 if (Subtarget->hasLSE2()) {
1051 if (WideVT.getScalarSizeInBits() > NarrowVT.getScalarSizeInBits()) {
1057 if (Subtarget->hasFPARMv8()) {
1192 if (Subtarget->hasSME())
1240 for (
auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
1243 if (Subtarget->hasFullFP16()) {
1276 for (
auto VT : {MVT::v1i64, MVT::v2i64}) {
1292 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1293 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1300 for (
MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1311 for (
MVT VT : { MVT::v4f16, MVT::v2f32,
1312 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1313 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1322 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1323 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1345 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1372 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1374 if (Subtarget->hasFullFP16())
1375 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1381 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1383 if (Subtarget->hasFullFP16())
1384 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1407 for (
MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1410 for (
MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1417 if (VT.is128BitVector() || VT.is64BitVector()) {
1432 for (
MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1438 if (Subtarget->hasSME()) {
1446 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1455 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1520 if (Subtarget->hasSVE2() ||
1521 (Subtarget->hasSME() && Subtarget->
isStreaming()))
1527 for (
auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1533 for (
auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32})
1537 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
1538 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
1542 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1556 if (VT != MVT::nxv16i1) {
1564 {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1565 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1566 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1605 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1606 MVT::nxv4f32, MVT::nxv2f64}) {
1682 for (
auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1694 if (Subtarget->hasSVEB16B16()) {
1714 if (!Subtarget->hasSVEB16B16()) {
1727 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1728 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1743 addTypeForFixedLengthSVE(VT);
1748 addTypeForFixedLengthSVE(VT);
1752 for (
auto VT : {MVT::v8i8, MVT::v4i16})
1757 for (
auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1759 for (
auto VT : {MVT::v8f16, MVT::v4f32})
1785 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1786 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1797 for (
auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1808 for (
auto VT : {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1})
1814 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64,
1815 MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1816 MVT::nxv4f32, MVT::nxv2f64, MVT::nxv2bf16, MVT::nxv4bf16,
1817 MVT::nxv8bf16, MVT::v4f16, MVT::v8f16, MVT::v2f32,
1818 MVT::v4f32, MVT::v1f64, MVT::v2f64, MVT::v8i8,
1819 MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1820 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1825 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1826 MVT::nxv4f32, MVT::nxv2f64, MVT::v4f16, MVT::v8f16,
1827 MVT::v2f32, MVT::v4f32, MVT::v2f64})
1832 {MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv2f32,
1833 MVT::nxv2f64, MVT::nxv4i8, MVT::nxv4i16, MVT::nxv4i32, MVT::nxv4f32})
1838 for (
auto VT : {MVT::v2i8, MVT::v2i16, MVT::v2i32, MVT::v2i64, MVT::v2f32,
1839 MVT::v2f64, MVT::v4i8, MVT::v4i16, MVT::v4i32, MVT::v4f32})
1843 if (Subtarget->hasSVE2()) {
1852 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
1859 if (Subtarget->hasSVE()) {
1887 for (
int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
1890 if ((libcallName !=
nullptr) && (libcallName[0] !=
'#')) {
1897void AArch64TargetLowering::addTypeForNEON(
MVT VT) {
1907 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
1928 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
1929 ((VT == MVT::v4bf16 || VT == MVT::v8bf16 || VT == MVT::v4f16 ||
1930 VT == MVT::v8f16) &&
1931 Subtarget->hasFullFP16()))
1954 if (VT != MVT::v8i8 && VT != MVT::v16i8)
1963 for (
unsigned Opcode :
1981 for (
unsigned Opcode :
2017 if (Subtarget->hasD128()) {
2026 if (!Subtarget->hasSVE())
2031 if (ResVT != MVT::nxv2i1 && ResVT != MVT::nxv4i1 && ResVT != MVT::nxv8i1 &&
2032 ResVT != MVT::nxv16i1 && ResVT != MVT::v2i1 && ResVT != MVT::v4i1 &&
2033 ResVT != MVT::v8i1 && ResVT != MVT::v16i1)
2037 if (OpVT != MVT::i32 && OpVT != MVT::i64)
2045 if (
I->getIntrinsicID() != Intrinsic::experimental_vector_partial_reduce_add)
2049 auto Op1 =
I->getOperand(1);
2064 return VT != MVT::nxv16i1 && VT != MVT::nxv8i1 && VT != MVT::nxv4i1 &&
2065 VT != MVT::nxv2i1 && VT != MVT::v16i1 && VT != MVT::v8i1 &&
2066 VT != MVT::v4i1 && VT != MVT::v2i1;
2070 unsigned SearchSize)
const {
2075 if (VT == MVT::nxv8i16 || VT == MVT::v8i16)
2076 return SearchSize != 8;
2077 if (VT == MVT::nxv16i8 || VT == MVT::v16i8 || VT == MVT::v8i8)
2078 return SearchSize != 8 && SearchSize != 16;
2082void AArch64TargetLowering::addTypeForFixedLengthSVE(
MVT VT) {
2107 while (InnerVT != VT) {
2121 while (InnerVT != VT) {
2226void AArch64TargetLowering::addDRType(
MVT VT) {
2232void AArch64TargetLowering::addQRType(
MVT VT) {
2251 Imm =
C->getZExtValue();
2262 return N->getOpcode() == Opc &&
2267 const APInt &Demanded,
2270 uint64_t OldImm = Imm, NewImm, Enc;
2275 if (Imm == 0 || Imm == Mask ||
2279 unsigned EltSize =
Size;
2296 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2298 uint64_t Sum = RotatedImm + NonDemandedBits;
2299 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2300 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
2301 NewImm = (Imm | Ones) & Mask;
2329 while (EltSize <
Size) {
2330 NewImm |= NewImm << EltSize;
2336 "demanded bits should never be altered");
2337 assert(OldImm != NewImm &&
"the new imm shouldn't be equal to the old imm");
2340 EVT VT =
Op.getValueType();
2346 if (NewImm == 0 || NewImm == OrigMask) {
2371 EVT VT =
Op.getValueType();
2377 "i32 or i64 is expected after legalization.");
2384 switch (
Op.getOpcode()) {
2388 NewOpc =
Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
2391 NewOpc =
Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
2394 NewOpc =
Size == 32 ? AArch64::EORWri : AArch64::EORXri;
2409 switch (
Op.getOpcode()) {
2415 if (
SrcOp.getValueSizeInBits() !=
Op.getScalarValueSizeInBits()) {
2416 assert(
SrcOp.getValueSizeInBits() >
Op.getScalarValueSizeInBits() &&
2417 "Expected DUP implicit truncation");
2418 Known = Known.
trunc(
Op.getScalarValueSizeInBits());
2432 ~(
Op->getConstantOperandAPInt(1) <<
Op->getConstantOperandAPInt(2))
2482 case Intrinsic::aarch64_ldaxr:
2483 case Intrinsic::aarch64_ldxr: {
2485 EVT VT = cast<MemIntrinsicSDNode>(
Op)->getMemoryVT();
2495 unsigned IntNo =
Op.getConstantOperandVal(0);
2499 case Intrinsic::aarch64_neon_uaddlv: {
2500 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2502 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2503 unsigned Bound = (VT == MVT::v8i8) ? 11 : 12;
2510 case Intrinsic::aarch64_neon_umaxv:
2511 case Intrinsic::aarch64_neon_uminv: {
2516 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2518 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2522 }
else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
2536 unsigned Depth)
const {
2537 EVT VT =
Op.getValueType();
2539 unsigned Opcode =
Op.getOpcode();
2564 return std::min<uint64_t>(Tmp +
Op.getConstantOperandVal(1), VTBits);
2578 unsigned *
Fast)
const {
2588 if (ElementSizeBits % 8 == 0 && Alignment >=
Align(ElementSizeBits / 8))
2592 if (Subtarget->requiresStrictAlign())
2597 *
Fast = !Subtarget->isMisaligned128StoreSlow() || VT.
getStoreSize() != 16 ||
2616 unsigned *
Fast)
const {
2617 if (Subtarget->requiresStrictAlign())
2622 *
Fast = !Subtarget->isMisaligned128StoreSlow() ||
2646#define MAKE_CASE(V) \
3006 Register DestReg =
MI.getOperand(0).getReg();
3007 Register IfTrueReg =
MI.getOperand(1).getReg();
3008 Register IfFalseReg =
MI.getOperand(2).getReg();
3009 unsigned CondCode =
MI.getOperand(3).getImm();
3010 bool NZCVKilled =
MI.getOperand(4).isKill();
3041 MI.eraseFromParent();
3049 "SEH does not use catchret!");
3061 Register TargetReg =
MI.getOperand(0).getReg();
3063 TII.probedStackAlloc(
MBBI, TargetReg,
false);
3065 MI.eraseFromParent();
3066 return NextInst->getParent();
3077 MIB.
add(
MI.getOperand(1));
3078 MIB.
add(
MI.getOperand(2));
3079 MIB.
add(
MI.getOperand(3));
3080 MIB.
add(
MI.getOperand(4));
3081 MIB.
add(
MI.getOperand(5));
3083 MI.eraseFromParent();
3094 MIB.
add(
MI.getOperand(0));
3095 MIB.
add(
MI.getOperand(1));
3096 MIB.
add(
MI.getOperand(2));
3097 MIB.
add(
MI.getOperand(1));
3099 MI.eraseFromParent();
3106 bool Op0IsDef)
const {
3112 for (
unsigned I = 1;
I <
MI.getNumOperands(); ++
I)
3113 MIB.
add(
MI.getOperand(
I));
3115 MI.eraseFromParent();
3125 unsigned StartIdx = 0;
3127 bool HasTile = BaseReg != AArch64::ZA;
3128 bool HasZPROut = HasTile &&
MI.getOperand(0).isReg();
3130 MIB.
add(
MI.getOperand(StartIdx));
3134 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm(),
3136 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm());
3140 if (
MI.getOperand(0).isReg() && !
MI.getOperand(1).isImm()) {
3141 MIB.
add(
MI.getOperand(StartIdx));
3146 for (
unsigned I = StartIdx;
I <
MI.getNumOperands(); ++
I)
3147 MIB.
add(
MI.getOperand(
I));
3149 MI.eraseFromParent();
3158 MIB.
add(
MI.getOperand(0));
3160 unsigned Mask =
MI.getOperand(0).getImm();
3161 for (
unsigned I = 0;
I < 8;
I++) {
3162 if (Mask & (1 <<
I))
3166 MI.eraseFromParent();
3177 if (TPIDR2.Uses > 0) {
3211 "Lazy ZA save is not yet supported on Windows");
3215 if (TPIDR2.
Uses > 0) {
3221 Register SP =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3222 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::COPY), SP)
3226 auto Size =
MI.getOperand(1).getReg();
3227 auto Dest =
MI.getOperand(0).getReg();
3228 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(AArch64::MSUBXrrr), Dest)
3252 "Lazy ZA save is not yet supported on Windows");
3257 auto Size =
MI.getOperand(1).getReg();
3258 auto Dest =
MI.getOperand(0).getReg();
3259 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(AArch64::SUBXrx64), AArch64::SP)
3263 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::COPY), Dest)
3269 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF),
3270 MI.getOperand(0).getReg());
3292 MI.getOperand(0).getReg())
3296 MI.getOperand(0).getReg())
3306 if (SMEOrigInstr != -1) {
3310 switch (SMEMatrixType) {
3326 switch (
MI.getOpcode()) {
3332 case AArch64::InitTPIDR2Obj:
3334 case AArch64::AllocateZABuffer:
3336 case AArch64::AllocateSMESaveBuffer:
3338 case AArch64::GetSMESaveSize:
3340 case AArch64::F128CSEL:
3342 case TargetOpcode::STATEPOINT:
3348 MI.addOperand(*
MI.getMF(),
3354 case TargetOpcode::STACKMAP:
3355 case TargetOpcode::PATCHPOINT:
3358 case TargetOpcode::PATCHABLE_EVENT_CALL:
3359 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
3362 case AArch64::CATCHRET:
3365 case AArch64::PROBED_STACKALLOC_DYN:
3368 case AArch64::LD1_MXIPXX_H_PSEUDO_B:
3369 return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0,
MI, BB);
3370 case AArch64::LD1_MXIPXX_H_PSEUDO_H:
3371 return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0,
MI, BB);
3372 case AArch64::LD1_MXIPXX_H_PSEUDO_S:
3373 return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0,
MI, BB);
3374 case AArch64::LD1_MXIPXX_H_PSEUDO_D:
3375 return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0,
MI, BB);
3376 case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
3377 return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0,
MI, BB);
3378 case AArch64::LD1_MXIPXX_V_PSEUDO_B:
3379 return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0,
MI, BB);
3380 case AArch64::LD1_MXIPXX_V_PSEUDO_H:
3381 return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0,
MI, BB);
3382 case AArch64::LD1_MXIPXX_V_PSEUDO_S:
3383 return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0,
MI, BB);
3384 case AArch64::LD1_MXIPXX_V_PSEUDO_D:
3385 return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0,
MI, BB);
3386 case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
3387 return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0,
MI, BB);
3388 case AArch64::LDR_ZA_PSEUDO:
3390 case AArch64::LDR_TX_PSEUDO:
3392 case AArch64::STR_TX_PSEUDO:
3394 case AArch64::ZERO_M_PSEUDO:
3396 case AArch64::ZERO_T_PSEUDO:
3398 case AArch64::MOVT_TIZ_PSEUDO:
3425 N =
N->getOperand(0).getNode();
3433 auto Opnd0 =
N->getOperand(0);
3586 CondCode, CondCode2);
3593 bool IsLegal = (
C >> 12 == 0) || ((
C & 0xFFFULL) == 0 &&
C >> 24 == 0);
3595 <<
" legal: " << (IsLegal ?
"yes\n" :
"no\n"));
3616 (isIntEqualitySetCC(
CC) ||
3624 EVT VT =
LHS.getValueType();
3629 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3634 Chain =
RHS.getValue(1);
3638 return DAG.
getNode(Opcode, dl, {MVT::i32, MVT::Other}, {Chain,
LHS,
RHS});
3643 EVT VT =
LHS.getValueType();
3648 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3666 isIntEqualitySetCC(
CC)) {
3685 return LHS.getValue(1);
3751 unsigned Opcode = 0;
3754 if (
LHS.getValueType().isFloatingPoint()) {
3755 assert(
LHS.getValueType() != MVT::f128);
3756 if ((
LHS.getValueType() == MVT::f16 && !FullFP16) ||
3757 LHS.getValueType() == MVT::bf16) {
3763 APInt Imm = Const->getAPIntValue();
3764 if (Imm.isNegative() && Imm.sgt(-32)) {
3772 isIntEqualitySetCC(
CC)) {
3803 bool &MustBeFirst,
bool WillNegate,
3804 unsigned Depth = 0) {
3812 MustBeFirst =
false;
3819 bool IsOR = Opcode ==
ISD::OR;
3831 if (MustBeFirstL && MustBeFirstR)
3837 if (!CanNegateL && !CanNegateR)
3841 CanNegate = WillNegate && CanNegateL && CanNegateR;
3844 MustBeFirst = !CanNegate;
3849 MustBeFirst = MustBeFirstL || MustBeFirstR;
3873 bool isInteger =
LHS.getValueType().isInteger();
3875 CC = getSetCCInverse(
CC,
LHS.getValueType());
3881 assert(
LHS.getValueType().isFloatingPoint());
3894 Predicate = ExtraCC;
3907 bool IsOR = Opcode ==
ISD::OR;
3913 assert(ValidL &&
"Valid conjunction/disjunction tree");
3920 assert(ValidR &&
"Valid conjunction/disjunction tree");
3925 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
3934 bool NegateAfterAll;
3938 assert(CanNegateR &&
"at least one side must be negatable");
3939 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
3943 NegateAfterR =
true;
3946 NegateR = CanNegateR;
3947 NegateAfterR = !CanNegateR;
3950 NegateAfterAll = !Negate;
3952 assert(Opcode ==
ISD::AND &&
"Valid conjunction/disjunction tree");
3953 assert(!Negate &&
"Valid conjunction/disjunction tree");
3957 NegateAfterR =
false;
3958 NegateAfterAll =
false;
3978 bool DummyCanNegate;
3979 bool DummyMustBeFirst;
3991 auto isSupportedExtend = [&](
SDValue V) {
3996 if (
ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
3997 uint64_t Mask = MaskCst->getZExtValue();
3998 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
4004 if (!
Op.hasOneUse())
4007 if (isSupportedExtend(
Op))
4010 unsigned Opc =
Op.getOpcode();
4012 if (
ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(
Op.getOperand(1))) {
4013 uint64_t Shift = ShiftCst->getZExtValue();
4014 if (isSupportedExtend(
Op.getOperand(0)))
4015 return (Shift <= 4) ? 2 : 1;
4016 EVT VT =
Op.getValueType();
4017 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
4028 EVT VT =
RHS.getValueType();
4037 if ((VT == MVT::i32 &&
C != 0x80000000 &&
4039 (VT == MVT::i64 &&
C != 0x80000000ULL &&
4048 if ((VT == MVT::i32 &&
C != 0 &&
4058 if ((VT == MVT::i32 &&
C != INT32_MAX &&
4069 if ((VT == MVT::i32 &&
C != UINT32_MAX &&
4092 if (!isa<ConstantSDNode>(
RHS) ||
4108 if (isIntEqualitySetCC(
CC) && isa<ConstantSDNode>(
RHS)) {
4129 cast<LoadSDNode>(
LHS)->getMemoryVT() == MVT::i16 &&
4130 LHS.getNode()->hasNUsesOfValue(1, 0)) {
4131 int16_t ValueofRHS =
RHS->getAsZExtVal();
4159static std::pair<SDValue, SDValue>
4161 assert((
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::i64) &&
4162 "Unsupported value type");
4168 switch (
Op.getOpcode()) {
4192 if (
Op.getValueType() == MVT::i32) {
4215 assert(
Op.getValueType() == MVT::i64 &&
"Expected an i64 value type");
4244 Overflow =
Value.getValue(1);
4246 return std::make_pair(
Value, Overflow);
4252 return LowerToScalableOp(
Op, DAG);
4299 if (
LHS.getValueType() != MVT::i32 &&
LHS.getValueType() != MVT::i64)
4306 if (!CFVal || !CTVal)
4343 return Cmp.getValue(1);
4372 unsigned Opcode,
bool IsSigned) {
4373 EVT VT0 =
Op.getValue(0).getValueType();
4374 EVT VT1 =
Op.getValue(1).getValueType();
4376 if (VT0 != MVT::i32 && VT0 != MVT::i64)
4430 unsigned IsWrite =
Op.getConstantOperandVal(2);
4431 unsigned Locality =
Op.getConstantOperandVal(3);
4432 unsigned IsData =
Op.getConstantOperandVal(4);
4434 bool IsStream = !Locality;
4438 assert(Locality <= 3 &&
"Prefetch locality out-of-range");
4442 Locality = 3 - Locality;
4446 unsigned PrfOp = (IsWrite << 4) |
4463 if (LHSConstOp && RHSConst) {
4467 uint64_t NewMaskValue = LHSConstValue & ~(RHSConstant - 1);
4480 EVT VT =
Op.getValueType();
4488 if (VT == MVT::nxv2f32 || VT == MVT::nxv4f32)
4491 if (VT != MVT::nxv2f64)
4505 return LowerFixedLengthFPExtendToSVE(
Op, DAG);
4507 bool IsStrict =
Op->isStrictFPOpcode();
4508 SDValue Op0 =
Op.getOperand(IsStrict ? 1 : 0);
4510 if (VT == MVT::f64) {
4512 if (Op0VT == MVT::f32 || Op0VT == MVT::f16)
4515 if (Op0VT == MVT::bf16 && IsStrict) {
4518 {Op0,
Op.getOperand(0)});
4522 if (Op0VT == MVT::bf16)
4555 assert(
Op.getValueType() == MVT::f128 &&
"Unexpected lowering");
4561 EVT VT =
Op.getValueType();
4562 bool IsStrict =
Op->isStrictFPOpcode();
4563 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4565 bool Trunc =
Op.getConstantOperandVal(IsStrict ? 2 : 1) == 1;
4572 constexpr EVT I32 = MVT::nxv4i32;
4578 if (SrcVT == MVT::nxv2f32 || SrcVT == MVT::nxv4f32) {
4579 if (Subtarget->hasBF16())
4580 return LowerToPredicatedOp(
Op, DAG,
4583 Narrow = getSVESafeBitCast(I32, SrcVal, DAG);
4588 }
else if (SrcVT == MVT::nxv2f64 &&
4593 Pg, SrcVal, DAG.
getUNDEF(MVT::nxv2f32));
4599 NewOps.
push_back(
Op.getOperand(IsStrict ? 2 : 1));
4600 return DAG.
getNode(
Op.getOpcode(),
DL, VT, NewOps,
Op->getFlags());
4614 EVT I1 =
I32.changeElementType(MVT::i1);
4618 Narrow = DAG.
getSelect(
DL, I32, IsNaN, NaN, Narrow);
4623 return getSVESafeBitCast(VT, Narrow, DAG);
4627 return LowerFixedLengthFPRoundToSVE(
Op, DAG);
4632 !((Subtarget->hasNEON() || Subtarget->hasSME()) &&
4633 Subtarget->hasBF16())) {
4669 Narrow = DAG.
getSelect(dl, I32, IsNaN, NaN, Narrow);
4676 EVT I16 =
I32.changeVectorElementType(MVT::i16);
4686 if (SrcVT != MVT::f128) {
4703 bool IsStrict =
Op->isStrictFPOpcode();
4704 EVT InVT =
Op.getOperand(IsStrict ? 1 : 0).getValueType();
4705 EVT VT =
Op.getValueType();
4711 return LowerToPredicatedOp(
Op, DAG, Opcode);
4716 return LowerFixedLengthFPToIntToSVE(
Op, DAG);
4727 {
Op.getOperand(0),
Op.getOperand(1)});
4728 return DAG.
getNode(
Op.getOpcode(), dl, {VT, MVT::Other},
4729 {Ext.getValue(1), Ext.getValue(0)});
4732 Op.getOpcode(), dl,
Op.getValueType(),
4738 if (VTSize < InVTSize) {
4743 {Op.getOperand(0), Op.getOperand(1)});
4753 if (VTSize > InVTSize) {
4760 {
Op.getOperand(0),
Op.getOperand(1)});
4761 return DAG.
getNode(
Op.getOpcode(), dl, {VT, MVT::Other},
4762 {Ext.getValue(1), Ext.getValue(0)});
4765 return DAG.
getNode(
Op.getOpcode(), dl, VT, Ext);
4774 Op.getOperand(IsStrict ? 1 : 0), DAG.
getConstant(0, dl, MVT::i64));
4777 return DAG.
getNode(
Op.getOpcode(), dl, {ScalarVT, MVT::Other},
4778 {Op.getOperand(0), Extract});
4779 return DAG.
getNode(
Op.getOpcode(), dl, ScalarVT, Extract);
4788 bool IsStrict =
Op->isStrictFPOpcode();
4789 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4792 return LowerVectorFP_TO_INT(
Op, DAG);
4795 if ((SrcVal.
getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
4801 {
Op.getOperand(0), SrcVal});
4802 return DAG.
getNode(
Op.getOpcode(), dl, {Op.getValueType(), MVT::Other},
4803 {Ext.getValue(1), Ext.getValue(0)});
4806 Op.getOpcode(), dl,
Op.getValueType(),
4819AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(
SDValue Op,
4825 EVT DstVT =
Op.getValueType();
4826 EVT SatVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
4831 assert(SatWidth <= DstElementWidth &&
4832 "Saturation width cannot exceed result width");
4845 if ((SrcElementVT == MVT::f16 &&
4846 (!Subtarget->hasFullFP16() || DstElementWidth > 16)) ||
4847 SrcElementVT == MVT::bf16) {
4857 SrcElementVT = MVT::f32;
4858 SrcElementWidth = 32;
4859 }
else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
4860 SrcElementVT != MVT::f16 && SrcElementVT != MVT::bf16)
4865 if (SatWidth == 64 && SrcElementWidth < 64) {
4869 SrcElementVT = MVT::f64;
4870 SrcElementWidth = 64;
4873 if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth) {
4888 if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
4895 SrcVal2 ? DAG.
getNode(
Op.getOpcode(),
DL, IntVT, SrcVal2,
4931 return LowerVectorFP_TO_INT_SAT(
Op, DAG);
4933 EVT DstVT =
Op.getValueType();
4934 EVT SatVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
4937 assert(SatWidth <= DstWidth &&
"Saturation width cannot exceed result width");
4940 if ((SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) || SrcVT == MVT::bf16) {
4943 }
else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16 &&
4949 if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
4950 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
4951 DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32))
4952 return DAG.
getNode(
Op.getOpcode(),
DL, DstVT, SrcVal,
4958 if (DstWidth < SatWidth)
4982 EVT VT =
Op.getValueType();
5005 bool IsStrict =
Op->isStrictFPOpcode();
5006 EVT VT =
Op.getValueType();
5009 EVT InVT =
In.getValueType();
5010 unsigned Opc =
Op.getOpcode();
5018 In = DAG.
getNode(CastOpc, dl, CastVT, In);
5019 return DAG.
getNode(Opc, dl, VT, In);
5024 return LowerToPredicatedOp(
Op, DAG, Opcode);
5029 return LowerFixedLengthIntToFPToSVE(
Op, DAG);
5036 {Op.getOperand(0), In});
5038 {
Op.getValueType(), MVT::Other},
5049 if (VTSize < InVTSize) {
5054 In = DAG.
getNode(Opc, dl, {CastVT, MVT::Other},
5055 {
Op.getOperand(0), In});
5057 {
In.getValue(1),
In.getValue(0),
5065 if (VTSize > InVTSize) {
5068 In = DAG.
getNode(CastOpc, dl, CastVT, In);
5070 return DAG.
getNode(Opc, dl, {VT, MVT::Other}, {
Op.getOperand(0), In});
5071 return DAG.
getNode(Opc, dl, VT, In);
5082 return DAG.
getNode(
Op.getOpcode(), dl, {ScalarVT, MVT::Other},
5083 {Op.getOperand(0), Extract});
5084 return DAG.
getNode(
Op.getOpcode(), dl, ScalarVT, Extract);
5092 if (
Op.getValueType().isVector())
5093 return LowerVectorINT_TO_FP(
Op, DAG);
5095 bool IsStrict =
Op->isStrictFPOpcode();
5096 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
5101 auto IntToFpViaPromotion = [&](
EVT PromoteVT) {
5105 {Op.getOperand(0), SrcVal});
5107 {
Op.getValueType(), MVT::Other},
5112 DAG.
getNode(
Op.getOpcode(), dl, PromoteVT, SrcVal),
5116 if (
Op.getValueType() == MVT::bf16) {
5117 unsigned MaxWidth = IsSigned
5121 if (MaxWidth <= 24) {
5122 return IntToFpViaPromotion(MVT::f32);
5126 if (MaxWidth <= 53) {
5127 return IntToFpViaPromotion(MVT::f64);
5178 IsStrict ? DAG.
getNode(
Op.getOpcode(),
DL, {MVT::f64, MVT::Other},
5179 {Op.getOperand(0), ToRound})
5180 : DAG.
getNode(
Op.getOpcode(),
DL, MVT::f64, ToRound);
5207 {
Op.getValueType(), MVT::Other},
5216 if (
Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
5217 return IntToFpViaPromotion(MVT::f32);
5226 if (
Op.getValueType() != MVT::f128)
5245 Entry.IsSExt =
false;
5246 Entry.IsZExt =
false;
5247 Args.push_back(Entry);
5250 : RTLIB::SINCOS_STRET_F32;
5261 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
5262 return CallResult.first;
5269 EVT OpVT =
Op.getValueType();
5270 EVT ArgVT =
Op.getOperand(0).getValueType();
5273 return LowerFixedLengthBitcastToSVE(
Op, DAG);
5281 "Expected int->fp bitcast!");
5294 return getSVESafeBitCast(OpVT, ExtResult, DAG);
5305 return getSVESafeBitCast(OpVT,
Op.getOperand(0), DAG);
5308 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
5312 if (ArgVT == MVT::f16 || ArgVT == MVT::bf16)
5315 assert(ArgVT == MVT::i16);
5325static std::optional<uint64_t>
5329 return std::nullopt;
5334 return std::nullopt;
5336 return C->getZExtValue();
5341 EVT VT =
N.getValueType();
5346 for (
const SDValue &Elt :
N->op_values()) {
5349 unsigned HalfSize = EltSize / 2;
5351 if (!
isIntN(HalfSize,
C->getSExtValue()))
5354 if (!
isUIntN(HalfSize,
C->getZExtValue()))
5366 EVT VT =
N.getValueType();
5388 unsigned Opcode =
N.getOpcode();
5399 unsigned Opcode =
N.getOpcode();
5420 {Chain, DAG.
getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
5506 Chain, DAG.
getConstant(Intrinsic::aarch64_set_fpcr,
DL, MVT::i64), FPCR};
5539 if (IsN0SExt && IsN1SExt)
5545 if (IsN0ZExt && IsN1ZExt)
5552 if (IsN0ZExt || IsN1ZExt) {
5562 if (IsN0SExt || IsN1SExt) {
5570 if (!IsN1SExt && !IsN1ZExt)
5592 EVT VT =
Op.getValueType();
5601 "unexpected type for custom-lowering ISD::MUL");
5615 if (VT == MVT::v1i64) {
5616 if (Subtarget->hasSVE())
5633 if (Subtarget->hasSVE())
5649 "unexpected types for extended operands to VMULL");
5672 if (VT == MVT::nxv1i1 &&
Pattern == AArch64SVEPredPattern::all)
5679 bool IsSigned,
bool IsEqual) {
5680 if (!isa<ConstantSDNode>(
Op.getOperand(1)) ||
5681 !isa<ConstantSDNode>(
Op.getOperand(2)))
5685 APInt X =
Op.getConstantOperandAPInt(1);
5686 APInt Y =
Op.getConstantOperandAPInt(2);
5691 if (IsSigned ?
Y.isMaxSignedValue() :
Y.isMaxValue())
5695 APInt NumActiveElems =
5696 IsSigned ?
Y.ssub_ov(
X, Overflow) :
Y.usub_ov(
X, Overflow);
5703 NumActiveElems = IsSigned ? NumActiveElems.
sadd_ov(One, Overflow)
5704 : NumActiveElems.
uadd_ov(One, Overflow);
5709 std::optional<unsigned> PredPattern =
5711 unsigned MinSVEVectorSize = std::max(
5713 unsigned ElementSize = 128 /
Op.getValueType().getVectorMinNumElements();
5714 if (PredPattern != std::nullopt &&
5715 NumActiveElems.
getZExtValue() <= (MinSVEVectorSize / ElementSize))
5716 return getPTrue(DAG, dl,
Op.getValueType(), *PredPattern);
5725 EVT InVT =
Op.getValueType();
5729 "Expected a predicate-to-predicate bitcast");
5733 "Only expect to cast between legal scalable predicate types!");
5743 Op.getConstantOperandVal(0) == Intrinsic::aarch64_sve_convert_to_svbool &&
5744 Op.getOperand(1).getValueType().bitsGT(VT))
5745 Op =
Op.getOperand(1);
5776 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
5778 RetTy, Callee, std::move(Args));
5779 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
5825 SDValue TileSlice =
N->getOperand(2);
5828 int32_t ConstAddend = 0;
5834 ConstAddend = cast<ConstantSDNode>(VecNum.
getOperand(1))->getSExtValue();
5836 }
else if (
auto ImmNode = dyn_cast<ConstantSDNode>(VecNum)) {
5837 ConstAddend = ImmNode->getSExtValue();
5841 int32_t ImmAddend = ConstAddend % 16;
5842 if (int32_t
C = (ConstAddend - ImmAddend)) {
5844 VarAddend = VarAddend
5865 {
N.getOperand(0), TileSlice,
Base,
5874 auto Op1 =
Op.getOperand(1);
5875 auto Op2 =
Op.getOperand(2);
5876 auto Mask =
Op.getOperand(3);
5879 EVT Op2VT = Op2.getValueType();
5880 EVT ResVT =
Op.getValueType();
5884 "Expected 8-bit or 16-bit characters.");
5926 ID, Mask, Op1, Op2);
5937 unsigned IntNo =
Op.getConstantOperandVal(1);
5942 case Intrinsic::aarch64_prefetch: {
5946 unsigned IsWrite =
Op.getConstantOperandVal(3);
5947 unsigned Locality =
Op.getConstantOperandVal(4);
5948 unsigned IsStream =
Op.getConstantOperandVal(5);
5949 unsigned IsData =
Op.getConstantOperandVal(6);
5950 unsigned PrfOp = (IsWrite << 4) |
5958 case Intrinsic::aarch64_sme_str:
5959 case Intrinsic::aarch64_sme_ldr: {
5962 case Intrinsic::aarch64_sme_za_enable:
5968 case Intrinsic::aarch64_sme_za_disable:
5979 unsigned IntNo =
Op.getConstantOperandVal(1);
5984 case Intrinsic::aarch64_mops_memset_tag: {
5985 auto Node = cast<MemIntrinsicSDNode>(
Op.getNode());
5991 auto Alignment =
Node->getMemOperand()->getAlign();
5992 bool IsVol =
Node->isVolatile();
5993 auto DstPtrInfo =
Node->getPointerInfo();
5997 SDValue MS = SDI.EmitMOPS(AArch64::MOPSMemorySetTaggingPseudo, DAG,
DL,
5998 Chain, Dst, Val,
Size, Alignment, IsVol,
6012 unsigned IntNo =
Op.getConstantOperandVal(0);
6016 case Intrinsic::thread_pointer: {
6020 case Intrinsic::aarch64_neon_abs: {
6021 EVT Ty =
Op.getValueType();
6022 if (Ty == MVT::i64) {
6033 case Intrinsic::aarch64_neon_pmull64: {
6037 std::optional<uint64_t> LHSLane =
6039 std::optional<uint64_t> RHSLane =
6042 assert((!LHSLane || *LHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6043 assert((!RHSLane || *RHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6049 auto TryVectorizeOperand = [](
SDValue N, std::optional<uint64_t> NLane,
6050 std::optional<uint64_t> OtherLane,
6056 if (NLane && *NLane == 1)
6061 if (OtherLane && *OtherLane == 1) {
6068 if (NLane && *NLane == 0)
6081 assert(
N.getValueType() == MVT::i64 &&
6082 "Intrinsic aarch64_neon_pmull64 requires i64 parameters");
6086 LHS = TryVectorizeOperand(LHS, LHSLane, RHSLane, dl, DAG);
6087 RHS = TryVectorizeOperand(RHS, RHSLane, LHSLane, dl, DAG);
6091 case Intrinsic::aarch64_neon_smax:
6093 Op.getOperand(1),
Op.getOperand(2));
6094 case Intrinsic::aarch64_neon_umax:
6096 Op.getOperand(1),
Op.getOperand(2));
6097 case Intrinsic::aarch64_neon_smin:
6099 Op.getOperand(1),
Op.getOperand(2));
6100 case Intrinsic::aarch64_neon_umin:
6102 Op.getOperand(1),
Op.getOperand(2));
6103 case Intrinsic::aarch64_neon_scalar_sqxtn:
6104 case Intrinsic::aarch64_neon_scalar_sqxtun:
6105 case Intrinsic::aarch64_neon_scalar_uqxtn: {
6106 assert(
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::f32);
6107 if (
Op.getValueType() == MVT::i32)
6112 Op.getOperand(1))));
6115 case Intrinsic::aarch64_neon_sqxtn:
6118 case Intrinsic::aarch64_neon_sqxtun:
6121 case Intrinsic::aarch64_neon_uqxtn:
6124 case Intrinsic::aarch64_neon_sqshrn:
6125 if (
Op.getValueType().isVector())
6128 Op.getOperand(1).getValueType(),
6129 Op.getOperand(1),
Op.getOperand(2)));
6131 case Intrinsic::aarch64_neon_sqshrun:
6132 if (
Op.getValueType().isVector())
6135 Op.getOperand(1).getValueType(),
6136 Op.getOperand(1),
Op.getOperand(2)));
6138 case Intrinsic::aarch64_neon_uqshrn:
6139 if (
Op.getValueType().isVector())
6142 Op.getOperand(1).getValueType(),
6143 Op.getOperand(1),
Op.getOperand(2)));
6145 case Intrinsic::aarch64_neon_sqrshrn:
6146 if (
Op.getValueType().isVector())
6151 Op.getOperand(1),
Op.getOperand(2)));
6153 case Intrinsic::aarch64_neon_sqrshrun:
6154 if (
Op.getValueType().isVector())
6159 Op.getOperand(1),
Op.getOperand(2)));
6161 case Intrinsic::aarch64_neon_uqrshrn:
6162 if (
Op.getValueType().isVector())
6168 case Intrinsic::aarch64_sve_whilelo:
6171 case Intrinsic::aarch64_sve_whilelt:
6174 case Intrinsic::aarch64_sve_whilels:
6177 case Intrinsic::aarch64_sve_whilele:
6180 case Intrinsic::aarch64_sve_sunpkhi:
6183 case Intrinsic::aarch64_sve_sunpklo:
6186 case Intrinsic::aarch64_sve_uunpkhi:
6189 case Intrinsic::aarch64_sve_uunpklo:
6192 case Intrinsic::aarch64_sve_clasta_n:
6194 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6195 case Intrinsic::aarch64_sve_clastb_n:
6197 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6198 case Intrinsic::aarch64_sve_lasta:
6200 Op.getOperand(1),
Op.getOperand(2));
6201 case Intrinsic::aarch64_sve_lastb:
6203 Op.getOperand(1),
Op.getOperand(2));
6204 case Intrinsic::aarch64_sve_rev:
6207 case Intrinsic::aarch64_sve_tbl:
6209 Op.getOperand(1),
Op.getOperand(2));
6210 case Intrinsic::aarch64_sve_trn1:
6212 Op.getOperand(1),
Op.getOperand(2));
6213 case Intrinsic::aarch64_sve_trn2:
6215 Op.getOperand(1),
Op.getOperand(2));
6216 case Intrinsic::aarch64_sve_uzp1:
6218 Op.getOperand(1),
Op.getOperand(2));
6219 case Intrinsic::aarch64_sve_uzp2:
6221 Op.getOperand(1),
Op.getOperand(2));
6222 case Intrinsic::aarch64_sve_zip1:
6224 Op.getOperand(1),
Op.getOperand(2));
6225 case Intrinsic::aarch64_sve_zip2:
6227 Op.getOperand(1),
Op.getOperand(2));
6228 case Intrinsic::aarch64_sve_splice:
6230 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6231 case Intrinsic::aarch64_sve_ptrue:
6232 return getPTrue(DAG, dl,
Op.getValueType(),
Op.getConstantOperandVal(1));
6233 case Intrinsic::aarch64_sve_clz:
6235 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6236 case Intrinsic::aarch64_sme_cntsb:
6239 case Intrinsic::aarch64_sme_cntsh: {
6244 case Intrinsic::aarch64_sme_cntsw: {
6250 case Intrinsic::aarch64_sme_cntsd: {
6256 case Intrinsic::aarch64_sve_cnt: {
6259 if (
Data.getValueType().isFloatingPoint())
6262 Op.getOperand(2),
Data,
Op.getOperand(1));
6264 case Intrinsic::aarch64_sve_dupq_lane:
6265 return LowerDUPQLane(
Op, DAG);
6266 case Intrinsic::aarch64_sve_convert_from_svbool:
6267 if (
Op.getValueType() == MVT::aarch64svcount)
6270 case Intrinsic::aarch64_sve_convert_to_svbool:
6271 if (
Op.getOperand(1).getValueType() == MVT::aarch64svcount)
6274 case Intrinsic::aarch64_sve_fneg:
6276 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6277 case Intrinsic::aarch64_sve_frintp:
6279 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6280 case Intrinsic::aarch64_sve_frintm:
6282 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6283 case Intrinsic::aarch64_sve_frinti:
6285 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6286 case Intrinsic::aarch64_sve_frintx:
6288 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6289 case Intrinsic::aarch64_sve_frinta:
6291 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6292 case Intrinsic::aarch64_sve_frintn:
6294 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6295 case Intrinsic::aarch64_sve_frintz:
6297 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6298 case Intrinsic::aarch64_sve_ucvtf:
6300 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6302 case Intrinsic::aarch64_sve_scvtf:
6304 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6306 case Intrinsic::aarch64_sve_fcvtzu:
6308 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6310 case Intrinsic::aarch64_sve_fcvtzs:
6312 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6314 case Intrinsic::aarch64_sve_fsqrt:
6316 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6317 case Intrinsic::aarch64_sve_frecpx:
6319 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6320 case Intrinsic::aarch64_sve_frecpe_x:
6323 case Intrinsic::aarch64_sve_frecps_x:
6325 Op.getOperand(1),
Op.getOperand(2));
6326 case Intrinsic::aarch64_sve_frsqrte_x:
6329 case Intrinsic::aarch64_sve_frsqrts_x:
6331 Op.getOperand(1),
Op.getOperand(2));
6332 case Intrinsic::aarch64_sve_fabs:
6334 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6335 case Intrinsic::aarch64_sve_abs:
6337 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6338 case Intrinsic::aarch64_sve_neg:
6340 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6341 case Intrinsic::aarch64_sve_insr: {
6344 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
6348 Op.getOperand(1), Scalar);
6350 case Intrinsic::aarch64_sve_rbit:
6352 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6354 case Intrinsic::aarch64_sve_revb:
6356 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6357 case Intrinsic::aarch64_sve_revh:
6359 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6360 case Intrinsic::aarch64_sve_revw:
6362 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6363 case Intrinsic::aarch64_sve_revd:
6365 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6366 case Intrinsic::aarch64_sve_sxtb:
6369 Op.getOperand(2),
Op.getOperand(3),
6370 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i8)),
6372 case Intrinsic::aarch64_sve_sxth:
6375 Op.getOperand(2),
Op.getOperand(3),
6376 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i16)),
6378 case Intrinsic::aarch64_sve_sxtw:
6381 Op.getOperand(2),
Op.getOperand(3),
6382 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i32)),
6384 case Intrinsic::aarch64_sve_uxtb:
6387 Op.getOperand(2),
Op.getOperand(3),
6388 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i8)),
6390 case Intrinsic::aarch64_sve_uxth:
6393 Op.getOperand(2),
Op.getOperand(3),
6394 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i16)),
6396 case Intrinsic::aarch64_sve_uxtw:
6399 Op.getOperand(2),
Op.getOperand(3),
6400 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i32)),
6402 case Intrinsic::localaddress: {
6405 unsigned Reg =
RegInfo->getLocalAddressRegister(MF);
6407 Op.getSimpleValueType());
6410 case Intrinsic::eh_recoverfp: {
6415 SDValue IncomingFPOp =
Op.getOperand(2);
6417 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->
getGlobal() :
nullptr);
6420 "llvm.eh.recoverfp must take a function as the first argument");
6421 return IncomingFPOp;
6424 case Intrinsic::aarch64_neon_vsri:
6425 case Intrinsic::aarch64_neon_vsli:
6426 case Intrinsic::aarch64_sve_sri:
6427 case Intrinsic::aarch64_sve_sli: {
6428 EVT Ty =
Op.getValueType();
6435 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri ||
6436 IntNo == Intrinsic::aarch64_sve_sri;
6438 return DAG.
getNode(Opcode, dl, Ty,
Op.getOperand(1),
Op.getOperand(2),
6442 case Intrinsic::aarch64_neon_srhadd:
6443 case Intrinsic::aarch64_neon_urhadd:
6444 case Intrinsic::aarch64_neon_shadd:
6445 case Intrinsic::aarch64_neon_uhadd: {
6446 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6447 IntNo == Intrinsic::aarch64_neon_shadd);
6448 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6449 IntNo == Intrinsic::aarch64_neon_urhadd);
6450 unsigned Opcode = IsSignedAdd
6453 return DAG.
getNode(Opcode, dl,
Op.getValueType(),
Op.getOperand(1),
6456 case Intrinsic::aarch64_neon_saddlp:
6457 case Intrinsic::aarch64_neon_uaddlp: {
6458 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
6461 return DAG.
getNode(Opcode, dl,
Op.getValueType(),
Op.getOperand(1));
6463 case Intrinsic::aarch64_neon_sdot:
6464 case Intrinsic::aarch64_neon_udot:
6465 case Intrinsic::aarch64_sve_sdot:
6466 case Intrinsic::aarch64_sve_udot: {
6467 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
6468 IntNo == Intrinsic::aarch64_sve_udot)
6471 return DAG.
getNode(Opcode, dl,
Op.getValueType(),
Op.getOperand(1),
6472 Op.getOperand(2),
Op.getOperand(3));
6474 case Intrinsic::aarch64_neon_usdot:
6475 case Intrinsic::aarch64_sve_usdot: {
6477 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6479 case Intrinsic::get_active_lane_mask: {
6483 EVT VT =
Op.getValueType();
6496 Op.getOperand(1),
Op.getOperand(2));
6501 case Intrinsic::aarch64_neon_saddlv:
6502 case Intrinsic::aarch64_neon_uaddlv: {
6503 EVT OpVT =
Op.getOperand(1).getValueType();
6504 EVT ResVT =
Op.getValueType();
6506 ((ResVT == MVT::i32 && (OpVT == MVT::v8i8 || OpVT == MVT::v16i8 ||
6507 OpVT == MVT::v8i16 || OpVT == MVT::v4i16)) ||
6508 (ResVT == MVT::i64 && (OpVT == MVT::v4i32 || OpVT == MVT::v2i32))) &&
6509 "Unexpected aarch64_neon_u/saddlv type");
6515 dl, ResVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64,
Op.getOperand(1));
6519 return EXTRACT_VEC_ELT;
6521 case Intrinsic::experimental_cttz_elts: {
6538 case Intrinsic::experimental_vector_match: {
6544bool AArch64TargetLowering::shouldExtendGSIndex(
EVT VT,
EVT &EltTy)
const {
6553bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(
SDValue Extend,
6569bool AArch64TargetLowering::isVectorLoadExtDesirable(
SDValue ExtVal)
const {
6579 if (
auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal->
getOperand(0))) {
6586 unsigned NumExtMaskedLoads = 0;
6587 for (
auto *U : Ld->getMask()->users())
6588 if (isa<MaskedLoadSDNode>(U))
6589 NumExtMaskedLoads++;
6591 if (NumExtMaskedLoads <= 1)
6600 std::map<std::tuple<bool, bool, bool>,
unsigned> AddrModes = {
6601 {std::make_tuple(
false,
false,
false),
6603 {std::make_tuple(
false,
false,
true),
6605 {std::make_tuple(
false,
true,
false),
6607 {std::make_tuple(
false,
true,
true),
6609 {std::make_tuple(
true,
false,
false),
6611 {std::make_tuple(
true,
false,
true),
6613 {std::make_tuple(
true,
true,
false),
6615 {std::make_tuple(
true,
true,
true),
6618 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
6619 return AddrModes.find(Key)->second;
6655 EVT VT =
Op.getValueType();
6679 EVT IndexVT =
Index.getValueType();
6692 "Cannot lower when not using SVE for fixed vectors!");
6701 Index.getValueType().getVectorElementType() == MVT::i64 ||
6702 Mask.getValueType().getVectorElementType() == MVT::i64)
6767 EVT IndexVT =
Index.getValueType();
6780 "Cannot lower when not using SVE for fixed vectors!");
6792 Index.getValueType().getVectorElementType() == MVT::i64 ||
6793 Mask.getValueType().getVectorElementType() == MVT::i64)
6803 if (PromotedVT != VT)
6827 assert(LoadNode &&
"Expected custom lowering of a masked load node");
6828 EVT VT =
Op->getValueType(0);
6831 return LowerFixedLengthVectorMLoadToSVE(
Op, DAG);
6855 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
6868 {Undef, Undef, Undef, Undef});
6878 return DAG.
getStore(ST->getChain(),
DL, ExtractTrunc,
6879 ST->getBasePtr(), ST->getMemOperand());
6889 assert (StoreNode &&
"Can only custom lower store nodes");
6900 return LowerFixedLengthVectorStoreToSVE(
Op, DAG);
6912 MemVT == MVT::v4i8) {
6936 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
6940 }
else if (MemVT == MVT::i128 && StoreNode->
isVolatile()) {
6941 return LowerStore128(
Op, DAG);
6942 }
else if (MemVT == MVT::i64x8) {
6947 EVT PtrVT =
Base.getValueType();
6948 for (
unsigned i = 0; i < 8; i++) {
6969 bool IsStoreRelease =
6972 assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
6973 Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
6985 std::swap(StoreValue.first, StoreValue.second);
6988 {StoreNode->getChain(), StoreValue.first, StoreValue.second,
6989 StoreNode->getBasePtr()},
6998 assert(LoadNode &&
"Expected custom lowering of a load node");
7004 EVT PtrVT =
Base.getValueType();
7005 for (
unsigned i = 0; i < 8; i++) {
7019 EVT VT =
Op->getValueType(0);
7020 assert((VT == MVT::v4i16 || VT == MVT::v4i32) &&
"Expected v4i16 or v4i32");
7026 if (Subtarget->requiresStrictAlign() && LoadNode->
getAlign() <
Align(4))
7046 if (VT == MVT::v4i32)
7058 EVT MaskVT =
Mask.getValueType();
7061 const bool HasPassthru = !Passthru.
isUndef();
7065 assert(VecVT.
isVector() &&
"Input to VECTOR_COMPRESS must be vector.");
7074 if (MinElmts != 2 && MinElmts != 4)
7078 if (IsFixedLength) {
7088 DAG.
getUNDEF(ScalableMaskVT), Mask,
7093 DAG.
getUNDEF(ScalableVecVT), Passthru,
7097 MaskVT =
Mask.getValueType();
7106 if (ContainerVT != VecVT) {
7113 DAG.
getConstant(Intrinsic::aarch64_sve_compact,
DL, MVT::i64), Mask, Vec);
7119 DAG.
getConstant(Intrinsic::aarch64_sve_cntp,
DL, MVT::i64), Mask, Mask);
7123 DAG.
getConstant(Intrinsic::aarch64_sve_whilelo,
DL, MVT::i64),
7131 if (IsFixedLength) {
7141 if (ContainerVT != VecVT) {
7143 Compressed = DAG.
getBitcast(VecVT, Compressed);
7151 MVT VT =
Op.getSimpleValueType();
7190 if (
auto *ShiftNo = dyn_cast<ConstantSDNode>(Shifts)) {
7192 MVT VT =
Op.getSimpleValueType();
7195 unsigned int NewShiftNo =
7210 EVT XScalarTy =
X.getValueType();
7215 switch (
Op.getSimpleValueType().SimpleTy) {
7224 ExpVT = MVT::nxv4i32;
7228 ExpVT = MVT::nxv2i64;
7239 AArch64SVEPredPattern::all);
7242 DAG.
getConstant(Intrinsic::aarch64_sve_fscale,
DL, MVT::i64),
7246 if (
X.getValueType() != XScalarTy)
7257 "ADJUST_TRAMPOLINE operation is only supported on Linux.");
7259 return Op.getOperand(0);
7281 Entry.Ty = IntPtrTy;
7283 Args.push_back(Entry);
7285 if (
auto *FI = dyn_cast<FrameIndexSDNode>(Trmp.
getNode())) {
7293 Args.push_back(Entry);
7295 Args.push_back(Entry);
7297 Args.push_back(Entry);
7301 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
7305 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
7306 return CallResult.second;
7314 switch (
Op.getOpcode()) {
7319 return LowerBITCAST(
Op, DAG);
7321 return LowerGlobalAddress(
Op, DAG);
7323 return LowerGlobalTLSAddress(
Op, DAG);
7325 return LowerPtrAuthGlobalAddress(
Op, DAG);
7327 return LowerADJUST_TRAMPOLINE(
Op, DAG);
7329 return LowerINIT_TRAMPOLINE(
Op, DAG);
7333 return LowerSETCC(
Op, DAG);
7335 return LowerSETCCCARRY(
Op, DAG);
7339 return LowerBR_CC(
Op, DAG);
7341 return LowerSELECT(
Op, DAG);
7343 return LowerSELECT_CC(
Op, DAG);
7345 return LowerJumpTable(
Op, DAG);
7347 return LowerBR_JT(
Op, DAG);
7349 return LowerBRIND(
Op, DAG);
7351 return LowerConstantPool(
Op, DAG);
7353 return LowerBlockAddress(
Op, DAG);
7355 return LowerVASTART(
Op, DAG);
7357 return LowerVACOPY(
Op, DAG);
7359 return LowerVAARG(
Op, DAG);
7407 return LowerFP_ROUND(
Op, DAG);
7410 return LowerFP_EXTEND(
Op, DAG);
7412 return LowerFRAMEADDR(
Op, DAG);
7414 return LowerSPONENTRY(
Op, DAG);
7416 return LowerRETURNADDR(
Op, DAG);
7418 return LowerADDROFRETURNADDR(
Op, DAG);
7420 return LowerCONCAT_VECTORS(
Op, DAG);
7422 return LowerINSERT_VECTOR_ELT(
Op, DAG);
7424 return LowerEXTRACT_VECTOR_ELT(
Op, DAG);
7426 return LowerBUILD_VECTOR(
Op, DAG);
7428 return LowerZERO_EXTEND_VECTOR_INREG(
Op, DAG);
7430 return LowerVECTOR_SHUFFLE(
Op, DAG);
7432 return LowerSPLAT_VECTOR(
Op, DAG);
7434 return LowerEXTRACT_SUBVECTOR(
Op, DAG);
7436 return LowerINSERT_SUBVECTOR(
Op, DAG);
7439 return LowerDIV(
Op, DAG);
7444 return LowerMinMax(
Op, DAG);
7448 return LowerVectorSRA_SRL_SHL(
Op, DAG);
7452 return LowerShiftParts(
Op, DAG);
7455 return LowerCTPOP_PARITY(
Op, DAG);
7457 return LowerFCOPYSIGN(
Op, DAG);
7459 return LowerVectorOR(
Op, DAG);
7461 return LowerXOR(
Op, DAG);
7468 return LowerINT_TO_FP(
Op, DAG);
7473 return LowerFP_TO_INT(
Op, DAG);
7476 return LowerFP_TO_INT_SAT(
Op, DAG);
7478 return LowerFSINCOS(
Op, DAG);
7480 return LowerGET_ROUNDING(
Op, DAG);
7482 return LowerSET_ROUNDING(
Op, DAG);
7484 return LowerGET_FPMODE(
Op, DAG);
7486 return LowerSET_FPMODE(
Op, DAG);
7488 return LowerRESET_FPMODE(
Op, DAG);
7490 return LowerMUL(
Op, DAG);
7496 return LowerINTRINSIC_W_CHAIN(
Op, DAG);
7498 return LowerINTRINSIC_WO_CHAIN(
Op, DAG);
7500 return LowerINTRINSIC_VOID(
Op, DAG);
7502 if (cast<MemSDNode>(
Op)->getMemoryVT() == MVT::i128) {
7503 assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
7504 return LowerStore128(
Op, DAG);
7508 return LowerSTORE(
Op, DAG);
7510 return LowerFixedLengthVectorMStoreToSVE(
Op, DAG);
7512 return LowerMGATHER(
Op, DAG);
7514 return LowerMSCATTER(
Op, DAG);
7516 return LowerVECREDUCE_SEQ_FADD(
Op, DAG);
7530 return LowerVECREDUCE(
Op, DAG);
7532 return LowerATOMIC_LOAD_AND(
Op, DAG);
7534 return LowerDYNAMIC_STACKALLOC(
Op, DAG);
7536 return LowerVSCALE(
Op, DAG);
7538 return LowerVECTOR_COMPRESS(
Op, DAG);
7542 return LowerFixedLengthVectorIntExtendToSVE(
Op, DAG);
7545 EVT ExtraVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
7547 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
7548 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
7551 return LowerToPredicatedOp(
Op, DAG,
7555 return LowerTRUNCATE(
Op, DAG);
7557 return LowerMLOAD(
Op, DAG);
7561 return LowerFixedLengthVectorLoadToSVE(
Op, DAG);
7562 return LowerLOAD(
Op, DAG);
7566 return LowerToScalableOp(
Op, DAG);
7576 return LowerFixedLengthVectorSelectToSVE(
Op, DAG);
7578 return LowerABS(
Op, DAG);
7592 return LowerBitreverse(
Op, DAG);
7598 return LowerCTTZ(
Op, DAG);
7600 return LowerVECTOR_SPLICE(
Op, DAG);
7602 return LowerVECTOR_DEINTERLEAVE(
Op, DAG);
7604 return LowerVECTOR_INTERLEAVE(
Op, DAG);
7607 if (
Op.getValueType().isVector())
7608 return LowerVectorXRINT(
Op, DAG);
7612 assert((
Op.getOperand(0).getValueType() == MVT::f16 ||
7613 Op.getOperand(0).getValueType() == MVT::bf16) &&
7614 "Expected custom lowering of rounding operations only for f16");
7617 return DAG.
getNode(
Op.getOpcode(),
DL,
Op.getValueType(), Ext);
7623 assert((
Op.getOperand(1).getValueType() == MVT::f16 ||
7624 Op.getOperand(1).getValueType() == MVT::bf16) &&
7625 "Expected custom lowering of rounding operations only for f16");
7628 {
Op.getOperand(0),
Op.getOperand(1)});
7629 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
7630 {Ext.getValue(1), Ext.getValue(0)});
7633 assert(
Op.getOperand(2).getValueType() == MVT::i128 &&
7634 "WRITE_REGISTER custom lowering is only for 128-bit sysregs");
7639 std::pair<SDValue, SDValue> Pair =
7644 SysRegName, Pair.first, Pair.second);
7654 return LowerVECTOR_HISTOGRAM(
Op, DAG);
7663 EVT VT,
bool OverrideNEON)
const {
7713 unsigned Opcode =
N->getOpcode();
7718 unsigned IID =
N->getConstantOperandVal(0);
7719 if (IID < Intrinsic::num_intrinsics)
7733 if (IID == Intrinsic::aarch64_neon_umull ||
7735 IID == Intrinsic::aarch64_neon_smull ||
7744 bool IsVarArg)
const {
7824SDValue AArch64TargetLowering::LowerFormalArguments(
7854 unsigned NumArgs =
Ins.size();
7856 unsigned CurArgIdx = 0;
7857 for (
unsigned i = 0; i != NumArgs; ++i) {
7859 if (Ins[i].isOrigArg()) {
7860 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
7861 CurArgIdx =
Ins[i].getOrigArgIndex();
7868 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
7870 else if (ActualMVT == MVT::i16)
7873 bool UseVarArgCC =
false;
7875 UseVarArgCC = isVarArg;
7879 assert(!Res &&
"Call operand has unhandled type");
7884 bool IsLocallyStreaming =
7885 !
Attrs.hasStreamingInterface() &&
Attrs.hasStreamingBody();
7890 unsigned ExtraArgLocs = 0;
7891 for (
unsigned i = 0, e =
Ins.size(); i != e; ++i) {
7894 if (Ins[i].
Flags.isByVal()) {
7898 int Size =
Ins[i].Flags.getByValSize();
7899 unsigned NumRegs = (
Size + 7) / 8;
7911 if (Ins[i].
Flags.isSwiftAsync())
7920 if (RegVT == MVT::i32)
7921 RC = &AArch64::GPR32RegClass;
7922 else if (RegVT == MVT::i64)
7923 RC = &AArch64::GPR64RegClass;
7924 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
7925 RC = &AArch64::FPR16RegClass;
7926 else if (RegVT == MVT::f32)
7927 RC = &AArch64::FPR32RegClass;
7929 RC = &AArch64::FPR64RegClass;
7931 RC = &AArch64::FPR128RegClass;
7935 RC = &AArch64::PPRRegClass;
7936 }
else if (RegVT == MVT::aarch64svcount) {
7938 RC = &AArch64::PPRRegClass;
7941 RC = &AArch64::ZPRRegClass;
7948 if (IsLocallyStreaming) {
7982 "Indirect arguments should be scalable on most subtargets");
8006 !
Ins[i].Flags.isInConsecutiveRegs())
8007 BEAlign = 8 - ArgSize;
8016 unsigned ObjOffset = ArgOffset + BEAlign;
8046 "Indirect arguments should be scalable on most subtargets");
8067 "Indirect arguments should be scalable on most subtargets");
8070 unsigned NumParts = 1;
8071 if (Ins[i].
Flags.isInConsecutiveRegs()) {
8072 while (!Ins[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
8081 while (NumParts > 0) {
8089 DL,
Ptr.getValueType(),
8090 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize));
8093 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize),
DL,
8094 Ptr.getValueType());
8109 if (Ins[i].isOrigArg()) {
8110 Argument *OrigArg =
F.getArg(Ins[i].getOrigArgIndex());
8112 if (!Ins[i].
Flags.isZExt()) {
8126 if (IsLocallyStreaming) {
8128 if (
Attrs.hasStreamingCompatibleInterface()) {
8129 PStateSM = getRuntimePStateSM(DAG, Chain,
DL, MVT::i64);
8142 for (
unsigned I=0;
I<InVals.
size(); ++
I) {
8159 saveVarArgRegisters(CCInfo, DAG,
DL, Chain);
8163 unsigned VarArgsOffset = CCInfo.getStackSize();
8177 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
8181 if (!CCInfo.isAllocated(AArch64::X8)) {
8192 for (
unsigned I = 0, E =
Ins.size();
I != E; ++
I) {
8194 Ins[
I].Flags.isInReg()) &&
8195 Ins[
I].Flags.isSRet()) {
8210 unsigned StackArgSize = CCInfo.getStackSize();
8212 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
8216 StackArgSize =
alignTo(StackArgSize, 16);
8244 DAG.
getVTList(MVT::i64, MVT::Other), {Chain, SVL});
8249 {Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
8254 { Buffer.getValue(1), Buffer.getValue(0)});
8259 DAG.
getVTList(MVT::i64, MVT::Other), Chain);
8266 DAG.
getVTList(MVT::i64, MVT::Other), {Chain, BufferSize});
8271 {Chain, BufferSize, DAG.getConstant(1, DL, MVT::i64)});
8284 if (
I.Flags.isSwiftSelf() ||
I.Flags.isSwiftError() ||
8285 I.Flags.isSwiftAsync()) {
8289 "Swift attributes can't be used with preserve_none",
8299void AArch64TargetLowering::saveVarArgRegisters(
CCState &CCInfo,
8322 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
8324 if (GPRSaveSize != 0) {
8327 if (GPRSaveSize & 15)
8347 for (
unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
8353 MF, GPRIdx, (i - FirstVariadicGPR) * 8)
8363 if (Subtarget->hasFPARMv8() && !IsWin64) {
8365 const unsigned NumFPRArgRegs =
FPRArgRegs.size();
8368 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
8370 if (FPRSaveSize != 0) {
8375 for (
unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
8390 if (!MemOps.
empty()) {
8397SDValue AArch64TargetLowering::LowerCallResult(
8401 SDValue ThisVal,
bool RequiresSMChange)
const {
8404 for (
unsigned i = 0; i != RVLocs.
size(); ++i) {
8409 if (i == 0 && isThisReturn) {
8411 "unexpected return calling convention register assignment");
8510 unsigned NumArgs = Outs.
size();
8511 for (
unsigned i = 0; i != NumArgs; ++i) {
8512 MVT ArgVT = Outs[i].VT;
8515 bool UseVarArgCC =
false;
8519 if (IsCalleeWin64) {
8522 UseVarArgCC = !Outs[i].IsFixed;
8533 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
8535 else if (ActualMVT == MVT::i16)
8541 assert(!Res &&
"Call operand has unhandled type");
8546bool AArch64TargetLowering::isEligibleForTailCallOptimization(
8547 const CallLoweringInfo &CLI)
const {
8553 bool IsVarArg = CLI.IsVarArg;
8566 if (CallerAttrs.requiresSMChange(CalleeAttrs) ||
8567 CallerAttrs.requiresLazySave(CalleeAttrs) ||
8568 CallerAttrs.requiresPreservingAllZAState(CalleeAttrs) ||
8569 CallerAttrs.hasStreamingBody())
8580 bool CCMatch = CallerCC == CalleeCC;
8595 if (i->hasByValAttr())
8604 if (i->hasInRegAttr())
8622 (!
TT.isOSWindows() ||
TT.isOSBinFormatELF() ||
TT.isOSBinFormatMachO()))
8643 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
8645 const uint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
8647 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
8648 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
8650 if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8659 CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs,
C);
8663 if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
8672 if (!ArgLoc.isRegLoc())
8684 A.getValVT().isScalableVector() ||
8686 "Expected value to be scalable");
8706 int ClobberedFI)
const {
8709 int64_t LastByte = FirstByte + MFI.
getObjectSize(ClobberedFI) - 1;
8720 if (FI->getIndex() < 0) {
8722 int64_t InLastByte = InFirstByte;
8725 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
8726 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
8734bool AArch64TargetLowering::DoesCalleeRestoreStack(
CallingConv::ID CallCC,
8735 bool TailCallOpt)
const {
8746 APInt RequredZero(SizeInBits, 0xFE);
8748 bool ZExtBool = (Bits.Zero & RequredZero) == RequredZero;
8768 switch (
MI.getOpcode()) {
8769 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO:
8770 RegClass = &AArch64::ZPR2StridedOrContiguousRegClass;
8772 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO:
8773 RegClass = &AArch64::ZPR4StridedOrContiguousRegClass;
8780 for (
unsigned I = 1;
I <
MI.getNumOperands(); ++
I) {
8782 assert(MO.
isReg() &&
"Unexpected operand to FORM_TRANSPOSED_REG_TUPLE");
8785 if (!Def || !Def->getParent()->isCopy())
8789 unsigned OpSubReg = CopySrc.
getSubReg();
8794 if (!CopySrcOp || !CopySrcOp->
isReg() || OpSubReg !=
SubReg ||
8795 MRI.getRegClass(CopySrcOp->
getReg()) != RegClass)
8802void AArch64TargetLowering::AdjustInstrPostInstrSelection(
MachineInstr &
MI,
8808 if (
MI.getOpcode() == AArch64::MSRpstatesvcrImm1 ||
8809 MI.getOpcode() == AArch64::MSRpstatePseudo) {
8810 for (
unsigned I =
MI.getNumOperands() - 1;
I > 0; --
I)
8812 MO.isReg() && MO.isImplicit() && MO.isDef() &&
8813 (AArch64::GPR32RegClass.contains(MO.getReg()) ||
8814 AArch64::GPR64RegClass.contains(MO.getReg())))
8815 MI.removeOperand(
I);
8818 if (
MI.getOperand(0).getImm() == AArch64SVCR::SVCRSM ||
8819 MI.getOperand(0).getImm() == AArch64SVCR::SVCRSMZA) {
8827 if (
MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO ||
8828 MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO) {
8836 TII->get(TargetOpcode::REG_SEQUENCE),
8837 MI.getOperand(0).getReg());
8839 for (
unsigned I = 1;
I <
MI.getNumOperands(); ++
I) {
8840 MIB.
add(
MI.getOperand(
I));
8841 MIB.
addImm(AArch64::zsub0 + (
I - 1));
8844 MI.eraseFromParent();
8855 (
MI.getOpcode() == AArch64::ADDXri ||
8856 MI.getOpcode() == AArch64::SUBXri)) {
8881 assert(PStateSM &&
"PStateSM should be defined");
8907 Args.push_back(Entry);
8916 Callee, std::move(Args));
8936AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
8945 bool &IsTailCall = CLI.IsTailCall;
8947 bool IsVarArg = CLI.IsVarArg;
8951 bool IsThisReturn =
false;
8955 bool IsCFICall = CLI.CB && CLI.CB->isIndirectCall() && CLI.CFIType;
8956 bool IsSibCall =
false;
8957 bool GuardWithBTI =
false;
8959 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
8960 !Subtarget->noBTIAtReturnTwice()) {
8969 unsigned NumArgs = Outs.
size();
8971 for (
unsigned i = 0; i != NumArgs; ++i) {
8974 "currently not supported");
8985 RetCCInfo.AnalyzeCallResult(Ins, RetCC);
8991 if (!Loc.isRegLoc())
8993 return AArch64::ZPRRegClass.contains(Loc.getLocReg()) ||
8994 AArch64::PPRRegClass.
contains(Loc.getLocReg());
8996 if (
any_of(RVLocs, HasSVERegLoc) ||
any_of(ArgLocs, HasSVERegLoc))
9002 IsTailCall = isEligibleForTailCallOptimization(CLI);
9014 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
9016 "site marked musttail");
9034 if (IsTailCall && !IsSibCall) {
9039 NumBytes =
alignTo(NumBytes, 16);
9044 FPDiff = NumReusableBytes - NumBytes;
9048 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (
unsigned)-FPDiff)
9056 assert(FPDiff % 16 == 0 &&
"unaligned stack on tail call");
9063 else if (
auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
9064 CalleeAttrs =
SMEAttrs(ES->getSymbol());
9066 auto DescribeCallsite =
9069 if (
auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
9070 R <<
ore::NV(
"Callee", ES->getSymbol());
9071 else if (CLI.CB && CLI.CB->getCalledFunction())
9072 R <<
ore::NV(
"Callee", CLI.CB->getCalledFunction()->getName());
9074 R <<
"unknown callee";
9079 bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs);
9080 bool RequiresSaveAllZA =
9081 CallerAttrs.requiresPreservingAllZAState(CalleeAttrs);
9082 if (RequiresLazySave) {
9094 Chain = DAG.
getTruncStore(Chain,
DL, NumZaSaveSlices, NumZaSaveSlicesAddr,
9098 DAG.
getConstant(Intrinsic::aarch64_sme_set_tpidr2,
DL, MVT::i32),
9106 return DescribeCallsite(R) <<
" sets up a lazy save for ZA";
9108 }
else if (RequiresSaveAllZA) {
9110 "Cannot share state that may not exist");
9116 bool RequiresSMChange = CallerAttrs.requiresSMChange(CalleeAttrs);
9117 if (RequiresSMChange) {
9118 if (CallerAttrs.hasStreamingInterfaceOrBody())
9120 else if (CallerAttrs.hasNonStreamingInterface())
9123 PStateSM = getRuntimePStateSM(DAG, Chain,
DL, MVT::i64);
9130 DescribeCallsite(R) <<
" requires a streaming mode transition";
9137 bool ShouldPreserveZT0 = CallerAttrs.requiresPreservingZT0(CalleeAttrs);
9141 if (ShouldPreserveZT0) {
9153 bool DisableZA = CallerAttrs.requiresDisablingZABeforeCall(CalleeAttrs);
9154 assert((!DisableZA || !RequiresLazySave) &&
9155 "Lazy-save should have PSTATE.SM=1 on entry to the function");
9176 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
9178 for (
const auto &
F : Forwards) {
9185 unsigned ExtraArgLocs = 0;
9186 for (
unsigned i = 0, e = Outs.
size(); i != e; ++i) {
9204 if (Outs[i].ArgVT == MVT::i1) {
9226 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
9243 "Indirect arguments should be scalable on most subtargets");
9247 unsigned NumParts = 1;
9248 if (Outs[i].
Flags.isInConsecutiveRegs()) {
9249 while (!Outs[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
9251 StoreSize *= NumParts;
9277 DL,
Ptr.getValueType(),
9278 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize));
9281 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize),
DL,
9282 Ptr.getValueType());
9297 if (i == 0 &&
Flags.isReturned() && !
Flags.isSwiftSelf() &&
9298 Outs[0].VT == MVT::i64) {
9300 "unexpected calling convention register assignment");
9301 assert(!
Ins.empty() && Ins[0].VT == MVT::i64 &&
9302 "unexpected use of 'returned'");
9303 IsThisReturn =
true;
9312 [=](
const std::pair<unsigned, SDValue> &Elt) {
9322 return ArgReg.Reg == VA.getLocReg();
9351 OpSize =
Flags.isByVal() ?
Flags.getByValSize() * 8
9353 OpSize = (OpSize + 7) / 8;
9355 !
Flags.isInConsecutiveRegs()) {
9357 BEAlign = 8 - OpSize;
9360 int32_t
Offset = LocMemOffset + BEAlign;
9374 Chain = addTokenForArgument(Chain, DAG, MF.
getFrameInfo(), FI);
9382 if (Outs[i].
Flags.isByVal()) {
9386 Chain,
DL, DstAddr, Arg, SizeNode,
9387 Outs[i].
Flags.getNonZeroByValAlign(),
9424 if (!MemOpChains.
empty())
9428 if (RequiresSMChange) {
9431 DAG.
getVTList(MVT::Other, MVT::Glue), Chain);
9444 for (
auto &RegToPass : RegsToPass) {
9446 RegToPass.second, InGlue);
9454 unsigned OpFlags = 0;
9455 if (
auto *
G = dyn_cast<GlobalAddressSDNode>(Callee)) {
9456 CalledGlobal =
G->getGlobal();
9466 }
else if (
auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
9470 const char *
Sym = S->getSymbol();
9483 if (IsTailCall && !IsSibCall) {
9490 std::vector<SDValue> Ops;
9492 Ops.push_back(Callee);
9499 "tail calls cannot be marked with clang.arc.attachedcall");
9506 Ops.insert(Ops.begin() + 1, GA);
9509 }
else if (GuardWithBTI) {
9523 "Invalid auth call key");
9527 std::tie(IntDisc, AddrDisc) =
9535 Ops.push_back(IntDisc);
9536 Ops.push_back(AddrDisc);
9541 for (
auto &RegToPass : RegsToPass)
9543 RegToPass.second.getValueType()));
9550 Mask =
TRI->getThisReturnPreservedMask(MF, CallConv);
9552 IsThisReturn =
false;
9553 Mask =
TRI->getCallPreservedMask(MF, CallConv);
9556 Mask =
TRI->getCallPreservedMask(MF, CallConv);
9559 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
9561 if (
TRI->isAnyArgRegReserved(MF))
9562 TRI->emitReservedArgRegCallError(MF);
9564 assert(Mask &&
"Missing call preserved mask for calling convention");
9568 Ops.push_back(InGlue);
9576 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
9586 Chain = DAG.
getNode(Opc,
DL, {MVT::Other, MVT::Glue}, Ops);
9597 DoesCalleeRestoreStack(CallConv, TailCallOpt) ?
alignTo(NumBytes, 16) : 0;
9605 Chain, InGlue, CallConv, IsVarArg, RVLocs,
DL, DAG, InVals, IsThisReturn,
9606 IsThisReturn ? OutVals[0] :
SDValue(), RequiresSMChange);
9611 if (RequiresSMChange) {
9612 assert(PStateSM &&
"Expected a PStateSM to be set");
9618 InGlue =
Result.getValue(1);
9621 DAG.
getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
9625 if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs))
9632 if (ShouldPreserveZT0)
9635 {Result, DAG.getConstant(0, DL, MVT::i32), ZTFrameIdx});
9637 if (RequiresLazySave) {
9641 TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
9646 DAG.
getConstant(Intrinsic::aarch64_sme_get_tpidr2,
DL, MVT::i32));
9658 RestoreRoutine, RegMask,
Result.getValue(1)});
9663 DAG.
getConstant(Intrinsic::aarch64_sme_set_tpidr2,
DL, MVT::i32),
9666 }
else if (RequiresSaveAllZA) {
9671 if (RequiresSMChange || RequiresLazySave || ShouldPreserveZT0 ||
9672 RequiresSaveAllZA) {
9673 for (
unsigned I = 0;
I < InVals.
size(); ++
I) {
9689 if (
O.Flags.isSwiftSelf() ||
O.Flags.isSwiftError() ||
9690 O.Flags.isSwiftAsync()) {
9694 "Swift attributes can't be used with preserve_none",
9704bool AArch64TargetLowering::CanLowerReturn(
9709 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
9731 for (
unsigned i = 0, realRVLocIdx = 0; i != RVLocs.
size();
9732 ++i, ++realRVLocIdx) {
9735 SDValue Arg = OutVals[realRVLocIdx];
9741 if (Outs[i].ArgVT == MVT::i1) {
9757 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
9766 llvm::find_if(RetVals, [=](
const std::pair<unsigned, SDValue> &Elt) {
9780 if (FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface()) {
9781 if (FuncAttrs.hasStreamingCompatibleInterface()) {
9783 assert(
Reg.isValid() &&
"PStateSM Register is invalid");
9795 for (
auto &RetVal : RetVals) {
9796 if (FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface() &&
9799 RetVal.second.getValueType(), RetVal.second);
9800 Chain = DAG.
getCopyToReg(Chain,
DL, RetVal.first, RetVal.second, Glue);
9803 DAG.
getRegister(RetVal.first, RetVal.second.getValueType()));
9814 unsigned RetValReg = AArch64::X0;
9816 RetValReg = AArch64::X8;
9827 if (AArch64::GPR64RegClass.
contains(*
I))
9829 else if (AArch64::FPR64RegClass.
contains(*
I))
9840 RetOps.push_back(Glue);
9849 getAddr(cast<ExternalSymbolSDNode>(Arm64ECRetDest), DAG, 0);
9852 RetOps.insert(RetOps.begin() + 1, Arm64ECRetDest);
9866 unsigned Flag)
const {
9868 N->getOffset(), Flag);
9873 unsigned Flag)
const {
9879 unsigned Flag)
const {
9881 N->getOffset(), Flag);
9886 unsigned Flag)
const {
9892 unsigned Flag)
const {
9897template <
class NodeTy>
9899 unsigned Flags)
const {
9915template <
class NodeTy>
9917 unsigned Flags)
const {
9931template <
class NodeTy>
9933 unsigned Flags)
const {
9945template <
class NodeTy>
9947 unsigned Flags)
const {
9963 "unexpected offset in global node");
9968 return getGOT(GN, DAG, OpFlags);
9974 Result = getAddrLarge(GN, DAG, OpFlags);
9976 Result = getAddrTiny(GN, DAG, OpFlags);
9978 Result = getAddr(GN, DAG, OpFlags);
10017AArch64TargetLowering::LowerDarwinGlobalTLSAddress(
SDValue Op,
10020 "This function expects a Darwin target");
10025 const GlobalValue *GV = cast<GlobalAddressSDNode>(
Op)->getGlobal();
10035 PtrMemVT,
DL, Chain, DescAddr,
10189SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(
SDValue SymAddr,
10201 Chain = DAG.
getNode(Opcode,
DL, NodeTys, {Chain, SymAddr});
10208AArch64TargetLowering::LowerELFGlobalTLSAddress(
SDValue Op,
10228 "in local exec TLS model");
10244 return LowerELFTLSLocalExec(GV, ThreadBase,
DL, DAG);
10265 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
10272 GV,
DL, MVT::i64, 0,
10289 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
10297AArch64TargetLowering::LowerWindowsGlobalTLSAddress(
SDValue Op,
10336 Chain =
TLS.getValue(1);
10362 return LowerDarwinGlobalTLSAddress(
Op, DAG);
10364 return LowerELFGlobalTLSAddress(
Op, DAG);
10366 return LowerWindowsGlobalTLSAddress(
Op, DAG);
10403 const auto *TGN = cast<GlobalAddressSDNode>(TGA.
getNode());
10404 assert(TGN->getGlobal()->hasExternalWeakLinkage());
10410 if (TGN->getOffset() != 0)
10412 "unsupported non-zero offset in weak ptrauth global reference");
10419 {TGA, Key, Discriminator}),
10424AArch64TargetLowering::LowerPtrAuthGlobalAddress(
SDValue Op,
10427 uint64_t KeyC =
Op.getConstantOperandVal(1);
10428 SDValue AddrDiscriminator =
Op.getOperand(2);
10429 uint64_t DiscriminatorC =
Op.getConstantOperandVal(3);
10430 EVT VT =
Op.getValueType();
10438 if (!isUInt<16>(DiscriminatorC))
10440 "constant discriminator in ptrauth global out of range [0, 0xffff]");
10446 int64_t PtrOffsetC = 0;
10448 PtrOffsetC =
Ptr.getConstantOperandVal(1);
10449 Ptr =
Ptr.getOperand(0);
10451 const auto *PtrN = cast<GlobalAddressSDNode>(
Ptr.getNode());
10455 const unsigned OpFlags =
10459 "unsupported non-GOT op flags on ptrauth global reference");
10462 PtrOffsetC += PtrN->getOffset();
10465 assert(PtrN->getTargetFlags() == 0 &&
10466 "unsupported target flags on ptrauth global");
10471 ? AddrDiscriminator
10475 if (!NeedsGOTLoad) {
10479 {TPtr, Key, TAddrDiscriminator, Discriminator}),
10488 {TPtr, Key, TAddrDiscriminator, Discriminator}),
10503 cast<VTSDNode>(Val.
getOperand(1))->getVT().getFixedSizeInBits() -
10525 bool ProduceNonFlagSettingCondBr =
10531 if (
LHS.getValueType() == MVT::f128) {
10536 if (!
RHS.getNode()) {
10556 OFCC = getInvertedCondCode(OFCC);
10563 if (
LHS.getValueType().isInteger()) {
10565 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
10570 if (RHSC && RHSC->
getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
10577 isa<ConstantSDNode>(
LHS.getOperand(1)) &&
10593 isa<ConstantSDNode>(
LHS.getOperand(1)) &&
10610 DAG.
getConstant(SignBitPos, dl, MVT::i64), Dest);
10614 LHS.getOpcode() !=
ISD::AND && ProduceNonFlagSettingCondBr) {
10621 DAG.
getConstant(SignBitPos, dl, MVT::i64), Dest);
10630 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::bf16 ||
10631 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
10656 EVT VT =
Op.getValueType();
10684 return getSVESafeBitCast(VT,
Op, DAG);
10691 auto SetVecVal = [&](
int Idx = -1) {
10698 VecVal1 = BitCast(VecVT, In1, DAG);
10699 VecVal2 = BitCast(VecVT, In2, DAG);
10705 }
else if (VT == MVT::f64) {
10706 VecVT = MVT::v2i64;
10707 SetVecVal(AArch64::dsub);
10708 }
else if (VT == MVT::f32) {
10709 VecVT = MVT::v4i32;
10710 SetVecVal(AArch64::ssub);
10711 }
else if (VT == MVT::f16 || VT == MVT::bf16) {
10712 VecVT = MVT::v8i16;
10713 SetVecVal(AArch64::hsub);
10724 if (VT == MVT::f64 || VT == MVT::v2f64) {
10733 if (VT == MVT::f16 || VT == MVT::bf16)
10735 if (VT == MVT::f32)
10737 if (VT == MVT::f64)
10740 return BitCast(VT, BSP, DAG);
10746 Attribute::NoImplicitFloat))
10749 EVT VT =
Op.getValueType();
10763 if (VT == MVT::i32 && IsParity)
10774 if (VT == MVT::i32 || VT == MVT::i64) {
10775 if (VT == MVT::i32)
10781 if (VT == MVT::i32)
10788 }
else if (VT == MVT::i128) {
10799 assert(!IsParity &&
"ISD::PARITY of vector types not supported");
10801 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
10802 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
10803 "Unexpected type for custom ctpop lowering");
10811 EVT DT = VT == MVT::v2i64 ? MVT::v4i32 : VT;
10815 if (VT == MVT::v2i64) {
10818 }
else if (VT == MVT::v2i32) {
10820 }
else if (VT == MVT::v4i32) {
10830 unsigned EltSize = 8;
10843 EVT VT =
Op.getValueType();
10856 EVT VT =
Op.getValueType();
10858 unsigned Opcode =
Op.getOpcode();
10902 EVT VT =
Op.getValueType();
10953 SmallVector<std::pair<SDValue, SDValue>, 16> &WorkList) {
10959 N =
N->getOperand(0);
10963 WorkList.push_back(std::make_pair(
N->getOperand(0),
N->getOperand(1)));
10969 if (
N->getOpcode() !=
ISD::OR || !
N->hasOneUse())
10983 EVT VT =
N->getValueType(0);
10993 unsigned NumXors = 0;
10998 std::tie(XOR0, XOR1) = WorkList[0];
11001 for (
unsigned I = 1;
I < WorkList.
size();
I++) {
11002 std::tie(XOR0, XOR1) = WorkList[
I];
11004 Cmp = DAG.
getNode(LogicOp,
DL, VT, Cmp, CmpChain);
11016 if (
Op.getValueType().isVector())
11017 return LowerVSETCC(
Op, DAG);
11019 bool IsStrict =
Op->isStrictFPOpcode();
11021 unsigned OpNo = IsStrict ? 1 : 0;
11024 Chain =
Op.getOperand(0);
11031 EVT VT =
Op.getValueType();
11037 if (
LHS.getValueType() == MVT::f128) {
11042 if (!
RHS.getNode()) {
11043 assert(
LHS.getValueType() ==
Op.getValueType() &&
11044 "Unexpected setcc expansion!");
11049 if (
LHS.getValueType().isInteger()) {
11065 assert(
LHS.getValueType() == MVT::bf16 ||
LHS.getValueType() == MVT::f16 ||
11066 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
11110 EVT VT =
LHS.getValueType();
11111 if (VT != MVT::i32 && VT != MVT::i64)
11119 LHS, RHS, InvCarry);
11121 EVT OpVT =
Op.getValueType();
11141 if (
LHS.getValueType() == MVT::f128) {
11146 if (!
RHS.getNode()) {
11153 if ((
LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
11154 LHS.getValueType() == MVT::bf16) {
11160 if (
LHS.getValueType().isInteger()) {
11162 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
11173 EVT VT =
LHS.getValueType();
11186 LHS.getValueType() ==
RHS.getValueType()) {
11187 EVT VT =
LHS.getValueType();
11193 Shift = DAG.
getNOT(dl, Shift, VT);
11206 }
else if (CTVal && CFVal && CTVal->
isOne() && CFVal->
isZero()) {
11226 }
else if (CTVal && CFVal) {
11234 if (TrueVal == ~FalseVal) {
11236 }
else if (FalseVal > std::numeric_limits<int64_t>::min() &&
11237 TrueVal == -FalseVal) {
11248 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
11251 if (TrueVal32 > FalseVal32) {
11260 if ((TrueVal64 == FalseVal64 + 1) || (TrueVal64 + 1 == FalseVal64)) {
11263 if (TrueVal > FalseVal) {
11294 if (CTVal && CTVal == RHSVal && AArch64CC ==
AArch64CC::EQ)
11296 else if (CFVal && CFVal == RHSVal && AArch64CC ==
AArch64CC::NE)
11299 assert (CTVal && CFVal &&
"Expected constant operands for CSNEG.");
11313 return DAG.
getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
11317 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::f32 ||
11318 LHS.getValueType() == MVT::f64);
11332 if (RHSVal && RHSVal->
isZero()) {
11340 CFVal && CFVal->
isZero() &&
11363 EVT Ty =
Op.getValueType();
11364 auto Idx =
Op.getConstantOperandAPInt(2);
11365 int64_t IdxVal =
Idx.getSExtValue();
11367 "Only expect scalable vectors for custom lowering of VECTOR_SPLICE");
11376 std::optional<unsigned> PredPattern;
11394 if (IdxVal >= 0 && (IdxVal *
BlockSize / 8) < 256)
11408 return LowerSELECT_CC(
CC, LHS, RHS, TVal, FVal,
DL, DAG);
11418 EVT Ty =
Op.getValueType();
11419 if (Ty == MVT::aarch64svcount) {
11466 CC = cast<CondCodeSDNode>(CCVal.
getOperand(2))->get();
11475 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
11482 SDValue Res = LowerSELECT_CC(
CC, LHS, RHS, TVal, FVal,
DL, DAG);
11484 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
11500 return getAddrLarge(JT, DAG);
11502 return getAddrTiny(JT, DAG);
11503 return getAddr(JT, DAG);
11513 int JTI = cast<JumpTableSDNode>(
JT.getNode())->getIndex();
11521 "aarch64-jump-table-hardening")) {
11529 "jump table hardening only supported on MachO/ELF");
11560 std::optional<uint16_t> BADisc =
11572 {Dest,
Key, Disc, AddrDisc, Chain});
11583 return getGOT(CP, DAG);
11586 return getAddrLarge(CP, DAG);
11588 return getAddrTiny(CP, DAG);
11590 return getAddr(CP, DAG);
11598 if (std::optional<uint16_t> BADisc =
11613 {TargetBA,
Key, AddrDisc, Disc});
11621 return getAddrLarge(BAN, DAG);
11623 return getAddrTiny(BAN, DAG);
11625 return getAddr(BAN, DAG);
11637 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
11668 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
11686 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
11758 return LowerWin64_VASTART(
Op, DAG);
11760 return LowerDarwin_VASTART(
Op, DAG);
11762 return LowerAAPCS_VASTART(
Op, DAG);
11771 unsigned VaListSize =
11775 const Value *DestSV = cast<SrcValueSDNode>(
Op.getOperand(3))->getValue();
11776 const Value *SrcSV = cast<SrcValueSDNode>(
Op.getOperand(4))->getValue();
11780 Align(PtrSize),
false,
false,
nullptr,
11787 "automatic va_arg instruction only works on Darwin");
11789 const Value *
V = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
11790 EVT VT =
Op.getValueType();
11805 "currently not supported");
11822 ArgSize = std::max(ArgSize, MinSlotSize);
11823 bool NeedFPTrunc =
false;
11826 NeedFPTrunc =
true;
11860 EVT VT =
Op.getValueType();
11862 unsigned Depth =
Op.getConstantOperandVal(0);
11886#define GET_REGISTER_MATCHER
11887#include "AArch64GenAsmMatcher.inc"
11894 if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
11896 unsigned DwarfRegNum =
MRI->getDwarfRegNum(Reg,
false);
11898 !
MRI->isReservedReg(MF, Reg))
11911 EVT VT =
Op.getValueType();
11927 EVT VT =
Op.getValueType();
11929 unsigned Depth =
Op.getConstantOperandVal(0);
11932 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
11949 if (Subtarget->hasPAuth()) {
11977 bool OptForSize)
const {
11978 bool IsLegal =
false;
11987 const APInt ImmInt = Imm.bitcastToAPInt();
11988 if (VT == MVT::f64)
11990 else if (VT == MVT::f32)
11992 else if (VT == MVT::f16 || VT == MVT::bf16)
12002 if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
12011 "Should be able to build any value with at most 4 moves");
12012 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 4 : 2));
12013 IsLegal =
Insn.size() <= Limit;
12017 <<
" imm value: "; Imm.dump(););
12029 if ((ST->hasNEON() &&
12030 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
12031 VT == MVT::f32 || VT == MVT::v1f32 || VT == MVT::v2f32 ||
12032 VT == MVT::v4f32)) ||
12034 (VT == MVT::nxv8f16 || VT == MVT::nxv4f32 || VT == MVT::nxv2f64))) {
12041 constexpr unsigned AccurateBits = 8;
12043 ExtraSteps = DesiredBits <= AccurateBits
12048 return DAG.
getNode(Opcode,
SDLoc(Operand), VT, Operand);
12058 EVT VT =
Op.getValueType();
12065AArch64TargetLowering::getSqrtResultForDenormInput(
SDValue Op,
12074 bool Reciprocal)
const {
12078 DAG, ExtraSteps)) {
12086 for (
int i = ExtraSteps; i > 0; --i) {
12104 int &ExtraSteps)
const {
12107 DAG, ExtraSteps)) {
12115 for (
int i = ExtraSteps; i > 0; --i) {
12155const char *AArch64TargetLowering::LowerXConstraint(
EVT ConstraintVT)
const {
12163 if (!Subtarget->hasFPARMv8())
12188static std::optional<std::pair<unsigned, const TargetRegisterClass *>>
12191 Constraint[1] !=
'p')
12192 return std::nullopt;
12194 Constraint = Constraint.
substr(2, Constraint.
size() - 3);
12195 bool IsPredicateAsCount = Constraint.
starts_with(
"n");
12196 if (IsPredicateAsCount)
12201 return std::nullopt;
12203 if (IsPredicateAsCount)
12204 return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
12206 return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
12209static std::optional<PredicateConstraint>
12212 .Case(
"Uph", PredicateConstraint::Uph)
12213 .
Case(
"Upl", PredicateConstraint::Upl)
12214 .
Case(
"Upa", PredicateConstraint::Upa)
12220 if (VT != MVT::aarch64svcount &&
12224 switch (Constraint) {
12225 case PredicateConstraint::Uph:
12226 return VT == MVT::aarch64svcount ? &AArch64::PNR_p8to15RegClass
12227 : &AArch64::PPR_p8to15RegClass;
12228 case PredicateConstraint::Upl:
12229 return VT == MVT::aarch64svcount ? &AArch64::PNR_3bRegClass
12230 : &AArch64::PPR_3bRegClass;
12231 case PredicateConstraint::Upa:
12232 return VT == MVT::aarch64svcount ? &AArch64::PNRRegClass
12233 : &AArch64::PPRRegClass;
12241static std::optional<ReducedGprConstraint>
12244 .Case(
"Uci", ReducedGprConstraint::Uci)
12245 .
Case(
"Ucj", ReducedGprConstraint::Ucj)
12254 switch (Constraint) {
12255 case ReducedGprConstraint::Uci:
12256 return &AArch64::MatrixIndexGPR32_8_11RegClass;
12257 case ReducedGprConstraint::Ucj:
12258 return &AArch64::MatrixIndexGPR32_12_15RegClass;
12299SDValue AArch64TargetLowering::LowerAsmOutputForConstraint(
12301 const AsmOperandInfo &OpInfo,
SelectionDAG &DAG)
const {
12306 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
12307 OpInfo.ConstraintVT.getSizeInBits() < 8)
12322 if (OpInfo.ConstraintVT.getSizeInBits() <= 32)
12333AArch64TargetLowering::getConstraintType(
StringRef Constraint)
const {
12334 if (Constraint.
size() == 1) {
12335 switch (Constraint[0]) {
12372AArch64TargetLowering::getSingleConstraintMatchWeight(
12373 AsmOperandInfo &
info,
const char *constraint)
const {
12375 Value *CallOperandVal =
info.CallOperandVal;
12378 if (!CallOperandVal)
12382 switch (*constraint) {
12404std::pair<unsigned, const TargetRegisterClass *>
12405AArch64TargetLowering::getRegForInlineAsmConstraint(
12407 if (Constraint.
size() == 1) {
12408 switch (Constraint[0]) {
12411 return std::make_pair(0U,
nullptr);
12413 return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
12415 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
12416 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
12418 if (!Subtarget->hasFPARMv8())
12422 return std::make_pair(0U, &AArch64::ZPRRegClass);
12423 return std::make_pair(0U,
nullptr);
12425 if (VT == MVT::Other)
12429 return std::make_pair(0U, &AArch64::FPR16RegClass);
12431 return std::make_pair(0U, &AArch64::FPR32RegClass);
12433 return std::make_pair(0U, &AArch64::FPR64RegClass);
12435 return std::make_pair(0U, &AArch64::FPR128RegClass);
12441 if (!Subtarget->hasFPARMv8())
12444 return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
12446 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
12449 if (!Subtarget->hasFPARMv8())
12452 return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
12460 return std::make_pair(0U, RegClass);
12464 return std::make_pair(0U, RegClass);
12466 if (
StringRef(
"{cc}").equals_insensitive(Constraint) ||
12468 return std::make_pair(
unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
12470 if (Constraint ==
"{za}") {
12471 return std::make_pair(
unsigned(AArch64::ZA), &AArch64::MPRRegClass);
12474 if (Constraint ==
"{zt0}") {
12475 return std::make_pair(
unsigned(AArch64::ZT0), &AArch64::ZTRRegClass);
12480 std::pair<unsigned, const TargetRegisterClass *> Res;
12485 unsigned Size = Constraint.
size();
12486 if ((
Size == 4 ||
Size == 5) && Constraint[0] ==
'{' &&
12487 tolower(Constraint[1]) ==
'v' && Constraint[
Size - 1] ==
'}') {
12490 if (!
Failed && RegNo >= 0 && RegNo <= 31) {
12495 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
12496 Res.second = &AArch64::FPR64RegClass;
12498 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
12499 Res.second = &AArch64::FPR128RegClass;
12505 if (Res.second && !Subtarget->hasFPARMv8() &&
12506 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
12507 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
12508 return std::make_pair(0U,
nullptr);
12515 bool AllowUnknown)
const {
12516 if (Subtarget->hasLS64() && Ty->
isIntegerTy(512))
12517 return EVT(MVT::i64x8);
12524void AArch64TargetLowering::LowerAsmOperandForConstraint(
12530 if (Constraint.
size() != 1)
12533 char ConstraintLetter = Constraint[0];
12534 switch (ConstraintLetter) {
12545 if (
Op.getValueType() == MVT::i64)
12546 Result = DAG.
getRegister(AArch64::XZR, MVT::i64);
12548 Result = DAG.
getRegister(AArch64::WZR, MVT::i32);
12570 switch (ConstraintLetter) {
12578 if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
12583 if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
12584 CVal =
C->getSExtValue();
12611 if (!isUInt<32>(CVal))
12615 if ((CVal & 0xFFFF) == CVal)
12617 if ((CVal & 0xFFFF0000ULL) == CVal)
12620 if ((NCVal & 0xFFFFULL) == NCVal)
12622 if ((NCVal & 0xFFFF0000ULL) == NCVal)
12629 if ((CVal & 0xFFFFULL) == CVal)
12631 if ((CVal & 0xFFFF0000ULL) == CVal)
12633 if ((CVal & 0xFFFF00000000ULL) == CVal)
12635 if ((CVal & 0xFFFF000000000000ULL) == CVal)
12638 if ((NCVal & 0xFFFFULL) == NCVal)
12640 if ((NCVal & 0xFFFF0000ULL) == NCVal)
12642 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
12644 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
12658 Ops.push_back(Result);
12685 EVT EltType = V.getValueType().getVectorElementType();
12695 EVT VT =
Op.getValueType();
12697 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
12701 if (VT != MVT::v16i8 && VT != MVT::v8i8)
12705 assert((NumElts == 8 || NumElts == 16) &&
12706 "Need to have exactly 8 or 16 elements in vector.");
12712 for (
unsigned i = 0; i < NumElts; ++i) {
12717 SDValue OperandSourceVec = V.getOperand(0);
12719 SourceVec = OperandSourceVec;
12720 else if (SourceVec != OperandSourceVec)
12726 SDValue MaskSource = V.getOperand(1);
12728 if (!isa<ConstantSDNode>(MaskSource.
getOperand(1)))
12733 }
else if (!AndMaskConstants.
empty()) {
12747 if (!isa<ConstantSDNode>(MaskIdx) ||
12748 !cast<ConstantSDNode>(MaskIdx)->getConstantIntValue()->equalsInt(i))
12753 if (!MaskSourceVec) {
12757 }
else if (MaskSourceVec != MaskSource->
getOperand(0)) {
12771 if (!AndMaskConstants.
empty())
12777 DAG.
getConstant(Intrinsic::aarch64_neon_tbl1, dl, MVT::i32), SourceVec,
12786 LLVM_DEBUG(
dbgs() <<
"AArch64TargetLowering::ReconstructShuffle\n");
12788 EVT VT =
Op.getValueType();
12790 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
12793 struct ShuffleSourceInfo {
12808 ShuffleSourceInfo(
SDValue Vec)
12809 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
12810 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
12812 bool operator ==(
SDValue OtherVec) {
return Vec == OtherVec; }
12818 for (
unsigned i = 0; i < NumElts; ++i) {
12823 !isa<ConstantSDNode>(V.getOperand(1)) ||
12824 V.getOperand(0).getValueType().isScalableVector()) {
12826 dbgs() <<
"Reshuffle failed: "
12827 "a shuffle can only come from building a vector from "
12828 "various elements of other fixed-width vectors, provided "
12829 "their indices are constant\n");
12834 SDValue SourceVec = V.getOperand(0);
12835 auto Source =
find(Sources, SourceVec);
12836 if (Source == Sources.
end())
12837 Source = Sources.
insert(Sources.
end(), ShuffleSourceInfo(SourceVec));
12840 unsigned EltNo = V.getConstantOperandVal(1);
12841 Source->MinElt = std::min(Source->MinElt, EltNo);
12842 Source->MaxElt = std::max(Source->MaxElt, EltNo);
12847 if ((Sources.
size() == 3 || Sources.
size() == 4) && NumElts > 4) {
12852 for (
unsigned I = 0;
I < NumElts; ++
I) {
12855 for (
unsigned OF = 0; OF < OutputFactor; OF++)
12856 Mask.push_back(-1);
12862 unsigned Lane = V.getConstantOperandVal(1);
12863 for (
unsigned S = 0; S < Sources.
size(); S++) {
12864 if (V.getOperand(0) == Sources[S].Vec) {
12865 unsigned InputSize = Sources[S].Vec.getScalarValueSizeInBits();
12866 unsigned InputBase = 16 * S + Lane * InputSize / 8;
12867 for (
unsigned OF = 0; OF < OutputFactor; OF++)
12868 Mask.push_back(InputBase + OF);
12878 ? Intrinsic::aarch64_neon_tbl3
12879 : Intrinsic::aarch64_neon_tbl4,
12881 for (
unsigned i = 0; i < Sources.
size(); i++) {
12882 SDValue Src = Sources[i].Vec;
12883 EVT SrcVT = Src.getValueType();
12886 "Expected a legally typed vector");
12894 for (
unsigned i = 0; i < Mask.size(); i++)
12896 assert((Mask.size() == 8 || Mask.size() == 16) &&
12897 "Expected a v8i8 or v16i8 Mask");
12899 DAG.
getBuildVector(Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, dl, TBLMask));
12903 Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, TBLOperands);
12907 if (Sources.
size() > 2) {
12908 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: currently only do something "
12909 <<
"sensible when at most two source vectors are "
12917 for (
auto &Source : Sources) {
12918 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
12919 if (SrcEltTy.
bitsLT(SmallestEltTy)) {
12920 SmallestEltTy = SrcEltTy;
12923 unsigned ResMultiplier =
12932 for (
auto &Src : Sources) {
12933 EVT SrcVT = Src.ShuffleVec.getValueType();
12946 assert(2 * SrcVTSize == VTSize);
12951 DAG.
getUNDEF(Src.ShuffleVec.getValueType()));
12957 dbgs() <<
"Reshuffle failed: result vector too small to extract\n");
12961 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
12963 dbgs() <<
"Reshuffle failed: span too large for a VEXT to cope\n");
12967 if (Src.MinElt >= NumSrcElts) {
12972 Src.WindowBase = -NumSrcElts;
12973 }
else if (Src.MaxElt < NumSrcElts) {
12990 dbgs() <<
"Reshuffle failed: don't know how to lower AArch64ISD::EXT "
12991 "for SVE vectors.");
12998 Src.WindowBase = -Src.MinElt;
13005 for (
auto &Src : Sources) {
13006 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
13007 if (SrcEltTy == SmallestEltTy)
13018 Src.WindowBase *= Src.WindowScale;
13023 for (
auto Src : Sources)
13024 assert(Src.ShuffleVec.getValueType() == ShuffleVT);
13032 if (Entry.isUndef())
13035 auto Src =
find(Sources, Entry.getOperand(0));
13036 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
13041 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
13044 int LanesDefined = BitsDefined / BitsPerShuffleLane;
13048 int *LaneMask = &Mask[i * ResMultiplier];
13050 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
13051 ExtractBase += NumElts * (Src - Sources.
begin());
13052 for (
int j = 0; j < LanesDefined; ++j)
13053 LaneMask[j] = ExtractBase + j;
13058 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: illegal shuffle mask\n");
13063 for (
unsigned i = 0; i < Sources.
size(); ++i)
13076 dbgs() <<
"Reshuffle, creating node: "; V.dump(););
13095 unsigned ExpectedElt = Imm;
13096 for (
unsigned i = 1; i < NumElts; ++i) {
13100 if (ExpectedElt == NumElts)
13105 if (ExpectedElt !=
static_cast<unsigned>(M[i]))
13116 if (V.getValueType() != MVT::v16i8)
13118 assert(V.getNumOperands() == 16 &&
"Expected 16 operands on the BUILDVECTOR");
13120 for (
unsigned X = 0;
X < 4;
X++) {
13123 SDValue BaseExt = V.getOperand(
X * 4);
13127 !isa<ConstantSDNode>(BaseExt.
getOperand(1)) ||
13132 for (
unsigned Y = 1;
Y < 4;
Y++) {
13135 Ext.getOperand(0) !=
Base ||
13136 !isa<ConstantSDNode>(Ext.getOperand(1)) ||
13137 Ext.getConstantOperandVal(1) !=
Y)
13148 V.getOperand(0).getOperand(0), V.getOperand(4).getOperand(0),
13149 V.getOperand(8).getOperand(0), V.getOperand(12).getOperand(0)};
13151 if (V.getValueType() == MVT::v4i32)
13167 unsigned &DupLaneOp) {
13169 "Only possible block sizes for wide DUP are: 16, 32, 64");
13188 for (
size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
13189 for (
size_t I = 0;
I < NumEltsPerBlock;
I++) {
13190 int Elt = M[BlockIndex * NumEltsPerBlock +
I];
13194 if ((
unsigned)Elt >= SingleVecNumElements)
13196 if (BlockElts[
I] < 0)
13197 BlockElts[
I] = Elt;
13198 else if (BlockElts[
I] != Elt)
13207 auto FirstRealEltIter =
find_if(BlockElts, [](
int Elt) {
return Elt >= 0; });
13208 assert(FirstRealEltIter != BlockElts.
end() &&
13209 "Shuffle with all-undefs must have been caught by previous cases, "
13211 if (FirstRealEltIter == BlockElts.
end()) {
13217 size_t FirstRealIndex = FirstRealEltIter - BlockElts.
begin();
13219 if ((
unsigned)*FirstRealEltIter < FirstRealIndex)
13222 size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
13225 if (Elt0 % NumEltsPerBlock != 0)
13229 for (
size_t I = 0;
I < NumEltsPerBlock;
I++)
13230 if (BlockElts[
I] >= 0 && (
unsigned)BlockElts[
I] != Elt0 +
I)
13233 DupLaneOp = Elt0 / NumEltsPerBlock;
13242 const int *FirstRealElt =
find_if(M, [](
int Elt) {
return Elt >= 0; });
13247 APInt ExpectedElt =
APInt(MaskBits, *FirstRealElt + 1,
false,
13251 bool FoundWrongElt = std::any_of(FirstRealElt + 1, M.end(), [&](
int Elt) {
13252 return Elt != ExpectedElt++ && Elt != -1;
13284 if (NumElts % 2 != 0)
13286 WhichResult = (M[0] == 0 ? 0 : 1);
13287 unsigned Idx = WhichResult * NumElts / 2;
13288 for (
unsigned i = 0; i != NumElts; i += 2) {
13289 if ((M[i] >= 0 && (
unsigned)M[i] !=
Idx) ||
13303 WhichResult = (M[0] == 0 ? 0 : 1);
13304 for (
unsigned j = 0; j != 2; ++j) {
13305 unsigned Idx = WhichResult;
13306 for (
unsigned i = 0; i != Half; ++i) {
13307 int MIdx = M[i + j * Half];
13308 if (MIdx >= 0 && (
unsigned)MIdx !=
Idx)
13322 if (NumElts % 2 != 0)
13324 WhichResult = (M[0] == 0 ? 0 : 1);
13325 for (
unsigned i = 0; i < NumElts; i += 2) {
13326 if ((M[i] >= 0 && (
unsigned)M[i] != i + WhichResult) ||
13327 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != i + WhichResult))
13334 bool &DstIsLeft,
int &Anomaly) {
13335 if (M.size() !=
static_cast<size_t>(NumInputElements))
13338 int NumLHSMatch = 0, NumRHSMatch = 0;
13339 int LastLHSMismatch = -1, LastRHSMismatch = -1;
13341 for (
int i = 0; i < NumInputElements; ++i) {
13351 LastLHSMismatch = i;
13353 if (M[i] == i + NumInputElements)
13356 LastRHSMismatch = i;
13359 if (NumLHSMatch == NumInputElements - 1) {
13361 Anomaly = LastLHSMismatch;
13363 }
else if (NumRHSMatch == NumInputElements - 1) {
13365 Anomaly = LastRHSMismatch;
13378 for (
int I = 0, E = NumElts / 2;
I != E;
I++) {
13383 int Offset = NumElts / 2;
13384 for (
int I = NumElts / 2, E = NumElts;
I != E;
I++) {
13385 if (Mask[
I] !=
I + SplitLHS *
Offset)
13394 EVT VT =
Op.getValueType();
13429 unsigned OpNum = (PFEntry >> 26) & 0x0F;
13430 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
13431 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
13453 if (LHSID == (1 * 9 + 2) * 9 + 3)
13455 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 &&
"Illegal OP_COPY!");
13459 if (OpNum == OP_MOVLANE) {
13461 auto getPFIDLane = [](
unsigned ID,
int Elt) ->
int {
13462 assert(Elt < 4 &&
"Expected Perfect Lanes to be less than 4");
13468 return (
ID % 9 == 8) ? -1 :
ID % 9;
13477 assert(RHSID < 8 &&
"Expected a lane index for RHSID!");
13478 unsigned ExtLane = 0;
13484 int MaskElt = getPFIDLane(
ID, (RHSID & 0x01) << 1) >> 1;
13486 MaskElt = (getPFIDLane(
ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
13487 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
13488 ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
13489 Input = MaskElt < 2 ? V1 : V2;
13495 "Expected 16 or 32 bit shuffle elemements");
13500 int MaskElt = getPFIDLane(
ID, RHSID);
13501 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
13502 ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
13503 Input = MaskElt < 4 ? V1 : V2;
13505 if (VT == MVT::v4i16) {
13548 if (EltTy == MVT::i8)
13550 else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
13552 else if (EltTy == MVT::i32 || EltTy == MVT::f32)
13554 else if (EltTy == MVT::i64 || EltTy == MVT::f64)
13562 return DAG.
getNode(Opcode, dl, VT, OpLHS, Lane);
13593 EVT EltVT =
Op.getValueType().getVectorElementType();
13605 bool IsUndefOrZero = V2.isUndef() ||
isZerosVector(V2.getNode());
13606 MVT IndexVT = MVT::v8i8;
13607 unsigned IndexLen = 8;
13608 if (
Op.getValueSizeInBits() == 128) {
13609 IndexVT = MVT::v16i8;
13614 for (
int Val : ShuffleMask) {
13615 for (
unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
13616 unsigned Offset = Byte + Val * BytesPerElt;
13619 if (IsUndefOrZero &&
Offset >= IndexLen)
13629 if (IsUndefOrZero) {
13634 DAG.
getConstant(Intrinsic::aarch64_neon_tbl1,
DL, MVT::i32), V1Cst,
13637 if (IndexLen == 8) {
13641 DAG.
getConstant(Intrinsic::aarch64_neon_tbl1,
DL, MVT::i32), V1Cst,
13652 DAG.
getConstant(Intrinsic::aarch64_neon_tbl2,
DL, MVT::i32), V1Cst,
13661 if (EltType == MVT::i8)
13663 if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
13665 if (EltType == MVT::i32 || EltType == MVT::f32)
13667 if (EltType == MVT::i64 || EltType == MVT::f64)
13676 auto getScaledOffsetDup = [](
SDValue BitCast,
int &LaneC,
MVT &CastVT) {
13687 unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
13689 if (ExtIdxInBits % CastedEltBitWidth != 0)
13697 LaneC += ExtIdxInBits / CastedEltBitWidth;
13704 unsigned SrcVecNumElts =
13711 if (getScaledOffsetDup(V, Lane, CastVT)) {
13712 V = DAG.
getBitcast(CastVT, V.getOperand(0).getOperand(0));
13714 V.getOperand(0).getValueType().is128BitVector()) {
13717 Lane += V.getConstantOperandVal(1);
13718 V = V.getOperand(0);
13744 EVT VT =
Op.getValueType();
13754 if (ElementSize > 32 || ElementSize == 1)
13784 EVT VT =
Op.getValueType();
13798 for (
unsigned I = 0;
I < 16;
I++) {
13799 if (ShuffleMask[
I] < 16)
13803 dyn_cast<ConstantSDNode>(Mask2->
getOperand(ShuffleMask[
I] - 16));
13806 TBLMaskParts[
I] = DAG.
getConstant(
C->getSExtValue() + 32, dl, MVT::i32);
13823AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(
SDValue Op,
13826 EVT VT =
Op.getValueType();
13830 "Unexpected extension factor.");
13843 EVT VT =
Op.getValueType();
13848 return LowerFixedLengthVECTOR_SHUFFLEToSVE(
Op, DAG);
13861 "Unexpected VECTOR_SHUFFLE mask size!");
13887 for (
unsigned LaneSize : {64U, 32U, 16U}) {
13899 V1 =
constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
13907 if (
isREVMask(ShuffleMask, EltSize, NumElts, 64))
13909 if (
isREVMask(ShuffleMask, EltSize, NumElts, 32))
13911 if (
isREVMask(ShuffleMask, EltSize, NumElts, 16))
13914 if (((NumElts == 8 && EltSize == 16) || (NumElts == 16 && EltSize == 8)) &&
13921 bool ReverseEXT =
false;
13923 if (
isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
13935 unsigned WhichResult;
13936 if (
isZIPMask(ShuffleMask, NumElts, WhichResult)) {
13940 if (
isUZPMask(ShuffleMask, NumElts, WhichResult)) {
13944 if (
isTRNMask(ShuffleMask, NumElts, WhichResult)) {
13968 if (
isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
13973 int SrcLane = ShuffleMask[Anomaly];
13974 if (SrcLane >= NumInputElements) {
13976 SrcLane -= NumElts;
13983 ScalarVT = MVT::i32;
13996 if (NumElts == 4) {
13997 unsigned PFIndexes[4];
13998 for (
unsigned i = 0; i != 4; ++i) {
13999 if (ShuffleMask[i] < 0)
14002 PFIndexes[i] = ShuffleMask[i];
14006 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
14007 PFIndexes[2] * 9 + PFIndexes[3];
14017 "Expected larger vector element sizes to be handled already");
14019 for (
int M : ShuffleMask)
14021 M >=
static_cast<int>(NumElts) ? 0 : 0xffffffff, dl, MVT::i32));
14035 EVT VT =
Op.getValueType();
14038 return LowerToScalableOp(
Op, DAG);
14041 "Unexpected vector type!");
14044 if (isa<ConstantSDNode>(
Op.getOperand(0)))
14056 if (VT == MVT::nxv1i1)
14068 EVT VT =
Op.getValueType();
14080 auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
14081 if (CIdx && (CIdx->getZExtValue() <= 3)) {
14111 APInt &UndefBits) {
14113 APInt SplatBits, SplatUndef;
14114 unsigned SplatBitSize;
14116 if (BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14119 for (
unsigned i = 0; i < NumSplats; ++i) {
14120 CnstBits <<= SplatBitSize;
14121 UndefBits <<= SplatBitSize;
14123 UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.
getSizeInBits());
14134 const APInt &Bits) {
14135 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14137 EVT VT =
Op.getValueType();
14156 const SDValue *LHS =
nullptr) {
14157 EVT VT =
Op.getValueType();
14162 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14165 bool isAdvSIMDModImm =
false;
14185 if (isAdvSIMDModImm) {
14190 Mov = DAG.
getNode(NewOp, dl, MovTy,
14195 Mov = DAG.
getNode(NewOp, dl, MovTy,
14209 const SDValue *LHS =
nullptr) {
14210 EVT VT =
Op.getValueType();
14215 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14218 bool isAdvSIMDModImm =
false;
14230 if (isAdvSIMDModImm) {
14235 Mov = DAG.
getNode(NewOp, dl, MovTy,
14240 Mov = DAG.
getNode(NewOp, dl, MovTy,
14254 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14256 EVT VT =
Op.getValueType();
14258 bool isAdvSIMDModImm =
false;
14270 if (isAdvSIMDModImm) {
14284 const APInt &Bits) {
14285 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14287 EVT VT =
Op.getValueType();
14305 const APInt &Bits) {
14306 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14308 EVT VT =
Op.getValueType();
14311 bool isAdvSIMDModImm =
false;
14315 MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
14320 MovTy = MVT::v2f64;
14323 if (isAdvSIMDModImm) {
14347 for (
unsigned i = 1; i < NumElts; ++i)
14348 if (dyn_cast<ConstantSDNode>(Bvec->
getOperand(i)) != FirstElt)
14357 N =
N.getOperand(0);
14363 unsigned NumElts =
N.getValueType().getVectorMinNumElements();
14367 N =
N.getOperand(0);
14370 if (
N.getValueType().getVectorMinNumElements() < NumElts)
14381 N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
14382 return N.getValueType().getVectorMinNumElements() >= NumElts;
14389 unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
14390 if (MaxSVESize && MinSVESize == MaxSVESize) {
14392 unsigned PatNumElts =
14394 return PatNumElts == (NumElts * VScale);
14408 EVT VT =
N->getValueType(0);
14418 SDValue FirstOp =
N->getOperand(0);
14419 unsigned FirstOpc = FirstOp.
getOpcode();
14420 SDValue SecondOp =
N->getOperand(1);
14421 unsigned SecondOpc = SecondOp.
getOpcode();
14452 if (ShiftHasPredOp) {
14458 C2 =
C.getZExtValue();
14460 dyn_cast<ConstantSDNode>(Shift.
getOperand(1)))
14461 C2 = C2node->getZExtValue();
14475 assert(C1nodeImm && C1nodeShift);
14477 C1AsAPInt = C1AsAPInt.
zextOrTrunc(ElemSizeInBits);
14483 if (C2 > ElemSizeInBits)
14488 if (C1AsAPInt != RequiredC1)
14512 return LowerToScalableOp(
Op, DAG);
14518 EVT VT =
Op.getValueType();
14524 dyn_cast<BuildVectorSDNode>(
Op.getOperand(1).getNode());
14527 LHS =
Op.getOperand(1);
14528 BVN = dyn_cast<BuildVectorSDNode>(
Op.getOperand(0).getNode());
14545 UndefBits, &LHS)) ||
14561 EVT VT =
Op.getValueType();
14573 if (
auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
14575 CstLane->getAPIntValue().trunc(EltTy.
getSizeInBits()).getZExtValue(),
14577 }
else if (Lane.getNode()->isUndef()) {
14580 assert(Lane.getValueType() == MVT::i32 &&
14581 "Unexpected BUILD_VECTOR operand type");
14590 EVT VT =
Op.getValueType();
14592 "Expected a legal NEON vector");
14598 auto TryMOVIWithBits = [&](
APInt DefBits) {
14612 APInt NotDefBits = ~DefBits;
14622 if (
SDValue R = TryMOVIWithBits(DefBits))
14624 if (
SDValue R = TryMOVIWithBits(UndefBits))
14628 auto TryWithFNeg = [&](
APInt DefBits,
MVT FVT) {
14634 unsigned NumElts = VT.
getSizeInBits() / FVT.getScalarSizeInBits();
14635 for (
unsigned i = 0; i < NumElts; i++)
14636 NegBits |= Neg << (FVT.getScalarSizeInBits() * i);
14637 NegBits = DefBits ^ NegBits;
14641 if (
SDValue NewOp = TryMOVIWithBits(NegBits)) {
14652 if ((R = TryWithFNeg(DefBits, MVT::f32)) ||
14653 (R = TryWithFNeg(DefBits, MVT::f64)) ||
14654 (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
14661SDValue AArch64TargetLowering::LowerFixedLengthBuildVectorToSVE(
14663 EVT VT =
Op.getValueType();
14666 auto *BVN = cast<BuildVectorSDNode>(
Op);
14687 NumElems -
count_if(
Op->op_values(), IsExtractElt) > 4)
14694 return Op.isUndef() ? Undef
14695 : DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
14696 ContainerVT, Undef, Op, ZeroI64);
14700 while (Intermediates.
size() > 1) {
14703 for (
unsigned I = 0;
I < Intermediates.
size();
I += 2) {
14706 Intermediates[
I / 2] =
14711 Intermediates.
resize(Intermediates.
size() / 2);
14722 EVT VT =
Op.getValueType();
14725 cast<BuildVectorSDNode>(
Op)->isConstantSequence();
14727 return LowerFixedLengthBuildVectorToSVE(
Op, DAG);
14745 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
14746 if (Val.isZero() || (VT.
isInteger() && Val.isAllOnes()))
14750 if (
Const->isZero() && !
Const->isNegative())
14771 bool isOnlyLowElement =
true;
14772 bool usesOnlyOneValue =
true;
14773 bool usesOnlyOneConstantValue =
true;
14775 bool AllLanesExtractElt =
true;
14776 unsigned NumConstantLanes = 0;
14777 unsigned NumDifferentLanes = 0;
14778 unsigned NumUndefLanes = 0;
14782 unsigned ConsecutiveValCount = 0;
14784 for (
unsigned i = 0; i < NumElts; ++i) {
14787 AllLanesExtractElt =
false;
14793 isOnlyLowElement =
false;
14798 ++NumConstantLanes;
14799 if (!ConstantValue.
getNode())
14801 else if (ConstantValue != V)
14802 usesOnlyOneConstantValue =
false;
14805 if (!
Value.getNode())
14807 else if (V !=
Value) {
14808 usesOnlyOneValue =
false;
14809 ++NumDifferentLanes;
14812 if (PrevVal != V) {
14813 ConsecutiveValCount = 0;
14828 DifferentValueMap[
V] = ++ConsecutiveValCount;
14831 if (!
Value.getNode()) {
14833 dbgs() <<
"LowerBUILD_VECTOR: value undefined, creating undef node\n");
14841 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: only low element used, creating 1 "
14842 "SCALAR_TO_VECTOR node\n");
14846 if (AllLanesExtractElt) {
14852 for (
unsigned i = 0; i < NumElts; ++i) {
14855 if (!isa<ConstantSDNode>(
N->getOperand(1))) {
14878 uint64_t Val =
N->getConstantOperandVal(1);
14879 if (Val == 2 * i) {
14883 if (Val - 1 == 2 * i) {
14910 if (usesOnlyOneValue) {
14913 Value.getValueType() != VT) {
14915 dbgs() <<
"LowerBUILD_VECTOR: use DUP for non-constant splats\n");
14923 if (
Value.getValueSizeInBits() == 64) {
14925 dbgs() <<
"LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
14937 assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
14938 EltTy == MVT::f64) &&
"Unsupported floating-point vector type");
14940 dbgs() <<
"LowerBUILD_VECTOR: float constant splats, creating int "
14941 "BITCASTS, and try again\n");
14943 for (
unsigned i = 0; i < NumElts; ++i)
14947 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: trying to lower new vector: ";
14949 Val = LowerBUILD_VECTOR(Val, DAG);
14959 bool PreferDUPAndInsert =
14961 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
14962 NumDifferentLanes >= NumConstantLanes;
14968 if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
14972 APInt ConstantValueAPInt(1, 0);
14973 if (
auto *
C = dyn_cast<ConstantSDNode>(ConstantValue))
14974 ConstantValueAPInt =
C->getAPIntValue().zextOrTrunc(BitSize);
14976 !ConstantValueAPInt.isAllOnes()) {
14984 for (
unsigned i = 0; i < NumElts; ++i) {
14998 dbgs() <<
"LowerBUILD_VECTOR: all elements are constant, use default "
15010 if (NumElts >= 4) {
15018 if (PreferDUPAndInsert) {
15023 for (
unsigned I = 0;
I < NumElts; ++
I)
15034 if (DifferentValueMap.
size() == 2 && NumUndefLanes == 0) {
15046 bool canUseVECTOR_CONCAT =
true;
15047 for (
auto Pair : DifferentValueMap) {
15049 if (Pair.second != NumElts / 2)
15050 canUseVECTOR_CONCAT =
false;
15063 if (canUseVECTOR_CONCAT) {
15086 if (NumElts >= 8) {
15089 SDValue FirstLaneVal =
Op.getOperand(0);
15090 for (
unsigned i = 0; i < NumElts; ++i) {
15092 if (FirstLaneVal == Val)
15116 dbgs() <<
"LowerBUILD_VECTOR: alternatives failed, creating sequence "
15117 "of INSERT_VECTOR_ELT\n");
15134 LLVM_DEBUG(
dbgs() <<
"Creating node for op0, it is not undefined:\n");
15140 dbgs() <<
"Creating nodes for the other vector elements:\n";
15142 for (; i < NumElts; ++i) {
15153 dbgs() <<
"LowerBUILD_VECTOR: use default expansion, failed to find "
15154 "better alternative\n");
15162 return LowerFixedLengthConcatVectorsToSVE(
Op, DAG);
15164 assert(
Op.getValueType().isScalableVector() &&
15166 "Expected legal scalable vector type!");
15171 "Unexpected number of operands in CONCAT_VECTORS");
15173 if (NumOperands == 2)
15178 while (ConcatOps.size() > 1) {
15179 for (
unsigned I = 0, E = ConcatOps.size();
I != E;
I += 2) {
15187 ConcatOps.resize(ConcatOps.size() / 2);
15189 return ConcatOps[0];
15201 return LowerFixedLengthInsertVectorElt(
Op, DAG);
15203 EVT VT =
Op.getOperand(0).getValueType();
15217 ExtendedValue,
Op.getOperand(2));
15230AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(
SDValue Op,
15233 EVT VT =
Op.getOperand(0).getValueType();
15242 MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
15244 Extend,
Op.getOperand(1));
15249 return LowerFixedLengthExtractVectorElt(
Op, DAG);
15257 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
15258 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
15259 VT == MVT::v8f16 || VT == MVT::v8bf16)
15262 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
15263 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
15274 if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
15284 EVT VT =
Op.getValueType();
15286 "Only cases that extract a fixed length vector are supported!");
15287 EVT InVT =
Op.getOperand(0).getValueType();
15295 unsigned Idx =
Op.getConstantOperandVal(1);
15314 if (PackedVT != InVT) {
15337 assert(
Op.getValueType().isScalableVector() &&
15338 "Only expect to lower inserts into scalable vectors!");
15340 EVT InVT =
Op.getOperand(1).getValueType();
15341 unsigned Idx =
Op.getConstantOperandVal(2);
15346 EVT VT =
Op.getValueType();
15362 if (
Idx < (NumElts / 2))
15388 Vec0 = getSVESafeBitCast(NarrowVT, Vec0, DAG);
15389 Vec1 = getSVESafeBitCast(NarrowVT, Vec1, DAG);
15406 "Invalid subvector index!");
15412 return getSVESafeBitCast(VT, Narrow, DAG);
15420 std::optional<unsigned> PredPattern =
15442 !isa<ConstantSDNode>(
Op->getOperand(0)))
15445 SplatVal =
Op->getConstantOperandVal(0);
15446 if (
Op.getValueType().getVectorElementType() != MVT::i64)
15447 SplatVal = (int32_t)SplatVal;
15455 SplatVal = -SplatVal;
15463 EVT VT =
Op.getValueType();
15467 return LowerFixedLengthVectorIntDivideToSVE(
Op, DAG);
15487 if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
15488 return LowerToPredicatedOp(
Op, DAG, PredOpcode);
15493 if (VT == MVT::nxv16i8)
15494 WidenedVT = MVT::nxv8i16;
15495 else if (VT == MVT::nxv8i16)
15496 WidenedVT = MVT::nxv4i32;
15506 SDValue ResultLo = DAG.
getNode(
Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
15507 SDValue ResultHi = DAG.
getNode(
Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
15513bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(
15514 EVT VT,
unsigned DefinedValues)
const {
15534 unsigned DummyUnsigned;
15542 isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
15543 isTRNMask(M, NumElts, DummyUnsigned) ||
15544 isUZPMask(M, NumElts, DummyUnsigned) ||
15545 isZIPMask(M, NumElts, DummyUnsigned) ||
15549 isINSMask(M, NumElts, DummyBool, DummyInt) ||
15565 Op =
Op.getOperand(0);
15567 APInt SplatBits, SplatUndef;
15568 unsigned SplatBitSize;
15570 if (!BVN || !BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
15571 HasAnyUndefs, ElementBits) ||
15572 SplatBitSize > ElementBits)
15583 assert(VT.
isVector() &&
"vector shift count is not a vector type");
15587 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
15594 assert(VT.
isVector() &&
"vector shift count is not a vector type");
15598 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
15603 EVT VT =
Op.getValueType();
15608 EVT OpVT =
Op.getOperand(0).getValueType();
15620 return LowerFixedLengthVectorTruncateToSVE(
Op, DAG);
15630 unsigned &ShiftValue,
15643 ShiftValue = ShiftOp1->getZExtValue();
15652 "ResVT must be truncated or same type as the shift.");
15655 if (ShiftValue > ExtraBits && !
Add->getFlags().hasNoUnsignedWrap())
15662 uint64_t AddValue = AddOp1->getZExtValue();
15663 if (AddValue != 1ULL << (ShiftValue - 1))
15666 RShOperand =
Add->getOperand(0);
15672 EVT VT =
Op.getValueType();
15676 if (!
Op.getOperand(1).getValueType().isVector())
15680 switch (
Op.getOpcode()) {
15686 if (
isVShiftLImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize)
15692 Op.getOperand(0),
Op.getOperand(1));
15696 (Subtarget->hasSVE2() ||
15697 (Subtarget->hasSME() && Subtarget->
isStreaming()))) {
15699 unsigned ShiftValue;
15710 return LowerToPredicatedOp(
Op, DAG, Opc);
15714 if (
isVShiftRImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize) {
15717 return DAG.
getNode(Opc,
DL, VT,
Op.getOperand(0),
15724 unsigned Opc = (
Op.getOpcode() ==
ISD::SRA) ? Intrinsic::aarch64_neon_sshl
15725 : Intrinsic::aarch64_neon_ushl;
15733 return NegShiftLeft;
15742 EVT SrcVT =
LHS.getValueType();
15744 "function only supposed to emit natural comparisons");
15748 unsigned SplatBitSize = 0;
15753 SplatBitSize, HasAnyUndefs);
15755 bool IsZero = IsCnst && SplatValue == 0;
15758 bool IsMinusOne = IsCnst && SplatValue.
isAllOnes();
15770 return DAG.
getNOT(dl, Fcmeq, VT);
15814 return DAG.
getNOT(dl, Cmeq, VT);
15853 if (
Op.getValueType().isScalableVector())
15858 return LowerFixedLengthVectorSetccToSVE(
Op, DAG);
15863 EVT CmpVT =
LHS.getValueType().changeVectorElementTypeToInteger();
15866 if (
LHS.getValueType().getVectorElementType().isInteger()) {
15877 bool OneNaN =
false;
15896 if ((!FullFP16 &&
LHS.getValueType().getVectorElementType() == MVT::f16) ||
15897 LHS.getValueType().getVectorElementType() == MVT::bf16) {
15898 if (
LHS.getValueType().getVectorNumElements() == 4) {
15903 CmpVT = MVT::v4i32;
15908 assert((!FullFP16 &&
LHS.getValueType().getVectorElementType() != MVT::f16) ||
15909 LHS.getValueType().getVectorElementType() != MVT::bf16 ||
15910 LHS.getValueType().getVectorElementType() != MVT::f128);
15921 if (!
Cmp.getNode())
15951 unsigned ScalarOpcode;
15969 "Expected power-of-2 length vector");
15977 if (ElemVT == MVT::i1) {
15979 if (NumElems > 16) {
15982 EVT HalfVT =
Lo.getValueType();
15993 unsigned ExtendedWidth = 64;
15996 ExtendedWidth = 128;
16001 unsigned ExtendOp =
16009 NumElems == 2 && ExtendedWidth == 128) {
16010 Extended = DAG.
getBitcast(MVT::v4i32, Extended);
16011 ExtendedVT = MVT::i32;
16013 switch (ScalarOpcode) {
16034 VecVT =
Lo.getValueType();
16050 for (
unsigned Shift = NumElems / 2; Shift > 0; Shift /= 2) {
16055 Scalar = DAG.
getNode(ScalarOpcode,
DL, ScalarVT, Scalar, Shifted);
16069 EVT SrcVT = Src.getValueType();
16082 return LowerPredReductionToSVE(
Op, DAG);
16084 switch (
Op.getOpcode()) {
16118 switch (
Op.getOpcode()) {
16123 Op.getValueType(), dl, DAG);
16143 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
16148 MVT VT =
Op.getSimpleValueType();
16149 assert(VT != MVT::i128 &&
"Handled elsewhere, code replicated.");
16154 Op.getOperand(0),
Op.getOperand(1), RHS,
16159AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
SDValue Op,
16168 cast<ConstantSDNode>(
Op.getOperand(2))->getMaybeAlignValue();
16169 EVT VT =
Node->getValueType(0);
16172 "no-stack-arg-probe")) {
16180 SDValue Ops[2] = {SP, Chain};
16200 Chain, Callee, DAG.
getRegister(AArch64::X15, MVT::i64),
16220 SDValue Ops[2] = {SP, Chain};
16225AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(
SDValue Op,
16233 cast<ConstantSDNode>(
Op.getOperand(2))->getMaybeAlignValue();
16235 EVT VT =
Node->getValueType(0);
16247 SDValue Ops[2] = {SP, Chain};
16252AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(
SDValue Op,
16257 return LowerWindowsDYNAMIC_STACKALLOC(
Op, DAG);
16259 return LowerInlineDYNAMIC_STACKALLOC(
Op, DAG);
16265 unsigned NewOp)
const {
16266 if (Subtarget->hasSVE2())
16267 return LowerToPredicatedOp(
Op, DAG, NewOp);
16275 EVT VT =
Op.getValueType();
16276 assert(VT != MVT::i64 &&
"Expected illegal VSCALE node");
16279 APInt MulImm =
Op.getConstantOperandAPInt(0);
16285template <
unsigned NumVecs>
16295 for (
unsigned I = 0;
I < NumVecs; ++
I)
16304 Info.align.reset();
16315 unsigned Intrinsic)
const {
16316 auto &
DL =
I.getDataLayout();
16317 switch (Intrinsic) {
16318 case Intrinsic::aarch64_sve_st2:
16319 return setInfoSVEStN<2>(*
this,
DL,
Info,
I);
16320 case Intrinsic::aarch64_sve_st3:
16321 return setInfoSVEStN<3>(*
this,
DL,
Info,
I);
16322 case Intrinsic::aarch64_sve_st4:
16323 return setInfoSVEStN<4>(*
this,
DL,
Info,
I);
16324 case Intrinsic::aarch64_neon_ld2:
16325 case Intrinsic::aarch64_neon_ld3:
16326 case Intrinsic::aarch64_neon_ld4:
16327 case Intrinsic::aarch64_neon_ld1x2:
16328 case Intrinsic::aarch64_neon_ld1x3:
16329 case Intrinsic::aarch64_neon_ld1x4: {
16331 uint64_t NumElts =
DL.getTypeSizeInBits(
I.getType()) / 64;
16333 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
16335 Info.align.reset();
16340 case Intrinsic::aarch64_neon_ld2lane:
16341 case Intrinsic::aarch64_neon_ld3lane:
16342 case Intrinsic::aarch64_neon_ld4lane:
16343 case Intrinsic::aarch64_neon_ld2r:
16344 case Intrinsic::aarch64_neon_ld3r:
16345 case Intrinsic::aarch64_neon_ld4r: {
16349 auto *StructTy = cast<StructType>(
RetTy);
16350 unsigned NumElts = StructTy->getNumElements();
16351 Type *VecTy = StructTy->getElementType(0);
16354 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
16356 Info.align.reset();
16361 case Intrinsic::aarch64_neon_st2:
16362 case Intrinsic::aarch64_neon_st3:
16363 case Intrinsic::aarch64_neon_st4:
16364 case Intrinsic::aarch64_neon_st1x2:
16365 case Intrinsic::aarch64_neon_st1x3:
16366 case Intrinsic::aarch64_neon_st1x4: {
16368 unsigned NumElts = 0;
16369 for (
const Value *Arg :
I.args()) {
16370 Type *ArgTy = Arg->getType();
16373 NumElts +=
DL.getTypeSizeInBits(ArgTy) / 64;
16376 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
16378 Info.align.reset();
16383 case Intrinsic::aarch64_neon_st2lane:
16384 case Intrinsic::aarch64_neon_st3lane:
16385 case Intrinsic::aarch64_neon_st4lane: {
16387 unsigned NumElts = 0;
16389 Type *VecTy =
I.getArgOperand(0)->getType();
16392 for (
const Value *Arg :
I.args()) {
16393 Type *ArgTy = Arg->getType();
16400 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
16402 Info.align.reset();
16407 case Intrinsic::aarch64_ldaxr:
16408 case Intrinsic::aarch64_ldxr: {
16409 Type *ValTy =
I.getParamElementType(0);
16412 Info.ptrVal =
I.getArgOperand(0);
16414 Info.align =
DL.getABITypeAlign(ValTy);
16418 case Intrinsic::aarch64_stlxr:
16419 case Intrinsic::aarch64_stxr: {
16420 Type *ValTy =
I.getParamElementType(1);
16423 Info.ptrVal =
I.getArgOperand(1);
16425 Info.align =
DL.getABITypeAlign(ValTy);
16429 case Intrinsic::aarch64_ldaxp:
16430 case Intrinsic::aarch64_ldxp:
16432 Info.memVT = MVT::i128;
16433 Info.ptrVal =
I.getArgOperand(0);
16438 case Intrinsic::aarch64_stlxp:
16439 case Intrinsic::aarch64_stxp:
16441 Info.memVT = MVT::i128;
16442 Info.ptrVal =
I.getArgOperand(2);
16447 case Intrinsic::aarch64_sve_ldnt1: {
16448 Type *ElTy = cast<VectorType>(
I.getType())->getElementType();
16451 Info.ptrVal =
I.getArgOperand(1);
16453 Info.align =
DL.getABITypeAlign(ElTy);
16457 case Intrinsic::aarch64_sve_stnt1: {
16459 cast<VectorType>(
I.getArgOperand(0)->getType())->getElementType();
16462 Info.ptrVal =
I.getArgOperand(2);
16464 Info.align =
DL.getABITypeAlign(ElTy);
16468 case Intrinsic::aarch64_mops_memset_tag: {
16469 Value *Dst =
I.getArgOperand(0);
16470 Value *Val =
I.getArgOperand(1);
16475 Info.align =
I.getParamAlign(0).valueOrOne();
16501 MemSDNode *Mem = dyn_cast<MemSDNode>(Load);
16506 Base.getOperand(1).hasOneUse() &&
16513 uint64_t ShiftAmount =
Base.getOperand(1).getConstantOperandVal(1);
16515 if (ShiftAmount ==
Log2_32(LoadBytes))
16525 if ((VT == MVT::i64 || VT == MVT::i32) && Extend->
use_size()) {
16544 return NumBits1 > NumBits2;
16551 return NumBits1 > NumBits2;
16558 if (
I->getOpcode() != Instruction::FMul)
16561 if (!
I->hasOneUse())
16566 if (!(
User->getOpcode() == Instruction::FSub ||
16567 User->getOpcode() == Instruction::FAdd))
16588 return NumBits1 == 32 && NumBits2 == 64;
16595 return NumBits1 == 32 && NumBits2 == 64;
16613bool AArch64TargetLowering::isExtFreeImpl(
const Instruction *Ext)
const {
16614 if (isa<FPExtInst>(Ext))
16618 if (Ext->getType()->isVectorTy())
16621 for (
const Use &U : Ext->uses()) {
16626 const Instruction *Instr = cast<Instruction>(U.getUser());
16629 switch (Instr->getOpcode()) {
16630 case Instruction::Shl:
16631 if (!isa<ConstantInt>(Instr->getOperand(1)))
16634 case Instruction::GetElementPtr: {
16636 auto &
DL = Ext->getDataLayout();
16637 std::advance(GTI, U.getOperandNo()-1);
16650 if (ShiftAmt == 0 || ShiftAmt > 4)
16654 case Instruction::Trunc:
16657 if (Instr->getType() == Ext->getOperand(0)->getType())
16671 unsigned NumElts,
bool IsLittleEndian,
16673 if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth > 64)
16676 assert(DstWidth % SrcWidth == 0 &&
16677 "TBL lowering is not supported for a conversion instruction with this "
16678 "source and destination element type.");
16680 unsigned Factor = DstWidth / SrcWidth;
16681 unsigned MaskLen = NumElts * Factor;
16684 Mask.resize(MaskLen, NumElts);
16686 unsigned SrcIndex = 0;
16687 for (
unsigned I = IsLittleEndian ? 0 : Factor - 1;
I < MaskLen;
I += Factor)
16688 Mask[
I] = SrcIndex++;
16696 bool IsLittleEndian) {
16697 auto *SrcTy = cast<FixedVectorType>(
Op->getType());
16698 unsigned NumElts = SrcTy->getNumElements();
16699 auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16700 auto DstWidth = cast<IntegerType>(DstTy->
getElementType())->getBitWidth();
16710 if (DstTy != ZExtTy)
16711 Result = Builder.
CreateZExt(Result, ZExtTy);
16717 bool IsLittleEndian) {
16718 auto *SrcTy = cast<FixedVectorType>(
Op->getType());
16719 auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16720 auto DstWidth = cast<IntegerType>(DstTy->
getElementType())->getBitWidth();
16724 !IsLittleEndian, Mask))
16736 int NumElements = cast<FixedVectorType>(TI->
getType())->getNumElements();
16738 auto *DstTy = cast<FixedVectorType>(TI->
getType());
16739 assert(SrcTy->getElementType()->isIntegerTy() &&
16740 "Non-integer type source vector element is not supported");
16741 assert(DstTy->getElementType()->isIntegerTy(8) &&
16742 "Unsupported destination vector element type");
16743 unsigned SrcElemTySz =
16744 cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16745 unsigned DstElemTySz =
16746 cast<IntegerType>(DstTy->getElementType())->getBitWidth();
16747 assert((SrcElemTySz % DstElemTySz == 0) &&
16748 "Cannot lower truncate to tbl instructions for a source element size "
16749 "that is not divisible by the destination element size");
16750 unsigned TruncFactor = SrcElemTySz / DstElemTySz;
16751 assert((SrcElemTySz == 16 || SrcElemTySz == 32 || SrcElemTySz == 64) &&
16752 "Unsupported source vector element type size");
16760 for (
int Itr = 0; Itr < 16; Itr++) {
16761 if (Itr < NumElements)
16763 IsLittleEndian ? Itr * TruncFactor
16764 : Itr * TruncFactor + (TruncFactor - 1)));
16769 int MaxTblSz = 128 * 4;
16770 int MaxSrcSz = SrcElemTySz * NumElements;
16772 (MaxTblSz > MaxSrcSz) ? NumElements : (MaxTblSz / SrcElemTySz);
16773 assert(ElemsPerTbl <= 16 &&
16774 "Maximum elements selected using TBL instruction cannot exceed 16!");
16776 int ShuffleCount = 128 / SrcElemTySz;
16778 for (
int i = 0; i < ShuffleCount; ++i)
16785 while (ShuffleLanes.
back() < NumElements) {
16789 if (Parts.
size() == 4) {
16792 Builder.
CreateIntrinsic(Intrinsic::aarch64_neon_tbl4, VecTy, Parts));
16796 for (
int i = 0; i < ShuffleCount; ++i)
16797 ShuffleLanes[i] += ShuffleCount;
16801 "Lowering trunc for vectors requiring different TBL instructions is "
16805 if (!Parts.
empty()) {
16807 switch (Parts.
size()) {
16809 TblID = Intrinsic::aarch64_neon_tbl1;
16812 TblID = Intrinsic::aarch64_neon_tbl2;
16815 TblID = Intrinsic::aarch64_neon_tbl3;
16826 "more than 2 tbl instructions!");
16829 if (ElemsPerTbl < 16) {
16831 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
16836 if (ElemsPerTbl < 16) {
16837 std::iota(FinalMask.
begin(), FinalMask.
begin() + ElemsPerTbl, 0);
16838 std::iota(FinalMask.
begin() + ElemsPerTbl, FinalMask.
end(), 16);
16840 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
16862 if (!L || L->getHeader() !=
I->getParent() ||
F->hasMinSize() ||
16866 auto *SrcTy = dyn_cast<FixedVectorType>(
I->getOperand(0)->getType());
16867 auto *DstTy = dyn_cast<FixedVectorType>(
I->getType());
16868 if (!SrcTy || !DstTy)
16874 auto *ZExt = dyn_cast<ZExtInst>(
I);
16875 if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
16876 auto DstWidth = DstTy->getElementType()->getScalarSizeInBits();
16877 if (DstWidth % 8 != 0)
16880 auto *TruncDstType =
16884 auto SrcWidth = SrcTy->getElementType()->getScalarSizeInBits();
16888 if (SrcWidth * 2 >= TruncDstType->getElementType()->getScalarSizeInBits())
16891 DstTy = TruncDstType;
16897 if (SrcWidth * 4 <= DstWidth &&
I->hasOneUser()) {
16898 auto *SingleUser = cast<Instruction>(*
I->user_begin());
16903 if (DstTy->getScalarSizeInBits() >= 64)
16908 Builder, ZExt->getOperand(0), cast<FixedVectorType>(ZExt->getType()),
16912 ZExt->replaceAllUsesWith(Result);
16913 ZExt->eraseFromParent();
16917 auto *UIToFP = dyn_cast<UIToFPInst>(
I);
16918 if (UIToFP && ((SrcTy->getElementType()->isIntegerTy(8) &&
16919 DstTy->getElementType()->isFloatTy()) ||
16920 (SrcTy->getElementType()->isIntegerTy(16) &&
16921 DstTy->getElementType()->isDoubleTy()))) {
16926 assert(ZExt &&
"Cannot fail for the i8 to float conversion");
16928 I->replaceAllUsesWith(UI);
16929 I->eraseFromParent();
16933 auto *SIToFP = dyn_cast<SIToFPInst>(
I);
16934 if (SIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
16935 DstTy->getElementType()->isFloatTy()) {
16940 assert(Shuffle &&
"Cannot fail for the i8 to float conversion");
16942 auto *AShr = Builder.
CreateAShr(Cast, 24,
"",
true);
16944 I->replaceAllUsesWith(SI);
16945 I->eraseFromParent();
16951 auto *FPToUI = dyn_cast<FPToUIInst>(
I);
16953 (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
16954 SrcTy->getElementType()->isFloatTy() &&
16955 DstTy->getElementType()->isIntegerTy(8)) {
16957 auto *WideConv = Builder.
CreateFPToUI(FPToUI->getOperand(0),
16959 auto *TruncI = Builder.
CreateTrunc(WideConv, DstTy);
16960 I->replaceAllUsesWith(TruncI);
16961 I->eraseFromParent();
16970 auto *TI = dyn_cast<TruncInst>(
I);
16971 if (TI && DstTy->getElementType()->isIntegerTy(8) &&
16972 ((SrcTy->getElementType()->isIntegerTy(32) ||
16973 SrcTy->getElementType()->isIntegerTy(64)) &&
16974 (SrcTy->getNumElements() == 16 || SrcTy->getNumElements() == 8))) {
16983 Align &RequiredAligment)
const {
16988 RequiredAligment =
Align(1);
16990 return NumBits == 32 || NumBits == 64;
16997 unsigned VecSize = 128;
17000 if (UseScalable && isa<FixedVectorType>(VecTy))
17002 return std::max<unsigned>(1, (MinElts * ElSize + 127) / VecSize);
17007 if (Subtarget->
getProcFamily() == AArch64Subtarget::Falkor &&
17017 unsigned MinElts = EC.getKnownMinValue();
17019 UseScalable =
false;
17026 if (isa<ScalableVectorType>(VecTy) &&
17035 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
17038 if (EC.isScalable()) {
17039 UseScalable =
true;
17040 return isPowerOf2_32(MinElts) && (MinElts * ElSize) % 128 == 0;
17043 unsigned VecSize =
DL.getTypeSizeInBits(VecTy);
17045 unsigned MinSVEVectorSize =
17047 if (VecSize % MinSVEVectorSize == 0 ||
17050 UseScalable =
true;
17057 return Subtarget->
isNeonAvailable() && (VecSize == 64 || VecSize % 128 == 0);
17089 bool Scalable,
Type *LDVTy,
17091 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
17092 static const Intrinsic::ID SVELoads[3] = {Intrinsic::aarch64_sve_ld2_sret,
17093 Intrinsic::aarch64_sve_ld3_sret,
17094 Intrinsic::aarch64_sve_ld4_sret};
17095 static const Intrinsic::ID NEONLoads[3] = {Intrinsic::aarch64_neon_ld2,
17096 Intrinsic::aarch64_neon_ld3,
17097 Intrinsic::aarch64_neon_ld4};
17106 bool Scalable,
Type *STVTy,
17108 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
17109 static const Intrinsic::ID SVEStores[3] = {Intrinsic::aarch64_sve_st2,
17110 Intrinsic::aarch64_sve_st3,
17111 Intrinsic::aarch64_sve_st4};
17112 static const Intrinsic::ID NEONStores[3] = {Intrinsic::aarch64_neon_st2,
17113 Intrinsic::aarch64_neon_st3,
17114 Intrinsic::aarch64_neon_st4};
17137 "Invalid interleave factor");
17138 assert(!Shuffles.
empty() &&
"Empty shufflevector input");
17140 "Unmatched number of shufflevectors and indices");
17158 SI->getType()->getScalarSizeInBits() * 4 ==
17159 SI->user_back()->getType()->getScalarSizeInBits();
17165 auto *FVTy = cast<FixedVectorType>(VTy);
17169 Type *EltTy = FVTy->getElementType();
17177 FVTy->getNumElements() / NumLoads);
17189 LDVTy->getElementCount());
17192 UseScalable, LDVTy, PtrTy);
17199 Value *PTrue =
nullptr;
17201 std::optional<unsigned> PgPattern =
17206 PgPattern = AArch64SVEPredPattern::all;
17210 PTrue = Builder.
CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
17214 for (
unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
17220 FVTy->getNumElements() * Factor);
17224 LdN = Builder.
CreateCall(LdNFunc, {PTrue, BaseAddr},
"ldN");
17226 LdN = Builder.
CreateCall(LdNFunc, BaseAddr,
"ldN");
17229 for (
unsigned i = 0; i < Shuffles.
size(); i++) {
17231 unsigned Index = Indices[i];
17244 FVTy->getNumElements()));
17246 SubVecs[SVI].push_back(SubVec);
17255 auto &SubVec = SubVecs[SVI];
17258 SVI->replaceAllUsesWith(WideVec);
17264template <
typename Iter>
17266 int MaxLookupDist = 20;
17267 unsigned IdxWidth =
DL.getIndexSizeInBits(0);
17268 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
17269 const Value *PtrA1 =
17270 Ptr->stripAndAccumulateInBoundsConstantOffsets(
DL, OffsetA);
17272 while (++It !=
End) {
17273 if (It->isDebugOrPseudoInst())
17275 if (MaxLookupDist-- == 0)
17277 if (
const auto *SI = dyn_cast<StoreInst>(&*It)) {
17278 const Value *PtrB1 =
17279 SI->getPointerOperand()->stripAndAccumulateInBoundsConstantOffsets(
17281 if (PtrA1 == PtrB1 &&
17282 (OffsetA.sextOrTrunc(IdxWidth) - OffsetB.
sextOrTrunc(IdxWidth))
17319 unsigned Factor)
const {
17322 "Invalid interleave factor");
17324 auto *VecTy = cast<FixedVectorType>(SVI->
getType());
17325 assert(VecTy->getNumElements() % Factor == 0 &&
"Invalid interleaved store");
17327 unsigned LaneLen = VecTy->getNumElements() / Factor;
17328 Type *EltTy = VecTy->getElementType();
17349 Type *IntTy =
DL.getIntPtrType(EltTy);
17350 unsigned NumOpElts =
17351 cast<FixedVectorType>(Op0->
getType())->getNumElements();
17363 LaneLen /= NumStores;
17370 Value *BaseAddr = SI->getPointerOperand();
17384 if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 &&
17392 Type *PtrTy = SI->getPointerOperandType();
17394 STVTy->getElementCount());
17397 UseScalable, STVTy, PtrTy);
17399 Value *PTrue =
nullptr;
17401 std::optional<unsigned> PgPattern =
17406 DL.getTypeSizeInBits(SubVecTy))
17407 PgPattern = AArch64SVEPredPattern::all;
17411 PTrue = Builder.
CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
17415 for (
unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
17420 for (
unsigned i = 0; i < Factor; i++) {
17422 unsigned IdxI = StoreCount * LaneLen * Factor + i;
17423 if (Mask[IdxI] >= 0) {
17427 unsigned StartMask = 0;
17428 for (
unsigned j = 1; j < LaneLen; j++) {
17429 unsigned IdxJ = StoreCount * LaneLen * Factor + j * Factor + i;
17430 if (Mask[IdxJ] >= 0) {
17431 StartMask = Mask[IdxJ] - j;
17457 if (StoreCount > 0)
17459 BaseAddr, LaneLen * Factor);
17472 auto *Extr1 = dyn_cast<ExtractValueInst>(*(DI->
user_begin()));
17473 auto *Extr2 = dyn_cast<ExtractValueInst>(*(++DI->
user_begin()));
17474 if (!Extr1 || !Extr2)
17477 DeinterleavedValues.
resize(2);
17479 DeinterleavedValues[0x1 & (Extr1->getIndices()[0])] = Extr1;
17480 DeinterleavedValues[0x1 & (Extr2->getIndices()[0])] = Extr2;
17481 if (!DeinterleavedValues[0] || !DeinterleavedValues[1])
17485 if (!
match(DeinterleavedValues[0], m_ExtractValue<0>((
m_Specific(DI)))) ||
17486 !
match(DeinterleavedValues[1], m_ExtractValue<1>((
m_Specific(DI))))) {
17491 DeInterleaveDeadInsts.
insert(DeInterleaveDeadInsts.
end(),
17492 DeinterleavedValues.
begin(),
17493 DeinterleavedValues.
end());
17519 auto *Extr1 = dyn_cast<ExtractValueInst>(*(DI->
user_begin()));
17520 auto *Extr2 = dyn_cast<ExtractValueInst>(*(++DI->
user_begin()));
17521 if (!Extr1 || !Extr2)
17524 if (!Extr1->hasOneUse() || !Extr2->hasOneUse())
17526 auto *DI1 = *(Extr1->user_begin());
17527 auto *DI2 = *(Extr2->user_begin());
17529 if (!DI1->hasNUses(2) || !DI2->hasNUses(2))
17532 auto *
A = dyn_cast<ExtractValueInst>(*(DI1->user_begin()));
17533 auto *
C = dyn_cast<ExtractValueInst>(*(++DI1->user_begin()));
17534 auto *
B = dyn_cast<ExtractValueInst>(*(DI2->user_begin()));
17535 auto *
D = dyn_cast<ExtractValueInst>(*(++DI2->user_begin()));
17538 if (!
A || !
B || !
C || !
D)
17541 DeinterleavedValues.
resize(4);
17543 DeinterleavedValues[0x3 &
17544 ((
A->getIndices()[0] * 2) + Extr1->getIndices()[0])] =
A;
17545 DeinterleavedValues[0x3 &
17546 ((
B->getIndices()[0] * 2) + Extr2->getIndices()[0])] =
B;
17547 DeinterleavedValues[0x3 &
17548 ((
C->getIndices()[0] * 2) + Extr1->getIndices()[0])] =
C;
17549 DeinterleavedValues[0x3 &
17550 ((
D->getIndices()[0] * 2) + Extr2->getIndices()[0])] =
D;
17551 if (!DeinterleavedValues[0] || !DeinterleavedValues[1] ||
17552 !DeinterleavedValues[2] || !DeinterleavedValues[3])
17570 DeInterleaveDeadInsts.
insert(DeInterleaveDeadInsts.
end(),
17571 DeinterleavedValues.
begin(),
17572 DeinterleavedValues.
end());
17573 DeInterleaveDeadInsts.
push_back(cast<Instruction>(DI1));
17574 DeInterleaveDeadInsts.
push_back(cast<Instruction>(Extr1));
17575 DeInterleaveDeadInsts.
push_back(cast<Instruction>(DI2));
17576 DeInterleaveDeadInsts.
push_back(cast<Instruction>(Extr2));
17603 unsigned Factor = DeinterleavedValues.
size();
17604 assert((Factor == 2 || Factor == 4) &&
17605 "Currently supported Factor is 2 or 4 only");
17625 UseScalable, LdTy, PtrTy);
17628 Value *Pred =
nullptr;
17634 if (NumLoads > 1) {
17637 for (
unsigned I = 0;
I < NumLoads; ++
I) {
17641 Value *LdN =
nullptr;
17648 for (
unsigned J = 0; J < Factor; ++J) {
17655 for (
unsigned J = 0; J < Factor; ++J)
17660 Result = Builder.
CreateCall(LdNFunc, {Pred, BaseAddr},
"ldN");
17662 Result = Builder.
CreateCall(LdNFunc, BaseAddr,
"ldN");
17664 for (
unsigned I = 0;
I < Factor;
I++) {
17666 DeinterleavedValues[
I]->replaceAllUsesWith(NewExtract);
17669 DeadInsts.
insert(DeadInsts.
end(), DeInterleaveDeadInsts.
begin(),
17670 DeInterleaveDeadInsts.
end());
17701 cast<Instruction>(cast<Instruction>(
II)->getOperand(0)));
17703 cast<Instruction>(cast<Instruction>(
II)->getOperand(1)));
17721 if (
II->getIntrinsicID() != Intrinsic::vector_interleave2)
17730 unsigned Factor = InterleavedValues.
size();
17731 assert((Factor == 2 || Factor == 4) &&
17732 "Currently supported Factor is 2 or 4 only");
17751 Type *PtrTy = SI->getPointerOperandType();
17753 UseScalable, StTy, PtrTy);
17757 Value *BaseAddr = SI->getPointerOperand();
17758 Value *Pred =
nullptr;
17764 auto ExtractedValues = InterleavedValues;
17768 for (
unsigned I = 0;
I < NumStores; ++
I) {
17770 if (NumStores > 1) {
17775 for (
unsigned J = 0; J < Factor; J++) {
17776 InterleavedValues[J] =
17780 InterleavedValues[InterleavedValues.
size() - 1] =
Address;
17782 Builder.
CreateCall(StNFunc, InterleavedValues);
17785 InterleaveDeadInsts.
end());
17791 bool CanImplicitFloat = !FuncAttributes.
hasFnAttr(Attribute::NoImplicitFloat);
17792 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
17793 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
17797 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
17798 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
17799 if (
Op.isAligned(AlignCheck))
17807 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
17808 AlignmentIsAcceptable(MVT::v16i8,
Align(16)))
17810 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128,
Align(16)))
17812 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
17814 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
17821 bool CanImplicitFloat = !FuncAttributes.
hasFnAttr(Attribute::NoImplicitFloat);
17822 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
17823 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
17827 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
17828 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
17829 if (
Op.isAligned(AlignCheck))
17837 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
17838 AlignmentIsAcceptable(MVT::v2i64,
Align(16)))
17840 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128,
Align(16)))
17842 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
17844 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
17851 if (Immed == std::numeric_limits<int64_t>::min()) {
17853 <<
": avoid UB for INT64_MIN\n");
17857 Immed = std::abs(Immed);
17858 bool IsLegal = ((Immed >> 12) == 0 ||
17859 ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
17861 <<
" legal add imm: " << (IsLegal ?
"yes" :
"no") <<
"\n");
17867 if (!Subtarget->hasSVE2())
17875 return isInt<6>(Imm / 16);
17886 return std::abs(Imm / 8) <= 16;
17889 return std::abs(Imm / 4) <= 16;
17892 return std::abs(Imm / 2) <= 16;
17919 if (
Insn.size() > 1)
17956 if (AM.
Scale == 1) {
17959 }
else if (AM.
Scale == 2) {
17972 if (isa<ScalableVectorType>(Ty)) {
17977 uint64_t VecNumBytes =
DL.getTypeSizeInBits(Ty).getKnownMinValue() / 8;
17984 DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
18000 uint64_t NumBits =
DL.getTypeSizeInBits(Ty);
18001 NumBytes = NumBits / 8;
18014 int64_t MaxOffset)
const {
18015 int64_t HighPart = MinOffset & ~0xfffULL;
18038 return Subtarget->hasFullFP16();
18071 static const MCPhysReg ScratchRegs[] = {
18072 AArch64::X16, AArch64::X17, AArch64::LR, 0
18074 return ScratchRegs;
18078 static const MCPhysReg RCRegs[] = {AArch64::FPCR};
18087 "Expected shift op");
18089 SDValue ShiftLHS =
N->getOperand(0);
18090 EVT VT =
N->getValueType(0);
18103 isa<ConstantSDNode>(ShiftLHS.
getOperand(1))) {
18108 if (
auto *SRLC = dyn_cast<ConstantSDNode>(AndLHS.
getOperand(1))) {
18110 if (
auto *SHLC = dyn_cast<ConstantSDNode>(
N->getOperand(1)))
18111 return SRLC->getZExtValue() == SHLC->getZExtValue();
18123 (
N->getOperand(0).getOpcode() ==
ISD::SHL ||
18124 N->getOperand(0).getOpcode() ==
ISD::SRL) &&
18125 "Expected XOR(SHIFT) pattern");
18128 auto *XorC = dyn_cast<ConstantSDNode>(
N->getOperand(1));
18129 auto *ShiftC = dyn_cast<ConstantSDNode>(
N->getOperand(0).getOperand(1));
18130 if (XorC && ShiftC) {
18131 unsigned MaskIdx, MaskLen;
18132 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
18133 unsigned ShiftAmt = ShiftC->getZExtValue();
18134 unsigned BitWidth =
N->getValueType(0).getScalarSizeInBits();
18135 if (
N->getOperand(0).getOpcode() ==
ISD::SHL)
18136 return MaskIdx == ShiftAmt && MaskLen == (
BitWidth - ShiftAmt);
18137 return MaskIdx == 0 && MaskLen == (
BitWidth - ShiftAmt);
18147 N->getOperand(0).getOpcode() ==
ISD::SRL) ||
18149 N->getOperand(0).getOpcode() ==
ISD::SHL)) &&
18150 "Expected shift-shift mask");
18152 if (!
N->getOperand(0)->hasOneUse())
18156 EVT VT =
N->getValueType(0);
18157 if (
N->getOpcode() ==
ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
18158 auto *C1 = dyn_cast<ConstantSDNode>(
N->getOperand(0).getOperand(1));
18159 auto *C2 = dyn_cast<ConstantSDNode>(
N->getOperand(1));
18160 return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
18165 if (
N->getOpcode() ==
ISD::SHL &&
N->hasOneUse()) {
18166 if (
auto C2 = dyn_cast<ConstantSDNode>(
N->getOperand(1))) {
18167 unsigned ShlAmt = C2->getZExtValue();
18168 if (
auto ShouldADD = *
N->user_begin();
18169 ShouldADD->getOpcode() ==
ISD::ADD && ShouldADD->hasOneUse()) {
18170 if (
auto ShouldLOAD = dyn_cast<LoadSDNode>(*ShouldADD->user_begin())) {
18171 unsigned ByteVT = ShouldLOAD->getMemoryVT().getSizeInBits() / 8;
18172 if ((1ULL << ShlAmt) == ByteVT &&
18184 unsigned BinOpcode,
EVT VT)
const {
18196 int64_t Val = Imm.getSExtValue();
18200 if ((int64_t)Val < 0)
18203 Val &= (1LL << 32) - 1;
18211 unsigned Index)
const {
18224 EVT VT =
N->getValueType(0);
18225 if (!Subtarget->hasNEON() || !VT.
isVector())
18237 auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.
getOperand(1));
18239 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.
getSizeInBits() - 1)
18263 if (
N->getValueType(0) != MVT::i32)
18266 SDValue VecReduceOp0 =
N->getOperand(0);
18267 unsigned Opcode = VecReduceOp0.
getOpcode();
18274 if (ABS->getOperand(0)->getOpcode() !=
ISD::SUB ||
18275 ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
18278 SDValue SUB = ABS->getOperand(0);
18279 unsigned Opcode0 = SUB->getOperand(0).getOpcode();
18280 unsigned Opcode1 = SUB->getOperand(1).getOpcode();
18282 if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 ||
18283 SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
18287 bool IsZExt =
false;
18295 SDValue EXT0 = SUB->getOperand(0);
18296 SDValue EXT1 = SUB->getOperand(1);
18313 UABDHigh8Op0, UABDHigh8Op1);
18324 UABDLo8Op0, UABDLo8Op1);
18343 if (!ST->isNeonAvailable())
18346 if (!ST->hasDotProd())
18357 unsigned DotOpcode;
18361 if (
A.getOperand(0).getValueType() !=
B.getOperand(0).getValueType())
18363 auto OpCodeA =
A.getOpcode();
18367 auto OpCodeB =
B.getOpcode();
18371 if (OpCodeA == OpCodeB) {
18376 if (!ST->hasMatMulInt8())
18390 EVT Op0VT =
A.getOperand(0).getValueType();
18393 if (!IsValidElementCount || !IsValidSize)
18402 B =
B.getOperand(0);
18405 unsigned NumOfVecReduce;
18407 if (IsMultipleOf16) {
18409 TargetType = MVT::v4i32;
18412 TargetType = MVT::v2i32;
18415 if (NumOfVecReduce == 1) {
18418 A.getOperand(0),
B);
18425 for (;
I < VecReduce16Num;
I += 1) {
18444 if (VecReduce8Num == 0)
18445 return VecReduceAdd16;
18468 auto DetectAddExtract = [&](
SDValue A) {
18472 EVT VT =
A.getValueType();
18500 if (
SDValue R = DetectAddExtract(
A))
18503 if (
A.getOperand(0).getOpcode() ==
ISD::ADD &&
A.getOperand(0).hasOneUse())
18507 if (
A.getOperand(1).getOpcode() ==
ISD::ADD &&
A.getOperand(1).hasOneUse())
18520 EVT VT =
A.getValueType();
18521 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
18532 if (ExtVT0 != ExtVT1 ||
18575AArch64TargetLowering::BuildSDIVPow2(
SDNode *
N,
const APInt &Divisor,
18582 EVT VT =
N->getValueType(0);
18591 if ((VT != MVT::i32 && VT != MVT::i64) ||
18597 if (Divisor == 2 ||
18605AArch64TargetLowering::BuildSREMPow2(
SDNode *
N,
const APInt &Divisor,
18612 EVT VT =
N->getValueType(0);
18620 if ((VT != MVT::i32 && VT != MVT::i64) ||
18662 case Intrinsic::aarch64_sve_cntb:
18664 case Intrinsic::aarch64_sve_cnth:
18666 case Intrinsic::aarch64_sve_cntw:
18668 case Intrinsic::aarch64_sve_cntd:
18697 return TypeNode->
getVT();
18707 if (Mask == UCHAR_MAX)
18709 else if (Mask == USHRT_MAX)
18711 else if (Mask == UINT_MAX)
18733 unsigned ExtendOpcode = Extend.
getOpcode();
18749 if (PreExtendType == MVT::Other ||
18754 bool SeenZExtOrSExt = !IsAnyExt;
18762 unsigned Opc =
Op.getOpcode();
18769 if (SeenZExtOrSExt && OpcIsSExt != IsSExt)
18772 IsSExt = OpcIsSExt;
18773 SeenZExtOrSExt =
true;
18780 EVT PreExtendLegalType =
18786 PreExtendLegalType));
18794 cast<ShuffleVectorSDNode>(BV)->getMask());
18796 unsigned ExtOpc = !SeenZExtOrSExt
18799 return DAG.
getNode(ExtOpc,
DL, VT, NBV);
18806 EVT VT =
Mul->getValueType(0);
18807 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
18818 return DAG.
getNode(
Mul->getOpcode(),
DL, VT, Op0 ? Op0 :
Mul->getOperand(0),
18819 Op1 ? Op1 :
Mul->getOperand(1));
18825 EVT VT =
N->getValueType(0);
18826 if (VT != MVT::v2i64 && VT != MVT::v1i64 && VT != MVT::v2i32 &&
18827 VT != MVT::v4i32 && VT != MVT::v4i16 && VT != MVT::v8i16)
18829 if (
N->getOperand(0).getOpcode() !=
ISD::AND ||
18830 N->getOperand(0).getOperand(0).getOpcode() !=
ISD::SRL)
18843 if (!V1.
isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
18844 V3 != (HalfSize - 1))
18862 EVT VT =
N->getValueType(0);
18868 N->getOperand(0).getOperand(0).getValueType() !=
18869 N->getOperand(1).getOperand(0).getValueType())
18873 N->getOperand(0).getOpcode() !=
N->getOperand(1).getOpcode())
18876 SDValue N0 =
N->getOperand(0).getOperand(0);
18877 SDValue N1 =
N->getOperand(1).getOperand(0);
18882 if ((S2 == MVT::i32 &&
S1 == MVT::i8) ||
18883 (S2 == MVT::i64 && (
S1 == MVT::i8 ||
S1 == MVT::i16))) {
18916 EVT VT =
N->getValueType(0);
18920 unsigned AddSubOpc;
18922 auto IsAddSubWith1 = [&](
SDValue V) ->
bool {
18923 AddSubOpc = V->getOpcode();
18925 SDValue Opnd = V->getOperand(1);
18926 MulOper = V->getOperand(0);
18929 if (
auto C = dyn_cast<ConstantSDNode>(Opnd))
18935 if (IsAddSubWith1(N0)) {
18937 return DAG.
getNode(AddSubOpc,
DL, VT, N1, MulVal);
18940 if (IsAddSubWith1(N1)) {
18942 return DAG.
getNode(AddSubOpc,
DL, VT, N0, MulVal);
18946 if (!isa<ConstantSDNode>(N1))
18950 const APInt &ConstValue =
C->getAPIntValue();
18957 if (ConstValue.
sge(1) && ConstValue.
sle(16))
18972 unsigned TrailingZeroes = ConstValue.
countr_zero();
18973 if (TrailingZeroes) {
18981 if (
N->hasOneUse() && (
N->user_begin()->getOpcode() ==
ISD::ADD ||
18982 N->user_begin()->getOpcode() ==
ISD::SUB))
18987 APInt ShiftedConstValue = ConstValue.
ashr(TrailingZeroes);
18990 auto Shl = [&](
SDValue N0,
unsigned N1) {
19021 for (
unsigned i = 1; i <
BitWidth / 2; i++) {
19041 unsigned TrailingZeroes = CVMinus1.
countr_zero();
19042 APInt SCVMinus1 = CVMinus1.
ashr(TrailingZeroes) - 1;
19058 unsigned TrailingZeroes = CVMinus1.
countr_zero();
19059 APInt CVPlus1 = CVMinus1.
ashr(TrailingZeroes) + 1;
19079 APInt SCVMinus1 = ShiftedConstValue - 1;
19080 APInt SCVPlus1 = ShiftedConstValue + 1;
19081 APInt CVPlus1 = ConstValue + 1;
19085 return Shl(
Add(Shl(N0, ShiftAmt), N0), TrailingZeroes);
19088 return Sub(Shl(N0, ShiftAmt), N0);
19090 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
19091 return Sub(Shl(N0, ShiftAmt), Shl(N0, TrailingZeroes));
19093 if (Subtarget->hasALULSLFast() &&
19094 isPowPlusPlusConst(ConstValue, CVM, CVN)) {
19095 APInt CVMMinus1 = CVM - 1;
19096 APInt CVNMinus1 = CVN - 1;
19097 unsigned ShiftM1 = CVMMinus1.
logBase2();
19098 unsigned ShiftN1 = CVNMinus1.
logBase2();
19100 if (ShiftM1 <= 4 && ShiftN1 <= 4) {
19102 return Add(Shl(MVal, ShiftN1), MVal);
19105 if (Subtarget->hasALULSLFast() &&
19106 isPowPlusPlusOneConst(ConstValue, CVM, CVN)) {
19110 if (ShiftM <= 4 && ShiftN <= 4) {
19116 if (Subtarget->hasALULSLFast() &&
19117 isPowMinusMinusOneConst(ConstValue, CVM, CVN)) {
19121 if (ShiftM <= 4 && ShiftN <= 4) {
19130 APInt SCVPlus1 = -ShiftedConstValue + 1;
19131 APInt CVNegPlus1 = -ConstValue + 1;
19132 APInt CVNegMinus1 = -ConstValue - 1;
19135 return Sub(N0, Shl(N0, ShiftAmt));
19137 ShiftAmt = CVNegMinus1.
logBase2();
19138 return Negate(
Add(Shl(N0, ShiftAmt), N0));
19140 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
19141 return Sub(Shl(N0, TrailingZeroes), Shl(N0, ShiftAmt));
19161 EVT VT =
N->getValueType(0);
19163 N->getOperand(0)->getOperand(0)->getOpcode() !=
ISD::SETCC ||
19164 VT.
getSizeInBits() !=
N->getOperand(0)->getValueType(0).getSizeInBits())
19172 dyn_cast<BuildVectorSDNode>(
N->getOperand(0)->getOperand(1))) {
19174 if (!BV->isConstant())
19179 EVT IntVT = BV->getValueType(0);
19186 N->getOperand(0)->getOperand(0), MaskConst);
19200 if (
N->isStrictFPOpcode())
19211 return !VT.
isVector() && VT != MVT::bf16 && VT != MVT::f128;
19214 SDValue SrcVal =
N->getOperand(0);
19216 EVT DestTy =
N->getValueType(0);
19223 if (DestTy.
bitsGT(SrcTy)) {
19232 if (SrcVecTy == MVT::nxv2i32 || DestVecTy == MVT::nxv2i32)
19238 DAG.
getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
19255 EVT VT =
N->getValueType(0);
19256 if (VT != MVT::f32 && VT != MVT::f64)
19260 if (VT.
getSizeInBits() !=
N->getOperand(0).getValueSizeInBits())
19270 !cast<LoadSDNode>(N0)->isVolatile()) {
19300 if (!
N->getValueType(0).isSimple())
19304 if (!
Op.getValueType().isSimple() ||
Op.getOpcode() !=
ISD::FMUL)
19307 if (!
Op.getValueType().is64BitVector() && !
Op.getValueType().is128BitVector())
19311 if (!isa<BuildVectorSDNode>(ConstVec))
19314 MVT FloatTy =
Op.getSimpleValueType().getVectorElementType();
19316 if (FloatBits != 32 && FloatBits != 64 &&
19317 (FloatBits != 16 || !Subtarget->hasFullFP16()))
19320 MVT IntTy =
N->getSimpleValueType(0).getVectorElementType();
19322 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
19326 if (IntBits > FloatBits)
19331 int32_t Bits = IntBits == 64 ? 64 : 32;
19333 if (
C == -1 ||
C == 0 ||
C > Bits)
19336 EVT ResTy =
Op.getValueType().changeVectorElementTypeToInteger();
19342 EVT SatVT = cast<VTSDNode>(
N->getOperand(1))->getVT();
19350 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
19351 : Intrinsic::aarch64_neon_vcvtfp2fxu;
19357 if (IntBits < FloatBits)
19365 EVT VT =
N->getValueType(0);
19391 for (
int i = 1; i >= 0; --i) {
19392 for (
int j = 1; j >= 0; --j) {
19429 uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
19430 for (
int i = 1; i >= 0; --i)
19431 for (
int j = 1; j >= 0; --j) {
19442 if (!BVN0 || !BVN1)
19445 bool FoundMatch =
true;
19449 if (!CN0 || !CN1 ||
19451 FoundMatch =
false;
19474 EVT VT =
N->getValueType(0);
19522 auto *Op1 = dyn_cast<ConstantSDNode>(Cmp1.
getOperand(1));
19523 if (Op1 && Op1->getAPIntValue().isNegative() &&
19524 Op1->getAPIntValue().sgt(-32)) {
19531 NZCVOp, Condition, Cmp0);
19534 Cmp1.
getOperand(1), NZCVOp, Condition, Cmp0);
19545 EVT VT =
N->getValueType(0);
19566 MaskForTy = 0xffull;
19569 MaskForTy = 0xffffull;
19572 MaskForTy = 0xffffffffull;
19580 if (
auto *Op0 = dyn_cast<ConstantSDNode>(
N->getOperand(0)))
19581 return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
19591 Op =
Op->getOperand(0);
19601 unsigned Opc = Src->getOpcode();
19605 SDValue UnpkOp = Src->getOperand(0);
19618 auto MaskAndTypeMatch = [ExtVal](
EVT VT) ->
bool {
19619 return ((ExtVal == 0xFF && VT == MVT::i8) ||
19620 (ExtVal == 0xFFFF && VT == MVT::i16) ||
19621 (ExtVal == 0xFFFFFFFF && VT == MVT::i32));
19627 if (MaskAndTypeMatch(EltTy))
19632 auto MaskedLoadOp = dyn_cast<MaskedLoadSDNode>(UnpkOp);
19633 if (MaskedLoadOp && (MaskedLoadOp->getExtensionType() ==
ISD::ZEXTLOAD ||
19636 if (MaskAndTypeMatch(EltTy))
19660 return N->getOperand(1);
19662 return N->getOperand(0);
19669 if (!Src.hasOneUse())
19680 MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
19697 MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
19717 EVT VT =
N->getValueType(0);
19723 for (
auto U :
N->users())
19754 EVT VT =
N->getValueType(0);
19794 DefBits = ~(DefBits | ZeroSplat);
19801 UndefBits = ~(UndefBits | ZeroSplat);
19803 UndefBits, &
LHS)) ||
19817 EVT VT =
N->getValueType(0);
19820 if (!
N->getFlags().hasAllowReassociation())
19827 unsigned Opc =
A.getConstantOperandVal(0);
19828 if (Opc != Intrinsic::aarch64_neon_vcmla_rot0 &&
19829 Opc != Intrinsic::aarch64_neon_vcmla_rot90 &&
19830 Opc != Intrinsic::aarch64_neon_vcmla_rot180 &&
19831 Opc != Intrinsic::aarch64_neon_vcmla_rot270)
19836 A.getOperand(2),
A.getOperand(3));
19852 return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
19854 return VT == MVT::i64;
19866 (
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege ||
19867 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt ||
19868 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi ||
19869 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs ||
19870 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele ||
19871 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo ||
19872 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
19873 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
19875 N.getConstantOperandVal(0) == Intrinsic::get_active_lane_mask)))
19938 if (VS.getConstantOperandVal(0) != NumEls)
19957 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
19959 EVT VT =
N->getValueType(0);
19988 Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
19992 if (Shuffle && Shuffle->
getMaskElt(0) == 1 &&
20007 {N0->getOperand(0), Extract1, Extract2});
20021 EVT VT =
N->getValueType(0);
20022 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
20047 (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
20049 MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
20051 for (
size_t i = 0; i < Mask.size(); ++i)
20079 NScalarSize =
N->getValueType(0).getScalarSizeInBits();
20081 if (N001ConstVal == N101ConstVal && N001ConstVal > NScalarSize) {
20086 DAG.
getConstant(N001ConstVal - NScalarSize, dl, MVT::i32);
20093 if (
N->getOperand(0).getValueType() == MVT::v4i8 ||
20094 N->getOperand(0).getValueType() == MVT::v2i16 ||
20095 N->getOperand(0).getValueType() == MVT::v2i8) {
20096 EVT SrcVT =
N->getOperand(0).getValueType();
20100 if (
N->getNumOperands() % 2 == 0 &&
20102 if (V.getValueType() != SrcVT)
20106 LoadSDNode *LD = dyn_cast<LoadSDNode>(V);
20107 return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() &&
20108 LD->getExtensionType() == ISD::NON_EXTLOAD;
20110 EVT FVT = SrcVT == MVT::v2i8 ? MVT::f16 : MVT::f32;
20114 for (
unsigned i = 0; i <
N->getNumOperands(); i++) {
20121 LD->getBasePtr(), LD->getMemOperand());
20142 auto isBitwiseVectorNegate = [](
SDValue V) {
20143 return V->getOpcode() ==
ISD::XOR &&
20169 if (
N->getNumOperands() == 2 && N0Opc == N1Opc && VT.
is128BitVector() &&
20180 return DAG.
getNode(N0Opc, dl, VT, Concat0, Concat1);
20184 auto IsRSHRN = [](
SDValue Shr) {
20188 EVT VT =
Op.getValueType();
20189 unsigned ShtAmt = Shr.getConstantOperandVal(1);
20196 Op.getOperand(1).getConstantOperandVal(0)
20197 <<
Op.getOperand(1).getConstantOperandVal(1));
20199 isa<ConstantSDNode>(
Op.getOperand(1).getOperand(0)))
20201 Op.getOperand(1).getConstantOperandVal(0));
20205 if (Imm != 1ULL << (ShtAmt - 1))
20211 if (
N->getNumOperands() == 2 && IsRSHRN(N0) &&
20219 X.getValueType().getDoubleNumVectorElementsVT(*DCI.
DAG.
getContext());
20261 MVT RHSTy =
RHS.getValueType().getSimpleVT();
20267 dbgs() <<
"aarch64-lower: concat_vectors bitcast simplification\n");
20283 EVT VT =
N->getValueType(0);
20294 if (isa<ConstantSDNode>(V.getOperand(0)))
20305 SDValue SubVec =
N->getOperand(1);
20306 uint64_t IdxVal =
N->getConstantOperandVal(2);
20317 if (IdxVal == 0 && Vec.
isUndef())
20323 (IdxVal != 0 && IdxVal != NumSubElts))
20368 EVT ResTy =
N->getValueType(0);
20379 VecResTy = MVT::v4f32;
20381 VecResTy = MVT::v2f64;
20406 MVT VT =
N.getSimpleValueType();
20408 N.getConstantOperandVal(1) == 0)
20409 N =
N.getOperand(0);
20411 switch (
N.getOpcode()) {
20436 if (
N.getValueType().is64BitVector()) {
20448 N =
N.getOperand(0);
20451 if (
N.getOperand(0).getValueType().isScalableVector())
20453 return N.getConstantOperandAPInt(1) ==
20454 N.getOperand(0).getValueType().getVectorNumElements() / 2;
20519 if (!TValue || !FValue)
20523 if (!TValue->
isOne()) {