64 #include "llvm/IR/IntrinsicsAArch64.h"
99 #define DEBUG_TYPE "aarch64-lower"
101 STATISTIC(NumTailCalls,
"Number of tail calls");
102 STATISTIC(NumShiftInserts,
"Number of vector shift inserts");
103 STATISTIC(NumOptimizedImms,
"Number of times immediates were optimized");
110 cl::desc(
"Allow AArch64 Local Dynamic TLS code generation"),
115 cl::desc(
"Enable AArch64 logical imm instruction "
125 cl::desc(
"Combine extends of AArch64 masked "
126 "gather intrinsics"),
158 switch (EC.getKnownMinValue()) {
174 "Expected scalable predicate vector type!");
196 "Expected legal vector type!");
253 if (Subtarget->hasLS64()) {
259 if (Subtarget->hasFPARMv8()) {
267 if (Subtarget->hasNEON()) {
278 if (Subtarget->hasBF16())
288 if (Subtarget->hasBF16())
292 if (Subtarget->hasSVE() || Subtarget->hasSME()) {
312 if (Subtarget->hasBF16()) {
320 if (useSVEForFixedLengthVectorVT(VT))
324 if (useSVEForFixedLengthVectorVT(VT))
536 if (Subtarget->hasFullFP16())
553 if (!Subtarget->hasFullFP16()) {
641 if (Subtarget->hasFullFP16())
650 if (Subtarget->hasFullFP16())
671 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
697 #define LCALLNAMES(A, B, N) \
698 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
699 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
700 setLibcallName(A##N##_REL, #B #N "_rel"); \
701 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
702 #define LCALLNAME4(A, B) \
703 LCALLNAMES(A, B, 1) \
704 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
705 #define LCALLNAME5(A, B) \
706 LCALLNAMES(A, B, 1) \
707 LCALLNAMES(A, B, 2) \
708 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
709 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
710 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
711 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
712 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
713 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
714 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
726 if (Subtarget->hasLSE2()) {
744 if (Subtarget->hasPerfMon())
898 if (Subtarget->hasNEON()) {
938 if (Subtarget->hasFullFP16()) {
1004 if (VT.getVectorElementType() !=
MVT::f16 || Subtarget->hasFullFP16()) {
1057 if (Subtarget->hasFullFP16())
1079 if (Subtarget->hasSME()) {
1083 if (Subtarget->hasSVE()) {
1314 if (useSVEForFixedLengthVectorVT(VT))
1315 addTypeForFixedLengthSVE(VT);
1317 if (useSVEForFixedLengthVectorVT(VT))
1318 addTypeForFixedLengthSVE(VT);
1381 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
1391 void AArch64TargetLowering::addTypeForNEON(
MVT VT) {
1445 for (
unsigned Opcode :
1463 for (
unsigned Opcode :
1503 if (!Subtarget->hasSVE())
1520 void AArch64TargetLowering::addTypeForFixedLengthSVE(
MVT VT) {
1547 while (InnerVT != VT) {
1560 while (InnerVT != VT) {
1652 void AArch64TargetLowering::addDRTypeForNEON(
MVT VT) {
1657 void AArch64TargetLowering::addQRTypeForNEON(
MVT VT) {
1672 const APInt &Demanded,
1684 unsigned EltSize = Size;
1701 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
1703 uint64_t Sum = RotatedImm + NonDemandedBits;
1704 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
1705 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
1734 while (EltSize < Size) {
1735 NewImm |= NewImm << EltSize;
1741 "demanded bits should never be altered");
1742 assert(OldImm != NewImm &&
"the new imm shouldn't be equal to the old imm");
1745 EVT VT =
Op.getValueType();
1751 if (NewImm == 0 || NewImm == OrigMask) {
1776 EVT VT =
Op.getValueType();
1781 assert((Size == 32 || Size == 64) &&
1782 "i32 or i64 is expected after legalization.");
1789 switch (
Op.getOpcode()) {
1793 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1796 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1799 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1814 switch (
Op.getOpcode()) {
1820 if (
SrcOp.getValueSizeInBits() !=
Op.getScalarValueSizeInBits()) {
1821 assert(
SrcOp.getValueSizeInBits() >
Op.getScalarValueSizeInBits() &&
1822 "Expected DUP implicit truncation");
1823 Known = Known.
trunc(
Op.getScalarValueSizeInBits());
1837 ~(
Op->getConstantOperandVal(1) <<
Op->getConstantOperandVal(2));
1874 case Intrinsic::aarch64_ldaxr:
1875 case Intrinsic::aarch64_ldxr: {
1877 EVT VT = cast<MemIntrinsicSDNode>(
Op)->getMemoryVT();
1887 unsigned IntNo = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
1891 case Intrinsic::aarch64_neon_umaxv:
1892 case Intrinsic::aarch64_neon_uminv: {
1897 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
1923 if (Subtarget->requiresStrictAlign())
1928 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.
getStoreSize() != 16 ||
1948 if (Subtarget->requiresStrictAlign())
1953 *Fast = !Subtarget->isMisaligned128StoreSlow() ||
1977 #define MAKE_CASE(V) \
2294 Register DestReg =
MI.getOperand(0).getReg();
2295 Register IfTrueReg =
MI.getOperand(1).getReg();
2296 Register IfFalseReg =
MI.getOperand(2).getReg();
2297 unsigned CondCode =
MI.getOperand(3).getImm();
2298 bool NZCVKilled =
MI.getOperand(4).isKill();
2329 MI.eraseFromParent();
2336 BB->getParent()->getFunction().getPersonalityFn())) &&
2337 "SEH does not use catchret!");
2349 MIB.
add(
MI.getOperand(1));
2350 MIB.
add(
MI.getOperand(2));
2351 MIB.
add(
MI.getOperand(3));
2352 MIB.
add(
MI.getOperand(4));
2353 MIB.
add(
MI.getOperand(5));
2355 MI.eraseFromParent();
2366 MIB.
add(
MI.getOperand(0));
2367 MIB.
add(
MI.getOperand(1));
2368 MIB.
add(
MI.getOperand(2));
2369 MIB.
add(
MI.getOperand(1));
2371 MI.eraseFromParent();
2383 MIB.
addReg(BaseReg +
MI.getOperand(0).getImm());
2384 MIB.
add(
MI.getOperand(1));
2385 MIB.
add(
MI.getOperand(2));
2386 MIB.
add(
MI.getOperand(3));
2387 MIB.
add(
MI.getOperand(4));
2389 MI.eraseFromParent();
2398 MIB.
add(
MI.getOperand(0));
2400 unsigned Mask =
MI.getOperand(0).getImm();
2401 for (
unsigned I = 0;
I < 8;
I++) {
2402 if (
Mask & (1 <<
I))
2406 MI.eraseFromParent();
2412 switch (
MI.getOpcode()) {
2419 case AArch64::F128CSEL:
2422 case TargetOpcode::STATEPOINT:
2428 MI.addOperand(*
MI.getMF(),
2434 case TargetOpcode::STACKMAP:
2435 case TargetOpcode::PATCHPOINT:
2440 case AArch64::LD1_MXIPXX_H_PSEUDO_B:
2442 case AArch64::LD1_MXIPXX_H_PSEUDO_H:
2444 case AArch64::LD1_MXIPXX_H_PSEUDO_S:
2446 case AArch64::LD1_MXIPXX_H_PSEUDO_D:
2448 case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
2450 case AArch64::LD1_MXIPXX_V_PSEUDO_B:
2452 case AArch64::LD1_MXIPXX_V_PSEUDO_H:
2454 case AArch64::LD1_MXIPXX_V_PSEUDO_S:
2456 case AArch64::LD1_MXIPXX_V_PSEUDO_D:
2458 case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
2460 case AArch64::LDR_ZA_PSEUDO:
2462 case AArch64::INSERT_MXIPZ_H_PSEUDO_B:
2465 case AArch64::INSERT_MXIPZ_H_PSEUDO_H:
2468 case AArch64::INSERT_MXIPZ_H_PSEUDO_S:
2471 case AArch64::INSERT_MXIPZ_H_PSEUDO_D:
2474 case AArch64::INSERT_MXIPZ_H_PSEUDO_Q:
2477 case AArch64::INSERT_MXIPZ_V_PSEUDO_B:
2480 case AArch64::INSERT_MXIPZ_V_PSEUDO_H:
2483 case AArch64::INSERT_MXIPZ_V_PSEUDO_S:
2486 case AArch64::INSERT_MXIPZ_V_PSEUDO_D:
2489 case AArch64::INSERT_MXIPZ_V_PSEUDO_Q:
2492 case AArch64::ZERO_M_PSEUDO:
2518 N =
N->getOperand(0).getNode();
2526 auto Opnd0 =
N->getOperand(0);
2527 auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
2528 auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
2529 return (CINT && CINT->isZero()) || (CFP && CFP->isZero());
2688 bool IsLegal = (
C >> 12 == 0) || ((
C & 0xFFFULL) == 0 &&
C >> 24 == 0);
2690 <<
" legal: " << (IsLegal ?
"yes\n" :
"no\n"));
2712 EVT VT =
LHS.getValueType();
2722 Chain =
RHS.getValue(1);
2732 EVT VT =
LHS.getValueType();
2774 return LHS.getValue(1);
2840 unsigned Opcode = 0;
2843 if (
LHS.getValueType().isFloatingPoint()) {
2845 if (
LHS.getValueType() ==
MVT::f16 && !FullFP16) {
2883 bool &MustBeFirst,
bool WillNegate,
2884 unsigned Depth = 0) {
2892 MustBeFirst =
false;
2899 bool IsOR = Opcode ==
ISD::OR;
2911 if (MustBeFirstL && MustBeFirstR)
2917 if (!CanNegateL && !CanNegateR)
2921 CanNegate = WillNegate && CanNegateL && CanNegateR;
2924 MustBeFirst = !CanNegate;
2929 MustBeFirst = MustBeFirstL || MustBeFirstR;
2961 assert(
LHS.getValueType().isFloatingPoint());
2987 bool IsOR = Opcode ==
ISD::OR;
2993 assert(ValidL &&
"Valid conjunction/disjunction tree");
3000 assert(ValidR &&
"Valid conjunction/disjunction tree");
3005 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
3014 bool NegateAfterAll;
3018 assert(CanNegateR &&
"at least one side must be negatable");
3019 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
3023 NegateAfterR =
true;
3026 NegateR = CanNegateR;
3027 NegateAfterR = !CanNegateR;
3030 NegateAfterAll = !Negate;
3032 assert(Opcode ==
ISD::AND &&
"Valid conjunction/disjunction tree");
3033 assert(!Negate &&
"Valid conjunction/disjunction tree");
3037 NegateAfterR =
false;
3038 NegateAfterAll =
false;
3058 bool DummyCanNegate;
3059 bool DummyMustBeFirst;
3071 auto isSupportedExtend = [&](
SDValue V) {
3076 if (
ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
3078 return (
Mask == 0xFF ||
Mask == 0xFFFF ||
Mask == 0xFFFFFFFF);
3084 if (!
Op.hasOneUse())
3087 if (isSupportedExtend(
Op))
3090 unsigned Opc =
Op.getOpcode();
3092 if (
ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(
Op.getOperand(1))) {
3094 if (isSupportedExtend(
Op.getOperand(0)))
3095 return (
Shift <= 4) ? 2 : 1;
3096 EVT VT =
Op.getValueType();
3108 EVT VT =
RHS.getValueType();
3117 if ((VT ==
MVT::i32 &&
C != 0x80000000 &&
3119 (VT ==
MVT::i64 &&
C != 0x80000000ULL &&
3138 if ((VT ==
MVT::i32 &&
C != INT32_MAX &&
3149 if ((VT ==
MVT::i32 &&
C != UINT32_MAX &&
3172 if (!isa<ConstantSDNode>(
RHS) ||
3205 cast<LoadSDNode>(
LHS)->getMemoryVT() ==
MVT::i16 &&
3206 LHS.getNode()->hasNUsesOfValue(1, 0)) {
3207 int16_t ValueofRHS = cast<ConstantSDNode>(
RHS)->getZExtValue();
3213 RHS.getValueType()),
3235 static std::pair<SDValue, SDValue>
3238 "Unsupported value type");
3244 switch (
Op.getOpcode()) {
3320 Overflow =
Value.getValue(1);
3322 return std::make_pair(
Value, Overflow);
3326 if (useSVEForFixedLengthVectorVT(
Op.getValueType()))
3327 return LowerToScalableOp(
Op, DAG);