23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
82 cl::desc(
"Threshold for forced unrolling of small loops in AArch64"));
85class TailFoldingOption {
100 bool NeedsDefault =
true;
104 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
124 Bits &= ~DisableBits;
130 errs() <<
"invalid argument '" << Opt
131 <<
"' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
139 void operator=(
const std::string &Val) {
148 setNeedsDefault(
false);
151 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] ==
"disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] ==
"all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] ==
"default")
159 setNeedsDefault(
true);
160 else if (TailFoldTypes[0] ==
"simple")
161 setInitialBits(TailFoldingOpts::Simple);
164 setInitialBits(TailFoldingOpts::Disabled);
167 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
168 if (TailFoldTypes[
I] ==
"reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[
I] ==
"recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[
I] ==
"reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[
I] ==
"noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[
I] ==
"norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[
I] ==
"noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
200 "\ndefault (Initial) Uses the default tail-folding settings for "
202 "\nall (Initial) All legal loop types will vectorize using "
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
213 "\nnoreverse Inverse of above"),
258 TTI->isMultiversionedFunction(
F) ?
"fmv-features" :
"target-features";
259 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.
split(Features,
",");
276 return F.hasFnAttribute(
"fmv-features");
280 AArch64::FeatureExecuteOnly,
320 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
321 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
323 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
341 auto FVTy = dyn_cast<FixedVectorType>(Ty);
343 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
352 unsigned DefaultCallPenalty)
const {
377 if (
F ==
Call.getCaller())
383 return DefaultCallPenalty;
394 ST->isSVEorStreamingSVEAvailable() &&
395 !ST->disableMaximizeScalableBandwidth();
419 assert(Ty->isIntegerTy());
421 unsigned BitSize = Ty->getPrimitiveSizeInBits();
428 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
433 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
439 return std::max<InstructionCost>(1,
Cost);
446 assert(Ty->isIntegerTy());
448 unsigned BitSize = Ty->getPrimitiveSizeInBits();
454 unsigned ImmIdx = ~0U;
458 case Instruction::GetElementPtr:
463 case Instruction::Store:
466 case Instruction::Add:
467 case Instruction::Sub:
468 case Instruction::Mul:
469 case Instruction::UDiv:
470 case Instruction::SDiv:
471 case Instruction::URem:
472 case Instruction::SRem:
473 case Instruction::And:
474 case Instruction::Or:
475 case Instruction::Xor:
476 case Instruction::ICmp:
480 case Instruction::Shl:
481 case Instruction::LShr:
482 case Instruction::AShr:
486 case Instruction::Trunc:
487 case Instruction::ZExt:
488 case Instruction::SExt:
489 case Instruction::IntToPtr:
490 case Instruction::PtrToInt:
491 case Instruction::BitCast:
492 case Instruction::PHI:
493 case Instruction::Call:
494 case Instruction::Select:
495 case Instruction::Ret:
496 case Instruction::Load:
501 int NumConstants = (BitSize + 63) / 64;
514 assert(Ty->isIntegerTy());
516 unsigned BitSize = Ty->getPrimitiveSizeInBits();
525 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
531 case Intrinsic::sadd_with_overflow:
532 case Intrinsic::uadd_with_overflow:
533 case Intrinsic::ssub_with_overflow:
534 case Intrinsic::usub_with_overflow:
535 case Intrinsic::smul_with_overflow:
536 case Intrinsic::umul_with_overflow:
538 int NumConstants = (BitSize + 63) / 64;
545 case Intrinsic::experimental_stackmap:
546 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
549 case Intrinsic::experimental_patchpoint_void:
550 case Intrinsic::experimental_patchpoint:
551 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
554 case Intrinsic::experimental_gc_statepoint:
555 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
565 if (TyWidth == 32 || TyWidth == 64)
589 unsigned TotalHistCnts = 1;
599 unsigned EC = VTy->getElementCount().getKnownMinValue();
604 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
606 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
610 TotalHistCnts = EC / NaturalVectorWidth;
630 switch (ICA.
getID()) {
631 case Intrinsic::experimental_vector_histogram_add: {
638 case Intrinsic::umin:
639 case Intrinsic::umax:
640 case Intrinsic::smin:
641 case Intrinsic::smax: {
642 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
643 MVT::v8i16, MVT::v2i32, MVT::v4i32,
644 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
648 if (LT.second == MVT::v2i64)
654 case Intrinsic::scmp:
655 case Intrinsic::ucmp: {
657 {Intrinsic::scmp, MVT::i32, 3},
658 {Intrinsic::scmp, MVT::i64, 3},
659 {Intrinsic::scmp, MVT::v8i8, 3},
660 {Intrinsic::scmp, MVT::v16i8, 3},
661 {Intrinsic::scmp, MVT::v4i16, 3},
662 {Intrinsic::scmp, MVT::v8i16, 3},
663 {Intrinsic::scmp, MVT::v2i32, 3},
664 {Intrinsic::scmp, MVT::v4i32, 3},
665 {Intrinsic::scmp, MVT::v1i64, 3},
666 {Intrinsic::scmp, MVT::v2i64, 3},
672 return Entry->Cost * LT.first;
675 case Intrinsic::sadd_sat:
676 case Intrinsic::ssub_sat:
677 case Intrinsic::uadd_sat:
678 case Intrinsic::usub_sat: {
679 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
680 MVT::v8i16, MVT::v2i32, MVT::v4i32,
686 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
688 return LT.first * Instrs;
693 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
694 return LT.first * Instrs;
698 case Intrinsic::abs: {
699 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
700 MVT::v8i16, MVT::v2i32, MVT::v4i32,
701 MVT::v2i64, MVT::nxv16i8, MVT::nxv8i16,
702 MVT::nxv4i32, MVT::nxv2i64};
708 case Intrinsic::bswap: {
709 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
710 MVT::v4i32, MVT::v2i64};
713 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
718 case Intrinsic::fmuladd: {
723 (EltTy->
isHalfTy() && ST->hasFullFP16()))
727 case Intrinsic::stepvector: {
736 Cost += AddCost * (LT.first - 1);
740 case Intrinsic::vector_extract:
741 case Intrinsic::vector_insert: {
754 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
755 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
763 getTLI()->getTypeConversion(
C, SubVecVT);
765 getTLI()->getTypeConversion(
C, VecVT);
773 case Intrinsic::bitreverse: {
775 {Intrinsic::bitreverse, MVT::i32, 1},
776 {Intrinsic::bitreverse, MVT::i64, 1},
777 {Intrinsic::bitreverse, MVT::v8i8, 1},
778 {Intrinsic::bitreverse, MVT::v16i8, 1},
779 {Intrinsic::bitreverse, MVT::v4i16, 2},
780 {Intrinsic::bitreverse, MVT::v8i16, 2},
781 {Intrinsic::bitreverse, MVT::v2i32, 2},
782 {Intrinsic::bitreverse, MVT::v4i32, 2},
783 {Intrinsic::bitreverse, MVT::v1i64, 2},
784 {Intrinsic::bitreverse, MVT::v2i64, 2},
792 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
793 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
794 return LegalisationCost.first * Entry->Cost + 1;
796 return LegalisationCost.first * Entry->Cost;
800 case Intrinsic::ctpop: {
801 if (!ST->hasNEON()) {
833 RetTy->getScalarSizeInBits()
836 return LT.first * Entry->Cost + ExtraCost;
840 case Intrinsic::sadd_with_overflow:
841 case Intrinsic::uadd_with_overflow:
842 case Intrinsic::ssub_with_overflow:
843 case Intrinsic::usub_with_overflow:
844 case Intrinsic::smul_with_overflow:
845 case Intrinsic::umul_with_overflow: {
847 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
848 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
849 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
850 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
851 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
852 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
853 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
854 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
855 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
856 {Intrinsic::usub_with_overflow, MVT::i8, 3},
857 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
858 {Intrinsic::usub_with_overflow, MVT::i16, 3},
859 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
860 {Intrinsic::usub_with_overflow, MVT::i32, 1},
861 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
862 {Intrinsic::usub_with_overflow, MVT::i64, 1},
863 {Intrinsic::smul_with_overflow, MVT::i8, 5},
864 {Intrinsic::umul_with_overflow, MVT::i8, 4},
865 {Intrinsic::smul_with_overflow, MVT::i16, 5},
866 {Intrinsic::umul_with_overflow, MVT::i16, 4},
867 {Intrinsic::smul_with_overflow, MVT::i32, 2},
868 {Intrinsic::umul_with_overflow, MVT::i32, 2},
869 {Intrinsic::smul_with_overflow, MVT::i64, 3},
870 {Intrinsic::umul_with_overflow, MVT::i64, 3},
872 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
879 case Intrinsic::fptosi_sat:
880 case Intrinsic::fptoui_sat: {
883 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
885 EVT MTy = TLI->getValueType(
DL, RetTy);
888 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
889 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
890 LT.second == MVT::v2f64)) {
892 (LT.second == MVT::f64 && MTy == MVT::i32) ||
893 (LT.second == MVT::f32 && MTy == MVT::i64)))
902 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
909 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
910 (LT.second == MVT::f16 && MTy == MVT::i64) ||
911 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
925 if ((LT.second.getScalarType() == MVT::f32 ||
926 LT.second.getScalarType() == MVT::f64 ||
927 LT.second.getScalarType() == MVT::f16) &&
931 if (LT.second.isVector())
936 LegalTy, {LegalTy, LegalTy});
940 LegalTy, {LegalTy, LegalTy});
942 return LT.first *
Cost +
943 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
949 RetTy = RetTy->getScalarType();
950 if (LT.second.isVector()) {
968 return LT.first *
Cost;
970 case Intrinsic::fshl:
971 case Intrinsic::fshr: {
980 if (RetTy->isIntegerTy() && ICA.
getArgs()[0] == ICA.
getArgs()[1] &&
981 (RetTy->getPrimitiveSizeInBits() == 32 ||
982 RetTy->getPrimitiveSizeInBits() == 64)) {
995 {Intrinsic::fshl, MVT::v4i32, 2},
996 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
997 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
998 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
1004 return LegalisationCost.first * Entry->Cost;
1008 if (!RetTy->isIntegerTy())
1013 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
1014 RetTy->getScalarSizeInBits() < 64) ||
1015 (RetTy->getScalarSizeInBits() % 64 != 0);
1016 unsigned ExtraCost = HigherCost ? 1 : 0;
1017 if (RetTy->getScalarSizeInBits() == 32 ||
1018 RetTy->getScalarSizeInBits() == 64)
1021 else if (HigherCost)
1025 return TyL.first + ExtraCost;
1027 case Intrinsic::get_active_lane_mask: {
1029 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
1031 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
1034 if (RetTy->isScalableTy()) {
1035 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
1045 if (ST->hasSVE2p1() || ST->hasSME2()) {
1060 return Cost + (SplitCost * (
Cost - 1));
1075 case Intrinsic::experimental_vector_match: {
1078 unsigned SearchSize = NeedleTy->getNumElements();
1079 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1092 case Intrinsic::cttz: {
1094 if (LT.second == MVT::v8i8 || LT.second == MVT::v16i8)
1095 return LT.first * 2;
1096 if (LT.second == MVT::v4i16 || LT.second == MVT::v8i16 ||
1097 LT.second == MVT::v2i32 || LT.second == MVT::v4i32)
1098 return LT.first * 3;
1101 case Intrinsic::experimental_cttz_elts: {
1103 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1111 case Intrinsic::loop_dependence_raw_mask:
1112 case Intrinsic::loop_dependence_war_mask: {
1114 if (ST->hasSVE2() || ST->hasSME()) {
1115 EVT VecVT = getTLI()->getValueType(
DL, RetTy);
1116 unsigned EltSizeInBytes =
1126 case Intrinsic::experimental_vector_extract_last_active:
1127 if (ST->isSVEorStreamingSVEAvailable()) {
1133 case Intrinsic::pow: {
1136 EVT VT = getTLI()->getValueType(
DL, RetTy);
1138 bool HasLibcall = getTLI()->getLibcallImpl(LC) != RTLIB::Unsupported;
1153 bool Is025 = ExpF->getValueAPF().isExactlyValue(0.25);
1154 bool Is075 = ExpF->getValueAPF().isExactlyValue(0.75);
1164 return (Sqrt * 2) +
FMul;
1175 case Intrinsic::sqrt:
1176 case Intrinsic::fabs:
1177 case Intrinsic::ceil:
1178 case Intrinsic::floor:
1179 case Intrinsic::nearbyint:
1180 case Intrinsic::round:
1181 case Intrinsic::rint:
1182 case Intrinsic::roundeven:
1183 case Intrinsic::trunc:
1184 case Intrinsic::minnum:
1185 case Intrinsic::maxnum:
1186 case Intrinsic::minimum:
1187 case Intrinsic::maximum: {
1205 auto RequiredType =
II.getType();
1208 assert(PN &&
"Expected Phi Node!");
1211 if (!PN->hasOneUse())
1212 return std::nullopt;
1214 for (
Value *IncValPhi : PN->incoming_values()) {
1217 Reinterpret->getIntrinsicID() !=
1218 Intrinsic::aarch64_sve_convert_to_svbool ||
1219 RequiredType != Reinterpret->getArgOperand(0)->getType())
1220 return std::nullopt;
1228 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1230 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1303 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1308 return GoverningPredicateIdx;
1313 GoverningPredicateIdx = Index;
1331 return UndefIntrinsic;
1336 UndefIntrinsic = IID;
1358 return ResultLanes == InactiveLanesTakenFromOperand;
1363 return OperandIdxForInactiveLanes;
1367 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1368 ResultLanes = InactiveLanesTakenFromOperand;
1369 OperandIdxForInactiveLanes = Index;
1374 return ResultLanes == InactiveLanesAreNotDefined;
1378 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1379 ResultLanes = InactiveLanesAreNotDefined;
1384 return ResultLanes == InactiveLanesAreUnused;
1388 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1389 ResultLanes = InactiveLanesAreUnused;
1399 ResultIsZeroInitialized =
true;
1410 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1415 return OperandIdxWithNoActiveLanes;
1420 OperandIdxWithNoActiveLanes = Index;
1425 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1428 unsigned IROpcode = 0;
1430 enum PredicationStyle {
1432 InactiveLanesTakenFromOperand,
1433 InactiveLanesAreNotDefined,
1434 InactiveLanesAreUnused
1437 bool ResultIsZeroInitialized =
false;
1438 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1439 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1447 return !isa<ScalableVectorType>(V->getType());
1455 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1456 case Intrinsic::aarch64_sve_fcvt_f16f32:
1457 case Intrinsic::aarch64_sve_fcvt_f16f64:
1458 case Intrinsic::aarch64_sve_fcvt_f32f16:
1459 case Intrinsic::aarch64_sve_fcvt_f32f64:
1460 case Intrinsic::aarch64_sve_fcvt_f64f16:
1461 case Intrinsic::aarch64_sve_fcvt_f64f32:
1462 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1463 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1464 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1465 case Intrinsic::aarch64_sve_fcvtzs:
1466 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1467 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1468 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1469 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1470 case Intrinsic::aarch64_sve_fcvtzu:
1471 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1472 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1473 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1474 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1475 case Intrinsic::aarch64_sve_revb:
1476 case Intrinsic::aarch64_sve_revh:
1477 case Intrinsic::aarch64_sve_revw:
1478 case Intrinsic::aarch64_sve_revd:
1479 case Intrinsic::aarch64_sve_scvtf:
1480 case Intrinsic::aarch64_sve_scvtf_f16i32:
1481 case Intrinsic::aarch64_sve_scvtf_f16i64:
1482 case Intrinsic::aarch64_sve_scvtf_f32i64:
1483 case Intrinsic::aarch64_sve_scvtf_f64i32:
1484 case Intrinsic::aarch64_sve_ucvtf:
1485 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1486 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1487 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1488 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1491 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1492 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1493 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1494 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1497 case Intrinsic::aarch64_sve_fabd:
1499 case Intrinsic::aarch64_sve_fadd:
1502 case Intrinsic::aarch64_sve_fdiv:
1505 case Intrinsic::aarch64_sve_fmax:
1507 case Intrinsic::aarch64_sve_fmaxnm:
1509 case Intrinsic::aarch64_sve_fmin:
1511 case Intrinsic::aarch64_sve_fminnm:
1513 case Intrinsic::aarch64_sve_fmla:
1515 case Intrinsic::aarch64_sve_fmls:
1517 case Intrinsic::aarch64_sve_fmul:
1520 case Intrinsic::aarch64_sve_fmulx:
1522 case Intrinsic::aarch64_sve_fnmla:
1524 case Intrinsic::aarch64_sve_fnmls:
1526 case Intrinsic::aarch64_sve_fsub:
1529 case Intrinsic::aarch64_sve_add:
1532 case Intrinsic::aarch64_sve_mla:
1534 case Intrinsic::aarch64_sve_mls:
1536 case Intrinsic::aarch64_sve_mul:
1539 case Intrinsic::aarch64_sve_sabd:
1541 case Intrinsic::aarch64_sve_sdiv:
1544 case Intrinsic::aarch64_sve_smax:
1546 case Intrinsic::aarch64_sve_smin:
1548 case Intrinsic::aarch64_sve_smulh:
1550 case Intrinsic::aarch64_sve_sub:
1553 case Intrinsic::aarch64_sve_uabd:
1555 case Intrinsic::aarch64_sve_udiv:
1558 case Intrinsic::aarch64_sve_umax:
1560 case Intrinsic::aarch64_sve_umin:
1562 case Intrinsic::aarch64_sve_umulh:
1564 case Intrinsic::aarch64_sve_asr:
1567 case Intrinsic::aarch64_sve_lsl:
1570 case Intrinsic::aarch64_sve_lsr:
1573 case Intrinsic::aarch64_sve_and:
1576 case Intrinsic::aarch64_sve_bic:
1578 case Intrinsic::aarch64_sve_eor:
1581 case Intrinsic::aarch64_sve_orr:
1584 case Intrinsic::aarch64_sve_shsub:
1586 case Intrinsic::aarch64_sve_shsubr:
1588 case Intrinsic::aarch64_sve_sqrshl:
1590 case Intrinsic::aarch64_sve_sqshl:
1592 case Intrinsic::aarch64_sve_sqsub:
1594 case Intrinsic::aarch64_sve_srshl:
1596 case Intrinsic::aarch64_sve_uhsub:
1598 case Intrinsic::aarch64_sve_uhsubr:
1600 case Intrinsic::aarch64_sve_uqrshl:
1602 case Intrinsic::aarch64_sve_uqshl:
1604 case Intrinsic::aarch64_sve_uqsub:
1606 case Intrinsic::aarch64_sve_urshl:
1609 case Intrinsic::aarch64_sve_add_u:
1612 case Intrinsic::aarch64_sve_and_u:
1615 case Intrinsic::aarch64_sve_asr_u:
1618 case Intrinsic::aarch64_sve_eor_u:
1621 case Intrinsic::aarch64_sve_fadd_u:
1624 case Intrinsic::aarch64_sve_fdiv_u:
1627 case Intrinsic::aarch64_sve_fmul_u:
1630 case Intrinsic::aarch64_sve_fsub_u:
1633 case Intrinsic::aarch64_sve_lsl_u:
1636 case Intrinsic::aarch64_sve_lsr_u:
1639 case Intrinsic::aarch64_sve_mul_u:
1642 case Intrinsic::aarch64_sve_orr_u:
1645 case Intrinsic::aarch64_sve_sdiv_u:
1648 case Intrinsic::aarch64_sve_sub_u:
1651 case Intrinsic::aarch64_sve_udiv_u:
1655 case Intrinsic::aarch64_sve_addqv:
1656 case Intrinsic::aarch64_sve_and_z:
1657 case Intrinsic::aarch64_sve_bic_z:
1658 case Intrinsic::aarch64_sve_brka_z:
1659 case Intrinsic::aarch64_sve_brkb_z:
1660 case Intrinsic::aarch64_sve_brkn_z:
1661 case Intrinsic::aarch64_sve_brkpa_z:
1662 case Intrinsic::aarch64_sve_brkpb_z:
1663 case Intrinsic::aarch64_sve_cntp:
1664 case Intrinsic::aarch64_sve_compact:
1665 case Intrinsic::aarch64_sve_eor_z:
1666 case Intrinsic::aarch64_sve_eorv:
1667 case Intrinsic::aarch64_sve_eorqv:
1668 case Intrinsic::aarch64_sve_nand_z:
1669 case Intrinsic::aarch64_sve_nor_z:
1670 case Intrinsic::aarch64_sve_orn_z:
1671 case Intrinsic::aarch64_sve_orr_z:
1672 case Intrinsic::aarch64_sve_orv:
1673 case Intrinsic::aarch64_sve_orqv:
1674 case Intrinsic::aarch64_sve_pnext:
1675 case Intrinsic::aarch64_sve_rdffr_z:
1676 case Intrinsic::aarch64_sve_saddv:
1677 case Intrinsic::aarch64_sve_uaddv:
1678 case Intrinsic::aarch64_sve_umaxv:
1679 case Intrinsic::aarch64_sve_umaxqv:
1680 case Intrinsic::aarch64_sve_cmpeq:
1681 case Intrinsic::aarch64_sve_cmpeq_wide:
1682 case Intrinsic::aarch64_sve_cmpge:
1683 case Intrinsic::aarch64_sve_cmpge_wide:
1684 case Intrinsic::aarch64_sve_cmpgt:
1685 case Intrinsic::aarch64_sve_cmpgt_wide:
1686 case Intrinsic::aarch64_sve_cmphi:
1687 case Intrinsic::aarch64_sve_cmphi_wide:
1688 case Intrinsic::aarch64_sve_cmphs:
1689 case Intrinsic::aarch64_sve_cmphs_wide:
1690 case Intrinsic::aarch64_sve_cmple_wide:
1691 case Intrinsic::aarch64_sve_cmplo_wide:
1692 case Intrinsic::aarch64_sve_cmpls_wide:
1693 case Intrinsic::aarch64_sve_cmplt_wide:
1694 case Intrinsic::aarch64_sve_cmpne:
1695 case Intrinsic::aarch64_sve_cmpne_wide:
1696 case Intrinsic::aarch64_sve_facge:
1697 case Intrinsic::aarch64_sve_facgt:
1698 case Intrinsic::aarch64_sve_fcmpeq:
1699 case Intrinsic::aarch64_sve_fcmpge:
1700 case Intrinsic::aarch64_sve_fcmpgt:
1701 case Intrinsic::aarch64_sve_fcmpne:
1702 case Intrinsic::aarch64_sve_fcmpuo:
1703 case Intrinsic::aarch64_sve_ld1:
1704 case Intrinsic::aarch64_sve_ld1_gather:
1705 case Intrinsic::aarch64_sve_ld1_gather_index:
1706 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1707 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1708 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1709 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1710 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1711 case Intrinsic::aarch64_sve_ld1q_gather_index:
1712 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1713 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1714 case Intrinsic::aarch64_sve_ld1ro:
1715 case Intrinsic::aarch64_sve_ld1rq:
1716 case Intrinsic::aarch64_sve_ld1udq:
1717 case Intrinsic::aarch64_sve_ld1uwq:
1718 case Intrinsic::aarch64_sve_ld2_sret:
1719 case Intrinsic::aarch64_sve_ld2q_sret:
1720 case Intrinsic::aarch64_sve_ld3_sret:
1721 case Intrinsic::aarch64_sve_ld3q_sret:
1722 case Intrinsic::aarch64_sve_ld4_sret:
1723 case Intrinsic::aarch64_sve_ld4q_sret:
1724 case Intrinsic::aarch64_sve_ldff1:
1725 case Intrinsic::aarch64_sve_ldff1_gather:
1726 case Intrinsic::aarch64_sve_ldff1_gather_index:
1727 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1728 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1729 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1730 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1731 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1732 case Intrinsic::aarch64_sve_ldnf1:
1733 case Intrinsic::aarch64_sve_ldnt1:
1734 case Intrinsic::aarch64_sve_ldnt1_gather:
1735 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1736 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1737 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1740 case Intrinsic::aarch64_sve_prf:
1741 case Intrinsic::aarch64_sve_prfb_gather_index:
1742 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1743 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1744 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1745 case Intrinsic::aarch64_sve_prfd_gather_index:
1746 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1747 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1748 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1749 case Intrinsic::aarch64_sve_prfh_gather_index:
1750 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1751 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1752 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1753 case Intrinsic::aarch64_sve_prfw_gather_index:
1754 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1755 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1756 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1759 case Intrinsic::aarch64_sve_st1_scatter:
1760 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1761 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1762 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1763 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1764 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1765 case Intrinsic::aarch64_sve_st1dq:
1766 case Intrinsic::aarch64_sve_st1q_scatter_index:
1767 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1768 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1769 case Intrinsic::aarch64_sve_st1wq:
1770 case Intrinsic::aarch64_sve_stnt1:
1771 case Intrinsic::aarch64_sve_stnt1_scatter:
1772 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1773 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1774 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1776 case Intrinsic::aarch64_sve_st2:
1777 case Intrinsic::aarch64_sve_st2q:
1779 case Intrinsic::aarch64_sve_st3:
1780 case Intrinsic::aarch64_sve_st3q:
1782 case Intrinsic::aarch64_sve_st4:
1783 case Intrinsic::aarch64_sve_st4q:
1791 Value *UncastedPred;
1797 Pred = UncastedPred;
1803 if (OrigPredTy->getMinNumElements() <=
1805 ->getMinNumElements())
1806 Pred = UncastedPred;
1810 return C &&
C->isAllOnesValue();
1817 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1818 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1826static std::optional<Instruction *>
1833 Value *Op1 =
II.getOperand(1);
1834 Value *Op2 =
II.getOperand(2);
1860 return std::nullopt;
1868 if (SimpleII == Inactive)
1878static std::optional<Instruction *>
1882 return std::nullopt;
1911 II.setCalledFunction(NewDecl);
1921 return std::nullopt;
1933static std::optional<Instruction *>
1937 return std::nullopt;
1939 auto IntrinsicID = BinOp->getIntrinsicID();
1940 switch (IntrinsicID) {
1941 case Intrinsic::aarch64_sve_and_z:
1942 case Intrinsic::aarch64_sve_bic_z:
1943 case Intrinsic::aarch64_sve_eor_z:
1944 case Intrinsic::aarch64_sve_nand_z:
1945 case Intrinsic::aarch64_sve_nor_z:
1946 case Intrinsic::aarch64_sve_orn_z:
1947 case Intrinsic::aarch64_sve_orr_z:
1950 return std::nullopt;
1953 auto BinOpPred = BinOp->getOperand(0);
1954 auto BinOpOp1 = BinOp->getOperand(1);
1955 auto BinOpOp2 = BinOp->getOperand(2);
1959 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1960 return std::nullopt;
1962 auto PredOp = PredIntr->getOperand(0);
1964 if (PredOpTy !=
II.getType())
1965 return std::nullopt;
1969 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1970 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1971 if (BinOpOp1 == BinOpOp2)
1972 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1975 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1977 auto NarrowedBinOp =
1982static std::optional<Instruction *>
1989 return BinOpCombine;
1994 return std::nullopt;
1997 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
2006 if (CursorVTy->getElementCount().getKnownMinValue() <
2007 IVTy->getElementCount().getKnownMinValue())
2011 if (Cursor->getType() == IVTy)
2012 EarliestReplacement = Cursor;
2017 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
2018 Intrinsic::aarch64_sve_convert_to_svbool ||
2019 IntrinsicCursor->getIntrinsicID() ==
2020 Intrinsic::aarch64_sve_convert_from_svbool))
2023 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
2024 Cursor = IntrinsicCursor->getOperand(0);
2029 if (!EarliestReplacement)
2030 return std::nullopt;
2038 auto *OpPredicate =
II.getOperand(0);
2055 II.getArgOperand(2));
2061 return std::nullopt;
2065 II.getArgOperand(0),
II.getArgOperand(2),
uint64_t(0));
2074 II.getArgOperand(0));
2084 return std::nullopt;
2089 if (!SplatValue || !SplatValue->isZero())
2090 return std::nullopt;
2095 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
2096 return std::nullopt;
2100 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
2101 return std::nullopt;
2104 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
2105 return std::nullopt;
2110 return std::nullopt;
2113 return std::nullopt;
2117 return std::nullopt;
2121 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
2122 return std::nullopt;
2124 unsigned NumElts = VecTy->getNumElements();
2125 unsigned PredicateBits = 0;
2128 for (
unsigned I = 0;
I < NumElts; ++
I) {
2131 return std::nullopt;
2133 PredicateBits |= 1 << (
I * (16 / NumElts));
2137 if (PredicateBits == 0) {
2139 PFalse->takeName(&
II);
2145 for (
unsigned I = 0;
I < 16; ++
I)
2146 if ((PredicateBits & (1 <<
I)) != 0)
2149 unsigned PredSize = Mask & -Mask;
2154 for (
unsigned I = 0;
I < 16;
I += PredSize)
2155 if ((PredicateBits & (1 <<
I)) == 0)
2156 return std::nullopt;
2158 auto *ConvertToSVBool =
2161 auto *ConvertFromSVBool =
2163 II.getType(), ConvertToSVBool);
2171 Value *Pg =
II.getArgOperand(0);
2172 Value *Vec =
II.getArgOperand(1);
2173 auto IntrinsicID =
II.getIntrinsicID();
2174 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2186 auto OpC = OldBinOp->getOpcode();
2192 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2198 if (IsAfter &&
C &&
C->isNullValue()) {
2202 Extract->insertBefore(
II.getIterator());
2203 Extract->takeName(&
II);
2209 return std::nullopt;
2211 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2212 return std::nullopt;
2214 const auto PTruePattern =
2220 return std::nullopt;
2222 unsigned Idx = MinNumElts - 1;
2232 if (Idx >= PgVTy->getMinNumElements())
2233 return std::nullopt;
2238 Extract->insertBefore(
II.getIterator());
2239 Extract->takeName(&
II);
2252 Value *Pg =
II.getArgOperand(0);
2254 Value *Vec =
II.getArgOperand(2);
2257 if (!Ty->isIntegerTy())
2258 return std::nullopt;
2263 return std::nullopt;
2280 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2295static std::optional<Instruction *>
2299 if (
Pattern == AArch64SVEPredPattern::all) {
2308 return MinNumElts && NumElts >= MinNumElts
2310 II, ConstantInt::get(
II.getType(), MinNumElts)))
2314static std::optional<Instruction *>
2317 if (!ST->isStreaming())
2318 return std::nullopt;
2330 Value *PgVal =
II.getArgOperand(0);
2331 Value *OpVal =
II.getArgOperand(1);
2335 if (PgVal == OpVal &&
2336 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2337 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2352 return std::nullopt;
2356 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2357 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2371 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2372 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2373 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2374 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2375 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2376 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2377 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2378 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2379 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2380 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2381 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2382 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2383 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2393 return std::nullopt;
2396template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2397static std::optional<Instruction *>
2399 bool MergeIntoAddendOp) {
2401 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2402 if (MergeIntoAddendOp) {
2403 AddendOp =
II.getOperand(1);
2404 Mul =
II.getOperand(2);
2406 AddendOp =
II.getOperand(2);
2407 Mul =
II.getOperand(1);
2412 return std::nullopt;
2414 if (!
Mul->hasOneUse())
2415 return std::nullopt;
2418 if (
II.getType()->isFPOrFPVectorTy()) {
2423 return std::nullopt;
2425 return std::nullopt;
2430 if (MergeIntoAddendOp)
2440static std::optional<Instruction *>
2442 Value *Pred =
II.getOperand(0);
2443 Value *PtrOp =
II.getOperand(1);
2444 Type *VecTy =
II.getType();
2448 Load->copyMetadata(
II);
2459static std::optional<Instruction *>
2461 Value *VecOp =
II.getOperand(0);
2462 Value *Pred =
II.getOperand(1);
2463 Value *PtrOp =
II.getOperand(2);
2467 Store->copyMetadata(
II);
2479 case Intrinsic::aarch64_sve_fmul_u:
2480 return Instruction::BinaryOps::FMul;
2481 case Intrinsic::aarch64_sve_fadd_u:
2482 return Instruction::BinaryOps::FAdd;
2483 case Intrinsic::aarch64_sve_fsub_u:
2484 return Instruction::BinaryOps::FSub;
2486 return Instruction::BinaryOpsEnd;
2490static std::optional<Instruction *>
2493 if (
II.isStrictFP())
2494 return std::nullopt;
2496 auto *OpPredicate =
II.getOperand(0);
2498 if (BinOpCode == Instruction::BinaryOpsEnd ||
2500 return std::nullopt;
2502 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2509 Intrinsic::aarch64_sve_mla>(
2513 Intrinsic::aarch64_sve_mad>(
2516 return std::nullopt;
2519static std::optional<Instruction *>
2523 Intrinsic::aarch64_sve_fmla>(IC,
II,
2528 Intrinsic::aarch64_sve_fmad>(IC,
II,
2533 Intrinsic::aarch64_sve_fmla>(IC,
II,
2536 return std::nullopt;
2539static std::optional<Instruction *>
2543 Intrinsic::aarch64_sve_fmla>(IC,
II,
2548 Intrinsic::aarch64_sve_fmad>(IC,
II,
2553 Intrinsic::aarch64_sve_fmla_u>(
2559static std::optional<Instruction *>
2563 Intrinsic::aarch64_sve_fmls>(IC,
II,
2568 Intrinsic::aarch64_sve_fnmsb>(
2573 Intrinsic::aarch64_sve_fmls>(IC,
II,
2576 return std::nullopt;
2579static std::optional<Instruction *>
2583 Intrinsic::aarch64_sve_fmls>(IC,
II,
2588 Intrinsic::aarch64_sve_fnmsb>(
2593 Intrinsic::aarch64_sve_fmls_u>(
2602 Intrinsic::aarch64_sve_mls>(
2605 return std::nullopt;
2610 Value *UnpackArg =
II.getArgOperand(0);
2612 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2613 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2626 return std::nullopt;
2630 auto *OpVal =
II.getOperand(0);
2631 auto *OpIndices =
II.getOperand(1);
2638 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2639 return std::nullopt;
2654 Type *RetTy =
II.getType();
2655 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2656 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2660 if ((
match(
II.getArgOperand(0),
2667 if (TyA ==
B->getType() &&
2672 TyA->getMinNumElements());
2678 return std::nullopt;
2686 if (
match(
II.getArgOperand(0),
2691 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2693 return std::nullopt;
2696static std::optional<Instruction *>
2698 Value *Mask =
II.getOperand(0);
2699 Value *BasePtr =
II.getOperand(1);
2700 Value *Index =
II.getOperand(2);
2711 BasePtr->getPointerAlignment(
II.getDataLayout());
2714 BasePtr, IndexBase);
2721 return std::nullopt;
2724static std::optional<Instruction *>
2726 Value *Val =
II.getOperand(0);
2727 Value *Mask =
II.getOperand(1);
2728 Value *BasePtr =
II.getOperand(2);
2729 Value *Index =
II.getOperand(3);
2739 BasePtr->getPointerAlignment(
II.getDataLayout());
2742 BasePtr, IndexBase);
2748 return std::nullopt;
2754 Value *Pred =
II.getOperand(0);
2755 Value *Vec =
II.getOperand(1);
2756 Value *DivVec =
II.getOperand(2);
2760 if (!SplatConstantInt)
2761 return std::nullopt;
2765 if (DivisorValue == -1)
2766 return std::nullopt;
2767 if (DivisorValue == 1)
2773 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2780 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2782 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2786 return std::nullopt;
2790 size_t VecSize = Vec.
size();
2795 size_t HalfVecSize = VecSize / 2;
2799 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2807 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2825 return std::nullopt;
2832 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2833 CurrentInsertElt = InsertElt->getOperand(0);
2839 return std::nullopt;
2843 for (
size_t I = 0;
I < Elts.
size();
I++) {
2844 if (Elts[
I] ==
nullptr)
2849 if (InsertEltChain ==
nullptr)
2850 return std::nullopt;
2856 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2857 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2858 IIScalableTy->getMinNumElements() /
2863 auto *WideShuffleMaskTy =
2874 auto NarrowBitcast =
2887 return std::nullopt;
2892 Value *Pred =
II.getOperand(0);
2893 Value *Vec =
II.getOperand(1);
2894 Value *Shift =
II.getOperand(2);
2897 Value *AbsPred, *MergedValue;
2903 return std::nullopt;
2911 return std::nullopt;
2916 return std::nullopt;
2919 {
II.getType()}, {Pred, Vec, Shift});
2926 Value *Vec =
II.getOperand(0);
2931 return std::nullopt;
2937 auto *NI =
II.getNextNode();
2940 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2942 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2943 auto *NIBB = NI->getParent();
2944 NI = NI->getNextNode();
2946 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2947 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2953 if (NextII &&
II.isIdenticalTo(NextII))
2956 return std::nullopt;
2964 {II.getType(), II.getOperand(0)->getType()},
2965 {II.getOperand(0), II.getOperand(1)}));
2972 return std::nullopt;
2978 Value *Passthru =
II.getOperand(0);
2986 auto *Mask = ConstantInt::get(Ty, MaskValue);
2992 return std::nullopt;
2995static std::optional<Instruction *>
3002 return std::nullopt;
3005std::optional<Instruction *>
3016 case Intrinsic::aarch64_dmb:
3018 case Intrinsic::aarch64_neon_fmaxnm:
3019 case Intrinsic::aarch64_neon_fminnm:
3021 case Intrinsic::aarch64_sve_convert_from_svbool:
3023 case Intrinsic::aarch64_sve_dup:
3025 case Intrinsic::aarch64_sve_dup_x:
3027 case Intrinsic::aarch64_sve_cmpne:
3028 case Intrinsic::aarch64_sve_cmpne_wide:
3030 case Intrinsic::aarch64_sve_rdffr:
3032 case Intrinsic::aarch64_sve_lasta:
3033 case Intrinsic::aarch64_sve_lastb:
3035 case Intrinsic::aarch64_sve_clasta_n:
3036 case Intrinsic::aarch64_sve_clastb_n:
3038 case Intrinsic::aarch64_sve_cntd:
3040 case Intrinsic::aarch64_sve_cntw:
3042 case Intrinsic::aarch64_sve_cnth:
3044 case Intrinsic::aarch64_sve_cntb:
3046 case Intrinsic::aarch64_sme_cntsd:
3048 case Intrinsic::aarch64_sve_ptest_any:
3049 case Intrinsic::aarch64_sve_ptest_first:
3050 case Intrinsic::aarch64_sve_ptest_last:
3052 case Intrinsic::aarch64_sve_fadd:
3054 case Intrinsic::aarch64_sve_fadd_u:
3056 case Intrinsic::aarch64_sve_fmul_u:
3058 case Intrinsic::aarch64_sve_fsub:
3060 case Intrinsic::aarch64_sve_fsub_u:
3062 case Intrinsic::aarch64_sve_add:
3064 case Intrinsic::aarch64_sve_add_u:
3066 Intrinsic::aarch64_sve_mla_u>(
3068 case Intrinsic::aarch64_sve_sub:
3070 case Intrinsic::aarch64_sve_sub_u:
3072 Intrinsic::aarch64_sve_mls_u>(
3074 case Intrinsic::aarch64_sve_tbl:
3076 case Intrinsic::aarch64_sve_uunpkhi:
3077 case Intrinsic::aarch64_sve_uunpklo:
3078 case Intrinsic::aarch64_sve_sunpkhi:
3079 case Intrinsic::aarch64_sve_sunpklo:
3081 case Intrinsic::aarch64_sve_uzp1:
3083 case Intrinsic::aarch64_sve_zip1:
3084 case Intrinsic::aarch64_sve_zip2:
3086 case Intrinsic::aarch64_sve_ld1_gather_index:
3088 case Intrinsic::aarch64_sve_st1_scatter_index:
3090 case Intrinsic::aarch64_sve_ld1:
3092 case Intrinsic::aarch64_sve_st1:
3094 case Intrinsic::aarch64_sve_sdiv:
3096 case Intrinsic::aarch64_sve_sel:
3098 case Intrinsic::aarch64_sve_srshl:
3100 case Intrinsic::aarch64_sve_dupq_lane:
3102 case Intrinsic::aarch64_sve_insr:
3104 case Intrinsic::aarch64_sve_whilelo:
3106 case Intrinsic::aarch64_sve_ptrue:
3108 case Intrinsic::aarch64_sve_uxtb:
3110 case Intrinsic::aarch64_sve_uxth:
3112 case Intrinsic::aarch64_sve_uxtw:
3114 case Intrinsic::aarch64_sme_in_streaming_mode:
3118 return std::nullopt;
3125 SimplifyAndSetOp)
const {
3126 switch (
II.getIntrinsicID()) {
3129 case Intrinsic::aarch64_neon_fcvtxn:
3130 case Intrinsic::aarch64_neon_rshrn:
3131 case Intrinsic::aarch64_neon_sqrshrn:
3132 case Intrinsic::aarch64_neon_sqrshrun:
3133 case Intrinsic::aarch64_neon_sqshrn:
3134 case Intrinsic::aarch64_neon_sqshrun:
3135 case Intrinsic::aarch64_neon_sqxtn:
3136 case Intrinsic::aarch64_neon_sqxtun:
3137 case Intrinsic::aarch64_neon_uqrshrn:
3138 case Intrinsic::aarch64_neon_uqshrn:
3139 case Intrinsic::aarch64_neon_uqxtn:
3140 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
3144 return std::nullopt;
3148 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3158 if (ST->useSVEForFixedLengthVectors() &&
3161 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3162 else if (ST->isNeonAvailable())
3167 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3176bool AArch64TTIImpl::isSingleExtWideningInstruction(
3178 Type *SrcOverrideTy)
const {
3193 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3196 Type *SrcTy = SrcOverrideTy;
3198 case Instruction::Add:
3199 case Instruction::Sub: {
3208 if (Opcode == Instruction::Sub)
3232 assert(SrcTy &&
"Expected some SrcTy");
3234 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3240 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3242 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3246 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3249Type *AArch64TTIImpl::isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
3251 Type *SrcOverrideTy)
const {
3252 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3253 Opcode != Instruction::Mul)
3263 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3266 auto getScalarSizeWithOverride = [&](
const Value *
V) {
3272 ->getScalarSizeInBits();
3275 unsigned MaxEltSize = 0;
3278 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3279 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3280 MaxEltSize = std::max(EltSize0, EltSize1);
3283 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3284 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3287 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3289 MaxEltSize = DstEltSize / 2;
3290 }
else if (Opcode == Instruction::Mul &&
3303 getScalarSizeWithOverride(
isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
3307 if (MaxEltSize * 2 > DstEltSize)
3325 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3326 (Src->isScalableTy() && !ST->hasSVE2()))
3336 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3340 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3344 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3345 Src->getScalarSizeInBits() !=
3369 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3373 if (
I &&
I->hasOneUser()) {
3376 if (
Type *ExtTy = isBinExtWideningInstruction(
3377 SingleUser->getOpcode(), Dst, Operands,
3378 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3391 if (isSingleExtWideningInstruction(
3392 SingleUser->getOpcode(), Dst, Operands,
3393 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3397 if (SingleUser->getOpcode() == Instruction::Add) {
3398 if (
I == SingleUser->getOperand(1) ||
3400 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3415 EVT SrcTy = TLI->getValueType(
DL, Src);
3416 EVT DstTy = TLI->getValueType(
DL, Dst);
3418 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3423 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3452 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3455 ST->useSVEForFixedLengthVectors(WiderTy)) {
3456 std::pair<InstructionCost, MVT> LT =
3458 unsigned NumElements =
3474 const unsigned int SVE_EXT_COST = 1;
3475 const unsigned int SVE_FCVT_COST = 1;
3476 const unsigned int SVE_UNPACK_ONCE = 4;
3477 const unsigned int SVE_UNPACK_TWICE = 16;
3606 SVE_EXT_COST + SVE_FCVT_COST},
3611 SVE_EXT_COST + SVE_FCVT_COST},
3618 SVE_EXT_COST + SVE_FCVT_COST},
3622 SVE_EXT_COST + SVE_FCVT_COST},
3628 SVE_EXT_COST + SVE_FCVT_COST},
3631 SVE_EXT_COST + SVE_FCVT_COST},
3636 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3638 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3648 SVE_EXT_COST + SVE_FCVT_COST},
3653 SVE_EXT_COST + SVE_FCVT_COST},
3666 SVE_EXT_COST + SVE_FCVT_COST},
3670 SVE_EXT_COST + SVE_FCVT_COST},
3682 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3684 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3686 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3688 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3692 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3694 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3710 SVE_EXT_COST + SVE_FCVT_COST},
3715 SVE_EXT_COST + SVE_FCVT_COST},
3726 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3728 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3730 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3732 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3734 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3736 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3740 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3742 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3744 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3746 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3971 if (ST->hasFullFP16())
3983 Src->getScalarType(), CCH,
CostKind) +
3991 ST->isSVEorStreamingSVEAvailable() &&
3992 TLI->getTypeAction(Src->getContext(), SrcTy) ==
3994 TLI->getTypeAction(Dst->getContext(), DstTy) ==
4003 Opcode, LegalTy, Src, CCH,
CostKind,
I);
4006 return Part1 + Part2;
4013 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
4025 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
4038 CostKind, Index,
nullptr,
nullptr);
4042 auto DstVT = TLI->getValueType(
DL, Dst);
4043 auto SrcVT = TLI->getValueType(
DL, Src);
4048 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
4054 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
4064 case Instruction::SExt:
4069 case Instruction::ZExt:
4070 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
4083 return Opcode == Instruction::PHI ? 0 : 1;
4092 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4101 if (!LT.second.isVector())
4106 if (LT.second.isFixedLengthVector()) {
4107 unsigned Width = LT.second.getVectorNumElements();
4108 Index = Index % Width;
4123 if (ST->hasFastLD1Single())
4135 : ST->getVectorInsertExtractBaseCost() + 1;
4159 auto ExtractCanFuseWithFmul = [&]() {
4166 auto IsAllowedScalarTy = [&](
const Type *
T) {
4167 return T->isFloatTy() ||
T->isDoubleTy() ||
4168 (
T->isHalfTy() && ST->hasFullFP16());
4172 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
4175 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4176 !BO->getType()->isVectorTy();
4181 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
4185 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4194 DenseMap<User *, unsigned> UserToExtractIdx;
4195 for (
auto *U :
Scalar->users()) {
4196 if (!IsUserFMulScalarTy(U))
4200 UserToExtractIdx[
U];
4202 if (UserToExtractIdx.
empty())
4204 for (
auto &[S, U, L] : ScalarUserAndIdx) {
4205 for (
auto *U : S->users()) {
4206 if (UserToExtractIdx.
contains(U)) {
4208 auto *Op0 =
FMul->getOperand(0);
4209 auto *Op1 =
FMul->getOperand(1);
4210 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4211 UserToExtractIdx[
U] =
L;
4217 for (
auto &[U, L] : UserToExtractIdx) {
4229 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
4230 if (!IsUserFMulScalarTy(U))
4235 const auto *BO = cast<BinaryOperator>(U);
4236 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4237 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4239 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4242 return IsExtractLaneEquivalentToZero(
4243 cast<ConstantInt>(OtherEE->getIndexOperand())
4246 OtherEE->getType()->getScalarSizeInBits());
4254 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4255 ExtractCanFuseWithFmul())
4260 :
ST->getVectorInsertExtractBaseCost();
4269 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4272 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr,
4278 Value *Scalar,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4280 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4281 ScalarUserAndIdx, VIC);
4288 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I,
4295 unsigned Index)
const {
4307 : ST->getVectorInsertExtractBaseCost() + 1;
4316 if (Ty->getElementType()->isFloatingPointTy())
4319 unsigned VecInstCost =
4321 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4328 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4329 return std::nullopt;
4330 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4331 return std::nullopt;
4333 if (CanUseSVE && ST->hasSVEB16B16() && ST->isNonStreamingSVEorSME2Available())
4334 return std::nullopt;
4341 Cost += InstCost(PromotedTy);
4364 Op2Info, Args, CxtI);
4368 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4375 Ty,
CostKind, Op1Info, Op2Info,
true,
4378 [&](
Type *PromotedTy) {
4382 return *PromotedCost;
4385 if (Ty->getScalarType()->isFP128Ty())
4393 if (
Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4409 if (LT.second == MVT::v2i64) {
4489 auto VT = TLI->getValueType(
DL, Ty);
4490 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4494 : (3 * AsrCost + AddCost);
4496 return MulCost + AsrCost + 2 * AddCost;
4498 }
else if (VT.isVector()) {
4508 if (Ty->isScalableTy() && ST->hasSVE())
4509 Cost += 2 * AsrCost;
4514 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4518 }
else if (LT.second == MVT::v2i64) {
4519 return VT.getVectorNumElements() *
4526 if (Ty->isScalableTy() && ST->hasSVE())
4527 return MulCost + 2 * AddCost + 2 * AsrCost;
4528 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4533 LT.second.isFixedLengthVector()) {
4543 return ExtractCost + InsertCost +
4551 auto VT = TLI->getValueType(
DL, Ty);
4567 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4568 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4569 LT.second == MVT::nxv16i8;
4570 bool Is128bit = LT.second.is128BitVector();
4582 (HasMULH ? 0 : ShrCost) +
4583 AddCost * 2 + ShrCost;
4584 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4591 if (!VT.isVector() && VT.getSizeInBits() > 64)
4595 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4597 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4601 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4611 if (
nullptr != Entry)
4616 if (LT.second.getScalarType() == MVT::i8)
4618 else if (LT.second.getScalarType() == MVT::i16)
4630 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4631 return (4 + DivCost) * VTy->getNumElements();
4637 -1,
nullptr,
nullptr);
4660 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4661 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4670 if (!Ty->getScalarType()->isFP128Ty())
4677 if (!Ty->getScalarType()->isFP128Ty())
4678 return 2 * LT.first;
4685 if (!Ty->isVectorTy())
4701 int MaxMergeDistance = 64;
4705 return NumVectorInstToHideOverhead;
4715 unsigned Opcode1,
unsigned Opcode2)
const {
4718 if (!
Sched.hasInstrSchedModel())
4722 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4724 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4730 "Cannot handle variant scheduling classes without an MI");
4746 const int AmortizationCost = 20;
4754 VecPred = CurrentPred;
4762 static const auto ValidMinMaxTys = {
4763 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4764 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4765 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4769 (ST->hasFullFP16() &&
4775 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4776 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4777 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4778 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4779 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4780 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4781 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4782 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4783 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4784 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4785 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4787 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4788 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4797 if (Opcode == Instruction::FCmp) {
4799 ValTy,
CostKind, Op1Info, Op2Info,
false,
4801 false, [&](
Type *PromotedTy) {
4813 return *PromotedCost;
4817 if (LT.second.getScalarType() != MVT::f64 &&
4818 LT.second.getScalarType() != MVT::f32 &&
4819 LT.second.getScalarType() != MVT::f16)
4824 unsigned Factor = 1;
4825 if (!CondTy->isVectorTy() &&
4839 AArch64::FCMEQv4f32))
4851 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4870 Op1Info, Op2Info,
I);
4876 if (ST->requiresStrictAlign()) {
4881 Options.AllowOverlappingLoads =
true;
4882 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4887 Options.LoadSizes = {8, 4, 2, 1};
4888 Options.AllowedTailExpansions = {3, 5, 6};
4893 return ST->hasSVE();
4899 switch (MICA.
getID()) {
4900 case Intrinsic::masked_scatter:
4901 case Intrinsic::masked_gather:
4903 case Intrinsic::masked_load:
4904 case Intrinsic::masked_expandload:
4905 case Intrinsic::masked_store:
4919 if (!LT.first.isValid())
4924 if (VT->getElementType()->isIntegerTy(1))
4935 if (MICA.
getID() == Intrinsic::masked_expandload) {
4951 if (LT.first > 1 && LT.second.getScalarSizeInBits() > 8)
4952 return MemOpCost * 2;
4961 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4962 "Should be called on only load or stores.");
4964 case Instruction::Load:
4967 return ST->getGatherOverhead();
4969 case Instruction::Store:
4972 return ST->getScatterOverhead();
4983 unsigned Opcode = (MICA.
getID() == Intrinsic::masked_gather ||
4984 MICA.
getID() == Intrinsic::vp_gather)
4986 : Instruction::Store;
4996 if (!LT.first.isValid())
5000 if (!LT.second.isVector() ||
5002 VT->getElementType()->isIntegerTy(1))
5012 ElementCount LegalVF = LT.second.getVectorElementCount();
5015 {TTI::OK_AnyValue, TTI::OP_None},
I);
5031 EVT VT = TLI->getValueType(
DL, Ty,
true);
5033 if (VT == MVT::Other ||
5039 if (!LT.first.isValid())
5049 (VTy->getElementType()->isIntegerTy(1) &&
5050 !VTy->getElementCount().isKnownMultipleOf(
5061 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
5062 LT.second.is128BitVector() && Alignment <
Align(16)) {
5068 const int AmortizationCost = 6;
5070 return LT.first * 2 * AmortizationCost;
5074 if (Ty->isPtrOrPtrVectorTy())
5079 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
5081 if (VT == MVT::v4i8)
5088 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
5103 while (!TypeWorklist.
empty()) {
5125 bool UseMaskForCond,
bool UseMaskForGaps)
const {
5126 assert(Factor >= 2 &&
"Invalid interleave factor");
5141 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
5144 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
5145 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
5148 VecVTy->getElementCount().divideCoefficientBy(Factor));
5154 if (MinElts % Factor == 0 &&
5155 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
5156 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
5161 UseMaskForCond, UseMaskForGaps);
5168 for (
auto *
I : Tys) {
5169 if (!
I->isVectorTy())
5180 Align Alignment)
const {
5187 return (ST->isSVEAvailable() && ST->hasSVE2p2()) ||
5188 (ST->isSVEorStreamingSVEAvailable() && ST->hasSME2p2());
5192 return ST->getMaxInterleaveFactor();
5202 enum { MaxStridedLoads = 7 };
5204 int StridedLoads = 0;
5207 for (
const auto BB : L->blocks()) {
5208 for (
auto &
I : *BB) {
5214 if (L->isLoopInvariant(PtrValue))
5219 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
5228 if (StridedLoads > MaxStridedLoads / 2)
5229 return StridedLoads;
5232 return StridedLoads;
5235 int StridedLoads = countStridedLoads(L, SE);
5237 <<
" strided loads\n");
5253 unsigned *FinalSize) {
5257 for (
auto *BB : L->getBlocks()) {
5258 for (
auto &
I : *BB) {
5264 if (!Cost.isValid())
5268 if (LoopCost > Budget)
5290 if (MaxTC > 0 && MaxTC <= 32)
5301 if (Blocks.
size() != 2)
5323 if (!L->isInnermost() || L->getNumBlocks() > 8)
5327 if (!L->getExitBlock())
5333 bool HasParellelizableReductions =
5334 L->getNumBlocks() == 1 &&
5335 any_of(L->getHeader()->phis(),
5337 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5340 if (HasParellelizableReductions &&
5362 if (HasParellelizableReductions) {
5373 if (Header == Latch) {
5376 unsigned Width = 10;
5382 unsigned MaxInstsPerLine = 16;
5384 unsigned BestUC = 1;
5385 unsigned SizeWithBestUC = BestUC *
Size;
5387 unsigned SizeWithUC = UC *
Size;
5388 if (SizeWithUC > 48)
5390 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5391 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5393 SizeWithBestUC = BestUC *
Size;
5403 for (
auto *BB : L->blocks()) {
5404 for (
auto &
I : *BB) {
5414 for (
auto *U :
I.users())
5416 LoadedValuesPlus.
insert(U);
5423 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5449 auto *I = dyn_cast<Instruction>(V);
5450 return I && DependsOnLoopLoad(I, Depth + 1);
5457 DependsOnLoopLoad(
I, 0)) {
5473 if (L->getLoopDepth() > 1)
5484 for (
auto *BB : L->getBlocks()) {
5485 for (
auto &
I : *BB) {
5489 if (IsVectorized &&
I.getType()->isVectorTy())
5506 if (ST->isAppleMLike())
5508 else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
5530 !ST->getSchedModel().isOutOfOrder()) {
5553 bool CanCreate)
const {
5557 case Intrinsic::aarch64_neon_st1x2:
5558 case Intrinsic::aarch64_neon_st1x3:
5559 case Intrinsic::aarch64_neon_st1x4:
5560 case Intrinsic::aarch64_neon_st2:
5561 case Intrinsic::aarch64_neon_st3:
5562 case Intrinsic::aarch64_neon_st4: {
5565 if (!CanCreate || !ST)
5567 unsigned NumElts = Inst->
arg_size() - 1;
5568 if (ST->getNumElements() != NumElts)
5570 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5576 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5578 Res = Builder.CreateInsertValue(Res, L, i);
5582 case Intrinsic::aarch64_neon_ld1x2:
5583 case Intrinsic::aarch64_neon_ld1x3:
5584 case Intrinsic::aarch64_neon_ld1x4:
5585 case Intrinsic::aarch64_neon_ld2:
5586 case Intrinsic::aarch64_neon_ld3:
5587 case Intrinsic::aarch64_neon_ld4:
5588 if (Inst->
getType() == ExpectedType)
5599 case Intrinsic::aarch64_neon_ld1x2:
5600 case Intrinsic::aarch64_neon_ld1x3:
5601 case Intrinsic::aarch64_neon_ld1x4:
5602 case Intrinsic::aarch64_neon_ld2:
5603 case Intrinsic::aarch64_neon_ld3:
5604 case Intrinsic::aarch64_neon_ld4:
5605 Info.ReadMem =
true;
5606 Info.WriteMem =
false;
5609 case Intrinsic::aarch64_neon_st1x2:
5610 case Intrinsic::aarch64_neon_st1x3:
5611 case Intrinsic::aarch64_neon_st1x4:
5612 case Intrinsic::aarch64_neon_st2:
5613 case Intrinsic::aarch64_neon_st3:
5614 case Intrinsic::aarch64_neon_st4:
5615 Info.ReadMem =
false;
5616 Info.WriteMem =
true;
5625 case Intrinsic::aarch64_neon_ld1x2:
5626 case Intrinsic::aarch64_neon_st1x2:
5627 Info.MatchingId = Intrinsic::aarch64_neon_ld1x2;
5629 case Intrinsic::aarch64_neon_ld1x3:
5630 case Intrinsic::aarch64_neon_st1x3:
5631 Info.MatchingId = Intrinsic::aarch64_neon_ld1x3;
5633 case Intrinsic::aarch64_neon_ld1x4:
5634 case Intrinsic::aarch64_neon_st1x4:
5635 Info.MatchingId = Intrinsic::aarch64_neon_ld1x4;
5637 case Intrinsic::aarch64_neon_ld2:
5638 case Intrinsic::aarch64_neon_st2:
5639 Info.MatchingId = Intrinsic::aarch64_neon_ld2;
5641 case Intrinsic::aarch64_neon_ld3:
5642 case Intrinsic::aarch64_neon_st3:
5643 Info.MatchingId = Intrinsic::aarch64_neon_ld3;
5645 case Intrinsic::aarch64_neon_ld4:
5646 case Intrinsic::aarch64_neon_st4:
5647 Info.MatchingId = Intrinsic::aarch64_neon_ld4;
5659 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5660 bool Considerable =
false;
5661 AllowPromotionWithoutCommonHeader =
false;
5664 Type *ConsideredSExtType =
5666 if (
I.getType() != ConsideredSExtType)
5670 for (
const User *U :
I.users()) {
5672 Considerable =
true;
5676 if (GEPInst->getNumOperands() > 2) {
5677 AllowPromotionWithoutCommonHeader =
true;
5682 return Considerable;
5733 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5743 return LegalizationCost + 2;
5753 LegalizationCost *= LT.first - 1;
5756 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5765 return LegalizationCost + 2;
5773 std::optional<FastMathFlags> FMF,
5789 return BaseCost + FixedVTy->getNumElements();
5806 MVT MTy = LT.second;
5807 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5855 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5856 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5868 return (LT.first - 1) +
Log2_32(NElts);
5873 return (LT.first - 1) + Entry->Cost;
5885 if (LT.first != 1) {
5891 ExtraCost *= LT.first - 1;
5894 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5895 return Cost + ExtraCost;
5903 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
5905 EVT VecVT = TLI->getValueType(
DL, VecTy);
5906 EVT ResVT = TLI->getValueType(
DL, ResTy);
5916 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5918 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5920 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5922 return (LT.first - 1) * 2 + 2;
5933 EVT VecVT = TLI->getValueType(
DL, VecTy);
5934 EVT ResVT = TLI->getValueType(
DL, ResTy);
5937 RedOpcode == Instruction::Add) {
5943 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5945 return LT.first + 2;
5980 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5981 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
5995 if (LT.second.getScalarType() == MVT::i1) {
6004 assert(Entry &&
"Illegal Type for Splice");
6005 LegalizationCost += Entry->Cost;
6006 return LegalizationCost * LT.first;
6010 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
6019 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub &&
6020 Opcode != Instruction::FAdd && Opcode != Instruction::FSub) ||
6027 assert(FMF &&
"Missing FastMathFlags for floating-point partial reduction");
6028 if (!FMF->allowReassoc() || !FMF->allowContract())
6032 "FastMathFlags only apply to floating-point partial reductions");
6036 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
6037 "Unexpected values for OpBExtend or InputTypeB");
6041 if (BinOp && ((*BinOp != Instruction::Mul && *BinOp != Instruction::FMul) ||
6042 InputTypeA != InputTypeB))
6045 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
6048 if (IsUSDot && !ST->hasMatMulInt8() && !ST->hasDotProd())
6061 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
6070 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
6076 std::pair<InstructionCost, MVT> AccumLT =
6078 std::pair<InstructionCost, MVT> InputLT =
6082 auto IsSupported = [&](
bool SVEPred,
bool NEONPred) ->
bool {
6083 return (ST->isSVEorStreamingSVEAvailable() && SVEPred) ||
6084 (AccumLT.second.isFixedLengthVector() &&
6085 AccumLT.second.getSizeInBits() <= 128 && ST->isNeonAvailable() &&
6089 bool IsSub = Opcode == Instruction::Sub || Opcode == Instruction::FSub;
6097 if (AccumLT.second.getScalarType() == MVT::i32 &&
6098 InputLT.second.getScalarType() == MVT::i8) {
6100 if (!IsUSDot && IsSupported(
true, ST->hasDotProd()))
6101 return Cost + INegCost;
6103 if (IsUSDot && IsSupported(ST->hasMatMulInt8(), ST->hasMatMulInt8()))
6104 return Cost + INegCost;
6109 if (IsUSDot && IsSupported(
false, ST->hasDotProd()))
6110 return Cost * 3 + INegCost;
6113 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
6115 if (AccumLT.second.getScalarType() == MVT::i64 &&
6116 InputLT.second.getScalarType() == MVT::i16)
6117 return Cost + INegCost;
6120 if (AccumLT.second.getScalarType() == MVT::i32 &&
6121 InputLT.second.getScalarType() == MVT::i16 &&
6122 (ST->hasSVE2p1() || ST->hasSME2()) && !IsSub)
6125 if (AccumLT.second.getScalarType() == MVT::i64 &&
6126 InputLT.second.getScalarType() == MVT::i8)
6132 return Cost + INegCost;
6135 if (AccumLT.second.getScalarType() == MVT::i16 &&
6136 InputLT.second.getScalarType() == MVT::i8 &&
6137 (ST->hasSVE2p3() || ST->hasSME2p3()) && !IsSub)
6143 if (Opcode == Instruction::FAdd && !IsSub &&
6144 IsSupported(ST->hasSME2() || ST->hasSVE2p1(), ST->hasF16F32DOT()) &&
6145 AccumLT.second.getScalarType() == MVT::f32 &&
6146 InputLT.second.getScalarType() == MVT::f16)
6150 if (Ratio == 2 && !IsUSDot) {
6151 MVT InVT = InputLT.second.getScalarType();
6154 if (IsSupported(ST->hasSVE2() || ST->hasSME(),
true) &&
6159 if (IsSupported(ST->hasSVE2(), ST->hasFP16FML()) && InVT == MVT::f16)
6163 if (IsSupported(ST->hasSVE2p1() || ST->hasSME2(),
false) &&
6164 InVT == MVT::bf16 && IsSub)
6174 if (IsSupported(ST->hasBF16(), ST->hasBF16()) && InVT == MVT::bf16)
6175 return Cost * 2 + FNegCost;
6179 AccumType, VF, OpAExtend, OpBExtend,
6191 "Expected the Mask to match the return size if given");
6193 "Expected the same scalar types");
6199 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
6200 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
6201 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
6209 return std::max<InstructionCost>(1, LT.first / 4);
6217 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
6219 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
6222 unsigned TpNumElts = Mask.size();
6223 unsigned LTNumElts = LT.second.getVectorNumElements();
6224 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
6226 LT.second.getVectorElementCount());
6228 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
6230 for (
unsigned N = 0;
N < NumVecs;
N++) {
6234 unsigned Source1 = -1U, Source2 = -1U;
6235 unsigned NumSources = 0;
6236 for (
unsigned E = 0; E < LTNumElts; E++) {
6237 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
6246 unsigned Source = MaskElt / LTNumElts;
6247 if (NumSources == 0) {
6250 }
else if (NumSources == 1 && Source != Source1) {
6253 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
6259 if (Source == Source1)
6261 else if (Source == Source2)
6262 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
6271 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
6282 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
6285 Result.first->second = NCost;
6299 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
6300 if (LT.second.getFixedSizeInBits() >= 128 &&
6302 LT.second.getVectorNumElements() / 2) {
6305 if (Index == (
int)LT.second.getVectorNumElements() / 2)
6319 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6322 return M.value() < 0 || M.value() == (int)M.index();
6328 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6329 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6338 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6339 ST->isSVEorStreamingSVEAvailable() &&
6344 if (ST->isSVEorStreamingSVEAvailable() &&
6358 if (IsLoad && LT.second.isVector() &&
6360 LT.second.getVectorElementCount()))
6366 if (Mask.size() == 4 &&
6368 (SrcTy->getScalarSizeInBits() == 16 ||
6369 SrcTy->getScalarSizeInBits() == 32) &&
6370 all_of(Mask, [](
int E) {
return E < 8; }))
6376 if (LT.second.isFixedLengthVector() &&
6377 LT.second.getVectorNumElements() == Mask.size() &&
6383 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6384 isTRNMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6385 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6386 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6387 LT.second.getVectorNumElements(), 16) ||
6388 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6389 LT.second.getVectorNumElements(), 32) ||
6390 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6391 LT.second.getVectorNumElements(), 64) ||
6394 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6523 return LT.first * Entry->Cost;
6532 LT.second.getSizeInBits() <= 128 && SubTp) {
6534 if (SubLT.second.isVector()) {
6535 int NumElts = LT.second.getVectorNumElements();
6536 int NumSubElts = SubLT.second.getVectorNumElements();
6537 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6543 if (IsExtractSubvector)
6560 if (
getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
6579 return ST->useFixedOverScalableIfEqualCost();
6583 return ST->getEpilogueVectorizationMinVF();
6618 unsigned NumInsns = 0;
6620 NumInsns += BB->size();
6630 int64_t Scale,
unsigned AddrSpace)
const {
6658 if (
I->getOpcode() == Instruction::Or &&
6662 if (
I->getOpcode() == Instruction::Add ||
6663 I->getOpcode() == Instruction::Sub)
6688 return all_equal(Shuf->getShuffleMask());
6695 bool AllowSplat =
false) {
6700 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6701 auto *FullTy = FullV->
getType();
6702 auto *HalfTy = HalfV->getType();
6704 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6707 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6710 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6714 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6728 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6729 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6743 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6744 (M2Start != 0 && M2Start != (NumElements / 2)))
6746 if (S1Op1 && S2Op1 && M1Start != M2Start)
6756 return Ext->getType()->getScalarSizeInBits() ==
6757 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6771 Value *VectorOperand =
nullptr;
6788 if (!
GEP ||
GEP->getNumOperands() != 2)
6792 Value *Offsets =
GEP->getOperand(1);
6795 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6801 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6802 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6803 Ops.push_back(&
GEP->getOperandUse(1));
6839 switch (
II->getIntrinsicID()) {
6840 case Intrinsic::aarch64_neon_smull:
6841 case Intrinsic::aarch64_neon_umull:
6844 Ops.push_back(&
II->getOperandUse(0));
6845 Ops.push_back(&
II->getOperandUse(1));
6850 case Intrinsic::fma:
6851 case Intrinsic::fmuladd:
6858 Ops.push_back(&
II->getOperandUse(0));
6860 Ops.push_back(&
II->getOperandUse(1));
6863 case Intrinsic::aarch64_neon_sqdmull:
6864 case Intrinsic::aarch64_neon_sqdmulh:
6865 case Intrinsic::aarch64_neon_sqrdmulh:
6868 Ops.push_back(&
II->getOperandUse(0));
6870 Ops.push_back(&
II->getOperandUse(1));
6871 return !
Ops.empty();
6872 case Intrinsic::aarch64_neon_fmlal:
6873 case Intrinsic::aarch64_neon_fmlal2:
6874 case Intrinsic::aarch64_neon_fmlsl:
6875 case Intrinsic::aarch64_neon_fmlsl2:
6878 Ops.push_back(&
II->getOperandUse(1));
6880 Ops.push_back(&
II->getOperandUse(2));
6881 return !
Ops.empty();
6882 case Intrinsic::aarch64_sve_ptest_first:
6883 case Intrinsic::aarch64_sve_ptest_last:
6885 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6886 Ops.push_back(&
II->getOperandUse(0));
6887 return !
Ops.empty();
6888 case Intrinsic::aarch64_sme_write_horiz:
6889 case Intrinsic::aarch64_sme_write_vert:
6890 case Intrinsic::aarch64_sme_writeq_horiz:
6891 case Intrinsic::aarch64_sme_writeq_vert: {
6893 if (!Idx || Idx->getOpcode() != Instruction::Add)
6895 Ops.push_back(&
II->getOperandUse(1));
6898 case Intrinsic::aarch64_sme_read_horiz:
6899 case Intrinsic::aarch64_sme_read_vert:
6900 case Intrinsic::aarch64_sme_readq_horiz:
6901 case Intrinsic::aarch64_sme_readq_vert:
6902 case Intrinsic::aarch64_sme_ld1b_vert:
6903 case Intrinsic::aarch64_sme_ld1h_vert:
6904 case Intrinsic::aarch64_sme_ld1w_vert:
6905 case Intrinsic::aarch64_sme_ld1d_vert:
6906 case Intrinsic::aarch64_sme_ld1q_vert:
6907 case Intrinsic::aarch64_sme_st1b_vert:
6908 case Intrinsic::aarch64_sme_st1h_vert:
6909 case Intrinsic::aarch64_sme_st1w_vert:
6910 case Intrinsic::aarch64_sme_st1d_vert:
6911 case Intrinsic::aarch64_sme_st1q_vert:
6912 case Intrinsic::aarch64_sme_ld1b_horiz:
6913 case Intrinsic::aarch64_sme_ld1h_horiz:
6914 case Intrinsic::aarch64_sme_ld1w_horiz:
6915 case Intrinsic::aarch64_sme_ld1d_horiz:
6916 case Intrinsic::aarch64_sme_ld1q_horiz:
6917 case Intrinsic::aarch64_sme_st1b_horiz:
6918 case Intrinsic::aarch64_sme_st1h_horiz:
6919 case Intrinsic::aarch64_sme_st1w_horiz:
6920 case Intrinsic::aarch64_sme_st1d_horiz:
6921 case Intrinsic::aarch64_sme_st1q_horiz: {
6923 if (!Idx || Idx->getOpcode() != Instruction::Add)
6925 Ops.push_back(&
II->getOperandUse(3));
6928 case Intrinsic::aarch64_neon_pmull:
6931 Ops.push_back(&
II->getOperandUse(0));
6932 Ops.push_back(&
II->getOperandUse(1));
6934 case Intrinsic::aarch64_neon_pmull64:
6936 II->getArgOperand(1)))
6938 Ops.push_back(&
II->getArgOperandUse(0));
6939 Ops.push_back(&
II->getArgOperandUse(1));
6941 case Intrinsic::masked_gather:
6944 Ops.push_back(&
II->getArgOperandUse(0));
6946 case Intrinsic::masked_scatter:
6949 Ops.push_back(&
II->getArgOperandUse(1));
6956 auto ShouldSinkCondition = [](
Value *
Cond,
6961 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6965 Ops.push_back(&
II->getOperandUse(0));
6969 switch (
I->getOpcode()) {
6970 case Instruction::GetElementPtr:
6971 case Instruction::Add:
6972 case Instruction::Sub:
6974 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
6976 Ops.push_back(&
I->getOperandUse(
Op));
6981 case Instruction::Select: {
6982 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
6985 Ops.push_back(&
I->getOperandUse(0));
6988 case Instruction::UncondBr:
6990 case Instruction::CondBr: {
6994 Ops.push_back(&
I->getOperandUse(0));
6997 case Instruction::FMul:
7002 Ops.push_back(&
I->getOperandUse(0));
7004 Ops.push_back(&
I->getOperandUse(1));
7014 case Instruction::Xor:
7017 if (
I->getType()->isVectorTy() && ST->isNeonAvailable()) {
7019 ST->isSVEorStreamingSVEAvailable() && (ST->hasSVE2() || ST->hasSME());
7024 case Instruction::And:
7025 case Instruction::Or:
7028 if (
I->getOpcode() == Instruction::Or &&
7033 if (!(
I->getType()->isVectorTy() && ST->hasNEON()) &&
7036 for (
auto &
Op :
I->operands()) {
7048 Ops.push_back(&Not);
7049 Ops.push_back(&InsertElt);
7059 if (!
I->getType()->isVectorTy())
7060 return !
Ops.empty();
7062 switch (
I->getOpcode()) {
7063 case Instruction::Sub:
7064 case Instruction::Add: {
7073 Ops.push_back(&Ext1->getOperandUse(0));
7074 Ops.push_back(&Ext2->getOperandUse(0));
7077 Ops.push_back(&
I->getOperandUse(0));
7078 Ops.push_back(&
I->getOperandUse(1));
7082 case Instruction::Or: {
7085 if (ST->hasNEON()) {
7099 if (
I->getParent() != MainAnd->
getParent() ||
7104 if (
I->getParent() != IA->getParent() ||
7105 I->getParent() != IB->getParent())
7110 Ops.push_back(&
I->getOperandUse(0));
7111 Ops.push_back(&
I->getOperandUse(1));
7120 case Instruction::Mul: {
7121 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
7124 if (Ty->isScalableTy())
7128 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
7131 int NumZExts = 0, NumSExts = 0;
7132 for (
auto &
Op :
I->operands()) {
7139 auto *ExtOp = Ext->getOperand(0);
7140 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
7141 Ops.push_back(&Ext->getOperandUse(0));
7149 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
7150 I->getType()->getScalarSizeInBits())
7187 if (!ElementConstant || !ElementConstant->
isZero())
7190 unsigned Opcode = OperandInstr->
getOpcode();
7191 if (Opcode == Instruction::SExt)
7193 else if (Opcode == Instruction::ZExt)
7198 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
7208 Ops.push_back(&Insert->getOperandUse(1));
7214 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
7218 if (!ShouldSinkSplatForIndexedVariant(
I))
7223 Ops.push_back(&
I->getOperandUse(0));
7225 Ops.push_back(&
I->getOperandUse(1));
7227 return !
Ops.empty();
7229 case Instruction::FMul: {
7231 if (
I->getType()->isScalableTy())
7232 return !
Ops.empty();
7236 return !
Ops.empty();
7240 Ops.push_back(&
I->getOperandUse(0));
7242 Ops.push_back(&
I->getOperandUse(1));
7243 return !
Ops.empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
bool preferTailFoldingOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
APInt getPriorityMask(const Function &F) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedExpandLoad(Type *DataTy, Align Alignment) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool noSignedZeros() const
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
bool isFixedLengthVector() const
MVT getVectorElementType() const
Information for memory intrinsic cost model.
Align getAlignment() const
Type * getDataType() const
Intrinsic::ID getID() const
const Instruction * getInst() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
LLVM_ABI APInt getCpuSupportsMask(ArrayRef< StringRef > Features)
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
LLVM_ABI Libcall getPOW(EVT RetVT)
getPOW - Return the POW_* value for the given types, or UNKNOWN_LIBCALL if there is none.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ FSub
Subtraction of floats.
@ FAddChainWithSubs
A chain of fadds and fsubs.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FindLast
FindLast reduction with select(cmp(),x,y) where x and y.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...