23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
82 cl::desc(
"Threshold for forced unrolling of small loops in AArch64"));
85class TailFoldingOption {
100 bool NeedsDefault =
true;
104 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
124 Bits &= ~DisableBits;
130 errs() <<
"invalid argument '" << Opt
131 <<
"' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
139 void operator=(
const std::string &Val) {
148 setNeedsDefault(
false);
151 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] ==
"disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] ==
"all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] ==
"default")
159 setNeedsDefault(
true);
160 else if (TailFoldTypes[0] ==
"simple")
161 setInitialBits(TailFoldingOpts::Simple);
164 setInitialBits(TailFoldingOpts::Disabled);
167 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
168 if (TailFoldTypes[
I] ==
"reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[
I] ==
"recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[
I] ==
"reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[
I] ==
"noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[
I] ==
"norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[
I] ==
"noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
200 "\ndefault (Initial) Uses the default tail-folding settings for "
202 "\nall (Initial) All legal loop types will vectorize using "
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
213 "\nnoreverse Inverse of above"),
258 TTI->isMultiversionedFunction(
F) ?
"fmv-features" :
"target-features";
259 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.
split(Features,
",");
276 return F.hasFnAttribute(
"fmv-features");
280 AArch64::FeatureExecuteOnly,
320 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
321 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
323 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
341 auto FVTy = dyn_cast<FixedVectorType>(Ty);
343 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
352 unsigned DefaultCallPenalty)
const {
377 if (
F ==
Call.getCaller())
383 return DefaultCallPenalty;
394 ST->isSVEorStreamingSVEAvailable() &&
395 !ST->disableMaximizeScalableBandwidth();
419 assert(Ty->isIntegerTy());
421 unsigned BitSize = Ty->getPrimitiveSizeInBits();
428 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
433 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
439 return std::max<InstructionCost>(1,
Cost);
446 assert(Ty->isIntegerTy());
448 unsigned BitSize = Ty->getPrimitiveSizeInBits();
454 unsigned ImmIdx = ~0U;
458 case Instruction::GetElementPtr:
463 case Instruction::Store:
466 case Instruction::Add:
467 case Instruction::Sub:
468 case Instruction::Mul:
469 case Instruction::UDiv:
470 case Instruction::SDiv:
471 case Instruction::URem:
472 case Instruction::SRem:
473 case Instruction::And:
474 case Instruction::Or:
475 case Instruction::Xor:
476 case Instruction::ICmp:
480 case Instruction::Shl:
481 case Instruction::LShr:
482 case Instruction::AShr:
486 case Instruction::Trunc:
487 case Instruction::ZExt:
488 case Instruction::SExt:
489 case Instruction::IntToPtr:
490 case Instruction::PtrToInt:
491 case Instruction::BitCast:
492 case Instruction::PHI:
493 case Instruction::Call:
494 case Instruction::Select:
495 case Instruction::Ret:
496 case Instruction::Load:
501 int NumConstants = (BitSize + 63) / 64;
514 assert(Ty->isIntegerTy());
516 unsigned BitSize = Ty->getPrimitiveSizeInBits();
525 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
531 case Intrinsic::sadd_with_overflow:
532 case Intrinsic::uadd_with_overflow:
533 case Intrinsic::ssub_with_overflow:
534 case Intrinsic::usub_with_overflow:
535 case Intrinsic::smul_with_overflow:
536 case Intrinsic::umul_with_overflow:
538 int NumConstants = (BitSize + 63) / 64;
545 case Intrinsic::experimental_stackmap:
546 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
549 case Intrinsic::experimental_patchpoint_void:
550 case Intrinsic::experimental_patchpoint:
551 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
554 case Intrinsic::experimental_gc_statepoint:
555 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
565 if (TyWidth == 32 || TyWidth == 64)
589 unsigned TotalHistCnts = 1;
599 unsigned EC = VTy->getElementCount().getKnownMinValue();
604 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
606 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
610 TotalHistCnts = EC / NaturalVectorWidth;
630 switch (ICA.
getID()) {
631 case Intrinsic::experimental_vector_histogram_add: {
638 case Intrinsic::umin:
639 case Intrinsic::umax:
640 case Intrinsic::smin:
641 case Intrinsic::smax: {
642 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
643 MVT::v8i16, MVT::v2i32, MVT::v4i32,
644 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
648 if (LT.second == MVT::v2i64)
654 case Intrinsic::scmp:
655 case Intrinsic::ucmp: {
657 {Intrinsic::scmp, MVT::i32, 3},
658 {Intrinsic::scmp, MVT::i64, 3},
659 {Intrinsic::scmp, MVT::v8i8, 3},
660 {Intrinsic::scmp, MVT::v16i8, 3},
661 {Intrinsic::scmp, MVT::v4i16, 3},
662 {Intrinsic::scmp, MVT::v8i16, 3},
663 {Intrinsic::scmp, MVT::v2i32, 3},
664 {Intrinsic::scmp, MVT::v4i32, 3},
665 {Intrinsic::scmp, MVT::v1i64, 3},
666 {Intrinsic::scmp, MVT::v2i64, 3},
672 return Entry->Cost * LT.first;
675 case Intrinsic::sadd_sat:
676 case Intrinsic::ssub_sat:
677 case Intrinsic::uadd_sat:
678 case Intrinsic::usub_sat: {
679 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
680 MVT::v8i16, MVT::v2i32, MVT::v4i32,
686 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
688 return LT.first * Instrs;
693 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
694 return LT.first * Instrs;
698 case Intrinsic::abs: {
699 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
700 MVT::v8i16, MVT::v2i32, MVT::v4i32,
701 MVT::v2i64, MVT::nxv16i8, MVT::nxv8i16,
702 MVT::nxv4i32, MVT::nxv2i64};
708 case Intrinsic::bswap: {
709 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
710 MVT::v4i32, MVT::v2i64};
713 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
718 case Intrinsic::fmuladd: {
723 (EltTy->
isHalfTy() && ST->hasFullFP16()))
727 case Intrinsic::stepvector: {
736 Cost += AddCost * (LT.first - 1);
740 case Intrinsic::vector_extract:
741 case Intrinsic::vector_insert: {
754 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
755 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
763 getTLI()->getTypeConversion(
C, SubVecVT);
765 getTLI()->getTypeConversion(
C, VecVT);
773 case Intrinsic::bitreverse: {
775 {Intrinsic::bitreverse, MVT::i32, 1},
776 {Intrinsic::bitreverse, MVT::i64, 1},
777 {Intrinsic::bitreverse, MVT::v8i8, 1},
778 {Intrinsic::bitreverse, MVT::v16i8, 1},
779 {Intrinsic::bitreverse, MVT::v4i16, 2},
780 {Intrinsic::bitreverse, MVT::v8i16, 2},
781 {Intrinsic::bitreverse, MVT::v2i32, 2},
782 {Intrinsic::bitreverse, MVT::v4i32, 2},
783 {Intrinsic::bitreverse, MVT::v1i64, 2},
784 {Intrinsic::bitreverse, MVT::v2i64, 2},
792 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
793 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
794 return LegalisationCost.first * Entry->Cost + 1;
796 return LegalisationCost.first * Entry->Cost;
800 case Intrinsic::ctpop: {
801 if (!ST->hasNEON()) {
822 RetTy->getScalarSizeInBits()
825 return LT.first * Entry->Cost + ExtraCost;
829 case Intrinsic::sadd_with_overflow:
830 case Intrinsic::uadd_with_overflow:
831 case Intrinsic::ssub_with_overflow:
832 case Intrinsic::usub_with_overflow:
833 case Intrinsic::smul_with_overflow:
834 case Intrinsic::umul_with_overflow: {
836 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
837 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
838 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
839 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
840 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
841 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
842 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
843 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
844 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
845 {Intrinsic::usub_with_overflow, MVT::i8, 3},
846 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
847 {Intrinsic::usub_with_overflow, MVT::i16, 3},
848 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
849 {Intrinsic::usub_with_overflow, MVT::i32, 1},
850 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
851 {Intrinsic::usub_with_overflow, MVT::i64, 1},
852 {Intrinsic::smul_with_overflow, MVT::i8, 5},
853 {Intrinsic::umul_with_overflow, MVT::i8, 4},
854 {Intrinsic::smul_with_overflow, MVT::i16, 5},
855 {Intrinsic::umul_with_overflow, MVT::i16, 4},
856 {Intrinsic::smul_with_overflow, MVT::i32, 2},
857 {Intrinsic::umul_with_overflow, MVT::i32, 2},
858 {Intrinsic::smul_with_overflow, MVT::i64, 3},
859 {Intrinsic::umul_with_overflow, MVT::i64, 3},
861 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
868 case Intrinsic::fptosi_sat:
869 case Intrinsic::fptoui_sat: {
872 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
874 EVT MTy = TLI->getValueType(
DL, RetTy);
877 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
878 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
879 LT.second == MVT::v2f64)) {
881 (LT.second == MVT::f64 && MTy == MVT::i32) ||
882 (LT.second == MVT::f32 && MTy == MVT::i64)))
891 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
898 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
899 (LT.second == MVT::f16 && MTy == MVT::i64) ||
900 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
914 if ((LT.second.getScalarType() == MVT::f32 ||
915 LT.second.getScalarType() == MVT::f64 ||
916 LT.second.getScalarType() == MVT::f16) &&
920 if (LT.second.isVector())
925 LegalTy, {LegalTy, LegalTy});
929 LegalTy, {LegalTy, LegalTy});
931 return LT.first *
Cost +
932 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
938 RetTy = RetTy->getScalarType();
939 if (LT.second.isVector()) {
957 return LT.first *
Cost;
959 case Intrinsic::fshl:
960 case Intrinsic::fshr: {
969 if (RetTy->isIntegerTy() && ICA.
getArgs()[0] == ICA.
getArgs()[1] &&
970 (RetTy->getPrimitiveSizeInBits() == 32 ||
971 RetTy->getPrimitiveSizeInBits() == 64)) {
984 {Intrinsic::fshl, MVT::v4i32, 2},
985 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
986 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
987 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
993 return LegalisationCost.first * Entry->Cost;
997 if (!RetTy->isIntegerTy())
1002 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
1003 RetTy->getScalarSizeInBits() < 64) ||
1004 (RetTy->getScalarSizeInBits() % 64 != 0);
1005 unsigned ExtraCost = HigherCost ? 1 : 0;
1006 if (RetTy->getScalarSizeInBits() == 32 ||
1007 RetTy->getScalarSizeInBits() == 64)
1010 else if (HigherCost)
1014 return TyL.first + ExtraCost;
1016 case Intrinsic::get_active_lane_mask: {
1018 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
1020 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
1023 if (RetTy->isScalableTy()) {
1024 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
1034 if (ST->hasSVE2p1() || ST->hasSME2()) {
1049 return Cost + (SplitCost * (
Cost - 1));
1064 case Intrinsic::experimental_vector_match: {
1067 unsigned SearchSize = NeedleTy->getNumElements();
1068 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1081 case Intrinsic::cttz: {
1083 if (LT.second == MVT::v8i8 || LT.second == MVT::v16i8)
1084 return LT.first * 2;
1085 if (LT.second == MVT::v4i16 || LT.second == MVT::v8i16 ||
1086 LT.second == MVT::v2i32 || LT.second == MVT::v4i32)
1087 return LT.first * 3;
1090 case Intrinsic::experimental_cttz_elts: {
1092 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1100 case Intrinsic::loop_dependence_raw_mask:
1101 case Intrinsic::loop_dependence_war_mask: {
1103 if (ST->hasSVE2() || ST->hasSME()) {
1104 EVT VecVT = getTLI()->getValueType(
DL, RetTy);
1105 unsigned EltSizeInBytes =
1115 case Intrinsic::experimental_vector_extract_last_active:
1116 if (ST->isSVEorStreamingSVEAvailable()) {
1122 case Intrinsic::pow: {
1125 EVT VT = getTLI()->getValueType(
DL, RetTy);
1127 bool HasLibcall = getTLI()->getLibcallImpl(LC) != RTLIB::Unsupported;
1142 bool Is025 = ExpF->getValueAPF().isExactlyValue(0.25);
1143 bool Is075 = ExpF->getValueAPF().isExactlyValue(0.75);
1153 return (Sqrt * 2) +
FMul;
1164 case Intrinsic::sqrt:
1165 case Intrinsic::fabs:
1166 case Intrinsic::ceil:
1167 case Intrinsic::floor:
1168 case Intrinsic::nearbyint:
1169 case Intrinsic::round:
1170 case Intrinsic::rint:
1171 case Intrinsic::roundeven:
1172 case Intrinsic::trunc:
1173 case Intrinsic::minnum:
1174 case Intrinsic::maxnum:
1175 case Intrinsic::minimum:
1176 case Intrinsic::maximum: {
1194 auto RequiredType =
II.getType();
1197 assert(PN &&
"Expected Phi Node!");
1200 if (!PN->hasOneUse())
1201 return std::nullopt;
1203 for (
Value *IncValPhi : PN->incoming_values()) {
1206 Reinterpret->getIntrinsicID() !=
1207 Intrinsic::aarch64_sve_convert_to_svbool ||
1208 RequiredType != Reinterpret->getArgOperand(0)->getType())
1209 return std::nullopt;
1217 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1219 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1292 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1297 return GoverningPredicateIdx;
1302 GoverningPredicateIdx = Index;
1320 return UndefIntrinsic;
1325 UndefIntrinsic = IID;
1347 return ResultLanes == InactiveLanesTakenFromOperand;
1352 return OperandIdxForInactiveLanes;
1356 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1357 ResultLanes = InactiveLanesTakenFromOperand;
1358 OperandIdxForInactiveLanes = Index;
1363 return ResultLanes == InactiveLanesAreNotDefined;
1367 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1368 ResultLanes = InactiveLanesAreNotDefined;
1373 return ResultLanes == InactiveLanesAreUnused;
1377 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1378 ResultLanes = InactiveLanesAreUnused;
1388 ResultIsZeroInitialized =
true;
1399 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1404 return OperandIdxWithNoActiveLanes;
1409 OperandIdxWithNoActiveLanes = Index;
1414 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1417 unsigned IROpcode = 0;
1419 enum PredicationStyle {
1421 InactiveLanesTakenFromOperand,
1422 InactiveLanesAreNotDefined,
1423 InactiveLanesAreUnused
1426 bool ResultIsZeroInitialized =
false;
1427 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1428 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1436 return !isa<ScalableVectorType>(V->getType());
1444 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1445 case Intrinsic::aarch64_sve_fcvt_f16f32:
1446 case Intrinsic::aarch64_sve_fcvt_f16f64:
1447 case Intrinsic::aarch64_sve_fcvt_f32f16:
1448 case Intrinsic::aarch64_sve_fcvt_f32f64:
1449 case Intrinsic::aarch64_sve_fcvt_f64f16:
1450 case Intrinsic::aarch64_sve_fcvt_f64f32:
1451 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1452 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1453 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1454 case Intrinsic::aarch64_sve_fcvtzs:
1455 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1456 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1457 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1458 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1459 case Intrinsic::aarch64_sve_fcvtzu:
1460 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1461 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1462 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1463 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1464 case Intrinsic::aarch64_sve_revb:
1465 case Intrinsic::aarch64_sve_revh:
1466 case Intrinsic::aarch64_sve_revw:
1467 case Intrinsic::aarch64_sve_revd:
1468 case Intrinsic::aarch64_sve_scvtf:
1469 case Intrinsic::aarch64_sve_scvtf_f16i32:
1470 case Intrinsic::aarch64_sve_scvtf_f16i64:
1471 case Intrinsic::aarch64_sve_scvtf_f32i64:
1472 case Intrinsic::aarch64_sve_scvtf_f64i32:
1473 case Intrinsic::aarch64_sve_ucvtf:
1474 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1475 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1476 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1477 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1480 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1481 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1482 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1483 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1486 case Intrinsic::aarch64_sve_fabd:
1488 case Intrinsic::aarch64_sve_fadd:
1491 case Intrinsic::aarch64_sve_fdiv:
1494 case Intrinsic::aarch64_sve_fmax:
1496 case Intrinsic::aarch64_sve_fmaxnm:
1498 case Intrinsic::aarch64_sve_fmin:
1500 case Intrinsic::aarch64_sve_fminnm:
1502 case Intrinsic::aarch64_sve_fmla:
1504 case Intrinsic::aarch64_sve_fmls:
1506 case Intrinsic::aarch64_sve_fmul:
1509 case Intrinsic::aarch64_sve_fmulx:
1511 case Intrinsic::aarch64_sve_fnmla:
1513 case Intrinsic::aarch64_sve_fnmls:
1515 case Intrinsic::aarch64_sve_fsub:
1518 case Intrinsic::aarch64_sve_add:
1521 case Intrinsic::aarch64_sve_mla:
1523 case Intrinsic::aarch64_sve_mls:
1525 case Intrinsic::aarch64_sve_mul:
1528 case Intrinsic::aarch64_sve_sabd:
1530 case Intrinsic::aarch64_sve_sdiv:
1533 case Intrinsic::aarch64_sve_smax:
1535 case Intrinsic::aarch64_sve_smin:
1537 case Intrinsic::aarch64_sve_smulh:
1539 case Intrinsic::aarch64_sve_sub:
1542 case Intrinsic::aarch64_sve_uabd:
1544 case Intrinsic::aarch64_sve_udiv:
1547 case Intrinsic::aarch64_sve_umax:
1549 case Intrinsic::aarch64_sve_umin:
1551 case Intrinsic::aarch64_sve_umulh:
1553 case Intrinsic::aarch64_sve_asr:
1556 case Intrinsic::aarch64_sve_lsl:
1559 case Intrinsic::aarch64_sve_lsr:
1562 case Intrinsic::aarch64_sve_and:
1565 case Intrinsic::aarch64_sve_bic:
1567 case Intrinsic::aarch64_sve_eor:
1570 case Intrinsic::aarch64_sve_orr:
1573 case Intrinsic::aarch64_sve_shsub:
1575 case Intrinsic::aarch64_sve_shsubr:
1577 case Intrinsic::aarch64_sve_sqrshl:
1579 case Intrinsic::aarch64_sve_sqshl:
1581 case Intrinsic::aarch64_sve_sqsub:
1583 case Intrinsic::aarch64_sve_srshl:
1585 case Intrinsic::aarch64_sve_uhsub:
1587 case Intrinsic::aarch64_sve_uhsubr:
1589 case Intrinsic::aarch64_sve_uqrshl:
1591 case Intrinsic::aarch64_sve_uqshl:
1593 case Intrinsic::aarch64_sve_uqsub:
1595 case Intrinsic::aarch64_sve_urshl:
1598 case Intrinsic::aarch64_sve_add_u:
1601 case Intrinsic::aarch64_sve_and_u:
1604 case Intrinsic::aarch64_sve_asr_u:
1607 case Intrinsic::aarch64_sve_eor_u:
1610 case Intrinsic::aarch64_sve_fadd_u:
1613 case Intrinsic::aarch64_sve_fdiv_u:
1616 case Intrinsic::aarch64_sve_fmul_u:
1619 case Intrinsic::aarch64_sve_fsub_u:
1622 case Intrinsic::aarch64_sve_lsl_u:
1625 case Intrinsic::aarch64_sve_lsr_u:
1628 case Intrinsic::aarch64_sve_mul_u:
1631 case Intrinsic::aarch64_sve_orr_u:
1634 case Intrinsic::aarch64_sve_sdiv_u:
1637 case Intrinsic::aarch64_sve_sub_u:
1640 case Intrinsic::aarch64_sve_udiv_u:
1644 case Intrinsic::aarch64_sve_addqv:
1645 case Intrinsic::aarch64_sve_and_z:
1646 case Intrinsic::aarch64_sve_bic_z:
1647 case Intrinsic::aarch64_sve_brka_z:
1648 case Intrinsic::aarch64_sve_brkb_z:
1649 case Intrinsic::aarch64_sve_brkn_z:
1650 case Intrinsic::aarch64_sve_brkpa_z:
1651 case Intrinsic::aarch64_sve_brkpb_z:
1652 case Intrinsic::aarch64_sve_cntp:
1653 case Intrinsic::aarch64_sve_compact:
1654 case Intrinsic::aarch64_sve_eor_z:
1655 case Intrinsic::aarch64_sve_eorv:
1656 case Intrinsic::aarch64_sve_eorqv:
1657 case Intrinsic::aarch64_sve_nand_z:
1658 case Intrinsic::aarch64_sve_nor_z:
1659 case Intrinsic::aarch64_sve_orn_z:
1660 case Intrinsic::aarch64_sve_orr_z:
1661 case Intrinsic::aarch64_sve_orv:
1662 case Intrinsic::aarch64_sve_orqv:
1663 case Intrinsic::aarch64_sve_pnext:
1664 case Intrinsic::aarch64_sve_rdffr_z:
1665 case Intrinsic::aarch64_sve_saddv:
1666 case Intrinsic::aarch64_sve_uaddv:
1667 case Intrinsic::aarch64_sve_umaxv:
1668 case Intrinsic::aarch64_sve_umaxqv:
1669 case Intrinsic::aarch64_sve_cmpeq:
1670 case Intrinsic::aarch64_sve_cmpeq_wide:
1671 case Intrinsic::aarch64_sve_cmpge:
1672 case Intrinsic::aarch64_sve_cmpge_wide:
1673 case Intrinsic::aarch64_sve_cmpgt:
1674 case Intrinsic::aarch64_sve_cmpgt_wide:
1675 case Intrinsic::aarch64_sve_cmphi:
1676 case Intrinsic::aarch64_sve_cmphi_wide:
1677 case Intrinsic::aarch64_sve_cmphs:
1678 case Intrinsic::aarch64_sve_cmphs_wide:
1679 case Intrinsic::aarch64_sve_cmple_wide:
1680 case Intrinsic::aarch64_sve_cmplo_wide:
1681 case Intrinsic::aarch64_sve_cmpls_wide:
1682 case Intrinsic::aarch64_sve_cmplt_wide:
1683 case Intrinsic::aarch64_sve_cmpne:
1684 case Intrinsic::aarch64_sve_cmpne_wide:
1685 case Intrinsic::aarch64_sve_facge:
1686 case Intrinsic::aarch64_sve_facgt:
1687 case Intrinsic::aarch64_sve_fcmpeq:
1688 case Intrinsic::aarch64_sve_fcmpge:
1689 case Intrinsic::aarch64_sve_fcmpgt:
1690 case Intrinsic::aarch64_sve_fcmpne:
1691 case Intrinsic::aarch64_sve_fcmpuo:
1692 case Intrinsic::aarch64_sve_ld1:
1693 case Intrinsic::aarch64_sve_ld1_gather:
1694 case Intrinsic::aarch64_sve_ld1_gather_index:
1695 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1696 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1697 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1698 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1699 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1700 case Intrinsic::aarch64_sve_ld1q_gather_index:
1701 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1702 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1703 case Intrinsic::aarch64_sve_ld1ro:
1704 case Intrinsic::aarch64_sve_ld1rq:
1705 case Intrinsic::aarch64_sve_ld1udq:
1706 case Intrinsic::aarch64_sve_ld1uwq:
1707 case Intrinsic::aarch64_sve_ld2_sret:
1708 case Intrinsic::aarch64_sve_ld2q_sret:
1709 case Intrinsic::aarch64_sve_ld3_sret:
1710 case Intrinsic::aarch64_sve_ld3q_sret:
1711 case Intrinsic::aarch64_sve_ld4_sret:
1712 case Intrinsic::aarch64_sve_ld4q_sret:
1713 case Intrinsic::aarch64_sve_ldff1:
1714 case Intrinsic::aarch64_sve_ldff1_gather:
1715 case Intrinsic::aarch64_sve_ldff1_gather_index:
1716 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1717 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1718 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1719 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1720 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1721 case Intrinsic::aarch64_sve_ldnf1:
1722 case Intrinsic::aarch64_sve_ldnt1:
1723 case Intrinsic::aarch64_sve_ldnt1_gather:
1724 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1725 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1726 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1729 case Intrinsic::aarch64_sve_prf:
1730 case Intrinsic::aarch64_sve_prfb_gather_index:
1731 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1732 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1733 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1734 case Intrinsic::aarch64_sve_prfd_gather_index:
1735 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1736 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1737 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1738 case Intrinsic::aarch64_sve_prfh_gather_index:
1739 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1740 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1741 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1742 case Intrinsic::aarch64_sve_prfw_gather_index:
1743 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1744 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1745 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1748 case Intrinsic::aarch64_sve_st1_scatter:
1749 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1750 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1751 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1752 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1753 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1754 case Intrinsic::aarch64_sve_st1dq:
1755 case Intrinsic::aarch64_sve_st1q_scatter_index:
1756 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1757 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1758 case Intrinsic::aarch64_sve_st1wq:
1759 case Intrinsic::aarch64_sve_stnt1:
1760 case Intrinsic::aarch64_sve_stnt1_scatter:
1761 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1762 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1763 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1765 case Intrinsic::aarch64_sve_st2:
1766 case Intrinsic::aarch64_sve_st2q:
1768 case Intrinsic::aarch64_sve_st3:
1769 case Intrinsic::aarch64_sve_st3q:
1771 case Intrinsic::aarch64_sve_st4:
1772 case Intrinsic::aarch64_sve_st4q:
1780 Value *UncastedPred;
1786 Pred = UncastedPred;
1792 if (OrigPredTy->getMinNumElements() <=
1794 ->getMinNumElements())
1795 Pred = UncastedPred;
1799 return C &&
C->isAllOnesValue();
1806 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1807 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1815static std::optional<Instruction *>
1822 Value *Op1 =
II.getOperand(1);
1823 Value *Op2 =
II.getOperand(2);
1849 return std::nullopt;
1857 if (SimpleII == Inactive)
1867static std::optional<Instruction *>
1871 return std::nullopt;
1900 II.setCalledFunction(NewDecl);
1910 return std::nullopt;
1922static std::optional<Instruction *>
1926 return std::nullopt;
1928 auto IntrinsicID = BinOp->getIntrinsicID();
1929 switch (IntrinsicID) {
1930 case Intrinsic::aarch64_sve_and_z:
1931 case Intrinsic::aarch64_sve_bic_z:
1932 case Intrinsic::aarch64_sve_eor_z:
1933 case Intrinsic::aarch64_sve_nand_z:
1934 case Intrinsic::aarch64_sve_nor_z:
1935 case Intrinsic::aarch64_sve_orn_z:
1936 case Intrinsic::aarch64_sve_orr_z:
1939 return std::nullopt;
1942 auto BinOpPred = BinOp->getOperand(0);
1943 auto BinOpOp1 = BinOp->getOperand(1);
1944 auto BinOpOp2 = BinOp->getOperand(2);
1948 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1949 return std::nullopt;
1951 auto PredOp = PredIntr->getOperand(0);
1953 if (PredOpTy !=
II.getType())
1954 return std::nullopt;
1958 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1959 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1960 if (BinOpOp1 == BinOpOp2)
1961 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1964 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1966 auto NarrowedBinOp =
1971static std::optional<Instruction *>
1978 return BinOpCombine;
1983 return std::nullopt;
1986 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1995 if (CursorVTy->getElementCount().getKnownMinValue() <
1996 IVTy->getElementCount().getKnownMinValue())
2000 if (Cursor->getType() == IVTy)
2001 EarliestReplacement = Cursor;
2006 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
2007 Intrinsic::aarch64_sve_convert_to_svbool ||
2008 IntrinsicCursor->getIntrinsicID() ==
2009 Intrinsic::aarch64_sve_convert_from_svbool))
2012 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
2013 Cursor = IntrinsicCursor->getOperand(0);
2018 if (!EarliestReplacement)
2019 return std::nullopt;
2027 auto *OpPredicate =
II.getOperand(0);
2044 II.getArgOperand(2));
2050 return std::nullopt;
2054 II.getArgOperand(0),
II.getArgOperand(2),
uint64_t(0));
2063 II.getArgOperand(0));
2073 return std::nullopt;
2078 if (!SplatValue || !SplatValue->isZero())
2079 return std::nullopt;
2084 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
2085 return std::nullopt;
2089 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
2090 return std::nullopt;
2093 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
2094 return std::nullopt;
2099 return std::nullopt;
2102 return std::nullopt;
2106 return std::nullopt;
2110 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
2111 return std::nullopt;
2113 unsigned NumElts = VecTy->getNumElements();
2114 unsigned PredicateBits = 0;
2117 for (
unsigned I = 0;
I < NumElts; ++
I) {
2120 return std::nullopt;
2122 PredicateBits |= 1 << (
I * (16 / NumElts));
2126 if (PredicateBits == 0) {
2128 PFalse->takeName(&
II);
2134 for (
unsigned I = 0;
I < 16; ++
I)
2135 if ((PredicateBits & (1 <<
I)) != 0)
2138 unsigned PredSize = Mask & -Mask;
2143 for (
unsigned I = 0;
I < 16;
I += PredSize)
2144 if ((PredicateBits & (1 <<
I)) == 0)
2145 return std::nullopt;
2150 {PredType}, {PTruePat});
2152 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
2153 auto *ConvertFromSVBool =
2155 {
II.getType()}, {ConvertToSVBool});
2163 Value *Pg =
II.getArgOperand(0);
2164 Value *Vec =
II.getArgOperand(1);
2165 auto IntrinsicID =
II.getIntrinsicID();
2166 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2178 auto OpC = OldBinOp->getOpcode();
2184 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2190 if (IsAfter &&
C &&
C->isNullValue()) {
2194 Extract->insertBefore(
II.getIterator());
2195 Extract->takeName(&
II);
2201 return std::nullopt;
2203 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2204 return std::nullopt;
2206 const auto PTruePattern =
2212 return std::nullopt;
2214 unsigned Idx = MinNumElts - 1;
2224 if (Idx >= PgVTy->getMinNumElements())
2225 return std::nullopt;
2230 Extract->insertBefore(
II.getIterator());
2231 Extract->takeName(&
II);
2244 Value *Pg =
II.getArgOperand(0);
2246 Value *Vec =
II.getArgOperand(2);
2249 if (!Ty->isIntegerTy())
2250 return std::nullopt;
2255 return std::nullopt;
2272 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2285 {
II.getType()}, {AllPat});
2292static std::optional<Instruction *>
2296 if (
Pattern == AArch64SVEPredPattern::all) {
2305 return MinNumElts && NumElts >= MinNumElts
2307 II, ConstantInt::get(
II.getType(), MinNumElts)))
2311static std::optional<Instruction *>
2314 if (!ST->isStreaming())
2315 return std::nullopt;
2327 Value *PgVal =
II.getArgOperand(0);
2328 Value *OpVal =
II.getArgOperand(1);
2332 if (PgVal == OpVal &&
2333 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2334 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2349 return std::nullopt;
2353 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2354 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2368 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2369 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2370 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2371 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2372 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2373 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2374 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2375 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2376 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2377 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2378 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2379 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2380 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2390 return std::nullopt;
2393template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2394static std::optional<Instruction *>
2396 bool MergeIntoAddendOp) {
2398 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2399 if (MergeIntoAddendOp) {
2400 AddendOp =
II.getOperand(1);
2401 Mul =
II.getOperand(2);
2403 AddendOp =
II.getOperand(2);
2404 Mul =
II.getOperand(1);
2409 return std::nullopt;
2411 if (!
Mul->hasOneUse())
2412 return std::nullopt;
2415 if (
II.getType()->isFPOrFPVectorTy()) {
2420 return std::nullopt;
2422 return std::nullopt;
2427 if (MergeIntoAddendOp)
2437static std::optional<Instruction *>
2439 Value *Pred =
II.getOperand(0);
2440 Value *PtrOp =
II.getOperand(1);
2441 Type *VecTy =
II.getType();
2445 Load->copyMetadata(
II);
2456static std::optional<Instruction *>
2458 Value *VecOp =
II.getOperand(0);
2459 Value *Pred =
II.getOperand(1);
2460 Value *PtrOp =
II.getOperand(2);
2464 Store->copyMetadata(
II);
2476 case Intrinsic::aarch64_sve_fmul_u:
2477 return Instruction::BinaryOps::FMul;
2478 case Intrinsic::aarch64_sve_fadd_u:
2479 return Instruction::BinaryOps::FAdd;
2480 case Intrinsic::aarch64_sve_fsub_u:
2481 return Instruction::BinaryOps::FSub;
2483 return Instruction::BinaryOpsEnd;
2487static std::optional<Instruction *>
2490 if (
II.isStrictFP())
2491 return std::nullopt;
2493 auto *OpPredicate =
II.getOperand(0);
2495 if (BinOpCode == Instruction::BinaryOpsEnd ||
2497 return std::nullopt;
2499 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2506 Intrinsic::aarch64_sve_mla>(
2510 Intrinsic::aarch64_sve_mad>(
2513 return std::nullopt;
2516static std::optional<Instruction *>
2520 Intrinsic::aarch64_sve_fmla>(IC,
II,
2525 Intrinsic::aarch64_sve_fmad>(IC,
II,
2530 Intrinsic::aarch64_sve_fmla>(IC,
II,
2533 return std::nullopt;
2536static std::optional<Instruction *>
2540 Intrinsic::aarch64_sve_fmla>(IC,
II,
2545 Intrinsic::aarch64_sve_fmad>(IC,
II,
2550 Intrinsic::aarch64_sve_fmla_u>(
2556static std::optional<Instruction *>
2560 Intrinsic::aarch64_sve_fmls>(IC,
II,
2565 Intrinsic::aarch64_sve_fnmsb>(
2570 Intrinsic::aarch64_sve_fmls>(IC,
II,
2573 return std::nullopt;
2576static std::optional<Instruction *>
2580 Intrinsic::aarch64_sve_fmls>(IC,
II,
2585 Intrinsic::aarch64_sve_fnmsb>(
2590 Intrinsic::aarch64_sve_fmls_u>(
2599 Intrinsic::aarch64_sve_mls>(
2602 return std::nullopt;
2607 Value *UnpackArg =
II.getArgOperand(0);
2609 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2610 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2623 return std::nullopt;
2627 auto *OpVal =
II.getOperand(0);
2628 auto *OpIndices =
II.getOperand(1);
2635 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2636 return std::nullopt;
2651 Type *RetTy =
II.getType();
2652 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2653 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2657 if ((
match(
II.getArgOperand(0),
2664 if (TyA ==
B->getType() &&
2669 TyA->getMinNumElements());
2675 return std::nullopt;
2683 if (
match(
II.getArgOperand(0),
2688 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2690 return std::nullopt;
2693static std::optional<Instruction *>
2695 Value *Mask =
II.getOperand(0);
2696 Value *BasePtr =
II.getOperand(1);
2697 Value *Index =
II.getOperand(2);
2708 BasePtr->getPointerAlignment(
II.getDataLayout());
2711 BasePtr, IndexBase);
2718 return std::nullopt;
2721static std::optional<Instruction *>
2723 Value *Val =
II.getOperand(0);
2724 Value *Mask =
II.getOperand(1);
2725 Value *BasePtr =
II.getOperand(2);
2726 Value *Index =
II.getOperand(3);
2736 BasePtr->getPointerAlignment(
II.getDataLayout());
2739 BasePtr, IndexBase);
2745 return std::nullopt;
2751 Value *Pred =
II.getOperand(0);
2752 Value *Vec =
II.getOperand(1);
2753 Value *DivVec =
II.getOperand(2);
2757 if (!SplatConstantInt)
2758 return std::nullopt;
2762 if (DivisorValue == -1)
2763 return std::nullopt;
2764 if (DivisorValue == 1)
2770 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2777 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2779 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2783 return std::nullopt;
2787 size_t VecSize = Vec.
size();
2792 size_t HalfVecSize = VecSize / 2;
2796 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2804 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2822 return std::nullopt;
2829 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2830 CurrentInsertElt = InsertElt->getOperand(0);
2836 return std::nullopt;
2840 for (
size_t I = 0;
I < Elts.
size();
I++) {
2841 if (Elts[
I] ==
nullptr)
2846 if (InsertEltChain ==
nullptr)
2847 return std::nullopt;
2853 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2854 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2855 IIScalableTy->getMinNumElements() /
2860 auto *WideShuffleMaskTy =
2871 auto NarrowBitcast =
2884 return std::nullopt;
2889 Value *Pred =
II.getOperand(0);
2890 Value *Vec =
II.getOperand(1);
2891 Value *Shift =
II.getOperand(2);
2894 Value *AbsPred, *MergedValue;
2900 return std::nullopt;
2908 return std::nullopt;
2913 return std::nullopt;
2916 {
II.getType()}, {Pred, Vec, Shift});
2923 Value *Vec =
II.getOperand(0);
2928 return std::nullopt;
2934 auto *NI =
II.getNextNode();
2937 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2939 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2940 auto *NIBB = NI->getParent();
2941 NI = NI->getNextNode();
2943 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2944 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2950 if (NextII &&
II.isIdenticalTo(NextII))
2953 return std::nullopt;
2961 {II.getType(), II.getOperand(0)->getType()},
2962 {II.getOperand(0), II.getOperand(1)}));
2969 return std::nullopt;
2975 Value *Passthru =
II.getOperand(0);
2983 auto *Mask = ConstantInt::get(Ty, MaskValue);
2989 return std::nullopt;
2992static std::optional<Instruction *>
2999 return std::nullopt;
3002std::optional<Instruction *>
3013 case Intrinsic::aarch64_dmb:
3015 case Intrinsic::aarch64_neon_fmaxnm:
3016 case Intrinsic::aarch64_neon_fminnm:
3018 case Intrinsic::aarch64_sve_convert_from_svbool:
3020 case Intrinsic::aarch64_sve_dup:
3022 case Intrinsic::aarch64_sve_dup_x:
3024 case Intrinsic::aarch64_sve_cmpne:
3025 case Intrinsic::aarch64_sve_cmpne_wide:
3027 case Intrinsic::aarch64_sve_rdffr:
3029 case Intrinsic::aarch64_sve_lasta:
3030 case Intrinsic::aarch64_sve_lastb:
3032 case Intrinsic::aarch64_sve_clasta_n:
3033 case Intrinsic::aarch64_sve_clastb_n:
3035 case Intrinsic::aarch64_sve_cntd:
3037 case Intrinsic::aarch64_sve_cntw:
3039 case Intrinsic::aarch64_sve_cnth:
3041 case Intrinsic::aarch64_sve_cntb:
3043 case Intrinsic::aarch64_sme_cntsd:
3045 case Intrinsic::aarch64_sve_ptest_any:
3046 case Intrinsic::aarch64_sve_ptest_first:
3047 case Intrinsic::aarch64_sve_ptest_last:
3049 case Intrinsic::aarch64_sve_fadd:
3051 case Intrinsic::aarch64_sve_fadd_u:
3053 case Intrinsic::aarch64_sve_fmul_u:
3055 case Intrinsic::aarch64_sve_fsub:
3057 case Intrinsic::aarch64_sve_fsub_u:
3059 case Intrinsic::aarch64_sve_add:
3061 case Intrinsic::aarch64_sve_add_u:
3063 Intrinsic::aarch64_sve_mla_u>(
3065 case Intrinsic::aarch64_sve_sub:
3067 case Intrinsic::aarch64_sve_sub_u:
3069 Intrinsic::aarch64_sve_mls_u>(
3071 case Intrinsic::aarch64_sve_tbl:
3073 case Intrinsic::aarch64_sve_uunpkhi:
3074 case Intrinsic::aarch64_sve_uunpklo:
3075 case Intrinsic::aarch64_sve_sunpkhi:
3076 case Intrinsic::aarch64_sve_sunpklo:
3078 case Intrinsic::aarch64_sve_uzp1:
3080 case Intrinsic::aarch64_sve_zip1:
3081 case Intrinsic::aarch64_sve_zip2:
3083 case Intrinsic::aarch64_sve_ld1_gather_index:
3085 case Intrinsic::aarch64_sve_st1_scatter_index:
3087 case Intrinsic::aarch64_sve_ld1:
3089 case Intrinsic::aarch64_sve_st1:
3091 case Intrinsic::aarch64_sve_sdiv:
3093 case Intrinsic::aarch64_sve_sel:
3095 case Intrinsic::aarch64_sve_srshl:
3097 case Intrinsic::aarch64_sve_dupq_lane:
3099 case Intrinsic::aarch64_sve_insr:
3101 case Intrinsic::aarch64_sve_whilelo:
3103 case Intrinsic::aarch64_sve_ptrue:
3105 case Intrinsic::aarch64_sve_uxtb:
3107 case Intrinsic::aarch64_sve_uxth:
3109 case Intrinsic::aarch64_sve_uxtw:
3111 case Intrinsic::aarch64_sme_in_streaming_mode:
3115 return std::nullopt;
3122 SimplifyAndSetOp)
const {
3123 switch (
II.getIntrinsicID()) {
3126 case Intrinsic::aarch64_neon_fcvtxn:
3127 case Intrinsic::aarch64_neon_rshrn:
3128 case Intrinsic::aarch64_neon_sqrshrn:
3129 case Intrinsic::aarch64_neon_sqrshrun:
3130 case Intrinsic::aarch64_neon_sqshrn:
3131 case Intrinsic::aarch64_neon_sqshrun:
3132 case Intrinsic::aarch64_neon_sqxtn:
3133 case Intrinsic::aarch64_neon_sqxtun:
3134 case Intrinsic::aarch64_neon_uqrshrn:
3135 case Intrinsic::aarch64_neon_uqshrn:
3136 case Intrinsic::aarch64_neon_uqxtn:
3137 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
3141 return std::nullopt;
3145 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3155 if (ST->useSVEForFixedLengthVectors() &&
3158 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3159 else if (ST->isNeonAvailable())
3164 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3173bool AArch64TTIImpl::isSingleExtWideningInstruction(
3175 Type *SrcOverrideTy)
const {
3190 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3193 Type *SrcTy = SrcOverrideTy;
3195 case Instruction::Add:
3196 case Instruction::Sub: {
3205 if (Opcode == Instruction::Sub)
3229 assert(SrcTy &&
"Expected some SrcTy");
3231 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3237 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3239 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3243 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3246Type *AArch64TTIImpl::isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
3248 Type *SrcOverrideTy)
const {
3249 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3250 Opcode != Instruction::Mul)
3260 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3263 auto getScalarSizeWithOverride = [&](
const Value *
V) {
3269 ->getScalarSizeInBits();
3272 unsigned MaxEltSize = 0;
3275 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3276 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3277 MaxEltSize = std::max(EltSize0, EltSize1);
3280 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3281 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3284 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3286 MaxEltSize = DstEltSize / 2;
3287 }
else if (Opcode == Instruction::Mul &&
3300 getScalarSizeWithOverride(
isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
3304 if (MaxEltSize * 2 > DstEltSize)
3322 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3323 (Src->isScalableTy() && !ST->hasSVE2()))
3333 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3337 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3341 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3342 Src->getScalarSizeInBits() !=
3366 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3370 if (
I &&
I->hasOneUser()) {
3373 if (
Type *ExtTy = isBinExtWideningInstruction(
3374 SingleUser->getOpcode(), Dst, Operands,
3375 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3388 if (isSingleExtWideningInstruction(
3389 SingleUser->getOpcode(), Dst, Operands,
3390 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3394 if (SingleUser->getOpcode() == Instruction::Add) {
3395 if (
I == SingleUser->getOperand(1) ||
3397 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3412 EVT SrcTy = TLI->getValueType(
DL, Src);
3413 EVT DstTy = TLI->getValueType(
DL, Dst);
3415 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3420 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3449 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3452 ST->useSVEForFixedLengthVectors(WiderTy)) {
3453 std::pair<InstructionCost, MVT> LT =
3455 unsigned NumElements =
3471 const unsigned int SVE_EXT_COST = 1;
3472 const unsigned int SVE_FCVT_COST = 1;
3473 const unsigned int SVE_UNPACK_ONCE = 4;
3474 const unsigned int SVE_UNPACK_TWICE = 16;
3603 SVE_EXT_COST + SVE_FCVT_COST},
3608 SVE_EXT_COST + SVE_FCVT_COST},
3615 SVE_EXT_COST + SVE_FCVT_COST},
3619 SVE_EXT_COST + SVE_FCVT_COST},
3625 SVE_EXT_COST + SVE_FCVT_COST},
3628 SVE_EXT_COST + SVE_FCVT_COST},
3633 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3635 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3645 SVE_EXT_COST + SVE_FCVT_COST},
3650 SVE_EXT_COST + SVE_FCVT_COST},
3663 SVE_EXT_COST + SVE_FCVT_COST},
3667 SVE_EXT_COST + SVE_FCVT_COST},
3679 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3681 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3683 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3685 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3689 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3691 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3707 SVE_EXT_COST + SVE_FCVT_COST},
3712 SVE_EXT_COST + SVE_FCVT_COST},
3723 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3725 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3727 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3729 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3731 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3733 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3737 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3739 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3741 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3743 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3968 if (ST->hasFullFP16())
3980 Src->getScalarType(), CCH,
CostKind) +
3988 ST->isSVEorStreamingSVEAvailable() &&
3989 TLI->getTypeAction(Src->getContext(), SrcTy) ==
3991 TLI->getTypeAction(Dst->getContext(), DstTy) ==
4000 Opcode, LegalTy, Src, CCH,
CostKind,
I);
4003 return Part1 + Part2;
4010 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
4022 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
4035 CostKind, Index,
nullptr,
nullptr);
4039 auto DstVT = TLI->getValueType(
DL, Dst);
4040 auto SrcVT = TLI->getValueType(
DL, Src);
4045 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
4051 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
4061 case Instruction::SExt:
4066 case Instruction::ZExt:
4067 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
4080 return Opcode == Instruction::PHI ? 0 : 1;
4089 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4098 if (!LT.second.isVector())
4103 if (LT.second.isFixedLengthVector()) {
4104 unsigned Width = LT.second.getVectorNumElements();
4105 Index = Index % Width;
4120 if (ST->hasFastLD1Single())
4132 : ST->getVectorInsertExtractBaseCost() + 1;
4156 auto ExtractCanFuseWithFmul = [&]() {
4163 auto IsAllowedScalarTy = [&](
const Type *
T) {
4164 return T->isFloatTy() ||
T->isDoubleTy() ||
4165 (
T->isHalfTy() && ST->hasFullFP16());
4169 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
4172 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4173 !BO->getType()->isVectorTy();
4178 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
4182 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4191 DenseMap<User *, unsigned> UserToExtractIdx;
4192 for (
auto *U :
Scalar->users()) {
4193 if (!IsUserFMulScalarTy(U))
4197 UserToExtractIdx[
U];
4199 if (UserToExtractIdx.
empty())
4201 for (
auto &[S, U, L] : ScalarUserAndIdx) {
4202 for (
auto *U : S->users()) {
4203 if (UserToExtractIdx.
contains(U)) {
4205 auto *Op0 =
FMul->getOperand(0);
4206 auto *Op1 =
FMul->getOperand(1);
4207 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4208 UserToExtractIdx[
U] =
L;
4214 for (
auto &[U, L] : UserToExtractIdx) {
4226 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
4227 if (!IsUserFMulScalarTy(U))
4232 const auto *BO = cast<BinaryOperator>(U);
4233 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4234 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4236 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4239 return IsExtractLaneEquivalentToZero(
4240 cast<ConstantInt>(OtherEE->getIndexOperand())
4243 OtherEE->getType()->getScalarSizeInBits());
4251 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4252 ExtractCanFuseWithFmul())
4257 :
ST->getVectorInsertExtractBaseCost();
4266 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4269 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr,
4275 Value *Scalar,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4277 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4278 ScalarUserAndIdx, VIC);
4285 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I,
4292 unsigned Index)
const {
4304 : ST->getVectorInsertExtractBaseCost() + 1;
4313 if (Ty->getElementType()->isFloatingPointTy())
4316 unsigned VecInstCost =
4318 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4325 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4326 return std::nullopt;
4327 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4328 return std::nullopt;
4330 if (CanUseSVE && ST->hasSVEB16B16() && ST->isNonStreamingSVEorSME2Available())
4331 return std::nullopt;
4338 Cost += InstCost(PromotedTy);
4361 Op2Info, Args, CxtI);
4365 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4372 Ty,
CostKind, Op1Info, Op2Info,
true,
4375 [&](
Type *PromotedTy) {
4379 return *PromotedCost;
4385 if (
Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4452 auto VT = TLI->getValueType(
DL, Ty);
4453 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4457 : (3 * AsrCost + AddCost);
4459 return MulCost + AsrCost + 2 * AddCost;
4461 }
else if (VT.isVector()) {
4471 if (Ty->isScalableTy() && ST->hasSVE())
4472 Cost += 2 * AsrCost;
4477 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4481 }
else if (LT.second == MVT::v2i64) {
4482 return VT.getVectorNumElements() *
4489 if (Ty->isScalableTy() && ST->hasSVE())
4490 return MulCost + 2 * AddCost + 2 * AsrCost;
4491 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4496 LT.second.isFixedLengthVector()) {
4506 return ExtractCost + InsertCost +
4514 auto VT = TLI->getValueType(
DL, Ty);
4530 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4531 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4532 LT.second == MVT::nxv16i8;
4533 bool Is128bit = LT.second.is128BitVector();
4545 (HasMULH ? 0 : ShrCost) +
4546 AddCost * 2 + ShrCost;
4547 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4554 if (!VT.isVector() && VT.getSizeInBits() > 64)
4558 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4560 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4564 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4574 if (
nullptr != Entry)
4579 if (LT.second.getScalarType() == MVT::i8)
4581 else if (LT.second.getScalarType() == MVT::i16)
4593 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4594 return (4 + DivCost) * VTy->getNumElements();
4600 -1,
nullptr,
nullptr);
4614 if (LT.second == MVT::v2i64 && ST->hasSVE())
4627 if (LT.second != MVT::v2i64)
4649 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4650 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4659 if (!Ty->getScalarType()->isFP128Ty())
4666 if (!Ty->getScalarType()->isFP128Ty())
4667 return 2 * LT.first;
4674 if (!Ty->isVectorTy())
4690 int MaxMergeDistance = 64;
4694 return NumVectorInstToHideOverhead;
4704 unsigned Opcode1,
unsigned Opcode2)
const {
4707 if (!
Sched.hasInstrSchedModel())
4711 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4713 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4719 "Cannot handle variant scheduling classes without an MI");
4735 const int AmortizationCost = 20;
4743 VecPred = CurrentPred;
4751 static const auto ValidMinMaxTys = {
4752 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4753 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4754 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4758 (ST->hasFullFP16() &&
4764 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4765 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4766 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4767 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4768 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4769 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4770 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4771 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4772 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4773 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4774 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4776 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4777 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4786 if (Opcode == Instruction::FCmp) {
4788 ValTy,
CostKind, Op1Info, Op2Info,
false,
4790 false, [&](
Type *PromotedTy) {
4802 return *PromotedCost;
4806 if (LT.second.getScalarType() != MVT::f64 &&
4807 LT.second.getScalarType() != MVT::f32 &&
4808 LT.second.getScalarType() != MVT::f16)
4813 unsigned Factor = 1;
4814 if (!CondTy->isVectorTy() &&
4828 AArch64::FCMEQv4f32))
4840 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4859 Op1Info, Op2Info,
I);
4865 if (ST->requiresStrictAlign()) {
4870 Options.AllowOverlappingLoads =
true;
4871 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4876 Options.LoadSizes = {8, 4, 2, 1};
4877 Options.AllowedTailExpansions = {3, 5, 6};
4882 return ST->hasSVE();
4888 switch (MICA.
getID()) {
4889 case Intrinsic::masked_scatter:
4890 case Intrinsic::masked_gather:
4892 case Intrinsic::masked_load:
4893 case Intrinsic::masked_expandload:
4894 case Intrinsic::masked_store:
4908 if (!LT.first.isValid())
4913 if (VT->getElementType()->isIntegerTy(1))
4924 if (MICA.
getID() == Intrinsic::masked_expandload) {
4940 if (LT.first > 1 && LT.second.getScalarSizeInBits() > 8)
4941 return MemOpCost * 2;
4950 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4951 "Should be called on only load or stores.");
4953 case Instruction::Load:
4956 return ST->getGatherOverhead();
4958 case Instruction::Store:
4961 return ST->getScatterOverhead();
4972 unsigned Opcode = (MICA.
getID() == Intrinsic::masked_gather ||
4973 MICA.
getID() == Intrinsic::vp_gather)
4975 : Instruction::Store;
4985 if (!LT.first.isValid())
4989 if (!LT.second.isVector() ||
4991 VT->getElementType()->isIntegerTy(1))
5001 ElementCount LegalVF = LT.second.getVectorElementCount();
5004 {TTI::OK_AnyValue, TTI::OP_None},
I);
5020 EVT VT = TLI->getValueType(
DL, Ty,
true);
5022 if (VT == MVT::Other)
5027 if (!LT.first.isValid())
5037 (VTy->getElementType()->isIntegerTy(1) &&
5038 !VTy->getElementCount().isKnownMultipleOf(
5049 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
5050 LT.second.is128BitVector() && Alignment <
Align(16)) {
5056 const int AmortizationCost = 6;
5058 return LT.first * 2 * AmortizationCost;
5062 if (Ty->isPtrOrPtrVectorTy())
5067 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
5069 if (VT == MVT::v4i8)
5076 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
5091 while (!TypeWorklist.
empty()) {
5113 bool UseMaskForCond,
bool UseMaskForGaps)
const {
5114 assert(Factor >= 2 &&
"Invalid interleave factor");
5129 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
5132 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
5133 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
5136 VecVTy->getElementCount().divideCoefficientBy(Factor));
5142 if (MinElts % Factor == 0 &&
5143 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
5144 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
5149 UseMaskForCond, UseMaskForGaps);
5156 for (
auto *
I : Tys) {
5157 if (!
I->isVectorTy())
5168 Align Alignment)
const {
5175 return (ST->isSVEAvailable() && ST->hasSVE2p2()) ||
5176 (ST->isSVEorStreamingSVEAvailable() && ST->hasSME2p2());
5180 return ST->getMaxInterleaveFactor();
5190 enum { MaxStridedLoads = 7 };
5192 int StridedLoads = 0;
5195 for (
const auto BB : L->blocks()) {
5196 for (
auto &
I : *BB) {
5202 if (L->isLoopInvariant(PtrValue))
5207 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
5216 if (StridedLoads > MaxStridedLoads / 2)
5217 return StridedLoads;
5220 return StridedLoads;
5223 int StridedLoads = countStridedLoads(L, SE);
5225 <<
" strided loads\n");
5241 unsigned *FinalSize) {
5245 for (
auto *BB : L->getBlocks()) {
5246 for (
auto &
I : *BB) {
5252 if (!Cost.isValid())
5256 if (LoopCost > Budget)
5278 if (MaxTC > 0 && MaxTC <= 32)
5289 if (Blocks.
size() != 2)
5311 if (!L->isInnermost() || L->getNumBlocks() > 8)
5315 if (!L->getExitBlock())
5321 bool HasParellelizableReductions =
5322 L->getNumBlocks() == 1 &&
5323 any_of(L->getHeader()->phis(),
5325 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5328 if (HasParellelizableReductions &&
5350 if (HasParellelizableReductions) {
5361 if (Header == Latch) {
5364 unsigned Width = 10;
5370 unsigned MaxInstsPerLine = 16;
5372 unsigned BestUC = 1;
5373 unsigned SizeWithBestUC = BestUC *
Size;
5375 unsigned SizeWithUC = UC *
Size;
5376 if (SizeWithUC > 48)
5378 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5379 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5381 SizeWithBestUC = BestUC *
Size;
5391 for (
auto *BB : L->blocks()) {
5392 for (
auto &
I : *BB) {
5402 for (
auto *U :
I.users())
5404 LoadedValuesPlus.
insert(U);
5411 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5437 auto *I = dyn_cast<Instruction>(V);
5438 return I && DependsOnLoopLoad(I, Depth + 1);
5445 DependsOnLoopLoad(
I, 0)) {
5461 if (L->getLoopDepth() > 1)
5472 for (
auto *BB : L->getBlocks()) {
5473 for (
auto &
I : *BB) {
5477 if (IsVectorized &&
I.getType()->isVectorTy())
5494 if (ST->isAppleMLike())
5496 else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
5518 !ST->getSchedModel().isOutOfOrder()) {
5541 bool CanCreate)
const {
5545 case Intrinsic::aarch64_neon_st2:
5546 case Intrinsic::aarch64_neon_st3:
5547 case Intrinsic::aarch64_neon_st4: {
5550 if (!CanCreate || !ST)
5552 unsigned NumElts = Inst->
arg_size() - 1;
5553 if (ST->getNumElements() != NumElts)
5555 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5561 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5563 Res = Builder.CreateInsertValue(Res, L, i);
5567 case Intrinsic::aarch64_neon_ld2:
5568 case Intrinsic::aarch64_neon_ld3:
5569 case Intrinsic::aarch64_neon_ld4:
5570 if (Inst->
getType() == ExpectedType)
5581 case Intrinsic::aarch64_neon_ld2:
5582 case Intrinsic::aarch64_neon_ld3:
5583 case Intrinsic::aarch64_neon_ld4:
5584 Info.ReadMem =
true;
5585 Info.WriteMem =
false;
5588 case Intrinsic::aarch64_neon_st2:
5589 case Intrinsic::aarch64_neon_st3:
5590 case Intrinsic::aarch64_neon_st4:
5591 Info.ReadMem =
false;
5592 Info.WriteMem =
true;
5600 case Intrinsic::aarch64_neon_ld2:
5601 case Intrinsic::aarch64_neon_st2:
5602 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5604 case Intrinsic::aarch64_neon_ld3:
5605 case Intrinsic::aarch64_neon_st3:
5606 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5608 case Intrinsic::aarch64_neon_ld4:
5609 case Intrinsic::aarch64_neon_st4:
5610 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5622 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5623 bool Considerable =
false;
5624 AllowPromotionWithoutCommonHeader =
false;
5627 Type *ConsideredSExtType =
5629 if (
I.getType() != ConsideredSExtType)
5633 for (
const User *U :
I.users()) {
5635 Considerable =
true;
5639 if (GEPInst->getNumOperands() > 2) {
5640 AllowPromotionWithoutCommonHeader =
true;
5645 return Considerable;
5694 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5704 return LegalizationCost + 2;
5714 LegalizationCost *= LT.first - 1;
5717 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5726 return LegalizationCost + 2;
5734 std::optional<FastMathFlags> FMF,
5750 return BaseCost + FixedVTy->getNumElements();
5753 if (Opcode != Instruction::FAdd)
5767 MVT MTy = LT.second;
5768 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5816 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5817 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5829 return (LT.first - 1) +
Log2_32(NElts);
5834 return (LT.first - 1) + Entry->Cost;
5846 if (LT.first != 1) {
5852 ExtraCost *= LT.first - 1;
5855 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5856 return Cost + ExtraCost;
5864 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
5866 EVT VecVT = TLI->getValueType(
DL, VecTy);
5867 EVT ResVT = TLI->getValueType(
DL, ResTy);
5877 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5879 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5881 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5883 return (LT.first - 1) * 2 + 2;
5894 EVT VecVT = TLI->getValueType(
DL, VecTy);
5895 EVT ResVT = TLI->getValueType(
DL, ResTy);
5898 RedOpcode == Instruction::Add) {
5904 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5906 return LT.first + 2;
5941 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5942 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
5956 if (LT.second.getScalarType() == MVT::i1) {
5965 assert(Entry &&
"Illegal Type for Splice");
5966 LegalizationCost += Entry->Cost;
5967 return LegalizationCost * LT.first;
5971 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
5980 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub &&
5981 Opcode != Instruction::FAdd) ||
5988 assert(FMF &&
"Missing FastMathFlags for floating-point partial reduction");
5989 if (!FMF->allowReassoc() || !FMF->allowContract())
5993 "FastMathFlags only apply to floating-point partial reductions");
5997 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
5998 "Unexpected values for OpBExtend or InputTypeB");
6002 if (BinOp && ((*BinOp != Instruction::Mul && *BinOp != Instruction::FMul) ||
6003 InputTypeA != InputTypeB))
6006 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
6007 if (IsUSDot && !ST->hasMatMulInt8())
6020 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
6029 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
6035 std::pair<InstructionCost, MVT> AccumLT =
6037 std::pair<InstructionCost, MVT> InputLT =
6041 auto IsSupported = [&](
bool SVEPred,
bool NEONPred) ->
bool {
6042 return (ST->isSVEorStreamingSVEAvailable() && SVEPred) ||
6043 (AccumLT.second.isFixedLengthVector() &&
6044 AccumLT.second.getSizeInBits() <= 128 && ST->isNeonAvailable() &&
6048 bool IsSub = Opcode == Instruction::Sub;
6051 if (AccumLT.second.getScalarType() == MVT::i32 &&
6052 InputLT.second.getScalarType() == MVT::i8 && !IsSub) {
6054 if (!IsUSDot && IsSupported(
true, ST->hasDotProd()))
6057 if (IsUSDot && IsSupported(ST->hasMatMulInt8(), ST->hasMatMulInt8()))
6061 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot && !IsSub) {
6063 if (AccumLT.second.getScalarType() == MVT::i64 &&
6064 InputLT.second.getScalarType() == MVT::i16)
6067 if (AccumLT.second.getScalarType() == MVT::i32 &&
6068 InputLT.second.getScalarType() == MVT::i16 &&
6069 (ST->hasSVE2p1() || ST->hasSME2()))
6072 if (AccumLT.second.getScalarType() == MVT::i64 &&
6073 InputLT.second.getScalarType() == MVT::i8)
6081 if (AccumLT.second.getScalarType() == MVT::i16 &&
6082 InputLT.second.getScalarType() == MVT::i8 &&
6083 (ST->hasSVE2p3() || ST->hasSME2p3()))
6089 if (Opcode == Instruction::FAdd && !IsSub &&
6090 IsSupported(ST->hasSME2() || ST->hasSVE2p1(), ST->hasF16F32DOT()) &&
6091 AccumLT.second.getScalarType() == MVT::f32 &&
6092 InputLT.second.getScalarType() == MVT::f16)
6096 if (Ratio == 2 && !IsSub) {
6097 MVT InVT = InputLT.second.getScalarType();
6100 if (IsSupported(ST->hasSVE2(),
true) &&
6105 if (IsSupported(ST->hasSVE2(), ST->hasFP16FML()) && InVT == MVT::f16)
6109 if (IsSupported(ST->hasBF16(), ST->hasBF16()) && InVT == MVT::bf16)
6114 Opcode, InputTypeA, InputTypeB, AccumType, VF, OpAExtend, OpBExtend,
6120 return ExpandCost.
isValid() && IsSub ? ((8 * ExpandCost) / 10) : ExpandCost;
6131 "Expected the Mask to match the return size if given");
6133 "Expected the same scalar types");
6139 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
6140 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
6141 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
6149 return std::max<InstructionCost>(1, LT.first / 4);
6157 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
6159 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
6162 unsigned TpNumElts = Mask.size();
6163 unsigned LTNumElts = LT.second.getVectorNumElements();
6164 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
6166 LT.second.getVectorElementCount());
6168 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
6170 for (
unsigned N = 0;
N < NumVecs;
N++) {
6174 unsigned Source1 = -1U, Source2 = -1U;
6175 unsigned NumSources = 0;
6176 for (
unsigned E = 0; E < LTNumElts; E++) {
6177 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
6186 unsigned Source = MaskElt / LTNumElts;
6187 if (NumSources == 0) {
6190 }
else if (NumSources == 1 && Source != Source1) {
6193 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
6199 if (Source == Source1)
6201 else if (Source == Source2)
6202 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
6211 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
6222 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
6225 Result.first->second = NCost;
6239 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
6240 if (LT.second.getFixedSizeInBits() >= 128 &&
6242 LT.second.getVectorNumElements() / 2) {
6245 if (Index == (
int)LT.second.getVectorNumElements() / 2)
6259 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6262 return M.value() < 0 || M.value() == (int)M.index();
6268 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6269 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6278 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6279 ST->isSVEorStreamingSVEAvailable() &&
6284 if (ST->isSVEorStreamingSVEAvailable() &&
6298 if (IsLoad && LT.second.isVector() &&
6300 LT.second.getVectorElementCount()))
6306 if (Mask.size() == 4 &&
6308 (SrcTy->getScalarSizeInBits() == 16 ||
6309 SrcTy->getScalarSizeInBits() == 32) &&
6310 all_of(Mask, [](
int E) {
return E < 8; }))
6316 if (LT.second.isFixedLengthVector() &&
6317 LT.second.getVectorNumElements() == Mask.size() &&
6323 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6324 isTRNMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6325 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6326 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6327 LT.second.getVectorNumElements(), 16) ||
6328 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6329 LT.second.getVectorNumElements(), 32) ||
6330 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6331 LT.second.getVectorNumElements(), 64) ||
6334 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6463 return LT.first * Entry->Cost;
6472 LT.second.getSizeInBits() <= 128 && SubTp) {
6474 if (SubLT.second.isVector()) {
6475 int NumElts = LT.second.getVectorNumElements();
6476 int NumSubElts = SubLT.second.getVectorNumElements();
6477 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6483 if (IsExtractSubvector)
6500 if (
getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
6519 return ST->useFixedOverScalableIfEqualCost();
6523 return ST->getEpilogueVectorizationMinVF();
6558 unsigned NumInsns = 0;
6560 NumInsns += BB->size();
6570 int64_t Scale,
unsigned AddrSpace)
const {
6598 if (
I->getOpcode() == Instruction::Or &&
6602 if (
I->getOpcode() == Instruction::Add ||
6603 I->getOpcode() == Instruction::Sub)
6628 return all_equal(Shuf->getShuffleMask());
6635 bool AllowSplat =
false) {
6640 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6641 auto *FullTy = FullV->
getType();
6642 auto *HalfTy = HalfV->getType();
6644 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6647 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6650 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6654 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6668 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6669 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6683 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6684 (M2Start != 0 && M2Start != (NumElements / 2)))
6686 if (S1Op1 && S2Op1 && M1Start != M2Start)
6696 return Ext->getType()->getScalarSizeInBits() ==
6697 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6711 Value *VectorOperand =
nullptr;
6728 if (!
GEP ||
GEP->getNumOperands() != 2)
6732 Value *Offsets =
GEP->getOperand(1);
6735 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6741 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6742 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6743 Ops.push_back(&
GEP->getOperandUse(1));
6779 switch (
II->getIntrinsicID()) {
6780 case Intrinsic::aarch64_neon_smull:
6781 case Intrinsic::aarch64_neon_umull:
6784 Ops.push_back(&
II->getOperandUse(0));
6785 Ops.push_back(&
II->getOperandUse(1));
6790 case Intrinsic::fma:
6791 case Intrinsic::fmuladd:
6798 Ops.push_back(&
II->getOperandUse(0));
6800 Ops.push_back(&
II->getOperandUse(1));
6803 case Intrinsic::aarch64_neon_sqdmull:
6804 case Intrinsic::aarch64_neon_sqdmulh:
6805 case Intrinsic::aarch64_neon_sqrdmulh:
6808 Ops.push_back(&
II->getOperandUse(0));
6810 Ops.push_back(&
II->getOperandUse(1));
6811 return !
Ops.empty();
6812 case Intrinsic::aarch64_neon_fmlal:
6813 case Intrinsic::aarch64_neon_fmlal2:
6814 case Intrinsic::aarch64_neon_fmlsl:
6815 case Intrinsic::aarch64_neon_fmlsl2:
6818 Ops.push_back(&
II->getOperandUse(1));
6820 Ops.push_back(&
II->getOperandUse(2));
6821 return !
Ops.empty();
6822 case Intrinsic::aarch64_sve_ptest_first:
6823 case Intrinsic::aarch64_sve_ptest_last:
6825 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6826 Ops.push_back(&
II->getOperandUse(0));
6827 return !
Ops.empty();
6828 case Intrinsic::aarch64_sme_write_horiz:
6829 case Intrinsic::aarch64_sme_write_vert:
6830 case Intrinsic::aarch64_sme_writeq_horiz:
6831 case Intrinsic::aarch64_sme_writeq_vert: {
6833 if (!Idx || Idx->getOpcode() != Instruction::Add)
6835 Ops.push_back(&
II->getOperandUse(1));
6838 case Intrinsic::aarch64_sme_read_horiz:
6839 case Intrinsic::aarch64_sme_read_vert:
6840 case Intrinsic::aarch64_sme_readq_horiz:
6841 case Intrinsic::aarch64_sme_readq_vert:
6842 case Intrinsic::aarch64_sme_ld1b_vert:
6843 case Intrinsic::aarch64_sme_ld1h_vert:
6844 case Intrinsic::aarch64_sme_ld1w_vert:
6845 case Intrinsic::aarch64_sme_ld1d_vert:
6846 case Intrinsic::aarch64_sme_ld1q_vert:
6847 case Intrinsic::aarch64_sme_st1b_vert:
6848 case Intrinsic::aarch64_sme_st1h_vert:
6849 case Intrinsic::aarch64_sme_st1w_vert:
6850 case Intrinsic::aarch64_sme_st1d_vert:
6851 case Intrinsic::aarch64_sme_st1q_vert:
6852 case Intrinsic::aarch64_sme_ld1b_horiz:
6853 case Intrinsic::aarch64_sme_ld1h_horiz:
6854 case Intrinsic::aarch64_sme_ld1w_horiz:
6855 case Intrinsic::aarch64_sme_ld1d_horiz:
6856 case Intrinsic::aarch64_sme_ld1q_horiz:
6857 case Intrinsic::aarch64_sme_st1b_horiz:
6858 case Intrinsic::aarch64_sme_st1h_horiz:
6859 case Intrinsic::aarch64_sme_st1w_horiz:
6860 case Intrinsic::aarch64_sme_st1d_horiz:
6861 case Intrinsic::aarch64_sme_st1q_horiz: {
6863 if (!Idx || Idx->getOpcode() != Instruction::Add)
6865 Ops.push_back(&
II->getOperandUse(3));
6868 case Intrinsic::aarch64_neon_pmull:
6871 Ops.push_back(&
II->getOperandUse(0));
6872 Ops.push_back(&
II->getOperandUse(1));
6874 case Intrinsic::aarch64_neon_pmull64:
6876 II->getArgOperand(1)))
6878 Ops.push_back(&
II->getArgOperandUse(0));
6879 Ops.push_back(&
II->getArgOperandUse(1));
6881 case Intrinsic::masked_gather:
6884 Ops.push_back(&
II->getArgOperandUse(0));
6886 case Intrinsic::masked_scatter:
6889 Ops.push_back(&
II->getArgOperandUse(1));
6896 auto ShouldSinkCondition = [](
Value *
Cond,
6901 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6905 Ops.push_back(&
II->getOperandUse(0));
6909 switch (
I->getOpcode()) {
6910 case Instruction::GetElementPtr:
6911 case Instruction::Add:
6912 case Instruction::Sub:
6914 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
6916 Ops.push_back(&
I->getOperandUse(
Op));
6921 case Instruction::Select: {
6922 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
6925 Ops.push_back(&
I->getOperandUse(0));
6928 case Instruction::UncondBr:
6930 case Instruction::CondBr: {
6934 Ops.push_back(&
I->getOperandUse(0));
6937 case Instruction::FMul:
6942 Ops.push_back(&
I->getOperandUse(0));
6944 Ops.push_back(&
I->getOperandUse(1));
6954 case Instruction::Xor:
6957 if (
I->getType()->isVectorTy() && ST->isNeonAvailable()) {
6959 ST->isSVEorStreamingSVEAvailable() && (ST->hasSVE2() || ST->hasSME());
6964 case Instruction::And:
6965 case Instruction::Or:
6968 if (
I->getOpcode() == Instruction::Or &&
6973 if (!(
I->getType()->isVectorTy() && ST->hasNEON()) &&
6976 for (
auto &
Op :
I->operands()) {
6988 Ops.push_back(&Not);
6989 Ops.push_back(&InsertElt);
6999 if (!
I->getType()->isVectorTy())
7000 return !
Ops.empty();
7002 switch (
I->getOpcode()) {
7003 case Instruction::Sub:
7004 case Instruction::Add: {
7013 Ops.push_back(&Ext1->getOperandUse(0));
7014 Ops.push_back(&Ext2->getOperandUse(0));
7017 Ops.push_back(&
I->getOperandUse(0));
7018 Ops.push_back(&
I->getOperandUse(1));
7022 case Instruction::Or: {
7025 if (ST->hasNEON()) {
7039 if (
I->getParent() != MainAnd->
getParent() ||
7044 if (
I->getParent() != IA->getParent() ||
7045 I->getParent() != IB->getParent())
7050 Ops.push_back(&
I->getOperandUse(0));
7051 Ops.push_back(&
I->getOperandUse(1));
7060 case Instruction::Mul: {
7061 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
7064 if (Ty->isScalableTy())
7068 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
7071 int NumZExts = 0, NumSExts = 0;
7072 for (
auto &
Op :
I->operands()) {
7079 auto *ExtOp = Ext->getOperand(0);
7080 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
7081 Ops.push_back(&Ext->getOperandUse(0));
7089 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
7090 I->getType()->getScalarSizeInBits())
7127 if (!ElementConstant || !ElementConstant->
isZero())
7130 unsigned Opcode = OperandInstr->
getOpcode();
7131 if (Opcode == Instruction::SExt)
7133 else if (Opcode == Instruction::ZExt)
7138 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
7148 Ops.push_back(&Insert->getOperandUse(1));
7154 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
7158 if (!ShouldSinkSplatForIndexedVariant(
I))
7163 Ops.push_back(&
I->getOperandUse(0));
7165 Ops.push_back(&
I->getOperandUse(1));
7167 return !
Ops.empty();
7169 case Instruction::FMul: {
7171 if (
I->getType()->isScalableTy())
7172 return !
Ops.empty();
7176 return !
Ops.empty();
7180 Ops.push_back(&
I->getOperandUse(0));
7182 Ops.push_back(&
I->getOperandUse(1));
7183 return !
Ops.empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
bool preferTailFoldingOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
APInt getPriorityMask(const Function &F) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedExpandLoad(Type *DataTy, Align Alignment) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool noSignedZeros() const
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
Information for memory intrinsic cost model.
Align getAlignment() const
Type * getDataType() const
Intrinsic::ID getID() const
const Instruction * getInst() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
LLVM_ABI APInt getCpuSupportsMask(ArrayRef< StringRef > Features)
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
LLVM_ABI Libcall getPOW(EVT RetVT)
getPOW - Return the POW_* value for the given types, or UNKNOWN_LIBCALL if there is none.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FindLast
FindLast reduction with select(cmp(),x,y) where x and y.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...