23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
82 cl::desc(
"Threshold for forced unrolling of small loops in AArch64"));
85class TailFoldingOption {
100 bool NeedsDefault =
true;
104 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
124 Bits &= ~DisableBits;
130 errs() <<
"invalid argument '" << Opt
131 <<
"' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
139 void operator=(
const std::string &Val) {
148 setNeedsDefault(
false);
151 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] ==
"disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] ==
"all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] ==
"default")
159 setNeedsDefault(
true);
160 else if (TailFoldTypes[0] ==
"simple")
161 setInitialBits(TailFoldingOpts::Simple);
164 setInitialBits(TailFoldingOpts::Disabled);
167 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
168 if (TailFoldTypes[
I] ==
"reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[
I] ==
"recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[
I] ==
"reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[
I] ==
"noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[
I] ==
"norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[
I] ==
"noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
200 "\ndefault (Initial) Uses the default tail-folding settings for "
202 "\nall (Initial) All legal loop types will vectorize using "
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
213 "\nnoreverse Inverse of above"),
258 TTI->isMultiversionedFunction(
F) ?
"fmv-features" :
"target-features";
259 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.
split(Features,
",");
276 return F.hasFnAttribute(
"fmv-features");
325 auto FVTy = dyn_cast<FixedVectorType>(Ty);
327 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
336 unsigned DefaultCallPenalty)
const {
361 if (
F ==
Call.getCaller())
367 return DefaultCallPenalty;
378 ST->isSVEorStreamingSVEAvailable() &&
379 !ST->disableMaximizeScalableBandwidth();
403 assert(Ty->isIntegerTy());
405 unsigned BitSize = Ty->getPrimitiveSizeInBits();
412 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
417 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
423 return std::max<InstructionCost>(1,
Cost);
430 assert(Ty->isIntegerTy());
432 unsigned BitSize = Ty->getPrimitiveSizeInBits();
438 unsigned ImmIdx = ~0U;
442 case Instruction::GetElementPtr:
447 case Instruction::Store:
450 case Instruction::Add:
451 case Instruction::Sub:
452 case Instruction::Mul:
453 case Instruction::UDiv:
454 case Instruction::SDiv:
455 case Instruction::URem:
456 case Instruction::SRem:
457 case Instruction::And:
458 case Instruction::Or:
459 case Instruction::Xor:
460 case Instruction::ICmp:
464 case Instruction::Shl:
465 case Instruction::LShr:
466 case Instruction::AShr:
470 case Instruction::Trunc:
471 case Instruction::ZExt:
472 case Instruction::SExt:
473 case Instruction::IntToPtr:
474 case Instruction::PtrToInt:
475 case Instruction::BitCast:
476 case Instruction::PHI:
477 case Instruction::Call:
478 case Instruction::Select:
479 case Instruction::Ret:
480 case Instruction::Load:
485 int NumConstants = (BitSize + 63) / 64;
498 assert(Ty->isIntegerTy());
500 unsigned BitSize = Ty->getPrimitiveSizeInBits();
509 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
515 case Intrinsic::sadd_with_overflow:
516 case Intrinsic::uadd_with_overflow:
517 case Intrinsic::ssub_with_overflow:
518 case Intrinsic::usub_with_overflow:
519 case Intrinsic::smul_with_overflow:
520 case Intrinsic::umul_with_overflow:
522 int NumConstants = (BitSize + 63) / 64;
529 case Intrinsic::experimental_stackmap:
530 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
533 case Intrinsic::experimental_patchpoint_void:
534 case Intrinsic::experimental_patchpoint:
535 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
538 case Intrinsic::experimental_gc_statepoint:
539 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
549 if (TyWidth == 32 || TyWidth == 64)
558 return ST->getSchedModel().MispredictPenalty;
579 unsigned TotalHistCnts = 1;
589 unsigned EC = VTy->getElementCount().getKnownMinValue();
594 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
596 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
600 TotalHistCnts = EC / NaturalVectorWidth;
620 switch (ICA.
getID()) {
621 case Intrinsic::experimental_vector_histogram_add: {
628 case Intrinsic::clmul: {
633 if (LT.second == MVT::v8i8 || LT.second == MVT::v16i8)
637 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8) {
642 -1,
nullptr,
nullptr) *
645 -1,
nullptr,
nullptr);
649 if (LT.second.SimpleTy == MVT::nxv2i64)
650 if (ST->hasSVEAES() && (ST->isSVEAvailable() || ST->hasSSVE_AES()))
653 if (ST->hasSVE2() || ST->hasSME()) {
654 switch (LT.second.SimpleTy) {
669 if (LT.second.SimpleTy == MVT::nxv2i64)
673 switch (LT.second.SimpleTy) {
683 -1,
nullptr,
nullptr) *
686 -1,
nullptr,
nullptr));
695 return LT.first * 11;
697 return LT.first * 14;
704 case Intrinsic::umin:
705 case Intrinsic::umax:
706 case Intrinsic::smin:
707 case Intrinsic::smax: {
708 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
709 MVT::v8i16, MVT::v2i32, MVT::v4i32,
710 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
714 if (LT.second == MVT::v2i64)
720 case Intrinsic::scmp:
721 case Intrinsic::ucmp: {
723 {Intrinsic::scmp, MVT::i32, 3},
724 {Intrinsic::scmp, MVT::i64, 3},
725 {Intrinsic::scmp, MVT::v8i8, 3},
726 {Intrinsic::scmp, MVT::v16i8, 3},
727 {Intrinsic::scmp, MVT::v4i16, 3},
728 {Intrinsic::scmp, MVT::v8i16, 3},
729 {Intrinsic::scmp, MVT::v2i32, 3},
730 {Intrinsic::scmp, MVT::v4i32, 3},
731 {Intrinsic::scmp, MVT::v1i64, 3},
732 {Intrinsic::scmp, MVT::v2i64, 3},
738 return Entry->Cost * LT.first;
741 case Intrinsic::sadd_sat:
742 case Intrinsic::ssub_sat:
743 case Intrinsic::uadd_sat:
744 case Intrinsic::usub_sat: {
745 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
746 MVT::v8i16, MVT::v2i32, MVT::v4i32,
752 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
754 return LT.first * Instrs;
759 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
760 return LT.first * Instrs;
764 case Intrinsic::abs: {
765 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
766 MVT::v8i16, MVT::v2i32, MVT::v4i32,
767 MVT::v2i64, MVT::nxv16i8, MVT::nxv8i16,
768 MVT::nxv4i32, MVT::nxv2i64};
774 case Intrinsic::bswap: {
775 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
776 MVT::v4i32, MVT::v2i64};
779 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
784 case Intrinsic::fmuladd: {
789 (EltTy->
isHalfTy() && ST->hasFullFP16()))
793 case Intrinsic::stepvector: {
802 Cost += AddCost * (LT.first - 1);
806 case Intrinsic::vector_extract:
807 case Intrinsic::vector_insert: {
820 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
821 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
829 getTLI()->getTypeConversion(
C, SubVecVT);
831 getTLI()->getTypeConversion(
C, VecVT);
839 case Intrinsic::bitreverse: {
841 {Intrinsic::bitreverse, MVT::i32, 1},
842 {Intrinsic::bitreverse, MVT::i64, 1},
843 {Intrinsic::bitreverse, MVT::v8i8, 1},
844 {Intrinsic::bitreverse, MVT::v16i8, 1},
845 {Intrinsic::bitreverse, MVT::v4i16, 2},
846 {Intrinsic::bitreverse, MVT::v8i16, 2},
847 {Intrinsic::bitreverse, MVT::v2i32, 2},
848 {Intrinsic::bitreverse, MVT::v4i32, 2},
849 {Intrinsic::bitreverse, MVT::v1i64, 2},
850 {Intrinsic::bitreverse, MVT::v2i64, 2},
858 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
859 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
860 return LegalisationCost.first * Entry->Cost + 1;
862 return LegalisationCost.first * Entry->Cost;
866 case Intrinsic::ctpop: {
867 if (!ST->hasNEON()) {
899 RetTy->getScalarSizeInBits()
902 return LT.first * Entry->Cost + ExtraCost;
906 case Intrinsic::sadd_with_overflow:
907 case Intrinsic::uadd_with_overflow:
908 case Intrinsic::ssub_with_overflow:
909 case Intrinsic::usub_with_overflow:
910 case Intrinsic::smul_with_overflow:
911 case Intrinsic::umul_with_overflow: {
913 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
914 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
915 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
916 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
917 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
918 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
919 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
920 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
921 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
922 {Intrinsic::usub_with_overflow, MVT::i8, 3},
923 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
924 {Intrinsic::usub_with_overflow, MVT::i16, 3},
925 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
926 {Intrinsic::usub_with_overflow, MVT::i32, 1},
927 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
928 {Intrinsic::usub_with_overflow, MVT::i64, 1},
929 {Intrinsic::smul_with_overflow, MVT::i8, 5},
930 {Intrinsic::umul_with_overflow, MVT::i8, 4},
931 {Intrinsic::smul_with_overflow, MVT::i16, 5},
932 {Intrinsic::umul_with_overflow, MVT::i16, 4},
933 {Intrinsic::smul_with_overflow, MVT::i32, 2},
934 {Intrinsic::umul_with_overflow, MVT::i32, 2},
935 {Intrinsic::smul_with_overflow, MVT::i64, 3},
936 {Intrinsic::umul_with_overflow, MVT::i64, 3},
938 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
945 case Intrinsic::fptosi_sat:
946 case Intrinsic::fptoui_sat: {
949 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
951 EVT MTy = TLI->getValueType(
DL, RetTy);
954 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
955 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
956 LT.second == MVT::v2f64)) {
958 (LT.second == MVT::f64 && MTy == MVT::i32) ||
959 (LT.second == MVT::f32 && MTy == MVT::i64)))
968 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
975 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
976 (LT.second == MVT::f16 && MTy == MVT::i64) ||
977 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
991 if ((LT.second.getScalarType() == MVT::f32 ||
992 LT.second.getScalarType() == MVT::f64 ||
993 LT.second.getScalarType() == MVT::f16) &&
997 if (LT.second.isVector())
1002 LegalTy, {LegalTy, LegalTy});
1006 LegalTy, {LegalTy, LegalTy});
1008 return LT.first *
Cost +
1009 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
1015 RetTy = RetTy->getScalarType();
1016 if (LT.second.isVector()) {
1034 return LT.first *
Cost;
1036 case Intrinsic::fshl:
1037 case Intrinsic::fshr: {
1046 if (RetTy->isIntegerTy() && ICA.
getArgs()[0] == ICA.
getArgs()[1] &&
1047 (RetTy->getPrimitiveSizeInBits() == 32 ||
1048 RetTy->getPrimitiveSizeInBits() == 64)) {
1061 {Intrinsic::fshl, MVT::v4i32, 2},
1062 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
1063 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
1064 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
1070 return LegalisationCost.first * Entry->Cost;
1074 if (!RetTy->isIntegerTy())
1079 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
1080 RetTy->getScalarSizeInBits() < 64) ||
1081 (RetTy->getScalarSizeInBits() % 64 != 0);
1082 unsigned ExtraCost = HigherCost ? 1 : 0;
1083 if (RetTy->getScalarSizeInBits() == 32 ||
1084 RetTy->getScalarSizeInBits() == 64)
1087 else if (HigherCost)
1091 return TyL.first + ExtraCost;
1093 case Intrinsic::get_active_lane_mask: {
1095 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
1097 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
1100 if (RetTy->isScalableTy()) {
1101 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
1111 if (ST->hasSVE2p1() || ST->hasSME2()) {
1126 return Cost + (SplitCost * (
Cost - 1));
1141 case Intrinsic::experimental_vector_match: {
1144 unsigned SearchSize = NeedleTy->getNumElements();
1145 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1158 case Intrinsic::cttz: {
1160 if (LT.second == MVT::v8i8 || LT.second == MVT::v16i8)
1161 return LT.first * 2;
1162 if (LT.second == MVT::v4i16 || LT.second == MVT::v8i16 ||
1163 LT.second == MVT::v2i32 || LT.second == MVT::v4i32)
1164 return LT.first * 3;
1167 case Intrinsic::experimental_cttz_elts: {
1169 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1177 case Intrinsic::loop_dependence_raw_mask:
1178 case Intrinsic::loop_dependence_war_mask: {
1180 if (ST->hasSVE2() || ST->hasSME()) {
1181 EVT VecVT = getTLI()->getValueType(
DL, RetTy);
1182 unsigned EltSizeInBytes =
1192 case Intrinsic::experimental_vector_extract_last_active:
1193 if (ST->isSVEorStreamingSVEAvailable()) {
1199 case Intrinsic::pow: {
1202 EVT VT = getTLI()->getValueType(
DL, RetTy);
1204 bool HasLibcall = getTLI()->getLibcallImpl(LC) != RTLIB::Unsupported;
1219 bool Is025 = ExpF->getValueAPF().isExactlyValue(0.25);
1220 bool Is075 = ExpF->getValueAPF().isExactlyValue(0.75);
1230 return (Sqrt * 2) +
FMul;
1241 case Intrinsic::sqrt:
1242 case Intrinsic::fabs:
1243 case Intrinsic::ceil:
1244 case Intrinsic::floor:
1245 case Intrinsic::nearbyint:
1246 case Intrinsic::round:
1247 case Intrinsic::rint:
1248 case Intrinsic::roundeven:
1249 case Intrinsic::trunc:
1250 case Intrinsic::minnum:
1251 case Intrinsic::maxnum:
1252 case Intrinsic::minimum:
1253 case Intrinsic::maximum: {
1271 auto RequiredType =
II.getType();
1274 assert(PN &&
"Expected Phi Node!");
1277 if (!PN->hasOneUse())
1278 return std::nullopt;
1280 for (
Value *IncValPhi : PN->incoming_values()) {
1283 Reinterpret->getIntrinsicID() !=
1284 Intrinsic::aarch64_sve_convert_to_svbool ||
1285 RequiredType != Reinterpret->getArgOperand(0)->getType())
1286 return std::nullopt;
1294 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1296 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1369 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1374 return GoverningPredicateIdx;
1379 GoverningPredicateIdx = Index;
1397 return UndefIntrinsic;
1402 UndefIntrinsic = IID;
1424 return ResultLanes == InactiveLanesTakenFromOperand;
1429 return OperandIdxForInactiveLanes;
1433 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1434 ResultLanes = InactiveLanesTakenFromOperand;
1435 OperandIdxForInactiveLanes = Index;
1440 return ResultLanes == InactiveLanesAreNotDefined;
1444 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1445 ResultLanes = InactiveLanesAreNotDefined;
1450 return ResultLanes == InactiveLanesAreUnused;
1454 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1455 ResultLanes = InactiveLanesAreUnused;
1465 ResultIsZeroInitialized =
true;
1476 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1481 return OperandIdxWithNoActiveLanes;
1486 OperandIdxWithNoActiveLanes = Index;
1491 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1494 unsigned IROpcode = 0;
1496 enum PredicationStyle {
1498 InactiveLanesTakenFromOperand,
1499 InactiveLanesAreNotDefined,
1500 InactiveLanesAreUnused
1503 bool ResultIsZeroInitialized =
false;
1504 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1505 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1513 return !isa<ScalableVectorType>(V->getType());
1521 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1522 case Intrinsic::aarch64_sve_fcvt_f16f32:
1523 case Intrinsic::aarch64_sve_fcvt_f16f64:
1524 case Intrinsic::aarch64_sve_fcvt_f32f16:
1525 case Intrinsic::aarch64_sve_fcvt_f32f64:
1526 case Intrinsic::aarch64_sve_fcvt_f64f16:
1527 case Intrinsic::aarch64_sve_fcvt_f64f32:
1528 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1529 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1530 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1531 case Intrinsic::aarch64_sve_fcvtzs:
1532 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1533 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1534 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1535 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1536 case Intrinsic::aarch64_sve_fcvtzu:
1537 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1538 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1539 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1540 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1541 case Intrinsic::aarch64_sve_revb:
1542 case Intrinsic::aarch64_sve_revh:
1543 case Intrinsic::aarch64_sve_revw:
1544 case Intrinsic::aarch64_sve_revd:
1545 case Intrinsic::aarch64_sve_scvtf:
1546 case Intrinsic::aarch64_sve_scvtf_f16i32:
1547 case Intrinsic::aarch64_sve_scvtf_f16i64:
1548 case Intrinsic::aarch64_sve_scvtf_f32i64:
1549 case Intrinsic::aarch64_sve_scvtf_f64i32:
1550 case Intrinsic::aarch64_sve_ucvtf:
1551 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1552 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1553 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1554 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1557 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1558 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1559 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1560 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1563 case Intrinsic::aarch64_sve_fabd:
1565 case Intrinsic::aarch64_sve_fadd:
1568 case Intrinsic::aarch64_sve_fdiv:
1571 case Intrinsic::aarch64_sve_fmax:
1573 case Intrinsic::aarch64_sve_fmaxnm:
1575 case Intrinsic::aarch64_sve_fmin:
1577 case Intrinsic::aarch64_sve_fminnm:
1579 case Intrinsic::aarch64_sve_fmla:
1581 case Intrinsic::aarch64_sve_fmls:
1583 case Intrinsic::aarch64_sve_fmul:
1586 case Intrinsic::aarch64_sve_fmulx:
1588 case Intrinsic::aarch64_sve_fnmla:
1590 case Intrinsic::aarch64_sve_fnmls:
1592 case Intrinsic::aarch64_sve_fsub:
1595 case Intrinsic::aarch64_sve_add:
1598 case Intrinsic::aarch64_sve_mla:
1600 case Intrinsic::aarch64_sve_mls:
1602 case Intrinsic::aarch64_sve_mul:
1605 case Intrinsic::aarch64_sve_sabd:
1607 case Intrinsic::aarch64_sve_sdiv:
1610 case Intrinsic::aarch64_sve_smax:
1612 case Intrinsic::aarch64_sve_smin:
1614 case Intrinsic::aarch64_sve_smulh:
1616 case Intrinsic::aarch64_sve_sub:
1619 case Intrinsic::aarch64_sve_uabd:
1621 case Intrinsic::aarch64_sve_udiv:
1624 case Intrinsic::aarch64_sve_umax:
1626 case Intrinsic::aarch64_sve_umin:
1628 case Intrinsic::aarch64_sve_umulh:
1630 case Intrinsic::aarch64_sve_asr:
1633 case Intrinsic::aarch64_sve_lsl:
1636 case Intrinsic::aarch64_sve_lsr:
1639 case Intrinsic::aarch64_sve_and:
1642 case Intrinsic::aarch64_sve_bic:
1644 case Intrinsic::aarch64_sve_eor:
1647 case Intrinsic::aarch64_sve_orr:
1650 case Intrinsic::aarch64_sve_shsub:
1652 case Intrinsic::aarch64_sve_shsubr:
1654 case Intrinsic::aarch64_sve_sqrshl:
1656 case Intrinsic::aarch64_sve_sqshl:
1658 case Intrinsic::aarch64_sve_sqsub:
1660 case Intrinsic::aarch64_sve_srshl:
1662 case Intrinsic::aarch64_sve_uhsub:
1664 case Intrinsic::aarch64_sve_uhsubr:
1666 case Intrinsic::aarch64_sve_uqrshl:
1668 case Intrinsic::aarch64_sve_uqshl:
1670 case Intrinsic::aarch64_sve_uqsub:
1672 case Intrinsic::aarch64_sve_urshl:
1675 case Intrinsic::aarch64_sve_add_u:
1678 case Intrinsic::aarch64_sve_and_u:
1681 case Intrinsic::aarch64_sve_asr_u:
1684 case Intrinsic::aarch64_sve_eor_u:
1687 case Intrinsic::aarch64_sve_fadd_u:
1690 case Intrinsic::aarch64_sve_fdiv_u:
1693 case Intrinsic::aarch64_sve_fmul_u:
1696 case Intrinsic::aarch64_sve_fsub_u:
1699 case Intrinsic::aarch64_sve_lsl_u:
1702 case Intrinsic::aarch64_sve_lsr_u:
1705 case Intrinsic::aarch64_sve_mul_u:
1708 case Intrinsic::aarch64_sve_orr_u:
1711 case Intrinsic::aarch64_sve_sdiv_u:
1714 case Intrinsic::aarch64_sve_sub_u:
1717 case Intrinsic::aarch64_sve_udiv_u:
1721 case Intrinsic::aarch64_sve_addqv:
1722 case Intrinsic::aarch64_sve_and_z:
1723 case Intrinsic::aarch64_sve_bic_z:
1724 case Intrinsic::aarch64_sve_brka_z:
1725 case Intrinsic::aarch64_sve_brkb_z:
1726 case Intrinsic::aarch64_sve_brkn_z:
1727 case Intrinsic::aarch64_sve_brkpa_z:
1728 case Intrinsic::aarch64_sve_brkpb_z:
1729 case Intrinsic::aarch64_sve_cntp:
1730 case Intrinsic::aarch64_sve_compact:
1731 case Intrinsic::aarch64_sve_eor_z:
1732 case Intrinsic::aarch64_sve_eorv:
1733 case Intrinsic::aarch64_sve_eorqv:
1734 case Intrinsic::aarch64_sve_nand_z:
1735 case Intrinsic::aarch64_sve_nor_z:
1736 case Intrinsic::aarch64_sve_orn_z:
1737 case Intrinsic::aarch64_sve_orr_z:
1738 case Intrinsic::aarch64_sve_orv:
1739 case Intrinsic::aarch64_sve_orqv:
1740 case Intrinsic::aarch64_sve_pnext:
1741 case Intrinsic::aarch64_sve_rdffr_z:
1742 case Intrinsic::aarch64_sve_saddv:
1743 case Intrinsic::aarch64_sve_uaddv:
1744 case Intrinsic::aarch64_sve_umaxv:
1745 case Intrinsic::aarch64_sve_umaxqv:
1746 case Intrinsic::aarch64_sve_cmpeq:
1747 case Intrinsic::aarch64_sve_cmpeq_wide:
1748 case Intrinsic::aarch64_sve_cmpge:
1749 case Intrinsic::aarch64_sve_cmpge_wide:
1750 case Intrinsic::aarch64_sve_cmpgt:
1751 case Intrinsic::aarch64_sve_cmpgt_wide:
1752 case Intrinsic::aarch64_sve_cmphi:
1753 case Intrinsic::aarch64_sve_cmphi_wide:
1754 case Intrinsic::aarch64_sve_cmphs:
1755 case Intrinsic::aarch64_sve_cmphs_wide:
1756 case Intrinsic::aarch64_sve_cmple_wide:
1757 case Intrinsic::aarch64_sve_cmplo_wide:
1758 case Intrinsic::aarch64_sve_cmpls_wide:
1759 case Intrinsic::aarch64_sve_cmplt_wide:
1760 case Intrinsic::aarch64_sve_cmpne:
1761 case Intrinsic::aarch64_sve_cmpne_wide:
1762 case Intrinsic::aarch64_sve_facge:
1763 case Intrinsic::aarch64_sve_facgt:
1764 case Intrinsic::aarch64_sve_fcmpeq:
1765 case Intrinsic::aarch64_sve_fcmpge:
1766 case Intrinsic::aarch64_sve_fcmpgt:
1767 case Intrinsic::aarch64_sve_fcmpne:
1768 case Intrinsic::aarch64_sve_fcmpuo:
1769 case Intrinsic::aarch64_sve_ld1:
1770 case Intrinsic::aarch64_sve_ld1_gather:
1771 case Intrinsic::aarch64_sve_ld1_gather_index:
1772 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1773 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1774 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1775 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1776 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1777 case Intrinsic::aarch64_sve_ld1q_gather_index:
1778 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1779 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1780 case Intrinsic::aarch64_sve_ld1ro:
1781 case Intrinsic::aarch64_sve_ld1rq:
1782 case Intrinsic::aarch64_sve_ld1udq:
1783 case Intrinsic::aarch64_sve_ld1uwq:
1784 case Intrinsic::aarch64_sve_ld2_sret:
1785 case Intrinsic::aarch64_sve_ld2q_sret:
1786 case Intrinsic::aarch64_sve_ld3_sret:
1787 case Intrinsic::aarch64_sve_ld3q_sret:
1788 case Intrinsic::aarch64_sve_ld4_sret:
1789 case Intrinsic::aarch64_sve_ld4q_sret:
1790 case Intrinsic::aarch64_sve_ldff1:
1791 case Intrinsic::aarch64_sve_ldff1_gather:
1792 case Intrinsic::aarch64_sve_ldff1_gather_index:
1793 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1794 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1795 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1796 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1797 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1798 case Intrinsic::aarch64_sve_ldnf1:
1799 case Intrinsic::aarch64_sve_ldnt1:
1800 case Intrinsic::aarch64_sve_ldnt1_gather:
1801 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1802 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1803 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1806 case Intrinsic::aarch64_sve_prf:
1807 case Intrinsic::aarch64_sve_prfb_gather_index:
1808 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1809 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1810 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1811 case Intrinsic::aarch64_sve_prfd_gather_index:
1812 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1813 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1814 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1815 case Intrinsic::aarch64_sve_prfh_gather_index:
1816 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1817 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1818 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1819 case Intrinsic::aarch64_sve_prfw_gather_index:
1820 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1821 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1822 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1825 case Intrinsic::aarch64_sve_st1_scatter:
1826 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1827 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1828 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1829 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1830 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1831 case Intrinsic::aarch64_sve_st1dq:
1832 case Intrinsic::aarch64_sve_st1q_scatter_index:
1833 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1834 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1835 case Intrinsic::aarch64_sve_st1wq:
1836 case Intrinsic::aarch64_sve_stnt1:
1837 case Intrinsic::aarch64_sve_stnt1_scatter:
1838 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1839 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1840 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1842 case Intrinsic::aarch64_sve_st2:
1843 case Intrinsic::aarch64_sve_st2q:
1845 case Intrinsic::aarch64_sve_st3:
1846 case Intrinsic::aarch64_sve_st3q:
1848 case Intrinsic::aarch64_sve_st4:
1849 case Intrinsic::aarch64_sve_st4q:
1857 Value *UncastedPred;
1863 Pred = UncastedPred;
1869 if (OrigPredTy->getMinNumElements() <=
1871 ->getMinNumElements())
1872 Pred = UncastedPred;
1876 return C &&
C->isAllOnesValue();
1883 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1884 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1892static std::optional<Instruction *>
1899 Value *Op1 =
II.getOperand(1);
1900 Value *Op2 =
II.getOperand(2);
1926 return std::nullopt;
1934 if (SimpleII == Inactive)
1944static std::optional<Instruction *>
1948 return std::nullopt;
1977 II.setCalledFunction(NewDecl);
1987 return std::nullopt;
1999static std::optional<Instruction *>
2003 return std::nullopt;
2005 auto IntrinsicID = BinOp->getIntrinsicID();
2006 switch (IntrinsicID) {
2007 case Intrinsic::aarch64_sve_and_z:
2008 case Intrinsic::aarch64_sve_bic_z:
2009 case Intrinsic::aarch64_sve_eor_z:
2010 case Intrinsic::aarch64_sve_nand_z:
2011 case Intrinsic::aarch64_sve_nor_z:
2012 case Intrinsic::aarch64_sve_orn_z:
2013 case Intrinsic::aarch64_sve_orr_z:
2016 return std::nullopt;
2019 auto BinOpPred = BinOp->getOperand(0);
2020 auto BinOpOp1 = BinOp->getOperand(1);
2021 auto BinOpOp2 = BinOp->getOperand(2);
2025 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
2026 return std::nullopt;
2028 auto PredOp = PredIntr->getOperand(0);
2030 if (PredOpTy !=
II.getType())
2031 return std::nullopt;
2035 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
2036 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
2037 if (BinOpOp1 == BinOpOp2)
2038 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
2041 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
2043 auto NarrowedBinOp =
2048static std::optional<Instruction *>
2055 return BinOpCombine;
2060 return std::nullopt;
2063 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
2072 if (CursorVTy->getElementCount().getKnownMinValue() <
2073 IVTy->getElementCount().getKnownMinValue())
2077 if (Cursor->getType() == IVTy)
2078 EarliestReplacement = Cursor;
2083 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
2084 Intrinsic::aarch64_sve_convert_to_svbool ||
2085 IntrinsicCursor->getIntrinsicID() ==
2086 Intrinsic::aarch64_sve_convert_from_svbool))
2089 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
2090 Cursor = IntrinsicCursor->getOperand(0);
2095 if (!EarliestReplacement)
2096 return std::nullopt;
2104 auto *OpPredicate =
II.getOperand(0);
2121 II.getArgOperand(2));
2127 return std::nullopt;
2131 II.getArgOperand(0),
II.getArgOperand(2),
uint64_t(0));
2140 II.getArgOperand(0));
2150 return std::nullopt;
2155 if (!SplatValue || !SplatValue->isZero())
2156 return std::nullopt;
2161 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
2162 return std::nullopt;
2166 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
2167 return std::nullopt;
2170 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
2171 return std::nullopt;
2176 return std::nullopt;
2179 return std::nullopt;
2183 return std::nullopt;
2187 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
2188 return std::nullopt;
2190 unsigned NumElts = VecTy->getNumElements();
2191 unsigned PredicateBits = 0;
2194 for (
unsigned I = 0;
I < NumElts; ++
I) {
2197 return std::nullopt;
2199 PredicateBits |= 1 << (
I * (16 / NumElts));
2203 if (PredicateBits == 0) {
2205 PFalse->takeName(&
II);
2211 for (
unsigned I = 0;
I < 16; ++
I)
2212 if ((PredicateBits & (1 <<
I)) != 0)
2215 unsigned PredSize = Mask & -Mask;
2220 for (
unsigned I = 0;
I < 16;
I += PredSize)
2221 if ((PredicateBits & (1 <<
I)) == 0)
2222 return std::nullopt;
2224 auto *ConvertToSVBool =
2227 auto *ConvertFromSVBool =
2229 II.getType(), ConvertToSVBool);
2237 Value *Pg =
II.getArgOperand(0);
2238 Value *Vec =
II.getArgOperand(1);
2239 auto IntrinsicID =
II.getIntrinsicID();
2240 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2252 auto OpC = OldBinOp->getOpcode();
2258 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2264 if (IsAfter &&
C &&
C->isNullValue()) {
2268 Extract->insertBefore(
II.getIterator());
2269 Extract->takeName(&
II);
2275 return std::nullopt;
2277 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2278 return std::nullopt;
2280 const auto PTruePattern =
2286 return std::nullopt;
2288 unsigned Idx = MinNumElts - 1;
2298 if (Idx >= PgVTy->getMinNumElements())
2299 return std::nullopt;
2304 Extract->insertBefore(
II.getIterator());
2305 Extract->takeName(&
II);
2318 Value *Pg =
II.getArgOperand(0);
2320 Value *Vec =
II.getArgOperand(2);
2323 if (!Ty->isIntegerTy())
2324 return std::nullopt;
2329 return std::nullopt;
2346 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2361static std::optional<Instruction *>
2365 if (
Pattern == AArch64SVEPredPattern::all) {
2374 return MinNumElts && NumElts >= MinNumElts
2376 II, ConstantInt::get(
II.getType(), MinNumElts)))
2380static std::optional<Instruction *>
2383 if (!ST->isStreaming())
2384 return std::nullopt;
2396 Value *PgVal =
II.getArgOperand(0);
2397 Value *OpVal =
II.getArgOperand(1);
2401 if (PgVal == OpVal &&
2402 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2403 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2418 return std::nullopt;
2422 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2423 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2437 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2438 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2439 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2440 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2441 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2442 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2443 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2444 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2445 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2446 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2447 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2448 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2449 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2459 return std::nullopt;
2462template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2463static std::optional<Instruction *>
2465 bool MergeIntoAddendOp) {
2467 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2468 if (MergeIntoAddendOp) {
2469 AddendOp =
II.getOperand(1);
2470 Mul =
II.getOperand(2);
2472 AddendOp =
II.getOperand(2);
2473 Mul =
II.getOperand(1);
2478 return std::nullopt;
2480 if (!
Mul->hasOneUse())
2481 return std::nullopt;
2484 if (
II.getType()->isFPOrFPVectorTy()) {
2489 return std::nullopt;
2491 return std::nullopt;
2496 if (MergeIntoAddendOp)
2506static std::optional<Instruction *>
2508 Value *Pred =
II.getOperand(0);
2509 Value *PtrOp =
II.getOperand(1);
2510 Type *VecTy =
II.getType();
2514 Load->copyMetadata(
II);
2525static std::optional<Instruction *>
2527 Value *VecOp =
II.getOperand(0);
2528 Value *Pred =
II.getOperand(1);
2529 Value *PtrOp =
II.getOperand(2);
2533 Store->copyMetadata(
II);
2545 case Intrinsic::aarch64_sve_fmul_u:
2546 return Instruction::BinaryOps::FMul;
2547 case Intrinsic::aarch64_sve_fadd_u:
2548 return Instruction::BinaryOps::FAdd;
2549 case Intrinsic::aarch64_sve_fsub_u:
2550 return Instruction::BinaryOps::FSub;
2552 return Instruction::BinaryOpsEnd;
2556static std::optional<Instruction *>
2559 if (
II.isStrictFP())
2560 return std::nullopt;
2562 auto *OpPredicate =
II.getOperand(0);
2564 if (BinOpCode == Instruction::BinaryOpsEnd ||
2566 return std::nullopt;
2568 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2572static std::optional<Instruction *>
2574 assert(
II.getIntrinsicID() == Intrinsic::aarch64_sve_mla_u &&
2575 "Expected MLA_U intrinsic");
2576 Value *Acc =
II.getArgOperand(1);
2577 Value *MulOp0 =
II.getArgOperand(2);
2578 Value *MulOp1 =
II.getArgOperand(3);
2593 II.setArgOperand(2, MulOp1);
2594 II.setArgOperand(3, MulOp0);
2598 return std::nullopt;
2601static std::optional<Instruction *>
2603 assert((
II.getIntrinsicID() == Intrinsic::aarch64_sve_sadalp ||
2604 II.getIntrinsicID() == Intrinsic::aarch64_sve_uadalp) &&
2605 "Expected SADALP or UADALP intrinsic");
2611 return std::nullopt;
2615 return std::nullopt;
2619 II.getIntrinsicID(), {II.getType()},
2620 {II.getArgOperand(0), Acc, II.getArgOperand(2)});
2630 Intrinsic::aarch64_sve_mla>(
2634 Intrinsic::aarch64_sve_mad>(
2637 return std::nullopt;
2640static std::optional<Instruction *>
2644 Intrinsic::aarch64_sve_fmla>(IC,
II,
2649 Intrinsic::aarch64_sve_fmad>(IC,
II,
2654 Intrinsic::aarch64_sve_fmla>(IC,
II,
2657 return std::nullopt;
2660static std::optional<Instruction *>
2664 Intrinsic::aarch64_sve_fmla>(IC,
II,
2669 Intrinsic::aarch64_sve_fmad>(IC,
II,
2674 Intrinsic::aarch64_sve_fmla_u>(
2680static std::optional<Instruction *>
2684 Intrinsic::aarch64_sve_fmls>(IC,
II,
2689 Intrinsic::aarch64_sve_fnmsb>(
2694 Intrinsic::aarch64_sve_fmls>(IC,
II,
2697 return std::nullopt;
2700static std::optional<Instruction *>
2704 Intrinsic::aarch64_sve_fmls>(IC,
II,
2709 Intrinsic::aarch64_sve_fnmsb>(
2714 Intrinsic::aarch64_sve_fmls_u>(
2723 Intrinsic::aarch64_sve_mls>(
2726 return std::nullopt;
2731 Value *UnpackArg =
II.getArgOperand(0);
2733 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2734 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2747 return std::nullopt;
2751 auto *OpVal =
II.getOperand(0);
2752 auto *OpIndices =
II.getOperand(1);
2759 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2760 return std::nullopt;
2775 Type *RetTy =
II.getType();
2776 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2777 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2781 if ((
match(
II.getArgOperand(0),
2788 if (TyA ==
B->getType() &&
2793 TyA->getMinNumElements());
2799 return std::nullopt;
2807 if (
match(
II.getArgOperand(0),
2812 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2814 return std::nullopt;
2817static std::optional<Instruction *>
2819 Value *Mask =
II.getOperand(0);
2820 Value *BasePtr =
II.getOperand(1);
2821 Value *Index =
II.getOperand(2);
2832 BasePtr->getPointerAlignment(
II.getDataLayout());
2835 BasePtr, IndexBase);
2842 return std::nullopt;
2845static std::optional<Instruction *>
2847 Value *Val =
II.getOperand(0);
2848 Value *Mask =
II.getOperand(1);
2849 Value *BasePtr =
II.getOperand(2);
2850 Value *Index =
II.getOperand(3);
2860 BasePtr->getPointerAlignment(
II.getDataLayout());
2863 BasePtr, IndexBase);
2869 return std::nullopt;
2875 Value *Pred =
II.getOperand(0);
2876 Value *Vec =
II.getOperand(1);
2877 Value *DivVec =
II.getOperand(2);
2881 if (!SplatConstantInt)
2882 return std::nullopt;
2886 if (DivisorValue == -1)
2887 return std::nullopt;
2888 if (DivisorValue == 1)
2894 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2901 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2903 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2907 return std::nullopt;
2911 size_t VecSize = Vec.
size();
2916 size_t HalfVecSize = VecSize / 2;
2920 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2928 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2946 return std::nullopt;
2953 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2954 CurrentInsertElt = InsertElt->getOperand(0);
2960 return std::nullopt;
2964 for (
size_t I = 0;
I < Elts.
size();
I++) {
2965 if (Elts[
I] ==
nullptr)
2970 if (InsertEltChain ==
nullptr)
2971 return std::nullopt;
2977 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2978 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2979 IIScalableTy->getMinNumElements() /
2984 auto *WideShuffleMaskTy =
2995 auto NarrowBitcast =
3008 return std::nullopt;
3013 Value *Pred =
II.getOperand(0);
3014 Value *Vec =
II.getOperand(1);
3015 Value *Shift =
II.getOperand(2);
3018 Value *AbsPred, *MergedValue;
3024 return std::nullopt;
3032 return std::nullopt;
3037 return std::nullopt;
3040 {
II.getType()}, {Pred, Vec, Shift});
3047 Value *Vec =
II.getOperand(0);
3052 return std::nullopt;
3058 auto *NI =
II.getNextNode();
3061 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
3063 while (LookaheadThreshold-- && CanSkipOver(NI)) {
3064 auto *NIBB = NI->getParent();
3065 NI = NI->getNextNode();
3067 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
3068 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
3074 if (NextII &&
II.isIdenticalTo(NextII))
3077 return std::nullopt;
3085 {II.getType(), II.getOperand(0)->getType()},
3086 {II.getOperand(0), II.getOperand(1)}));
3093 if (PredPattern == AArch64SVEPredPattern::all ||
3094 PredPattern == AArch64SVEPredPattern::pow2)
3096 return std::nullopt;
3102 Value *Passthru =
II.getOperand(0);
3110 auto *Mask = ConstantInt::get(Ty, MaskValue);
3116 return std::nullopt;
3119static std::optional<Instruction *>
3126 return std::nullopt;
3129std::optional<Instruction *>
3140 case Intrinsic::aarch64_dmb:
3142 case Intrinsic::aarch64_neon_fmaxnm:
3143 case Intrinsic::aarch64_neon_fminnm:
3145 case Intrinsic::aarch64_sve_convert_from_svbool:
3147 case Intrinsic::aarch64_sve_dup:
3149 case Intrinsic::aarch64_sve_dup_x:
3151 case Intrinsic::aarch64_sve_cmpne:
3152 case Intrinsic::aarch64_sve_cmpne_wide:
3154 case Intrinsic::aarch64_sve_rdffr:
3156 case Intrinsic::aarch64_sve_lasta:
3157 case Intrinsic::aarch64_sve_lastb:
3159 case Intrinsic::aarch64_sve_clasta_n:
3160 case Intrinsic::aarch64_sve_clastb_n:
3162 case Intrinsic::aarch64_sve_cntd:
3164 case Intrinsic::aarch64_sve_cntw:
3166 case Intrinsic::aarch64_sve_cnth:
3168 case Intrinsic::aarch64_sve_cntb:
3170 case Intrinsic::aarch64_sme_cntsd:
3172 case Intrinsic::aarch64_sve_ptest_any:
3173 case Intrinsic::aarch64_sve_ptest_first:
3174 case Intrinsic::aarch64_sve_ptest_last:
3176 case Intrinsic::aarch64_sve_fadd:
3178 case Intrinsic::aarch64_sve_fadd_u:
3180 case Intrinsic::aarch64_sve_fmul_u:
3182 case Intrinsic::aarch64_sve_fsub:
3184 case Intrinsic::aarch64_sve_fsub_u:
3186 case Intrinsic::aarch64_sve_add:
3188 case Intrinsic::aarch64_sve_add_u:
3190 Intrinsic::aarch64_sve_mla_u>(
3192 case Intrinsic::aarch64_sve_mla_u:
3194 case Intrinsic::aarch64_sve_sadalp:
3195 case Intrinsic::aarch64_sve_uadalp:
3197 case Intrinsic::aarch64_sve_sub:
3199 case Intrinsic::aarch64_sve_sub_u:
3201 Intrinsic::aarch64_sve_mls_u>(
3203 case Intrinsic::aarch64_sve_tbl:
3205 case Intrinsic::aarch64_sve_uunpkhi:
3206 case Intrinsic::aarch64_sve_uunpklo:
3207 case Intrinsic::aarch64_sve_sunpkhi:
3208 case Intrinsic::aarch64_sve_sunpklo:
3210 case Intrinsic::aarch64_sve_uzp1:
3212 case Intrinsic::aarch64_sve_zip1:
3213 case Intrinsic::aarch64_sve_zip2:
3215 case Intrinsic::aarch64_sve_ld1_gather_index:
3217 case Intrinsic::aarch64_sve_st1_scatter_index:
3219 case Intrinsic::aarch64_sve_ld1:
3221 case Intrinsic::aarch64_sve_st1:
3223 case Intrinsic::aarch64_sve_sdiv:
3225 case Intrinsic::aarch64_sve_sel:
3227 case Intrinsic::aarch64_sve_srshl:
3229 case Intrinsic::aarch64_sve_dupq_lane:
3231 case Intrinsic::aarch64_sve_insr:
3233 case Intrinsic::aarch64_sve_whilelo:
3235 case Intrinsic::aarch64_sve_ptrue:
3237 case Intrinsic::aarch64_sve_uxtb:
3239 case Intrinsic::aarch64_sve_uxth:
3241 case Intrinsic::aarch64_sve_uxtw:
3243 case Intrinsic::aarch64_sme_in_streaming_mode:
3247 return std::nullopt;
3254 SimplifyAndSetOp)
const {
3255 switch (
II.getIntrinsicID()) {
3258 case Intrinsic::aarch64_neon_fcvtxn:
3259 case Intrinsic::aarch64_neon_rshrn:
3260 case Intrinsic::aarch64_neon_sqrshrn:
3261 case Intrinsic::aarch64_neon_sqrshrun:
3262 case Intrinsic::aarch64_neon_sqshrn:
3263 case Intrinsic::aarch64_neon_sqshrun:
3264 case Intrinsic::aarch64_neon_sqxtn:
3265 case Intrinsic::aarch64_neon_sqxtun:
3266 case Intrinsic::aarch64_neon_uqrshrn:
3267 case Intrinsic::aarch64_neon_uqshrn:
3268 case Intrinsic::aarch64_neon_uqxtn:
3269 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
3273 return std::nullopt;
3277 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3287 if (ST->useSVEForFixedLengthVectors() &&
3290 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3291 else if (ST->isNeonAvailable())
3296 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3305bool AArch64TTIImpl::isSingleExtWideningInstruction(
3307 Type *SrcOverrideTy)
const {
3322 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3325 Type *SrcTy = SrcOverrideTy;
3327 case Instruction::Add:
3328 case Instruction::Sub: {
3337 if (Opcode == Instruction::Sub)
3361 assert(SrcTy &&
"Expected some SrcTy");
3363 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3369 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3371 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3375 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3378Type *AArch64TTIImpl::isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
3380 Type *SrcOverrideTy)
const {
3381 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3382 Opcode != Instruction::Mul)
3392 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3395 auto getScalarSizeWithOverride = [&](
const Value *
V) {
3401 ->getScalarSizeInBits();
3404 unsigned MaxEltSize = 0;
3407 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3408 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3409 MaxEltSize = std::max(EltSize0, EltSize1);
3412 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3413 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3416 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3418 MaxEltSize = DstEltSize / 2;
3419 }
else if (Opcode == Instruction::Mul &&
3432 getScalarSizeWithOverride(
isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
3436 if (MaxEltSize * 2 > DstEltSize)
3454 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3455 (Src->isScalableTy() && !ST->hasSVE2()))
3465 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3469 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3473 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3474 Src->getScalarSizeInBits() !=
3498 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3502 if (
I &&
I->hasOneUser()) {
3505 if (
Type *ExtTy = isBinExtWideningInstruction(
3506 SingleUser->getOpcode(), Dst, Operands,
3507 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3520 if (isSingleExtWideningInstruction(
3521 SingleUser->getOpcode(), Dst, Operands,
3522 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3526 if (SingleUser->getOpcode() == Instruction::Add) {
3527 if (
I == SingleUser->getOperand(1) ||
3529 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3544 EVT SrcTy = TLI->getValueType(
DL, Src);
3545 EVT DstTy = TLI->getValueType(
DL, Dst);
3547 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3552 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3581 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3584 ST->useSVEForFixedLengthVectors(WiderTy)) {
3585 std::pair<InstructionCost, MVT> LT =
3587 unsigned NumElements =
3603 const unsigned int SVE_EXT_COST = 1;
3604 const unsigned int SVE_FCVT_COST = 1;
3605 const unsigned int SVE_UNPACK_ONCE = 4;
3606 const unsigned int SVE_UNPACK_TWICE = 16;
3735 SVE_EXT_COST + SVE_FCVT_COST},
3740 SVE_EXT_COST + SVE_FCVT_COST},
3747 SVE_EXT_COST + SVE_FCVT_COST},
3751 SVE_EXT_COST + SVE_FCVT_COST},
3757 SVE_EXT_COST + SVE_FCVT_COST},
3760 SVE_EXT_COST + SVE_FCVT_COST},
3765 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3767 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3777 SVE_EXT_COST + SVE_FCVT_COST},
3782 SVE_EXT_COST + SVE_FCVT_COST},
3795 SVE_EXT_COST + SVE_FCVT_COST},
3799 SVE_EXT_COST + SVE_FCVT_COST},
3811 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3813 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3815 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3817 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3821 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3823 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3839 SVE_EXT_COST + SVE_FCVT_COST},
3844 SVE_EXT_COST + SVE_FCVT_COST},
3855 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3857 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3859 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3861 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3863 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3865 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3869 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3871 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3873 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3875 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
4100 if (ST->hasFullFP16())
4112 Src->getScalarType(), CCH,
CostKind) +
4120 ST->isSVEorStreamingSVEAvailable() &&
4121 TLI->getTypeAction(Src->getContext(), SrcTy) ==
4123 TLI->getTypeAction(Dst->getContext(), DstTy) ==
4132 Opcode, LegalTy, Src, CCH,
CostKind,
I);
4135 return Part1 + Part2;
4142 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
4154 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
4167 CostKind, Index,
nullptr,
nullptr);
4171 auto DstVT = TLI->getValueType(
DL, Dst);
4172 auto SrcVT = TLI->getValueType(
DL, Src);
4177 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
4183 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
4193 case Instruction::SExt:
4198 case Instruction::ZExt:
4199 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
4212 return Opcode == Instruction::PHI ? 0 : 1;
4221 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4230 if (!LT.second.isVector())
4235 if (LT.second.isFixedLengthVector()) {
4236 unsigned Width = LT.second.getVectorNumElements();
4237 Index = Index % Width;
4252 if (ST->hasFastLD1Single())
4264 : ST->getVectorInsertExtractBaseCost() + 1;
4288 auto ExtractCanFuseWithFmul = [&]() {
4295 auto IsAllowedScalarTy = [&](
const Type *
T) {
4296 return T->isFloatTy() ||
T->isDoubleTy() ||
4297 (
T->isHalfTy() && ST->hasFullFP16());
4301 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
4304 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4305 !BO->getType()->isVectorTy();
4310 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
4314 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4323 DenseMap<User *, unsigned> UserToExtractIdx;
4324 for (
auto *U :
Scalar->users()) {
4325 if (!IsUserFMulScalarTy(U))
4329 UserToExtractIdx[
U];
4331 if (UserToExtractIdx.
empty())
4333 for (
auto &[S, U, L] : ScalarUserAndIdx) {
4334 for (
auto *U : S->users()) {
4335 if (UserToExtractIdx.
contains(U)) {
4337 auto *Op0 =
FMul->getOperand(0);
4338 auto *Op1 =
FMul->getOperand(1);
4339 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4340 UserToExtractIdx[
U] =
L;
4346 for (
auto &[U, L] : UserToExtractIdx) {
4358 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
4359 if (!IsUserFMulScalarTy(U))
4364 const auto *BO = cast<BinaryOperator>(U);
4365 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4366 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4368 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4371 return IsExtractLaneEquivalentToZero(
4372 cast<ConstantInt>(OtherEE->getIndexOperand())
4375 OtherEE->getType()->getScalarSizeInBits());
4383 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4384 ExtractCanFuseWithFmul())
4389 :
ST->getVectorInsertExtractBaseCost();
4398 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4401 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr,
4407 Value *Scalar,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4409 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4410 ScalarUserAndIdx, VIC);
4417 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I,
4424 unsigned Index)
const {
4436 : ST->getVectorInsertExtractBaseCost() + 1;
4445 if (Ty->getElementType()->isFloatingPointTy())
4448 unsigned VecInstCost =
4450 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4457 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4458 return std::nullopt;
4459 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4460 return std::nullopt;
4462 if (CanUseSVE && ST->hasSVEB16B16() && ST->isNonStreamingSVEorSME2Available())
4463 return std::nullopt;
4470 Cost += InstCost(PromotedTy);
4493 Op2Info, Args, CxtI);
4497 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4504 Ty,
CostKind, Op1Info, Op2Info,
true,
4507 [&](
Type *PromotedTy) {
4511 return *PromotedCost;
4514 if (Ty->getScalarType()->isFP128Ty())
4522 if (
Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4538 if (LT.second == MVT::v2i64) {
4618 auto VT = TLI->getValueType(
DL, Ty);
4619 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4623 : (3 * AsrCost + AddCost);
4625 return MulCost + AsrCost + 2 * AddCost;
4627 }
else if (VT.isVector()) {
4637 if (Ty->isScalableTy() && ST->hasSVE())
4638 Cost += 2 * AsrCost;
4643 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4647 }
else if (LT.second == MVT::v2i64) {
4648 return VT.getVectorNumElements() *
4655 if (Ty->isScalableTy() && ST->hasSVE())
4656 return MulCost + 2 * AddCost + 2 * AsrCost;
4657 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4662 LT.second.isFixedLengthVector()) {
4672 return ExtractCost + InsertCost +
4680 auto VT = TLI->getValueType(
DL, Ty);
4696 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4697 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4698 LT.second == MVT::nxv16i8;
4699 bool Is128bit = LT.second.is128BitVector();
4711 (HasMULH ? 0 : ShrCost) +
4712 AddCost * 2 + ShrCost;
4713 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4720 if (!VT.isVector() && VT.getSizeInBits() > 64)
4724 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4726 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4730 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4740 if (
nullptr != Entry)
4745 if (LT.second.getScalarType() == MVT::i8)
4747 else if (LT.second.getScalarType() == MVT::i16)
4759 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4760 return (4 + DivCost) * VTy->getNumElements();
4766 -1,
nullptr,
nullptr);
4789 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4790 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4799 if (!Ty->getScalarType()->isFP128Ty())
4806 if (!Ty->getScalarType()->isFP128Ty())
4807 return 2 * LT.first;
4814 if (!Ty->isVectorTy())
4830 int MaxMergeDistance = 64;
4834 return NumVectorInstToHideOverhead;
4844 unsigned Opcode1,
unsigned Opcode2)
const {
4847 if (!
Sched.hasInstrSchedModel())
4851 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4853 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4859 "Cannot handle variant scheduling classes without an MI");
4875 const int AmortizationCost = 20;
4883 VecPred = CurrentPred;
4891 static const auto ValidMinMaxTys = {
4892 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4893 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4894 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4898 (ST->hasFullFP16() &&
4904 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4905 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4906 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4907 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4908 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4909 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4910 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4911 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4912 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4913 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4914 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4916 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4917 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4926 if (Opcode == Instruction::FCmp) {
4928 ValTy,
CostKind, Op1Info, Op2Info,
false,
4930 false, [&](
Type *PromotedTy) {
4942 return *PromotedCost;
4946 if (LT.second.getScalarType() != MVT::f64 &&
4947 LT.second.getScalarType() != MVT::f32 &&
4948 LT.second.getScalarType() != MVT::f16)
4953 unsigned Factor = 1;
4954 if (!CondTy->isVectorTy() &&
4968 AArch64::FCMEQv4f32))
4980 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4999 Op1Info, Op2Info,
I);
5005 if (ST->requiresStrictAlign()) {
5010 Options.AllowOverlappingLoads =
true;
5011 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
5016 Options.LoadSizes = {8, 4, 2, 1};
5017 Options.AllowedTailExpansions = {3, 5, 6};
5022 return ST->hasSVE();
5028 switch (MICA.
getID()) {
5029 case Intrinsic::masked_scatter:
5030 case Intrinsic::masked_gather:
5032 case Intrinsic::masked_load:
5033 case Intrinsic::masked_expandload:
5034 case Intrinsic::masked_store:
5048 if (!LT.first.isValid())
5053 if (VT->getElementType()->isIntegerTy(1))
5064 if (MICA.
getID() == Intrinsic::masked_expandload) {
5080 if (LT.first > 1 && LT.second.getScalarSizeInBits() > 8)
5081 return MemOpCost * 2;
5090 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
5091 "Should be called on only load or stores.");
5093 case Instruction::Load:
5096 return ST->getGatherOverhead();
5098 case Instruction::Store:
5101 return ST->getScatterOverhead();
5112 unsigned Opcode = (MICA.
getID() == Intrinsic::masked_gather ||
5113 MICA.
getID() == Intrinsic::vp_gather)
5115 : Instruction::Store;
5125 if (!LT.first.isValid())
5129 if (!LT.second.isVector() ||
5131 VT->getElementType()->isIntegerTy(1))
5141 ElementCount LegalVF = LT.second.getVectorElementCount();
5144 {TTI::OK_AnyValue, TTI::OP_None},
I);
5160 EVT VT = TLI->getValueType(
DL, Ty,
true);
5162 if (VT == MVT::Other)
5167 if (!LT.first.isValid())
5177 (VTy->getElementType()->isIntegerTy(1) &&
5178 !VTy->getElementCount().isKnownMultipleOf(
5188 if (Opcode == Instruction::Store)
5192 if (ST->getFixedLoadLatency())
5193 return (LT.first - 1) + ST->getFixedLoadLatency();
5202 if (LT.second.isScalableVector() ||
5203 ST->useSVEForFixedLengthVectors(LT.second)) {
5204 Inst = AArch64::LDR_ZXI;
5205 }
else if (LT.second.isVector() || LT.second.isFloatingPoint()) {
5206 switch (LT.second.getSizeInBits()) {
5208 Inst = AArch64::LDRBui;
5211 Inst = AArch64::LDRHui;
5214 Inst = AArch64::LDRSui;
5217 Inst = AArch64::LDRDui;
5220 Inst = AArch64::LDRQui;
5226 switch (LT.second.getSizeInBits()) {
5228 Inst = AArch64::LDRBBui;
5231 Inst = AArch64::LDRHHui;
5234 Inst = AArch64::LDRWui;
5237 Inst = AArch64::LDRXui;
5245 unsigned SchedClass =
TII->get(Inst).getSchedClass();
5249 float NumLoads = (LT.first - 1).
getValue();
5250 return NumLoads *
Sched.getReciprocalThroughput(*ST, *SCD) +
5251 Sched.computeInstrLatency(*ST, *SCD);
5254 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
5255 LT.second.is128BitVector() && Alignment <
Align(16)) {
5261 const int AmortizationCost = 6;
5263 return LT.first * 2 * AmortizationCost;
5267 if (Ty->isPtrOrPtrVectorTy())
5272 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
5274 if (VT == MVT::v4i8)
5281 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
5296 while (!TypeWorklist.
empty()) {
5318 bool UseMaskForCond,
bool UseMaskForGaps)
const {
5319 assert(Factor >= 2 &&
"Invalid interleave factor");
5334 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
5337 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
5338 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
5341 VecVTy->getElementCount().divideCoefficientBy(Factor));
5347 if (MinElts % Factor == 0 &&
5348 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
5349 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
5354 UseMaskForCond, UseMaskForGaps);
5361 for (
auto *
I : Tys) {
5362 if (!
I->isVectorTy())
5373 Align Alignment)
const {
5380 return (ST->isSVEAvailable() && ST->hasSVE2p2()) ||
5381 (ST->isSVEorStreamingSVEAvailable() && ST->hasSME2p2());
5386 bool HasUnorderedReductions)
const {
5389 return ST->getMaxInterleaveFactor();
5399 enum { MaxStridedLoads = 7 };
5401 int StridedLoads = 0;
5404 for (
const auto BB : L->blocks()) {
5405 for (
auto &
I : *BB) {
5411 if (L->isLoopInvariant(PtrValue))
5416 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
5425 if (StridedLoads > MaxStridedLoads / 2)
5426 return StridedLoads;
5429 return StridedLoads;
5432 int StridedLoads = countStridedLoads(L, SE);
5434 <<
" strided loads\n");
5450 unsigned *FinalSize) {
5454 for (
auto *BB : L->getBlocks()) {
5455 for (
auto &
I : *BB) {
5461 if (!Cost.isValid())
5465 if (LoopCost > Budget)
5487 if (MaxTC > 0 && MaxTC <= 32)
5498 if (Blocks.
size() != 2)
5520 if (!L->isInnermost() || L->getNumBlocks() > 8)
5524 if (!L->getExitBlock())
5530 bool HasParellelizableReductions =
5531 L->getNumBlocks() == 1 &&
5532 any_of(L->getHeader()->phis(),
5534 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5537 if (HasParellelizableReductions &&
5559 if (HasParellelizableReductions) {
5570 if (Header == Latch) {
5573 unsigned Width = 10;
5579 unsigned MaxInstsPerLine = 16;
5581 unsigned BestUC = 1;
5582 unsigned SizeWithBestUC = BestUC *
Size;
5584 unsigned SizeWithUC = UC *
Size;
5585 if (SizeWithUC > 48)
5587 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5588 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5590 SizeWithBestUC = BestUC *
Size;
5600 for (
auto *BB : L->blocks()) {
5601 for (
auto &
I : *BB) {
5611 for (
auto *U :
I.users())
5613 LoadedValuesPlus.
insert(U);
5620 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5646 auto *I = dyn_cast<Instruction>(V);
5647 return I && DependsOnLoopLoad(I, Depth + 1);
5654 DependsOnLoopLoad(
I, 0)) {
5670 if (L->getLoopDepth() > 1)
5681 for (
auto *BB : L->getBlocks()) {
5682 for (
auto &
I : *BB) {
5686 if (IsVectorized &&
I.getType()->isVectorTy())
5703 if (ST->isAppleMLike())
5705 else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
5727 !ST->getSchedModel().isOutOfOrder()) {
5750 bool CanCreate)
const {
5754 case Intrinsic::aarch64_neon_st1x2:
5755 case Intrinsic::aarch64_neon_st1x3:
5756 case Intrinsic::aarch64_neon_st1x4:
5757 case Intrinsic::aarch64_neon_st2:
5758 case Intrinsic::aarch64_neon_st3:
5759 case Intrinsic::aarch64_neon_st4: {
5762 if (!CanCreate || !ST)
5764 unsigned NumElts = Inst->
arg_size() - 1;
5765 if (ST->getNumElements() != NumElts)
5767 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5773 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5775 Res = Builder.CreateInsertValue(Res, L, i);
5779 case Intrinsic::aarch64_neon_ld1x2:
5780 case Intrinsic::aarch64_neon_ld1x3:
5781 case Intrinsic::aarch64_neon_ld1x4:
5782 case Intrinsic::aarch64_neon_ld2:
5783 case Intrinsic::aarch64_neon_ld3:
5784 case Intrinsic::aarch64_neon_ld4:
5785 if (Inst->
getType() == ExpectedType)
5796 case Intrinsic::aarch64_neon_ld1x2:
5797 case Intrinsic::aarch64_neon_ld1x3:
5798 case Intrinsic::aarch64_neon_ld1x4:
5799 case Intrinsic::aarch64_neon_ld2:
5800 case Intrinsic::aarch64_neon_ld3:
5801 case Intrinsic::aarch64_neon_ld4:
5802 Info.ReadMem =
true;
5803 Info.WriteMem =
false;
5806 case Intrinsic::aarch64_neon_st1x2:
5807 case Intrinsic::aarch64_neon_st1x3:
5808 case Intrinsic::aarch64_neon_st1x4:
5809 case Intrinsic::aarch64_neon_st2:
5810 case Intrinsic::aarch64_neon_st3:
5811 case Intrinsic::aarch64_neon_st4:
5812 Info.ReadMem =
false;
5813 Info.WriteMem =
true;
5822 case Intrinsic::aarch64_neon_ld1x2:
5823 case Intrinsic::aarch64_neon_st1x2:
5824 Info.MatchingId = Intrinsic::aarch64_neon_ld1x2;
5826 case Intrinsic::aarch64_neon_ld1x3:
5827 case Intrinsic::aarch64_neon_st1x3:
5828 Info.MatchingId = Intrinsic::aarch64_neon_ld1x3;
5830 case Intrinsic::aarch64_neon_ld1x4:
5831 case Intrinsic::aarch64_neon_st1x4:
5832 Info.MatchingId = Intrinsic::aarch64_neon_ld1x4;
5834 case Intrinsic::aarch64_neon_ld2:
5835 case Intrinsic::aarch64_neon_st2:
5836 Info.MatchingId = Intrinsic::aarch64_neon_ld2;
5838 case Intrinsic::aarch64_neon_ld3:
5839 case Intrinsic::aarch64_neon_st3:
5840 Info.MatchingId = Intrinsic::aarch64_neon_ld3;
5842 case Intrinsic::aarch64_neon_ld4:
5843 case Intrinsic::aarch64_neon_st4:
5844 Info.MatchingId = Intrinsic::aarch64_neon_ld4;
5856 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5857 bool Considerable =
false;
5858 AllowPromotionWithoutCommonHeader =
false;
5861 Type *ConsideredSExtType =
5863 if (
I.getType() != ConsideredSExtType)
5867 for (
const User *U :
I.users()) {
5869 Considerable =
true;
5873 if (GEPInst->getNumOperands() > 2) {
5874 AllowPromotionWithoutCommonHeader =
true;
5879 return Considerable;
5930 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5940 return LegalizationCost + 2;
5950 LegalizationCost *= LT.first - 1;
5953 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5962 return LegalizationCost + 2;
5970 std::optional<FastMathFlags> FMF,
5986 return BaseCost + FixedVTy->getNumElements();
6003 MVT MTy = LT.second;
6004 int ISD = TLI->InstructionOpcodeToISD(Opcode);
6052 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
6053 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
6065 return (LT.first - 1) +
Log2_32(NElts);
6070 return (LT.first - 1) + Entry->Cost;
6082 if (LT.first != 1) {
6088 ExtraCost *= LT.first - 1;
6091 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
6092 return Cost + ExtraCost;
6100 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
6102 EVT VecVT = TLI->getValueType(
DL, VecTy);
6103 EVT ResVT = TLI->getValueType(
DL, ResTy);
6113 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
6115 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
6117 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
6119 return (LT.first - 1) * 2 + 2;
6130 EVT VecVT = TLI->getValueType(
DL, VecTy);
6131 EVT ResVT = TLI->getValueType(
DL, ResTy);
6134 RedOpcode == Instruction::Add) {
6140 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
6142 return LT.first + 2;
6177 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
6178 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
6192 if (LT.second.getScalarType() == MVT::i1) {
6201 assert(Entry &&
"Illegal Type for Splice");
6202 LegalizationCost += Entry->Cost;
6203 return LegalizationCost * LT.first;
6207 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
6216 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub &&
6217 Opcode != Instruction::FAdd && Opcode != Instruction::FSub) ||
6224 assert(FMF &&
"Missing FastMathFlags for floating-point partial reduction");
6225 if (!FMF->allowReassoc() || !FMF->allowContract())
6229 "FastMathFlags only apply to floating-point partial reductions");
6233 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
6234 "Unexpected values for OpBExtend or InputTypeB");
6238 if (BinOp && ((*BinOp != Instruction::Mul && *BinOp != Instruction::FMul) ||
6239 InputTypeA != InputTypeB))
6242 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
6245 if (IsUSDot && !ST->hasMatMulInt8() && !ST->hasDotProd())
6258 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
6267 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
6273 std::pair<InstructionCost, MVT> AccumLT =
6275 std::pair<InstructionCost, MVT> InputLT =
6279 auto IsSupported = [&](
bool SVEPred,
bool NEONPred) ->
bool {
6280 return (ST->isSVEorStreamingSVEAvailable() && SVEPred) ||
6281 (AccumLT.second.isFixedLengthVector() &&
6282 AccumLT.second.getSizeInBits() <= 128 && ST->isNeonAvailable() &&
6286 bool IsSub = Opcode == Instruction::Sub || Opcode == Instruction::FSub;
6294 if (AccumLT.second.getScalarType() == MVT::i32 &&
6295 InputLT.second.getScalarType() == MVT::i8) {
6297 if (!IsUSDot && IsSupported(
true, ST->hasDotProd()))
6298 return Cost + INegCost;
6300 if (IsUSDot && IsSupported(ST->hasMatMulInt8(), ST->hasMatMulInt8()))
6301 return Cost + INegCost;
6306 if (IsUSDot && IsSupported(
false, ST->hasDotProd()))
6307 return Cost * 3 + INegCost;
6310 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
6312 if (AccumLT.second.getScalarType() == MVT::i64 &&
6313 InputLT.second.getScalarType() == MVT::i16)
6314 return Cost + INegCost;
6317 if (AccumLT.second.getScalarType() == MVT::i32 &&
6318 InputLT.second.getScalarType() == MVT::i16 &&
6319 (ST->hasSVE2p1() || ST->hasSME2()) && !IsSub)
6322 if (AccumLT.second.getScalarType() == MVT::i64 &&
6323 InputLT.second.getScalarType() == MVT::i8)
6329 return Cost + INegCost;
6332 if (AccumLT.second.getScalarType() == MVT::i16 &&
6333 InputLT.second.getScalarType() == MVT::i8 &&
6334 (ST->hasSVE2p3() || ST->hasSME2p3()) && !IsSub)
6340 if (Opcode == Instruction::FAdd && !IsSub &&
6341 IsSupported(ST->hasSME2() || ST->hasSVE2p1(), ST->hasF16F32DOT()) &&
6342 AccumLT.second.getScalarType() == MVT::f32 &&
6343 InputLT.second.getScalarType() == MVT::f16)
6347 if (Ratio == 2 && !IsUSDot) {
6348 MVT InVT = InputLT.second.getScalarType();
6351 if (IsSupported(ST->hasSVE2() || ST->hasSME(),
true) &&
6356 if (IsSupported(ST->hasSVE2(), ST->hasFP16FML()) && InVT == MVT::f16)
6360 if (IsSupported(ST->hasSVE2p1() || ST->hasSME2(),
false) &&
6361 InVT == MVT::bf16 && IsSub)
6371 if (IsSupported(ST->hasBF16(), ST->hasBF16()) && InVT == MVT::bf16)
6372 return Cost * 2 + FNegCost;
6376 AccumType, VF, OpAExtend, OpBExtend,
6388 "Expected the Mask to match the return size if given");
6390 "Expected the same scalar types");
6396 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
6397 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
6398 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
6406 return std::max<InstructionCost>(1, LT.first / 4);
6414 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
6416 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
6419 unsigned TpNumElts = Mask.size();
6420 unsigned LTNumElts = LT.second.getVectorNumElements();
6421 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
6423 LT.second.getVectorElementCount());
6425 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
6427 for (
unsigned N = 0;
N < NumVecs;
N++) {
6431 unsigned Source1 = -1U, Source2 = -1U;
6432 unsigned NumSources = 0;
6433 for (
unsigned E = 0; E < LTNumElts; E++) {
6434 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
6443 unsigned Source = MaskElt / LTNumElts;
6444 if (NumSources == 0) {
6447 }
else if (NumSources == 1 && Source != Source1) {
6450 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
6456 if (Source == Source1)
6458 else if (Source == Source2)
6459 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
6468 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
6479 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
6482 Result.first->second = NCost;
6496 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
6497 if (LT.second.getFixedSizeInBits() >= 128 &&
6499 LT.second.getVectorNumElements() / 2) {
6502 if (Index == (
int)LT.second.getVectorNumElements() / 2)
6516 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6519 return M.value() < 0 || M.value() == (int)M.index();
6525 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6526 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6535 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6536 ST->isSVEorStreamingSVEAvailable() &&
6541 if (ST->isSVEorStreamingSVEAvailable() &&
6555 if (IsLoad && LT.second.isVector() &&
6557 LT.second.getVectorElementCount()))
6563 if (Mask.size() == 4 &&
6565 (SrcTy->getScalarSizeInBits() == 16 ||
6566 SrcTy->getScalarSizeInBits() == 32) &&
6567 all_of(Mask, [](
int E) {
return E < 8; }))
6573 if (LT.second.isFixedLengthVector() &&
6574 LT.second.getVectorNumElements() == Mask.size() &&
6580 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6581 isTRNMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6582 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6583 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6584 LT.second.getVectorNumElements(), 16) ||
6585 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6586 LT.second.getVectorNumElements(), 32) ||
6587 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6588 LT.second.getVectorNumElements(), 64) ||
6591 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6720 return LT.first * Entry->Cost;
6729 LT.second.getSizeInBits() <= 128 && SubTp) {
6731 if (SubLT.second.isVector()) {
6732 int NumElts = LT.second.getVectorNumElements();
6733 int NumSubElts = SubLT.second.getVectorNumElements();
6734 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6740 if (IsExtractSubvector)
6757 if (
getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
6776 return ST->useFixedOverScalableIfEqualCost();
6780 return ST->getEpilogueVectorizationMinVF();
6815 unsigned NumInsns = 0;
6817 NumInsns += BB->size();
6827 int64_t Scale,
unsigned AddrSpace)
const {
6855 if (
I->getOpcode() == Instruction::Or &&
6859 if (
I->getOpcode() == Instruction::Add ||
6860 I->getOpcode() == Instruction::Sub)
6885 return all_equal(Shuf->getShuffleMask());
6892 bool AllowSplat =
false) {
6897 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6898 auto *FullTy = FullV->
getType();
6899 auto *HalfTy = HalfV->getType();
6901 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6904 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6907 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6911 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6925 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6926 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6940 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6941 (M2Start != 0 && M2Start != (NumElements / 2)))
6943 if (S1Op1 && S2Op1 && M1Start != M2Start)
6953 return Ext->getType()->getScalarSizeInBits() ==
6954 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6968 Value *VectorOperand =
nullptr;
6985 if (!
GEP ||
GEP->getNumOperands() != 2)
6989 Value *Offsets =
GEP->getOperand(1);
6992 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6998 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6999 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
7000 Ops.push_back(&
GEP->getOperandUse(1));
7036 switch (
II->getIntrinsicID()) {
7037 case Intrinsic::aarch64_neon_smull:
7038 case Intrinsic::aarch64_neon_umull:
7041 Ops.push_back(&
II->getOperandUse(0));
7042 Ops.push_back(&
II->getOperandUse(1));
7047 case Intrinsic::fma:
7048 case Intrinsic::fmuladd:
7055 Ops.push_back(&
II->getOperandUse(0));
7057 Ops.push_back(&
II->getOperandUse(1));
7060 case Intrinsic::aarch64_neon_sqdmull:
7061 case Intrinsic::aarch64_neon_sqdmulh:
7062 case Intrinsic::aarch64_neon_sqrdmulh:
7065 Ops.push_back(&
II->getOperandUse(0));
7067 Ops.push_back(&
II->getOperandUse(1));
7068 return !
Ops.empty();
7069 case Intrinsic::aarch64_neon_fmlal:
7070 case Intrinsic::aarch64_neon_fmlal2:
7071 case Intrinsic::aarch64_neon_fmlsl:
7072 case Intrinsic::aarch64_neon_fmlsl2:
7075 Ops.push_back(&
II->getOperandUse(1));
7077 Ops.push_back(&
II->getOperandUse(2));
7078 return !
Ops.empty();
7079 case Intrinsic::aarch64_sve_ptest_first:
7080 case Intrinsic::aarch64_sve_ptest_last:
7082 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
7083 Ops.push_back(&
II->getOperandUse(0));
7084 return !
Ops.empty();
7085 case Intrinsic::aarch64_sme_write_horiz:
7086 case Intrinsic::aarch64_sme_write_vert:
7087 case Intrinsic::aarch64_sme_writeq_horiz:
7088 case Intrinsic::aarch64_sme_writeq_vert: {
7090 if (!Idx || Idx->getOpcode() != Instruction::Add)
7092 Ops.push_back(&
II->getOperandUse(1));
7095 case Intrinsic::aarch64_sme_read_horiz:
7096 case Intrinsic::aarch64_sme_read_vert:
7097 case Intrinsic::aarch64_sme_readq_horiz:
7098 case Intrinsic::aarch64_sme_readq_vert:
7099 case Intrinsic::aarch64_sme_ld1b_vert:
7100 case Intrinsic::aarch64_sme_ld1h_vert:
7101 case Intrinsic::aarch64_sme_ld1w_vert:
7102 case Intrinsic::aarch64_sme_ld1d_vert:
7103 case Intrinsic::aarch64_sme_ld1q_vert:
7104 case Intrinsic::aarch64_sme_st1b_vert:
7105 case Intrinsic::aarch64_sme_st1h_vert:
7106 case Intrinsic::aarch64_sme_st1w_vert:
7107 case Intrinsic::aarch64_sme_st1d_vert:
7108 case Intrinsic::aarch64_sme_st1q_vert:
7109 case Intrinsic::aarch64_sme_ld1b_horiz:
7110 case Intrinsic::aarch64_sme_ld1h_horiz:
7111 case Intrinsic::aarch64_sme_ld1w_horiz:
7112 case Intrinsic::aarch64_sme_ld1d_horiz:
7113 case Intrinsic::aarch64_sme_ld1q_horiz:
7114 case Intrinsic::aarch64_sme_st1b_horiz:
7115 case Intrinsic::aarch64_sme_st1h_horiz:
7116 case Intrinsic::aarch64_sme_st1w_horiz:
7117 case Intrinsic::aarch64_sme_st1d_horiz:
7118 case Intrinsic::aarch64_sme_st1q_horiz: {
7120 if (!Idx || Idx->getOpcode() != Instruction::Add)
7122 Ops.push_back(&
II->getOperandUse(3));
7125 case Intrinsic::aarch64_neon_pmull:
7128 Ops.push_back(&
II->getOperandUse(0));
7129 Ops.push_back(&
II->getOperandUse(1));
7131 case Intrinsic::aarch64_neon_pmull64:
7133 II->getArgOperand(1)))
7135 Ops.push_back(&
II->getArgOperandUse(0));
7136 Ops.push_back(&
II->getArgOperandUse(1));
7138 case Intrinsic::masked_gather:
7141 Ops.push_back(&
II->getArgOperandUse(0));
7143 case Intrinsic::masked_scatter:
7146 Ops.push_back(&
II->getArgOperandUse(1));
7153 auto ShouldSinkCondition = [](
Value *
Cond,
7158 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
7162 Ops.push_back(&
II->getOperandUse(0));
7166 switch (
I->getOpcode()) {
7167 case Instruction::GetElementPtr:
7168 case Instruction::Add:
7169 case Instruction::Sub:
7171 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
7173 Ops.push_back(&
I->getOperandUse(
Op));
7178 case Instruction::Select: {
7179 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
7182 Ops.push_back(&
I->getOperandUse(0));
7185 case Instruction::UncondBr:
7187 case Instruction::CondBr: {
7191 Ops.push_back(&
I->getOperandUse(0));
7194 case Instruction::FMul:
7199 Ops.push_back(&
I->getOperandUse(0));
7201 Ops.push_back(&
I->getOperandUse(1));
7211 case Instruction::Xor:
7214 if (
I->getType()->isVectorTy() && ST->isNeonAvailable()) {
7216 ST->isSVEorStreamingSVEAvailable() && (ST->hasSVE2() || ST->hasSME());
7221 case Instruction::And:
7222 case Instruction::Or:
7225 if (
I->getOpcode() == Instruction::Or &&
7230 if (!(
I->getType()->isVectorTy() && ST->hasNEON()) &&
7233 for (
auto &
Op :
I->operands()) {
7245 Ops.push_back(&Not);
7246 Ops.push_back(&InsertElt);
7256 if (!
I->getType()->isVectorTy())
7257 return !
Ops.empty();
7259 switch (
I->getOpcode()) {
7260 case Instruction::Sub:
7261 case Instruction::Add: {
7270 Ops.push_back(&Ext1->getOperandUse(0));
7271 Ops.push_back(&Ext2->getOperandUse(0));
7274 Ops.push_back(&
I->getOperandUse(0));
7275 Ops.push_back(&
I->getOperandUse(1));
7279 case Instruction::Or: {
7282 if (ST->hasNEON()) {
7296 if (
I->getParent() != MainAnd->
getParent() ||
7301 if (
I->getParent() != IA->getParent() ||
7302 I->getParent() != IB->getParent())
7307 Ops.push_back(&
I->getOperandUse(0));
7308 Ops.push_back(&
I->getOperandUse(1));
7317 case Instruction::Mul: {
7318 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
7321 if (Ty->isScalableTy())
7325 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
7328 int NumZExts = 0, NumSExts = 0;
7329 for (
auto &
Op :
I->operands()) {
7336 auto *ExtOp = Ext->getOperand(0);
7337 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
7338 Ops.push_back(&Ext->getOperandUse(0));
7346 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
7347 I->getType()->getScalarSizeInBits())
7384 if (!ElementConstant || !ElementConstant->
isZero())
7387 unsigned Opcode = OperandInstr->
getOpcode();
7388 if (Opcode == Instruction::SExt)
7390 else if (Opcode == Instruction::ZExt)
7395 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
7405 Ops.push_back(&Insert->getOperandUse(1));
7411 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
7415 if (!ShouldSinkSplatForIndexedVariant(
I))
7420 Ops.push_back(&
I->getOperandUse(0));
7422 Ops.push_back(&
I->getOperandUse(1));
7424 return !
Ops.empty();
7426 case Instruction::FMul: {
7428 if (
I->getType()->isScalableTy())
7429 return !
Ops.empty();
7433 return !
Ops.empty();
7437 Ops.push_back(&
I->getOperandUse(0));
7439 Ops.push_back(&
I->getOperandUse(1));
7440 return !
Ops.empty();
static bool isAllActivePredicate(const SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
static constexpr Value * getValue(Ty &ValueOrUse)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
bool preferTailFoldingOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
APInt getPriorityMask(const Function &F) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedExpandLoad(Type *DataTy, Align Alignment) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
InstructionCost getBranchMispredictPenalty() const override
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
unsigned getMaxInterleaveFactor(ElementCount VF, bool HasUnorderedReductions) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool noSignedZeros() const
bool allowContract() const
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
LLVM_ABI Value * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={}, function_ref< void(CallInst *)> SetFn=[](CallInst *) {})
Variant to create a possibly constant-folded intrinsic.
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
bool isFixedLengthVector() const
MVT getVectorElementType() const
Information for memory intrinsic cost model.
Align getAlignment() const
Type * getDataType() const
Intrinsic::ID getID() const
const Instruction * getInst() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
LLVM_ABI APInt getCpuSupportsMask(ArrayRef< StringRef > Features)
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
LLVM_ABI Libcall getPOW(EVT RetVT)
getPOW - Return the POW_* value for the given types, or UNKNOWN_LIBCALL if there is none.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ FSub
Subtraction of floats.
@ FAddChainWithSubs
A chain of fadds and fsubs.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FindLast
FindLast reduction with select(cmp(),x,y) where x and y.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool ShouldCheckWrap=true, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...