23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
82 cl::desc(
"Threshold for forced unrolling of small loops in AArch64"));
85class TailFoldingOption {
100 bool NeedsDefault =
true;
104 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
124 Bits &= ~DisableBits;
130 errs() <<
"invalid argument '" << Opt
131 <<
"' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
139 void operator=(
const std::string &Val) {
148 setNeedsDefault(
false);
151 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] ==
"disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] ==
"all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] ==
"default")
159 setNeedsDefault(
true);
160 else if (TailFoldTypes[0] ==
"simple")
161 setInitialBits(TailFoldingOpts::Simple);
164 setInitialBits(TailFoldingOpts::Disabled);
167 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
168 if (TailFoldTypes[
I] ==
"reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[
I] ==
"recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[
I] ==
"reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[
I] ==
"noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[
I] ==
"norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[
I] ==
"noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
200 "\ndefault (Initial) Uses the default tail-folding settings for "
202 "\nall (Initial) All legal loop types will vectorize using "
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
213 "\nnoreverse Inverse of above"),
258 TTI->isMultiversionedFunction(
F) ?
"fmv-features" :
"target-features";
259 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.
split(Features,
",");
276 return F.hasFnAttribute(
"fmv-features");
280 AArch64::FeatureExecuteOnly,
320 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
321 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
323 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
341 auto FVTy = dyn_cast<FixedVectorType>(Ty);
343 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
352 unsigned DefaultCallPenalty)
const {
377 if (
F ==
Call.getCaller())
383 return DefaultCallPenalty;
394 ST->isSVEorStreamingSVEAvailable() &&
395 !ST->disableMaximizeScalableBandwidth();
419 assert(Ty->isIntegerTy());
421 unsigned BitSize = Ty->getPrimitiveSizeInBits();
428 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
433 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
439 return std::max<InstructionCost>(1,
Cost);
446 assert(Ty->isIntegerTy());
448 unsigned BitSize = Ty->getPrimitiveSizeInBits();
454 unsigned ImmIdx = ~0U;
458 case Instruction::GetElementPtr:
463 case Instruction::Store:
466 case Instruction::Add:
467 case Instruction::Sub:
468 case Instruction::Mul:
469 case Instruction::UDiv:
470 case Instruction::SDiv:
471 case Instruction::URem:
472 case Instruction::SRem:
473 case Instruction::And:
474 case Instruction::Or:
475 case Instruction::Xor:
476 case Instruction::ICmp:
480 case Instruction::Shl:
481 case Instruction::LShr:
482 case Instruction::AShr:
486 case Instruction::Trunc:
487 case Instruction::ZExt:
488 case Instruction::SExt:
489 case Instruction::IntToPtr:
490 case Instruction::PtrToInt:
491 case Instruction::BitCast:
492 case Instruction::PHI:
493 case Instruction::Call:
494 case Instruction::Select:
495 case Instruction::Ret:
496 case Instruction::Load:
501 int NumConstants = (BitSize + 63) / 64;
514 assert(Ty->isIntegerTy());
516 unsigned BitSize = Ty->getPrimitiveSizeInBits();
525 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
531 case Intrinsic::sadd_with_overflow:
532 case Intrinsic::uadd_with_overflow:
533 case Intrinsic::ssub_with_overflow:
534 case Intrinsic::usub_with_overflow:
535 case Intrinsic::smul_with_overflow:
536 case Intrinsic::umul_with_overflow:
538 int NumConstants = (BitSize + 63) / 64;
545 case Intrinsic::experimental_stackmap:
546 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
549 case Intrinsic::experimental_patchpoint_void:
550 case Intrinsic::experimental_patchpoint:
551 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
554 case Intrinsic::experimental_gc_statepoint:
555 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
565 if (TyWidth == 32 || TyWidth == 64)
589 unsigned TotalHistCnts = 1;
599 unsigned EC = VTy->getElementCount().getKnownMinValue();
604 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
606 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
610 TotalHistCnts = EC / NaturalVectorWidth;
630 switch (ICA.
getID()) {
631 case Intrinsic::experimental_vector_histogram_add: {
638 case Intrinsic::umin:
639 case Intrinsic::umax:
640 case Intrinsic::smin:
641 case Intrinsic::smax: {
642 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
643 MVT::v8i16, MVT::v2i32, MVT::v4i32,
644 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
648 if (LT.second == MVT::v2i64)
654 case Intrinsic::scmp:
655 case Intrinsic::ucmp: {
657 {Intrinsic::scmp, MVT::i32, 3},
658 {Intrinsic::scmp, MVT::i64, 3},
659 {Intrinsic::scmp, MVT::v8i8, 3},
660 {Intrinsic::scmp, MVT::v16i8, 3},
661 {Intrinsic::scmp, MVT::v4i16, 3},
662 {Intrinsic::scmp, MVT::v8i16, 3},
663 {Intrinsic::scmp, MVT::v2i32, 3},
664 {Intrinsic::scmp, MVT::v4i32, 3},
665 {Intrinsic::scmp, MVT::v1i64, 3},
666 {Intrinsic::scmp, MVT::v2i64, 3},
672 return Entry->Cost * LT.first;
675 case Intrinsic::sadd_sat:
676 case Intrinsic::ssub_sat:
677 case Intrinsic::uadd_sat:
678 case Intrinsic::usub_sat: {
679 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
680 MVT::v8i16, MVT::v2i32, MVT::v4i32,
686 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
688 return LT.first * Instrs;
693 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
694 return LT.first * Instrs;
698 case Intrinsic::abs: {
699 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
700 MVT::v8i16, MVT::v2i32, MVT::v4i32,
701 MVT::v2i64, MVT::nxv16i8, MVT::nxv8i16,
702 MVT::nxv4i32, MVT::nxv2i64};
708 case Intrinsic::bswap: {
709 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
710 MVT::v4i32, MVT::v2i64};
713 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
718 case Intrinsic::fmuladd: {
723 (EltTy->
isHalfTy() && ST->hasFullFP16()))
727 case Intrinsic::stepvector: {
736 Cost += AddCost * (LT.first - 1);
740 case Intrinsic::vector_extract:
741 case Intrinsic::vector_insert: {
754 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
755 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
763 getTLI()->getTypeConversion(
C, SubVecVT);
765 getTLI()->getTypeConversion(
C, VecVT);
773 case Intrinsic::bitreverse: {
775 {Intrinsic::bitreverse, MVT::i32, 1},
776 {Intrinsic::bitreverse, MVT::i64, 1},
777 {Intrinsic::bitreverse, MVT::v8i8, 1},
778 {Intrinsic::bitreverse, MVT::v16i8, 1},
779 {Intrinsic::bitreverse, MVT::v4i16, 2},
780 {Intrinsic::bitreverse, MVT::v8i16, 2},
781 {Intrinsic::bitreverse, MVT::v2i32, 2},
782 {Intrinsic::bitreverse, MVT::v4i32, 2},
783 {Intrinsic::bitreverse, MVT::v1i64, 2},
784 {Intrinsic::bitreverse, MVT::v2i64, 2},
792 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
793 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
794 return LegalisationCost.first * Entry->Cost + 1;
796 return LegalisationCost.first * Entry->Cost;
800 case Intrinsic::ctpop: {
801 if (!ST->hasNEON()) {
822 RetTy->getScalarSizeInBits()
825 return LT.first * Entry->Cost + ExtraCost;
829 case Intrinsic::sadd_with_overflow:
830 case Intrinsic::uadd_with_overflow:
831 case Intrinsic::ssub_with_overflow:
832 case Intrinsic::usub_with_overflow:
833 case Intrinsic::smul_with_overflow:
834 case Intrinsic::umul_with_overflow: {
836 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
837 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
838 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
839 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
840 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
841 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
842 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
843 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
844 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
845 {Intrinsic::usub_with_overflow, MVT::i8, 3},
846 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
847 {Intrinsic::usub_with_overflow, MVT::i16, 3},
848 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
849 {Intrinsic::usub_with_overflow, MVT::i32, 1},
850 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
851 {Intrinsic::usub_with_overflow, MVT::i64, 1},
852 {Intrinsic::smul_with_overflow, MVT::i8, 5},
853 {Intrinsic::umul_with_overflow, MVT::i8, 4},
854 {Intrinsic::smul_with_overflow, MVT::i16, 5},
855 {Intrinsic::umul_with_overflow, MVT::i16, 4},
856 {Intrinsic::smul_with_overflow, MVT::i32, 2},
857 {Intrinsic::umul_with_overflow, MVT::i32, 2},
858 {Intrinsic::smul_with_overflow, MVT::i64, 3},
859 {Intrinsic::umul_with_overflow, MVT::i64, 3},
861 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
868 case Intrinsic::fptosi_sat:
869 case Intrinsic::fptoui_sat: {
872 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
874 EVT MTy = TLI->getValueType(
DL, RetTy);
877 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
878 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
879 LT.second == MVT::v2f64)) {
881 (LT.second == MVT::f64 && MTy == MVT::i32) ||
882 (LT.second == MVT::f32 && MTy == MVT::i64)))
891 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
898 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
899 (LT.second == MVT::f16 && MTy == MVT::i64) ||
900 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
914 if ((LT.second.getScalarType() == MVT::f32 ||
915 LT.second.getScalarType() == MVT::f64 ||
916 LT.second.getScalarType() == MVT::f16) &&
920 if (LT.second.isVector())
924 LegalTy, {LegalTy, LegalTy});
927 LegalTy, {LegalTy, LegalTy});
929 return LT.first *
Cost +
930 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
936 RetTy = RetTy->getScalarType();
937 if (LT.second.isVector()) {
955 return LT.first *
Cost;
957 case Intrinsic::fshl:
958 case Intrinsic::fshr: {
967 if (RetTy->isIntegerTy() && ICA.
getArgs()[0] == ICA.
getArgs()[1] &&
968 (RetTy->getPrimitiveSizeInBits() == 32 ||
969 RetTy->getPrimitiveSizeInBits() == 64)) {
982 {Intrinsic::fshl, MVT::v4i32, 2},
983 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
984 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
985 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
991 return LegalisationCost.first * Entry->Cost;
995 if (!RetTy->isIntegerTy())
1000 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
1001 RetTy->getScalarSizeInBits() < 64) ||
1002 (RetTy->getScalarSizeInBits() % 64 != 0);
1003 unsigned ExtraCost = HigherCost ? 1 : 0;
1004 if (RetTy->getScalarSizeInBits() == 32 ||
1005 RetTy->getScalarSizeInBits() == 64)
1008 else if (HigherCost)
1012 return TyL.first + ExtraCost;
1014 case Intrinsic::get_active_lane_mask: {
1016 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
1018 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
1021 if (RetTy->isScalableTy()) {
1022 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
1032 if (ST->hasSVE2p1() || ST->hasSME2()) {
1047 return Cost + (SplitCost * (
Cost - 1));
1062 case Intrinsic::experimental_vector_match: {
1065 unsigned SearchSize = NeedleTy->getNumElements();
1066 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1079 case Intrinsic::experimental_cttz_elts: {
1081 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1089 case Intrinsic::loop_dependence_raw_mask:
1090 case Intrinsic::loop_dependence_war_mask: {
1092 if (ST->hasSVE2() || ST->hasSME()) {
1093 EVT VecVT = getTLI()->getValueType(
DL, RetTy);
1094 unsigned EltSizeInBytes =
1104 case Intrinsic::experimental_vector_extract_last_active:
1105 if (ST->isSVEorStreamingSVEAvailable()) {
1111 case Intrinsic::pow: {
1114 EVT VT = getTLI()->getValueType(
DL, RetTy);
1116 bool HasLibcall = getTLI()->getLibcallImpl(LC) != RTLIB::Unsupported;
1131 bool Is025 = ExpF->getValueAPF().isExactlyValue(0.25);
1132 bool Is075 = ExpF->getValueAPF().isExactlyValue(0.75);
1142 return (Sqrt * 2) +
FMul;
1153 case Intrinsic::sqrt:
1154 case Intrinsic::fabs:
1155 case Intrinsic::ceil:
1156 case Intrinsic::floor:
1157 case Intrinsic::nearbyint:
1158 case Intrinsic::round:
1159 case Intrinsic::rint:
1160 case Intrinsic::roundeven:
1161 case Intrinsic::trunc:
1162 case Intrinsic::minnum:
1163 case Intrinsic::maxnum:
1164 case Intrinsic::minimum:
1165 case Intrinsic::maximum: {
1183 auto RequiredType =
II.getType();
1186 assert(PN &&
"Expected Phi Node!");
1189 if (!PN->hasOneUse())
1190 return std::nullopt;
1192 for (
Value *IncValPhi : PN->incoming_values()) {
1195 Reinterpret->getIntrinsicID() !=
1196 Intrinsic::aarch64_sve_convert_to_svbool ||
1197 RequiredType != Reinterpret->getArgOperand(0)->getType())
1198 return std::nullopt;
1206 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1208 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1281 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1286 return GoverningPredicateIdx;
1291 GoverningPredicateIdx = Index;
1309 return UndefIntrinsic;
1314 UndefIntrinsic = IID;
1336 return ResultLanes == InactiveLanesTakenFromOperand;
1341 return OperandIdxForInactiveLanes;
1345 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1346 ResultLanes = InactiveLanesTakenFromOperand;
1347 OperandIdxForInactiveLanes = Index;
1352 return ResultLanes == InactiveLanesAreNotDefined;
1356 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1357 ResultLanes = InactiveLanesAreNotDefined;
1362 return ResultLanes == InactiveLanesAreUnused;
1366 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1367 ResultLanes = InactiveLanesAreUnused;
1377 ResultIsZeroInitialized =
true;
1388 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1393 return OperandIdxWithNoActiveLanes;
1398 OperandIdxWithNoActiveLanes = Index;
1403 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1406 unsigned IROpcode = 0;
1408 enum PredicationStyle {
1410 InactiveLanesTakenFromOperand,
1411 InactiveLanesAreNotDefined,
1412 InactiveLanesAreUnused
1415 bool ResultIsZeroInitialized =
false;
1416 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1417 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1425 return !isa<ScalableVectorType>(V->getType());
1433 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1434 case Intrinsic::aarch64_sve_fcvt_f16f32:
1435 case Intrinsic::aarch64_sve_fcvt_f16f64:
1436 case Intrinsic::aarch64_sve_fcvt_f32f16:
1437 case Intrinsic::aarch64_sve_fcvt_f32f64:
1438 case Intrinsic::aarch64_sve_fcvt_f64f16:
1439 case Intrinsic::aarch64_sve_fcvt_f64f32:
1440 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1441 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1442 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1443 case Intrinsic::aarch64_sve_fcvtzs:
1444 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1445 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1446 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1447 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1448 case Intrinsic::aarch64_sve_fcvtzu:
1449 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1450 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1451 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1452 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1453 case Intrinsic::aarch64_sve_scvtf:
1454 case Intrinsic::aarch64_sve_scvtf_f16i32:
1455 case Intrinsic::aarch64_sve_scvtf_f16i64:
1456 case Intrinsic::aarch64_sve_scvtf_f32i64:
1457 case Intrinsic::aarch64_sve_scvtf_f64i32:
1458 case Intrinsic::aarch64_sve_ucvtf:
1459 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1460 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1461 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1462 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1465 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1466 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1467 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1468 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1471 case Intrinsic::aarch64_sve_fabd:
1473 case Intrinsic::aarch64_sve_fadd:
1476 case Intrinsic::aarch64_sve_fdiv:
1479 case Intrinsic::aarch64_sve_fmax:
1481 case Intrinsic::aarch64_sve_fmaxnm:
1483 case Intrinsic::aarch64_sve_fmin:
1485 case Intrinsic::aarch64_sve_fminnm:
1487 case Intrinsic::aarch64_sve_fmla:
1489 case Intrinsic::aarch64_sve_fmls:
1491 case Intrinsic::aarch64_sve_fmul:
1494 case Intrinsic::aarch64_sve_fmulx:
1496 case Intrinsic::aarch64_sve_fnmla:
1498 case Intrinsic::aarch64_sve_fnmls:
1500 case Intrinsic::aarch64_sve_fsub:
1503 case Intrinsic::aarch64_sve_add:
1506 case Intrinsic::aarch64_sve_mla:
1508 case Intrinsic::aarch64_sve_mls:
1510 case Intrinsic::aarch64_sve_mul:
1513 case Intrinsic::aarch64_sve_sabd:
1515 case Intrinsic::aarch64_sve_sdiv:
1518 case Intrinsic::aarch64_sve_smax:
1520 case Intrinsic::aarch64_sve_smin:
1522 case Intrinsic::aarch64_sve_smulh:
1524 case Intrinsic::aarch64_sve_sub:
1527 case Intrinsic::aarch64_sve_uabd:
1529 case Intrinsic::aarch64_sve_udiv:
1532 case Intrinsic::aarch64_sve_umax:
1534 case Intrinsic::aarch64_sve_umin:
1536 case Intrinsic::aarch64_sve_umulh:
1538 case Intrinsic::aarch64_sve_asr:
1541 case Intrinsic::aarch64_sve_lsl:
1544 case Intrinsic::aarch64_sve_lsr:
1547 case Intrinsic::aarch64_sve_and:
1550 case Intrinsic::aarch64_sve_bic:
1552 case Intrinsic::aarch64_sve_eor:
1555 case Intrinsic::aarch64_sve_orr:
1558 case Intrinsic::aarch64_sve_shsub:
1560 case Intrinsic::aarch64_sve_shsubr:
1562 case Intrinsic::aarch64_sve_sqrshl:
1564 case Intrinsic::aarch64_sve_sqshl:
1566 case Intrinsic::aarch64_sve_sqsub:
1568 case Intrinsic::aarch64_sve_srshl:
1570 case Intrinsic::aarch64_sve_uhsub:
1572 case Intrinsic::aarch64_sve_uhsubr:
1574 case Intrinsic::aarch64_sve_uqrshl:
1576 case Intrinsic::aarch64_sve_uqshl:
1578 case Intrinsic::aarch64_sve_uqsub:
1580 case Intrinsic::aarch64_sve_urshl:
1583 case Intrinsic::aarch64_sve_add_u:
1586 case Intrinsic::aarch64_sve_and_u:
1589 case Intrinsic::aarch64_sve_asr_u:
1592 case Intrinsic::aarch64_sve_eor_u:
1595 case Intrinsic::aarch64_sve_fadd_u:
1598 case Intrinsic::aarch64_sve_fdiv_u:
1601 case Intrinsic::aarch64_sve_fmul_u:
1604 case Intrinsic::aarch64_sve_fsub_u:
1607 case Intrinsic::aarch64_sve_lsl_u:
1610 case Intrinsic::aarch64_sve_lsr_u:
1613 case Intrinsic::aarch64_sve_mul_u:
1616 case Intrinsic::aarch64_sve_orr_u:
1619 case Intrinsic::aarch64_sve_sdiv_u:
1622 case Intrinsic::aarch64_sve_sub_u:
1625 case Intrinsic::aarch64_sve_udiv_u:
1629 case Intrinsic::aarch64_sve_addqv:
1630 case Intrinsic::aarch64_sve_and_z:
1631 case Intrinsic::aarch64_sve_bic_z:
1632 case Intrinsic::aarch64_sve_brka_z:
1633 case Intrinsic::aarch64_sve_brkb_z:
1634 case Intrinsic::aarch64_sve_brkn_z:
1635 case Intrinsic::aarch64_sve_brkpa_z:
1636 case Intrinsic::aarch64_sve_brkpb_z:
1637 case Intrinsic::aarch64_sve_cntp:
1638 case Intrinsic::aarch64_sve_compact:
1639 case Intrinsic::aarch64_sve_eor_z:
1640 case Intrinsic::aarch64_sve_eorv:
1641 case Intrinsic::aarch64_sve_eorqv:
1642 case Intrinsic::aarch64_sve_nand_z:
1643 case Intrinsic::aarch64_sve_nor_z:
1644 case Intrinsic::aarch64_sve_orn_z:
1645 case Intrinsic::aarch64_sve_orr_z:
1646 case Intrinsic::aarch64_sve_orv:
1647 case Intrinsic::aarch64_sve_orqv:
1648 case Intrinsic::aarch64_sve_pnext:
1649 case Intrinsic::aarch64_sve_rdffr_z:
1650 case Intrinsic::aarch64_sve_saddv:
1651 case Intrinsic::aarch64_sve_uaddv:
1652 case Intrinsic::aarch64_sve_umaxv:
1653 case Intrinsic::aarch64_sve_umaxqv:
1654 case Intrinsic::aarch64_sve_cmpeq:
1655 case Intrinsic::aarch64_sve_cmpeq_wide:
1656 case Intrinsic::aarch64_sve_cmpge:
1657 case Intrinsic::aarch64_sve_cmpge_wide:
1658 case Intrinsic::aarch64_sve_cmpgt:
1659 case Intrinsic::aarch64_sve_cmpgt_wide:
1660 case Intrinsic::aarch64_sve_cmphi:
1661 case Intrinsic::aarch64_sve_cmphi_wide:
1662 case Intrinsic::aarch64_sve_cmphs:
1663 case Intrinsic::aarch64_sve_cmphs_wide:
1664 case Intrinsic::aarch64_sve_cmple_wide:
1665 case Intrinsic::aarch64_sve_cmplo_wide:
1666 case Intrinsic::aarch64_sve_cmpls_wide:
1667 case Intrinsic::aarch64_sve_cmplt_wide:
1668 case Intrinsic::aarch64_sve_cmpne:
1669 case Intrinsic::aarch64_sve_cmpne_wide:
1670 case Intrinsic::aarch64_sve_facge:
1671 case Intrinsic::aarch64_sve_facgt:
1672 case Intrinsic::aarch64_sve_fcmpeq:
1673 case Intrinsic::aarch64_sve_fcmpge:
1674 case Intrinsic::aarch64_sve_fcmpgt:
1675 case Intrinsic::aarch64_sve_fcmpne:
1676 case Intrinsic::aarch64_sve_fcmpuo:
1677 case Intrinsic::aarch64_sve_ld1:
1678 case Intrinsic::aarch64_sve_ld1_gather:
1679 case Intrinsic::aarch64_sve_ld1_gather_index:
1680 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1681 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1682 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1683 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1684 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1685 case Intrinsic::aarch64_sve_ld1q_gather_index:
1686 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1687 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1688 case Intrinsic::aarch64_sve_ld1ro:
1689 case Intrinsic::aarch64_sve_ld1rq:
1690 case Intrinsic::aarch64_sve_ld1udq:
1691 case Intrinsic::aarch64_sve_ld1uwq:
1692 case Intrinsic::aarch64_sve_ld2_sret:
1693 case Intrinsic::aarch64_sve_ld2q_sret:
1694 case Intrinsic::aarch64_sve_ld3_sret:
1695 case Intrinsic::aarch64_sve_ld3q_sret:
1696 case Intrinsic::aarch64_sve_ld4_sret:
1697 case Intrinsic::aarch64_sve_ld4q_sret:
1698 case Intrinsic::aarch64_sve_ldff1:
1699 case Intrinsic::aarch64_sve_ldff1_gather:
1700 case Intrinsic::aarch64_sve_ldff1_gather_index:
1701 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1702 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1703 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1704 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1705 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1706 case Intrinsic::aarch64_sve_ldnf1:
1707 case Intrinsic::aarch64_sve_ldnt1:
1708 case Intrinsic::aarch64_sve_ldnt1_gather:
1709 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1710 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1711 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1714 case Intrinsic::aarch64_sve_prf:
1715 case Intrinsic::aarch64_sve_prfb_gather_index:
1716 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1717 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1718 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1719 case Intrinsic::aarch64_sve_prfd_gather_index:
1720 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1721 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1722 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1723 case Intrinsic::aarch64_sve_prfh_gather_index:
1724 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1725 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1726 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1727 case Intrinsic::aarch64_sve_prfw_gather_index:
1728 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1729 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1730 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1733 case Intrinsic::aarch64_sve_st1_scatter:
1734 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1735 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1736 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1737 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1738 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1739 case Intrinsic::aarch64_sve_st1dq:
1740 case Intrinsic::aarch64_sve_st1q_scatter_index:
1741 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1742 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1743 case Intrinsic::aarch64_sve_st1wq:
1744 case Intrinsic::aarch64_sve_stnt1:
1745 case Intrinsic::aarch64_sve_stnt1_scatter:
1746 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1747 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1748 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1750 case Intrinsic::aarch64_sve_st2:
1751 case Intrinsic::aarch64_sve_st2q:
1753 case Intrinsic::aarch64_sve_st3:
1754 case Intrinsic::aarch64_sve_st3q:
1756 case Intrinsic::aarch64_sve_st4:
1757 case Intrinsic::aarch64_sve_st4q:
1765 Value *UncastedPred;
1771 Pred = UncastedPred;
1777 if (OrigPredTy->getMinNumElements() <=
1779 ->getMinNumElements())
1780 Pred = UncastedPred;
1784 return C &&
C->isAllOnesValue();
1791 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1792 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1800static std::optional<Instruction *>
1807 Value *Op1 =
II.getOperand(1);
1808 Value *Op2 =
II.getOperand(2);
1834 return std::nullopt;
1842 if (SimpleII == Inactive)
1852static std::optional<Instruction *>
1856 return std::nullopt;
1885 II.setCalledFunction(NewDecl);
1895 return std::nullopt;
1907static std::optional<Instruction *>
1911 return std::nullopt;
1913 auto IntrinsicID = BinOp->getIntrinsicID();
1914 switch (IntrinsicID) {
1915 case Intrinsic::aarch64_sve_and_z:
1916 case Intrinsic::aarch64_sve_bic_z:
1917 case Intrinsic::aarch64_sve_eor_z:
1918 case Intrinsic::aarch64_sve_nand_z:
1919 case Intrinsic::aarch64_sve_nor_z:
1920 case Intrinsic::aarch64_sve_orn_z:
1921 case Intrinsic::aarch64_sve_orr_z:
1924 return std::nullopt;
1927 auto BinOpPred = BinOp->getOperand(0);
1928 auto BinOpOp1 = BinOp->getOperand(1);
1929 auto BinOpOp2 = BinOp->getOperand(2);
1933 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1934 return std::nullopt;
1936 auto PredOp = PredIntr->getOperand(0);
1938 if (PredOpTy !=
II.getType())
1939 return std::nullopt;
1943 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1944 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1945 if (BinOpOp1 == BinOpOp2)
1946 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1949 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1951 auto NarrowedBinOp =
1956static std::optional<Instruction *>
1963 return BinOpCombine;
1968 return std::nullopt;
1971 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1980 if (CursorVTy->getElementCount().getKnownMinValue() <
1981 IVTy->getElementCount().getKnownMinValue())
1985 if (Cursor->getType() == IVTy)
1986 EarliestReplacement = Cursor;
1991 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1992 Intrinsic::aarch64_sve_convert_to_svbool ||
1993 IntrinsicCursor->getIntrinsicID() ==
1994 Intrinsic::aarch64_sve_convert_from_svbool))
1997 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
1998 Cursor = IntrinsicCursor->getOperand(0);
2003 if (!EarliestReplacement)
2004 return std::nullopt;
2012 auto *OpPredicate =
II.getOperand(0);
2029 II.getArgOperand(2));
2035 return std::nullopt;
2039 II.getArgOperand(0),
II.getArgOperand(2),
uint64_t(0));
2048 II.getArgOperand(0));
2058 return std::nullopt;
2063 if (!SplatValue || !SplatValue->isZero())
2064 return std::nullopt;
2069 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
2070 return std::nullopt;
2074 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
2075 return std::nullopt;
2078 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
2079 return std::nullopt;
2084 return std::nullopt;
2087 return std::nullopt;
2091 return std::nullopt;
2095 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
2096 return std::nullopt;
2098 unsigned NumElts = VecTy->getNumElements();
2099 unsigned PredicateBits = 0;
2102 for (
unsigned I = 0;
I < NumElts; ++
I) {
2105 return std::nullopt;
2107 PredicateBits |= 1 << (
I * (16 / NumElts));
2111 if (PredicateBits == 0) {
2113 PFalse->takeName(&
II);
2119 for (
unsigned I = 0;
I < 16; ++
I)
2120 if ((PredicateBits & (1 <<
I)) != 0)
2123 unsigned PredSize = Mask & -Mask;
2128 for (
unsigned I = 0;
I < 16;
I += PredSize)
2129 if ((PredicateBits & (1 <<
I)) == 0)
2130 return std::nullopt;
2135 {PredType}, {PTruePat});
2137 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
2138 auto *ConvertFromSVBool =
2140 {
II.getType()}, {ConvertToSVBool});
2148 Value *Pg =
II.getArgOperand(0);
2149 Value *Vec =
II.getArgOperand(1);
2150 auto IntrinsicID =
II.getIntrinsicID();
2151 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2163 auto OpC = OldBinOp->getOpcode();
2169 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2175 if (IsAfter &&
C &&
C->isNullValue()) {
2179 Extract->insertBefore(
II.getIterator());
2180 Extract->takeName(&
II);
2186 return std::nullopt;
2188 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2189 return std::nullopt;
2191 const auto PTruePattern =
2197 return std::nullopt;
2199 unsigned Idx = MinNumElts - 1;
2209 if (Idx >= PgVTy->getMinNumElements())
2210 return std::nullopt;
2215 Extract->insertBefore(
II.getIterator());
2216 Extract->takeName(&
II);
2229 Value *Pg =
II.getArgOperand(0);
2231 Value *Vec =
II.getArgOperand(2);
2234 if (!Ty->isIntegerTy())
2235 return std::nullopt;
2240 return std::nullopt;
2257 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2270 {
II.getType()}, {AllPat});
2277static std::optional<Instruction *>
2281 if (
Pattern == AArch64SVEPredPattern::all) {
2290 return MinNumElts && NumElts >= MinNumElts
2292 II, ConstantInt::get(
II.getType(), MinNumElts)))
2296static std::optional<Instruction *>
2299 if (!ST->isStreaming())
2300 return std::nullopt;
2312 Value *PgVal =
II.getArgOperand(0);
2313 Value *OpVal =
II.getArgOperand(1);
2317 if (PgVal == OpVal &&
2318 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2319 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2334 return std::nullopt;
2338 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2339 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2353 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2354 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2355 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2356 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2357 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2358 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2359 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2360 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2361 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2362 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2363 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2364 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2365 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2375 return std::nullopt;
2378template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2379static std::optional<Instruction *>
2381 bool MergeIntoAddendOp) {
2383 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2384 if (MergeIntoAddendOp) {
2385 AddendOp =
II.getOperand(1);
2386 Mul =
II.getOperand(2);
2388 AddendOp =
II.getOperand(2);
2389 Mul =
II.getOperand(1);
2394 return std::nullopt;
2396 if (!
Mul->hasOneUse())
2397 return std::nullopt;
2400 if (
II.getType()->isFPOrFPVectorTy()) {
2405 return std::nullopt;
2407 return std::nullopt;
2412 if (MergeIntoAddendOp)
2422static std::optional<Instruction *>
2424 Value *Pred =
II.getOperand(0);
2425 Value *PtrOp =
II.getOperand(1);
2426 Type *VecTy =
II.getType();
2430 Load->copyMetadata(
II);
2441static std::optional<Instruction *>
2443 Value *VecOp =
II.getOperand(0);
2444 Value *Pred =
II.getOperand(1);
2445 Value *PtrOp =
II.getOperand(2);
2449 Store->copyMetadata(
II);
2461 case Intrinsic::aarch64_sve_fmul_u:
2462 return Instruction::BinaryOps::FMul;
2463 case Intrinsic::aarch64_sve_fadd_u:
2464 return Instruction::BinaryOps::FAdd;
2465 case Intrinsic::aarch64_sve_fsub_u:
2466 return Instruction::BinaryOps::FSub;
2468 return Instruction::BinaryOpsEnd;
2472static std::optional<Instruction *>
2475 if (
II.isStrictFP())
2476 return std::nullopt;
2478 auto *OpPredicate =
II.getOperand(0);
2480 if (BinOpCode == Instruction::BinaryOpsEnd ||
2482 return std::nullopt;
2484 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2491 Intrinsic::aarch64_sve_mla>(
2495 Intrinsic::aarch64_sve_mad>(
2498 return std::nullopt;
2501static std::optional<Instruction *>
2505 Intrinsic::aarch64_sve_fmla>(IC,
II,
2510 Intrinsic::aarch64_sve_fmad>(IC,
II,
2515 Intrinsic::aarch64_sve_fmla>(IC,
II,
2518 return std::nullopt;
2521static std::optional<Instruction *>
2525 Intrinsic::aarch64_sve_fmla>(IC,
II,
2530 Intrinsic::aarch64_sve_fmad>(IC,
II,
2535 Intrinsic::aarch64_sve_fmla_u>(
2541static std::optional<Instruction *>
2545 Intrinsic::aarch64_sve_fmls>(IC,
II,
2550 Intrinsic::aarch64_sve_fnmsb>(
2555 Intrinsic::aarch64_sve_fmls>(IC,
II,
2558 return std::nullopt;
2561static std::optional<Instruction *>
2565 Intrinsic::aarch64_sve_fmls>(IC,
II,
2570 Intrinsic::aarch64_sve_fnmsb>(
2575 Intrinsic::aarch64_sve_fmls_u>(
2584 Intrinsic::aarch64_sve_mls>(
2587 return std::nullopt;
2592 Value *UnpackArg =
II.getArgOperand(0);
2594 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2595 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2608 return std::nullopt;
2612 auto *OpVal =
II.getOperand(0);
2613 auto *OpIndices =
II.getOperand(1);
2620 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2621 return std::nullopt;
2636 Type *RetTy =
II.getType();
2637 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2638 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2642 if ((
match(
II.getArgOperand(0),
2649 if (TyA ==
B->getType() &&
2654 TyA->getMinNumElements());
2660 return std::nullopt;
2668 if (
match(
II.getArgOperand(0),
2673 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2675 return std::nullopt;
2678static std::optional<Instruction *>
2680 Value *Mask =
II.getOperand(0);
2681 Value *BasePtr =
II.getOperand(1);
2682 Value *Index =
II.getOperand(2);
2693 BasePtr->getPointerAlignment(
II.getDataLayout());
2696 BasePtr, IndexBase);
2703 return std::nullopt;
2706static std::optional<Instruction *>
2708 Value *Val =
II.getOperand(0);
2709 Value *Mask =
II.getOperand(1);
2710 Value *BasePtr =
II.getOperand(2);
2711 Value *Index =
II.getOperand(3);
2721 BasePtr->getPointerAlignment(
II.getDataLayout());
2724 BasePtr, IndexBase);
2730 return std::nullopt;
2736 Value *Pred =
II.getOperand(0);
2737 Value *Vec =
II.getOperand(1);
2738 Value *DivVec =
II.getOperand(2);
2742 if (!SplatConstantInt)
2743 return std::nullopt;
2747 if (DivisorValue == -1)
2748 return std::nullopt;
2749 if (DivisorValue == 1)
2755 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2762 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2764 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2768 return std::nullopt;
2772 size_t VecSize = Vec.
size();
2777 size_t HalfVecSize = VecSize / 2;
2781 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2789 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2807 return std::nullopt;
2814 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2815 CurrentInsertElt = InsertElt->getOperand(0);
2821 return std::nullopt;
2825 for (
size_t I = 0;
I < Elts.
size();
I++) {
2826 if (Elts[
I] ==
nullptr)
2831 if (InsertEltChain ==
nullptr)
2832 return std::nullopt;
2838 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2839 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2840 IIScalableTy->getMinNumElements() /
2845 auto *WideShuffleMaskTy =
2856 auto NarrowBitcast =
2869 return std::nullopt;
2874 Value *Pred =
II.getOperand(0);
2875 Value *Vec =
II.getOperand(1);
2876 Value *Shift =
II.getOperand(2);
2879 Value *AbsPred, *MergedValue;
2885 return std::nullopt;
2893 return std::nullopt;
2898 return std::nullopt;
2901 {
II.getType()}, {Pred, Vec, Shift});
2908 Value *Vec =
II.getOperand(0);
2913 return std::nullopt;
2919 auto *NI =
II.getNextNode();
2922 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2924 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2925 auto *NIBB = NI->getParent();
2926 NI = NI->getNextNode();
2928 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2929 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2935 if (NextII &&
II.isIdenticalTo(NextII))
2938 return std::nullopt;
2946 {II.getType(), II.getOperand(0)->getType()},
2947 {II.getOperand(0), II.getOperand(1)}));
2954 return std::nullopt;
2960 Value *Passthru =
II.getOperand(0);
2968 auto *Mask = ConstantInt::get(Ty, MaskValue);
2974 return std::nullopt;
2977static std::optional<Instruction *>
2984 return std::nullopt;
2987std::optional<Instruction *>
2998 case Intrinsic::aarch64_dmb:
3000 case Intrinsic::aarch64_neon_fmaxnm:
3001 case Intrinsic::aarch64_neon_fminnm:
3003 case Intrinsic::aarch64_sve_convert_from_svbool:
3005 case Intrinsic::aarch64_sve_dup:
3007 case Intrinsic::aarch64_sve_dup_x:
3009 case Intrinsic::aarch64_sve_cmpne:
3010 case Intrinsic::aarch64_sve_cmpne_wide:
3012 case Intrinsic::aarch64_sve_rdffr:
3014 case Intrinsic::aarch64_sve_lasta:
3015 case Intrinsic::aarch64_sve_lastb:
3017 case Intrinsic::aarch64_sve_clasta_n:
3018 case Intrinsic::aarch64_sve_clastb_n:
3020 case Intrinsic::aarch64_sve_cntd:
3022 case Intrinsic::aarch64_sve_cntw:
3024 case Intrinsic::aarch64_sve_cnth:
3026 case Intrinsic::aarch64_sve_cntb:
3028 case Intrinsic::aarch64_sme_cntsd:
3030 case Intrinsic::aarch64_sve_ptest_any:
3031 case Intrinsic::aarch64_sve_ptest_first:
3032 case Intrinsic::aarch64_sve_ptest_last:
3034 case Intrinsic::aarch64_sve_fadd:
3036 case Intrinsic::aarch64_sve_fadd_u:
3038 case Intrinsic::aarch64_sve_fmul_u:
3040 case Intrinsic::aarch64_sve_fsub:
3042 case Intrinsic::aarch64_sve_fsub_u:
3044 case Intrinsic::aarch64_sve_add:
3046 case Intrinsic::aarch64_sve_add_u:
3048 Intrinsic::aarch64_sve_mla_u>(
3050 case Intrinsic::aarch64_sve_sub:
3052 case Intrinsic::aarch64_sve_sub_u:
3054 Intrinsic::aarch64_sve_mls_u>(
3056 case Intrinsic::aarch64_sve_tbl:
3058 case Intrinsic::aarch64_sve_uunpkhi:
3059 case Intrinsic::aarch64_sve_uunpklo:
3060 case Intrinsic::aarch64_sve_sunpkhi:
3061 case Intrinsic::aarch64_sve_sunpklo:
3063 case Intrinsic::aarch64_sve_uzp1:
3065 case Intrinsic::aarch64_sve_zip1:
3066 case Intrinsic::aarch64_sve_zip2:
3068 case Intrinsic::aarch64_sve_ld1_gather_index:
3070 case Intrinsic::aarch64_sve_st1_scatter_index:
3072 case Intrinsic::aarch64_sve_ld1:
3074 case Intrinsic::aarch64_sve_st1:
3076 case Intrinsic::aarch64_sve_sdiv:
3078 case Intrinsic::aarch64_sve_sel:
3080 case Intrinsic::aarch64_sve_srshl:
3082 case Intrinsic::aarch64_sve_dupq_lane:
3084 case Intrinsic::aarch64_sve_insr:
3086 case Intrinsic::aarch64_sve_whilelo:
3088 case Intrinsic::aarch64_sve_ptrue:
3090 case Intrinsic::aarch64_sve_uxtb:
3092 case Intrinsic::aarch64_sve_uxth:
3094 case Intrinsic::aarch64_sve_uxtw:
3096 case Intrinsic::aarch64_sme_in_streaming_mode:
3100 return std::nullopt;
3107 SimplifyAndSetOp)
const {
3108 switch (
II.getIntrinsicID()) {
3111 case Intrinsic::aarch64_neon_fcvtxn:
3112 case Intrinsic::aarch64_neon_rshrn:
3113 case Intrinsic::aarch64_neon_sqrshrn:
3114 case Intrinsic::aarch64_neon_sqrshrun:
3115 case Intrinsic::aarch64_neon_sqshrn:
3116 case Intrinsic::aarch64_neon_sqshrun:
3117 case Intrinsic::aarch64_neon_sqxtn:
3118 case Intrinsic::aarch64_neon_sqxtun:
3119 case Intrinsic::aarch64_neon_uqrshrn:
3120 case Intrinsic::aarch64_neon_uqshrn:
3121 case Intrinsic::aarch64_neon_uqxtn:
3122 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
3126 return std::nullopt;
3130 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3140 if (ST->useSVEForFixedLengthVectors() &&
3143 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3144 else if (ST->isNeonAvailable())
3149 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3158bool AArch64TTIImpl::isSingleExtWideningInstruction(
3160 Type *SrcOverrideTy)
const {
3175 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3178 Type *SrcTy = SrcOverrideTy;
3180 case Instruction::Add:
3181 case Instruction::Sub: {
3190 if (Opcode == Instruction::Sub)
3214 assert(SrcTy &&
"Expected some SrcTy");
3216 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3222 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3224 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3228 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3231Type *AArch64TTIImpl::isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
3233 Type *SrcOverrideTy)
const {
3234 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3235 Opcode != Instruction::Mul)
3245 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3248 auto getScalarSizeWithOverride = [&](
const Value *
V) {
3254 ->getScalarSizeInBits();
3257 unsigned MaxEltSize = 0;
3260 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3261 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3262 MaxEltSize = std::max(EltSize0, EltSize1);
3265 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3266 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3269 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3271 MaxEltSize = DstEltSize / 2;
3272 }
else if (Opcode == Instruction::Mul &&
3285 getScalarSizeWithOverride(
isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
3289 if (MaxEltSize * 2 > DstEltSize)
3307 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3308 (Src->isScalableTy() && !ST->hasSVE2()))
3318 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3322 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3326 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3327 Src->getScalarSizeInBits() !=
3351 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3355 if (
I &&
I->hasOneUser()) {
3358 if (
Type *ExtTy = isBinExtWideningInstruction(
3359 SingleUser->getOpcode(), Dst, Operands,
3360 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3373 if (isSingleExtWideningInstruction(
3374 SingleUser->getOpcode(), Dst, Operands,
3375 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3379 if (SingleUser->getOpcode() == Instruction::Add) {
3380 if (
I == SingleUser->getOperand(1) ||
3382 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3397 EVT SrcTy = TLI->getValueType(
DL, Src);
3398 EVT DstTy = TLI->getValueType(
DL, Dst);
3400 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3405 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3437 const unsigned int SVE_EXT_COST = 1;
3438 const unsigned int SVE_FCVT_COST = 1;
3439 const unsigned int SVE_UNPACK_ONCE = 4;
3440 const unsigned int SVE_UNPACK_TWICE = 16;
3569 SVE_EXT_COST + SVE_FCVT_COST},
3574 SVE_EXT_COST + SVE_FCVT_COST},
3581 SVE_EXT_COST + SVE_FCVT_COST},
3585 SVE_EXT_COST + SVE_FCVT_COST},
3591 SVE_EXT_COST + SVE_FCVT_COST},
3594 SVE_EXT_COST + SVE_FCVT_COST},
3599 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3601 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3611 SVE_EXT_COST + SVE_FCVT_COST},
3616 SVE_EXT_COST + SVE_FCVT_COST},
3629 SVE_EXT_COST + SVE_FCVT_COST},
3633 SVE_EXT_COST + SVE_FCVT_COST},
3645 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3647 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3649 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3651 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3655 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3657 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3673 SVE_EXT_COST + SVE_FCVT_COST},
3678 SVE_EXT_COST + SVE_FCVT_COST},
3689 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3691 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3693 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3695 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3697 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3699 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3703 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3705 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3707 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3709 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3908 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3911 ST->useSVEForFixedLengthVectors(WiderTy)) {
3912 std::pair<InstructionCost, MVT> LT =
3914 unsigned NumElements =
3953 if (ST->hasFullFP16())
3965 Src->getScalarType(), CCH,
CostKind) +
3973 ST->isSVEorStreamingSVEAvailable() &&
3974 TLI->getTypeAction(Src->getContext(), SrcTy) ==
3976 TLI->getTypeAction(Dst->getContext(), DstTy) ==
3985 Opcode, LegalTy, Src, CCH,
CostKind,
I);
3988 return Part1 + Part2;
3995 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
4007 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
4020 CostKind, Index,
nullptr,
nullptr);
4024 auto DstVT = TLI->getValueType(
DL, Dst);
4025 auto SrcVT = TLI->getValueType(
DL, Src);
4030 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
4036 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
4046 case Instruction::SExt:
4051 case Instruction::ZExt:
4052 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
4065 return Opcode == Instruction::PHI ? 0 : 1;
4074 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4083 if (!LT.second.isVector())
4088 if (LT.second.isFixedLengthVector()) {
4089 unsigned Width = LT.second.getVectorNumElements();
4090 Index = Index % Width;
4105 if (ST->hasFastLD1Single())
4117 : ST->getVectorInsertExtractBaseCost() + 1;
4141 auto ExtractCanFuseWithFmul = [&]() {
4148 auto IsAllowedScalarTy = [&](
const Type *
T) {
4149 return T->isFloatTy() ||
T->isDoubleTy() ||
4150 (
T->isHalfTy() && ST->hasFullFP16());
4154 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
4157 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4158 !BO->getType()->isVectorTy();
4163 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
4167 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4176 DenseMap<User *, unsigned> UserToExtractIdx;
4177 for (
auto *U :
Scalar->users()) {
4178 if (!IsUserFMulScalarTy(U))
4182 UserToExtractIdx[
U];
4184 if (UserToExtractIdx.
empty())
4186 for (
auto &[S, U, L] : ScalarUserAndIdx) {
4187 for (
auto *U : S->users()) {
4188 if (UserToExtractIdx.
contains(U)) {
4190 auto *Op0 =
FMul->getOperand(0);
4191 auto *Op1 =
FMul->getOperand(1);
4192 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4193 UserToExtractIdx[
U] =
L;
4199 for (
auto &[U, L] : UserToExtractIdx) {
4211 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
4212 if (!IsUserFMulScalarTy(U))
4217 const auto *BO = cast<BinaryOperator>(U);
4218 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4219 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4221 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4224 return IsExtractLaneEquivalentToZero(
4225 cast<ConstantInt>(OtherEE->getIndexOperand())
4228 OtherEE->getType()->getScalarSizeInBits());
4236 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4237 ExtractCanFuseWithFmul())
4242 :
ST->getVectorInsertExtractBaseCost();
4251 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4254 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr,
4260 Value *Scalar,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4262 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4263 ScalarUserAndIdx, VIC);
4270 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I,
4277 unsigned Index)
const {
4289 : ST->getVectorInsertExtractBaseCost() + 1;
4298 if (Ty->getElementType()->isFloatingPointTy())
4301 unsigned VecInstCost =
4303 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4310 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4311 return std::nullopt;
4312 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4313 return std::nullopt;
4315 if (CanUseSVE && ST->hasSVEB16B16() && ST->isNonStreamingSVEorSME2Available())
4316 return std::nullopt;
4323 Cost += InstCost(PromotedTy);
4346 Op2Info, Args, CxtI);
4350 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4357 Ty,
CostKind, Op1Info, Op2Info,
true,
4360 [&](
Type *PromotedTy) {
4364 return *PromotedCost;
4370 if (
Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4437 auto VT = TLI->getValueType(
DL, Ty);
4438 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4442 : (3 * AsrCost + AddCost);
4444 return MulCost + AsrCost + 2 * AddCost;
4446 }
else if (VT.isVector()) {
4456 if (Ty->isScalableTy() && ST->hasSVE())
4457 Cost += 2 * AsrCost;
4462 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4466 }
else if (LT.second == MVT::v2i64) {
4467 return VT.getVectorNumElements() *
4474 if (Ty->isScalableTy() && ST->hasSVE())
4475 return MulCost + 2 * AddCost + 2 * AsrCost;
4476 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4481 LT.second.isFixedLengthVector()) {
4491 return ExtractCost + InsertCost +
4499 auto VT = TLI->getValueType(
DL, Ty);
4515 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4516 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4517 LT.second == MVT::nxv16i8;
4518 bool Is128bit = LT.second.is128BitVector();
4530 (HasMULH ? 0 : ShrCost) +
4531 AddCost * 2 + ShrCost;
4532 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4539 if (!VT.isVector() && VT.getSizeInBits() > 64)
4543 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4545 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4549 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4559 if (
nullptr != Entry)
4564 if (LT.second.getScalarType() == MVT::i8)
4566 else if (LT.second.getScalarType() == MVT::i16)
4578 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4579 return (4 + DivCost) * VTy->getNumElements();
4585 -1,
nullptr,
nullptr);
4599 if (LT.second == MVT::v2i64 && ST->hasSVE())
4612 if (LT.second != MVT::v2i64)
4634 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4635 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4644 if (!Ty->getScalarType()->isFP128Ty())
4651 if (!Ty->getScalarType()->isFP128Ty())
4652 return 2 * LT.first;
4659 if (!Ty->isVectorTy())
4675 int MaxMergeDistance = 64;
4679 return NumVectorInstToHideOverhead;
4689 unsigned Opcode1,
unsigned Opcode2)
const {
4692 if (!
Sched.hasInstrSchedModel())
4696 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4698 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4704 "Cannot handle variant scheduling classes without an MI");
4720 const int AmortizationCost = 20;
4728 VecPred = CurrentPred;
4736 static const auto ValidMinMaxTys = {
4737 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4738 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4739 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4743 (ST->hasFullFP16() &&
4749 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4750 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4751 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4752 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4753 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4754 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4755 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4756 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4757 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4758 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4759 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4761 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4762 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4771 if (Opcode == Instruction::FCmp) {
4773 ValTy,
CostKind, Op1Info, Op2Info,
false,
4775 false, [&](
Type *PromotedTy) {
4787 return *PromotedCost;
4791 if (LT.second.getScalarType() != MVT::f64 &&
4792 LT.second.getScalarType() != MVT::f32 &&
4793 LT.second.getScalarType() != MVT::f16)
4798 unsigned Factor = 1;
4799 if (!CondTy->isVectorTy() &&
4813 AArch64::FCMEQv4f32))
4825 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4844 Op1Info, Op2Info,
I);
4850 if (ST->requiresStrictAlign()) {
4855 Options.AllowOverlappingLoads =
true;
4856 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4861 Options.LoadSizes = {8, 4, 2, 1};
4862 Options.AllowedTailExpansions = {3, 5, 6};
4867 return ST->hasSVE();
4873 switch (MICA.
getID()) {
4874 case Intrinsic::masked_scatter:
4875 case Intrinsic::masked_gather:
4877 case Intrinsic::masked_load:
4878 case Intrinsic::masked_store:
4892 if (!LT.first.isValid())
4897 if (VT->getElementType()->isIntegerTy(1))
4914 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4915 "Should be called on only load or stores.");
4917 case Instruction::Load:
4920 return ST->getGatherOverhead();
4922 case Instruction::Store:
4925 return ST->getScatterOverhead();
4936 unsigned Opcode = (MICA.
getID() == Intrinsic::masked_gather ||
4937 MICA.
getID() == Intrinsic::vp_gather)
4939 : Instruction::Store;
4949 if (!LT.first.isValid())
4953 if (!LT.second.isVector() ||
4955 VT->getElementType()->isIntegerTy(1))
4965 ElementCount LegalVF = LT.second.getVectorElementCount();
4968 {TTI::OK_AnyValue, TTI::OP_None},
I);
4984 EVT VT = TLI->getValueType(
DL, Ty,
true);
4986 if (VT == MVT::Other)
4991 if (!LT.first.isValid())
5001 (VTy->getElementType()->isIntegerTy(1) &&
5002 !VTy->getElementCount().isKnownMultipleOf(
5013 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
5014 LT.second.is128BitVector() && Alignment <
Align(16)) {
5020 const int AmortizationCost = 6;
5022 return LT.first * 2 * AmortizationCost;
5026 if (Ty->isPtrOrPtrVectorTy())
5031 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
5033 if (VT == MVT::v4i8)
5040 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
5055 while (!TypeWorklist.
empty()) {
5077 bool UseMaskForCond,
bool UseMaskForGaps)
const {
5078 assert(Factor >= 2 &&
"Invalid interleave factor");
5093 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
5096 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
5097 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
5100 VecVTy->getElementCount().divideCoefficientBy(Factor));
5106 if (MinElts % Factor == 0 &&
5107 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
5108 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
5113 UseMaskForCond, UseMaskForGaps);
5120 for (
auto *
I : Tys) {
5121 if (!
I->isVectorTy())
5132 return ST->getMaxInterleaveFactor();
5142 enum { MaxStridedLoads = 7 };
5144 int StridedLoads = 0;
5147 for (
const auto BB : L->blocks()) {
5148 for (
auto &
I : *BB) {
5154 if (L->isLoopInvariant(PtrValue))
5159 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
5168 if (StridedLoads > MaxStridedLoads / 2)
5169 return StridedLoads;
5172 return StridedLoads;
5175 int StridedLoads = countStridedLoads(L, SE);
5177 <<
" strided loads\n");
5193 unsigned *FinalSize) {
5197 for (
auto *BB : L->getBlocks()) {
5198 for (
auto &
I : *BB) {
5204 if (!Cost.isValid())
5208 if (LoopCost > Budget)
5230 if (MaxTC > 0 && MaxTC <= 32)
5241 if (Blocks.
size() != 2)
5263 if (!L->isInnermost() || L->getNumBlocks() > 8)
5267 if (!L->getExitBlock())
5273 bool HasParellelizableReductions =
5274 L->getNumBlocks() == 1 &&
5275 any_of(L->getHeader()->phis(),
5277 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5280 if (HasParellelizableReductions &&
5302 if (HasParellelizableReductions) {
5313 if (Header == Latch) {
5316 unsigned Width = 10;
5322 unsigned MaxInstsPerLine = 16;
5324 unsigned BestUC = 1;
5325 unsigned SizeWithBestUC = BestUC *
Size;
5327 unsigned SizeWithUC = UC *
Size;
5328 if (SizeWithUC > 48)
5330 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5331 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5333 SizeWithBestUC = BestUC *
Size;
5343 for (
auto *BB : L->blocks()) {
5344 for (
auto &
I : *BB) {
5354 for (
auto *U :
I.users())
5356 LoadedValuesPlus.
insert(U);
5363 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5389 auto *I = dyn_cast<Instruction>(V);
5390 return I && DependsOnLoopLoad(I, Depth + 1);
5397 DependsOnLoopLoad(
I, 0)) {
5413 if (L->getLoopDepth() > 1)
5424 for (
auto *BB : L->getBlocks()) {
5425 for (
auto &
I : *BB) {
5429 if (IsVectorized &&
I.getType()->isVectorTy())
5446 if (ST->isAppleMLike())
5448 else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
5470 !ST->getSchedModel().isOutOfOrder()) {
5493 bool CanCreate)
const {
5497 case Intrinsic::aarch64_neon_st2:
5498 case Intrinsic::aarch64_neon_st3:
5499 case Intrinsic::aarch64_neon_st4: {
5502 if (!CanCreate || !ST)
5504 unsigned NumElts = Inst->
arg_size() - 1;
5505 if (ST->getNumElements() != NumElts)
5507 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5513 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5515 Res = Builder.CreateInsertValue(Res, L, i);
5519 case Intrinsic::aarch64_neon_ld2:
5520 case Intrinsic::aarch64_neon_ld3:
5521 case Intrinsic::aarch64_neon_ld4:
5522 if (Inst->
getType() == ExpectedType)
5533 case Intrinsic::aarch64_neon_ld2:
5534 case Intrinsic::aarch64_neon_ld3:
5535 case Intrinsic::aarch64_neon_ld4:
5536 Info.ReadMem =
true;
5537 Info.WriteMem =
false;
5540 case Intrinsic::aarch64_neon_st2:
5541 case Intrinsic::aarch64_neon_st3:
5542 case Intrinsic::aarch64_neon_st4:
5543 Info.ReadMem =
false;
5544 Info.WriteMem =
true;
5552 case Intrinsic::aarch64_neon_ld2:
5553 case Intrinsic::aarch64_neon_st2:
5554 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5556 case Intrinsic::aarch64_neon_ld3:
5557 case Intrinsic::aarch64_neon_st3:
5558 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5560 case Intrinsic::aarch64_neon_ld4:
5561 case Intrinsic::aarch64_neon_st4:
5562 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5574 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5575 bool Considerable =
false;
5576 AllowPromotionWithoutCommonHeader =
false;
5579 Type *ConsideredSExtType =
5581 if (
I.getType() != ConsideredSExtType)
5585 for (
const User *U :
I.users()) {
5587 Considerable =
true;
5591 if (GEPInst->getNumOperands() > 2) {
5592 AllowPromotionWithoutCommonHeader =
true;
5597 return Considerable;
5646 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5656 return LegalizationCost + 2;
5666 LegalizationCost *= LT.first - 1;
5669 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5678 return LegalizationCost + 2;
5686 std::optional<FastMathFlags> FMF,
5702 return BaseCost + FixedVTy->getNumElements();
5705 if (Opcode != Instruction::FAdd)
5719 MVT MTy = LT.second;
5720 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5768 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5769 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5771 if (ValTy->getElementCount().getFixedValue() >= 2 && NElts >= 2 &&
5781 return (LT.first - 1) +
Log2_32(NElts);
5786 return (LT.first - 1) + Entry->Cost;
5798 if (LT.first != 1) {
5804 ExtraCost *= LT.first - 1;
5807 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5808 return Cost + ExtraCost;
5816 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
5818 EVT VecVT = TLI->getValueType(
DL, VecTy);
5819 EVT ResVT = TLI->getValueType(
DL, ResTy);
5829 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5831 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5833 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5835 return (LT.first - 1) * 2 + 2;
5846 EVT VecVT = TLI->getValueType(
DL, VecTy);
5847 EVT ResVT = TLI->getValueType(
DL, ResTy);
5850 RedOpcode == Instruction::Add) {
5856 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5858 return LT.first + 2;
5893 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5894 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
5908 if (LT.second.getScalarType() == MVT::i1) {
5917 assert(Entry &&
"Illegal Type for Splice");
5918 LegalizationCost += Entry->Cost;
5919 return LegalizationCost * LT.first;
5923 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
5932 if (VF.
isFixed() && !ST->isSVEorStreamingSVEAvailable() &&
5933 (!ST->isNeonAvailable() || !ST->hasDotProd()))
5936 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub &&
5937 Opcode != Instruction::FAdd) ||
5944 assert(FMF &&
"Missing FastMathFlags for floating-point partial reduction");
5945 if (!FMF->allowReassoc() || !FMF->allowContract())
5949 "FastMathFlags only apply to floating-point partial reductions");
5953 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
5954 "Unexpected values for OpBExtend or InputTypeB");
5958 if (BinOp && ((*BinOp != Instruction::Mul && *BinOp != Instruction::FMul) ||
5959 InputTypeA != InputTypeB))
5962 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
5963 if (IsUSDot && !ST->hasMatMulInt8())
5975 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
5984 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
5990 std::pair<InstructionCost, MVT> AccumLT =
5992 std::pair<InstructionCost, MVT> InputLT =
5999 if (Opcode == Instruction::Sub)
6010 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
6012 if (AccumLT.second.getScalarType() == MVT::i64 &&
6013 InputLT.second.getScalarType() == MVT::i16)
6016 if (AccumLT.second.getScalarType() == MVT::i32 &&
6017 InputLT.second.getScalarType() == MVT::i16 &&
6018 (ST->hasSVE2p1() || ST->hasSME2()))
6021 if (AccumLT.second.getScalarType() == MVT::i64 &&
6022 InputLT.second.getScalarType() == MVT::i8)
6032 if (ST->isSVEorStreamingSVEAvailable() ||
6033 (AccumLT.second.isFixedLengthVector() && ST->isNeonAvailable() &&
6034 ST->hasDotProd())) {
6035 if (AccumLT.second.getScalarType() == MVT::i32 &&
6036 InputLT.second.getScalarType() == MVT::i8)
6041 if (Opcode == Instruction::FAdd && (ST->hasSME2() || ST->hasSVE2p1())) {
6042 if (AccumLT.second.getScalarType() == MVT::f32 &&
6043 InputLT.second.getScalarType() == MVT::f16 &&
6044 AccumLT.second.getVectorMinNumElements() == 4 &&
6045 InputLT.second.getVectorMinNumElements() == 8)
6064 "Expected the Mask to match the return size if given");
6066 "Expected the same scalar types");
6072 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
6073 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
6074 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
6082 return std::max<InstructionCost>(1, LT.first / 4);
6090 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
6092 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
6095 unsigned TpNumElts = Mask.size();
6096 unsigned LTNumElts = LT.second.getVectorNumElements();
6097 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
6099 LT.second.getVectorElementCount());
6101 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
6103 for (
unsigned N = 0;
N < NumVecs;
N++) {
6107 unsigned Source1 = -1U, Source2 = -1U;
6108 unsigned NumSources = 0;
6109 for (
unsigned E = 0; E < LTNumElts; E++) {
6110 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
6119 unsigned Source = MaskElt / LTNumElts;
6120 if (NumSources == 0) {
6123 }
else if (NumSources == 1 && Source != Source1) {
6126 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
6132 if (Source == Source1)
6134 else if (Source == Source2)
6135 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
6144 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
6155 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
6158 Result.first->second = NCost;
6172 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
6173 if (LT.second.getFixedSizeInBits() >= 128 &&
6175 LT.second.getVectorNumElements() / 2) {
6178 if (Index == (
int)LT.second.getVectorNumElements() / 2)
6192 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6195 return M.value() < 0 || M.value() == (int)M.index();
6201 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6202 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6211 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6212 ST->isSVEorStreamingSVEAvailable() &&
6217 if (ST->isSVEorStreamingSVEAvailable() &&
6231 if (IsLoad && LT.second.isVector() &&
6233 LT.second.getVectorElementCount()))
6239 if (Mask.size() == 4 &&
6241 (SrcTy->getScalarSizeInBits() == 16 ||
6242 SrcTy->getScalarSizeInBits() == 32) &&
6243 all_of(Mask, [](
int E) {
return E < 8; }))
6249 if (LT.second.isFixedLengthVector() &&
6250 LT.second.getVectorNumElements() == Mask.size() &&
6256 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6257 isTRNMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6258 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6259 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6260 LT.second.getVectorNumElements(), 16) ||
6261 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6262 LT.second.getVectorNumElements(), 32) ||
6263 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6264 LT.second.getVectorNumElements(), 64) ||
6267 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6396 return LT.first * Entry->Cost;
6405 LT.second.getSizeInBits() <= 128 && SubTp) {
6407 if (SubLT.second.isVector()) {
6408 int NumElts = LT.second.getVectorNumElements();
6409 int NumSubElts = SubLT.second.getVectorNumElements();
6410 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6416 if (IsExtractSubvector)
6433 if (
getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
6452 return ST->useFixedOverScalableIfEqualCost();
6456 return ST->getEpilogueVectorizationMinVF();
6491 unsigned NumInsns = 0;
6493 NumInsns += BB->size();
6503 int64_t Scale,
unsigned AddrSpace)
const {
6531 if (
I->getOpcode() == Instruction::Or &&
6535 if (
I->getOpcode() == Instruction::Add ||
6536 I->getOpcode() == Instruction::Sub)
6561 return all_equal(Shuf->getShuffleMask());
6568 bool AllowSplat =
false) {
6573 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6574 auto *FullTy = FullV->
getType();
6575 auto *HalfTy = HalfV->getType();
6577 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6580 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6583 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6587 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6601 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6602 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6616 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6617 (M2Start != 0 && M2Start != (NumElements / 2)))
6619 if (S1Op1 && S2Op1 && M1Start != M2Start)
6629 return Ext->getType()->getScalarSizeInBits() ==
6630 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6644 Value *VectorOperand =
nullptr;
6661 if (!
GEP ||
GEP->getNumOperands() != 2)
6665 Value *Offsets =
GEP->getOperand(1);
6668 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6674 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6675 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6676 Ops.push_back(&
GEP->getOperandUse(1));
6712 switch (
II->getIntrinsicID()) {
6713 case Intrinsic::aarch64_neon_smull:
6714 case Intrinsic::aarch64_neon_umull:
6717 Ops.push_back(&
II->getOperandUse(0));
6718 Ops.push_back(&
II->getOperandUse(1));
6723 case Intrinsic::fma:
6724 case Intrinsic::fmuladd:
6731 Ops.push_back(&
II->getOperandUse(0));
6733 Ops.push_back(&
II->getOperandUse(1));
6736 case Intrinsic::aarch64_neon_sqdmull:
6737 case Intrinsic::aarch64_neon_sqdmulh:
6738 case Intrinsic::aarch64_neon_sqrdmulh:
6741 Ops.push_back(&
II->getOperandUse(0));
6743 Ops.push_back(&
II->getOperandUse(1));
6744 return !
Ops.empty();
6745 case Intrinsic::aarch64_neon_fmlal:
6746 case Intrinsic::aarch64_neon_fmlal2:
6747 case Intrinsic::aarch64_neon_fmlsl:
6748 case Intrinsic::aarch64_neon_fmlsl2:
6751 Ops.push_back(&
II->getOperandUse(1));
6753 Ops.push_back(&
II->getOperandUse(2));
6754 return !
Ops.empty();
6755 case Intrinsic::aarch64_sve_ptest_first:
6756 case Intrinsic::aarch64_sve_ptest_last:
6758 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6759 Ops.push_back(&
II->getOperandUse(0));
6760 return !
Ops.empty();
6761 case Intrinsic::aarch64_sme_write_horiz:
6762 case Intrinsic::aarch64_sme_write_vert:
6763 case Intrinsic::aarch64_sme_writeq_horiz:
6764 case Intrinsic::aarch64_sme_writeq_vert: {
6766 if (!Idx || Idx->getOpcode() != Instruction::Add)
6768 Ops.push_back(&
II->getOperandUse(1));
6771 case Intrinsic::aarch64_sme_read_horiz:
6772 case Intrinsic::aarch64_sme_read_vert:
6773 case Intrinsic::aarch64_sme_readq_horiz:
6774 case Intrinsic::aarch64_sme_readq_vert:
6775 case Intrinsic::aarch64_sme_ld1b_vert:
6776 case Intrinsic::aarch64_sme_ld1h_vert:
6777 case Intrinsic::aarch64_sme_ld1w_vert:
6778 case Intrinsic::aarch64_sme_ld1d_vert:
6779 case Intrinsic::aarch64_sme_ld1q_vert:
6780 case Intrinsic::aarch64_sme_st1b_vert:
6781 case Intrinsic::aarch64_sme_st1h_vert:
6782 case Intrinsic::aarch64_sme_st1w_vert:
6783 case Intrinsic::aarch64_sme_st1d_vert:
6784 case Intrinsic::aarch64_sme_st1q_vert:
6785 case Intrinsic::aarch64_sme_ld1b_horiz:
6786 case Intrinsic::aarch64_sme_ld1h_horiz:
6787 case Intrinsic::aarch64_sme_ld1w_horiz:
6788 case Intrinsic::aarch64_sme_ld1d_horiz:
6789 case Intrinsic::aarch64_sme_ld1q_horiz:
6790 case Intrinsic::aarch64_sme_st1b_horiz:
6791 case Intrinsic::aarch64_sme_st1h_horiz:
6792 case Intrinsic::aarch64_sme_st1w_horiz:
6793 case Intrinsic::aarch64_sme_st1d_horiz:
6794 case Intrinsic::aarch64_sme_st1q_horiz: {
6796 if (!Idx || Idx->getOpcode() != Instruction::Add)
6798 Ops.push_back(&
II->getOperandUse(3));
6801 case Intrinsic::aarch64_neon_pmull:
6804 Ops.push_back(&
II->getOperandUse(0));
6805 Ops.push_back(&
II->getOperandUse(1));
6807 case Intrinsic::aarch64_neon_pmull64:
6809 II->getArgOperand(1)))
6811 Ops.push_back(&
II->getArgOperandUse(0));
6812 Ops.push_back(&
II->getArgOperandUse(1));
6814 case Intrinsic::masked_gather:
6817 Ops.push_back(&
II->getArgOperandUse(0));
6819 case Intrinsic::masked_scatter:
6822 Ops.push_back(&
II->getArgOperandUse(1));
6829 auto ShouldSinkCondition = [](
Value *
Cond,
6834 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6838 Ops.push_back(&
II->getOperandUse(0));
6842 switch (
I->getOpcode()) {
6843 case Instruction::GetElementPtr:
6844 case Instruction::Add:
6845 case Instruction::Sub:
6847 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
6849 Ops.push_back(&
I->getOperandUse(
Op));
6854 case Instruction::Select: {
6855 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
6858 Ops.push_back(&
I->getOperandUse(0));
6861 case Instruction::UncondBr:
6863 case Instruction::CondBr: {
6867 Ops.push_back(&
I->getOperandUse(0));
6870 case Instruction::FMul:
6875 Ops.push_back(&
I->getOperandUse(0));
6877 Ops.push_back(&
I->getOperandUse(1));
6885 if (!
I->getType()->isVectorTy())
6886 return !
Ops.empty();
6888 switch (
I->getOpcode()) {
6889 case Instruction::Sub:
6890 case Instruction::Add: {
6899 Ops.push_back(&Ext1->getOperandUse(0));
6900 Ops.push_back(&Ext2->getOperandUse(0));
6903 Ops.push_back(&
I->getOperandUse(0));
6904 Ops.push_back(&
I->getOperandUse(1));
6908 case Instruction::Or: {
6911 if (ST->hasNEON()) {
6925 if (
I->getParent() != MainAnd->
getParent() ||
6930 if (
I->getParent() != IA->getParent() ||
6931 I->getParent() != IB->getParent())
6936 Ops.push_back(&
I->getOperandUse(0));
6937 Ops.push_back(&
I->getOperandUse(1));
6946 case Instruction::Mul: {
6947 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
6950 if (Ty->isScalableTy())
6954 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
6957 int NumZExts = 0, NumSExts = 0;
6958 for (
auto &
Op :
I->operands()) {
6965 auto *ExtOp = Ext->getOperand(0);
6966 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
6967 Ops.push_back(&Ext->getOperandUse(0));
6975 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
6976 I->getType()->getScalarSizeInBits())
7013 if (!ElementConstant || !ElementConstant->
isZero())
7016 unsigned Opcode = OperandInstr->
getOpcode();
7017 if (Opcode == Instruction::SExt)
7019 else if (Opcode == Instruction::ZExt)
7024 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
7034 Ops.push_back(&Insert->getOperandUse(1));
7040 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
7044 if (!ShouldSinkSplatForIndexedVariant(
I))
7049 Ops.push_back(&
I->getOperandUse(0));
7051 Ops.push_back(&
I->getOperandUse(1));
7053 return !
Ops.empty();
7055 case Instruction::FMul: {
7057 if (
I->getType()->isScalableTy())
7058 return !
Ops.empty();
7062 return !
Ops.empty();
7066 Ops.push_back(&
I->getOperandUse(0));
7068 Ops.push_back(&
I->getOperandUse(1));
7069 return !
Ops.empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
APInt getPriorityMask(const Function &F) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool noSignedZeros() const
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
Information for memory intrinsic cost model.
Align getAlignment() const
Type * getDataType() const
Intrinsic::ID getID() const
const Instruction * getInst() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
LLVM_ABI APInt getCpuSupportsMask(ArrayRef< StringRef > Features)
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
LLVM_ABI Libcall getPOW(EVT RetVT)
getPOW - Return the POW_* value for the given types, or UNKNOWN_LIBCALL if there is none.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FindLast
FindLast reduction with select(cmp(),x,y) where x and y.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...