23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
82 cl::desc(
"Threshold for forced unrolling of small loops in AArch64"));
85class TailFoldingOption {
100 bool NeedsDefault =
true;
104 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
124 Bits &= ~DisableBits;
130 errs() <<
"invalid argument '" << Opt
131 <<
"' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
139 void operator=(
const std::string &Val) {
148 setNeedsDefault(
false);
151 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] ==
"disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] ==
"all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] ==
"default")
159 setNeedsDefault(
true);
160 else if (TailFoldTypes[0] ==
"simple")
161 setInitialBits(TailFoldingOpts::Simple);
164 setInitialBits(TailFoldingOpts::Disabled);
167 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
168 if (TailFoldTypes[
I] ==
"reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[
I] ==
"recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[
I] ==
"reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[
I] ==
"noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[
I] ==
"norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[
I] ==
"noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
200 "\ndefault (Initial) Uses the default tail-folding settings for "
202 "\nall (Initial) All legal loop types will vectorize using "
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
213 "\nnoreverse Inverse of above"),
258 TTI->isMultiversionedFunction(
F) ?
"fmv-features" :
"target-features";
259 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.
split(Features,
",");
276 return F.hasFnAttribute(
"fmv-features");
280 AArch64::FeatureExecuteOnly,
320 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
321 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
323 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
341 auto FVTy = dyn_cast<FixedVectorType>(Ty);
343 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
352 unsigned DefaultCallPenalty)
const {
377 if (
F ==
Call.getCaller())
383 return DefaultCallPenalty;
394 ST->isSVEorStreamingSVEAvailable() &&
395 !ST->disableMaximizeScalableBandwidth();
419 assert(Ty->isIntegerTy());
421 unsigned BitSize = Ty->getPrimitiveSizeInBits();
428 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
433 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
439 return std::max<InstructionCost>(1,
Cost);
446 assert(Ty->isIntegerTy());
448 unsigned BitSize = Ty->getPrimitiveSizeInBits();
454 unsigned ImmIdx = ~0U;
458 case Instruction::GetElementPtr:
463 case Instruction::Store:
466 case Instruction::Add:
467 case Instruction::Sub:
468 case Instruction::Mul:
469 case Instruction::UDiv:
470 case Instruction::SDiv:
471 case Instruction::URem:
472 case Instruction::SRem:
473 case Instruction::And:
474 case Instruction::Or:
475 case Instruction::Xor:
476 case Instruction::ICmp:
480 case Instruction::Shl:
481 case Instruction::LShr:
482 case Instruction::AShr:
486 case Instruction::Trunc:
487 case Instruction::ZExt:
488 case Instruction::SExt:
489 case Instruction::IntToPtr:
490 case Instruction::PtrToInt:
491 case Instruction::BitCast:
492 case Instruction::PHI:
493 case Instruction::Call:
494 case Instruction::Select:
495 case Instruction::Ret:
496 case Instruction::Load:
501 int NumConstants = (BitSize + 63) / 64;
514 assert(Ty->isIntegerTy());
516 unsigned BitSize = Ty->getPrimitiveSizeInBits();
525 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
531 case Intrinsic::sadd_with_overflow:
532 case Intrinsic::uadd_with_overflow:
533 case Intrinsic::ssub_with_overflow:
534 case Intrinsic::usub_with_overflow:
535 case Intrinsic::smul_with_overflow:
536 case Intrinsic::umul_with_overflow:
538 int NumConstants = (BitSize + 63) / 64;
545 case Intrinsic::experimental_stackmap:
546 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
549 case Intrinsic::experimental_patchpoint_void:
550 case Intrinsic::experimental_patchpoint:
551 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
554 case Intrinsic::experimental_gc_statepoint:
555 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
565 if (TyWidth == 32 || TyWidth == 64)
589 unsigned TotalHistCnts = 1;
599 unsigned EC = VTy->getElementCount().getKnownMinValue();
604 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
606 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
610 TotalHistCnts = EC / NaturalVectorWidth;
630 switch (ICA.
getID()) {
631 case Intrinsic::experimental_vector_histogram_add: {
638 case Intrinsic::umin:
639 case Intrinsic::umax:
640 case Intrinsic::smin:
641 case Intrinsic::smax: {
642 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
643 MVT::v8i16, MVT::v2i32, MVT::v4i32,
644 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
648 if (LT.second == MVT::v2i64)
654 case Intrinsic::scmp:
655 case Intrinsic::ucmp: {
657 {Intrinsic::scmp, MVT::i32, 3},
658 {Intrinsic::scmp, MVT::i64, 3},
659 {Intrinsic::scmp, MVT::v8i8, 3},
660 {Intrinsic::scmp, MVT::v16i8, 3},
661 {Intrinsic::scmp, MVT::v4i16, 3},
662 {Intrinsic::scmp, MVT::v8i16, 3},
663 {Intrinsic::scmp, MVT::v2i32, 3},
664 {Intrinsic::scmp, MVT::v4i32, 3},
665 {Intrinsic::scmp, MVT::v1i64, 3},
666 {Intrinsic::scmp, MVT::v2i64, 3},
672 return Entry->Cost * LT.first;
675 case Intrinsic::sadd_sat:
676 case Intrinsic::ssub_sat:
677 case Intrinsic::uadd_sat:
678 case Intrinsic::usub_sat: {
679 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
680 MVT::v8i16, MVT::v2i32, MVT::v4i32,
686 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
688 return LT.first * Instrs;
693 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
694 return LT.first * Instrs;
698 case Intrinsic::abs: {
699 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
700 MVT::v8i16, MVT::v2i32, MVT::v4i32,
707 case Intrinsic::bswap: {
708 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
709 MVT::v4i32, MVT::v2i64};
712 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
717 case Intrinsic::fmuladd: {
722 (EltTy->
isHalfTy() && ST->hasFullFP16()))
726 case Intrinsic::stepvector: {
735 Cost += AddCost * (LT.first - 1);
739 case Intrinsic::vector_extract:
740 case Intrinsic::vector_insert: {
753 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
754 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
762 getTLI()->getTypeConversion(
C, SubVecVT);
764 getTLI()->getTypeConversion(
C, VecVT);
772 case Intrinsic::bitreverse: {
774 {Intrinsic::bitreverse, MVT::i32, 1},
775 {Intrinsic::bitreverse, MVT::i64, 1},
776 {Intrinsic::bitreverse, MVT::v8i8, 1},
777 {Intrinsic::bitreverse, MVT::v16i8, 1},
778 {Intrinsic::bitreverse, MVT::v4i16, 2},
779 {Intrinsic::bitreverse, MVT::v8i16, 2},
780 {Intrinsic::bitreverse, MVT::v2i32, 2},
781 {Intrinsic::bitreverse, MVT::v4i32, 2},
782 {Intrinsic::bitreverse, MVT::v1i64, 2},
783 {Intrinsic::bitreverse, MVT::v2i64, 2},
791 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
792 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
793 return LegalisationCost.first * Entry->Cost + 1;
795 return LegalisationCost.first * Entry->Cost;
799 case Intrinsic::ctpop: {
800 if (!ST->hasNEON()) {
821 RetTy->getScalarSizeInBits()
824 return LT.first * Entry->Cost + ExtraCost;
828 case Intrinsic::sadd_with_overflow:
829 case Intrinsic::uadd_with_overflow:
830 case Intrinsic::ssub_with_overflow:
831 case Intrinsic::usub_with_overflow:
832 case Intrinsic::smul_with_overflow:
833 case Intrinsic::umul_with_overflow: {
835 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
836 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
837 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
838 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
839 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
840 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
841 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
842 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
843 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
844 {Intrinsic::usub_with_overflow, MVT::i8, 3},
845 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
846 {Intrinsic::usub_with_overflow, MVT::i16, 3},
847 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
848 {Intrinsic::usub_with_overflow, MVT::i32, 1},
849 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
850 {Intrinsic::usub_with_overflow, MVT::i64, 1},
851 {Intrinsic::smul_with_overflow, MVT::i8, 5},
852 {Intrinsic::umul_with_overflow, MVT::i8, 4},
853 {Intrinsic::smul_with_overflow, MVT::i16, 5},
854 {Intrinsic::umul_with_overflow, MVT::i16, 4},
855 {Intrinsic::smul_with_overflow, MVT::i32, 2},
856 {Intrinsic::umul_with_overflow, MVT::i32, 2},
857 {Intrinsic::smul_with_overflow, MVT::i64, 3},
858 {Intrinsic::umul_with_overflow, MVT::i64, 3},
860 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
867 case Intrinsic::fptosi_sat:
868 case Intrinsic::fptoui_sat: {
871 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
873 EVT MTy = TLI->getValueType(
DL, RetTy);
876 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
877 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
878 LT.second == MVT::v2f64)) {
880 (LT.second == MVT::f64 && MTy == MVT::i32) ||
881 (LT.second == MVT::f32 && MTy == MVT::i64)))
890 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
897 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
898 (LT.second == MVT::f16 && MTy == MVT::i64) ||
899 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
913 if ((LT.second.getScalarType() == MVT::f32 ||
914 LT.second.getScalarType() == MVT::f64 ||
915 LT.second.getScalarType() == MVT::f16) &&
919 if (LT.second.isVector())
923 LegalTy, {LegalTy, LegalTy});
926 LegalTy, {LegalTy, LegalTy});
928 return LT.first *
Cost +
929 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
935 RetTy = RetTy->getScalarType();
936 if (LT.second.isVector()) {
954 return LT.first *
Cost;
956 case Intrinsic::fshl:
957 case Intrinsic::fshr: {
966 if (RetTy->isIntegerTy() && ICA.
getArgs()[0] == ICA.
getArgs()[1] &&
967 (RetTy->getPrimitiveSizeInBits() == 32 ||
968 RetTy->getPrimitiveSizeInBits() == 64)) {
981 {Intrinsic::fshl, MVT::v4i32, 2},
982 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
983 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
984 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
990 return LegalisationCost.first * Entry->Cost;
994 if (!RetTy->isIntegerTy())
999 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
1000 RetTy->getScalarSizeInBits() < 64) ||
1001 (RetTy->getScalarSizeInBits() % 64 != 0);
1002 unsigned ExtraCost = HigherCost ? 1 : 0;
1003 if (RetTy->getScalarSizeInBits() == 32 ||
1004 RetTy->getScalarSizeInBits() == 64)
1007 else if (HigherCost)
1011 return TyL.first + ExtraCost;
1013 case Intrinsic::get_active_lane_mask: {
1015 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
1017 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
1020 if (RetTy->isScalableTy()) {
1021 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
1031 if (ST->hasSVE2p1() || ST->hasSME2()) {
1046 return Cost + (SplitCost * (
Cost - 1));
1061 case Intrinsic::experimental_vector_match: {
1064 unsigned SearchSize = NeedleTy->getNumElements();
1065 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1078 case Intrinsic::experimental_cttz_elts: {
1080 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1088 case Intrinsic::loop_dependence_raw_mask:
1089 case Intrinsic::loop_dependence_war_mask: {
1091 if (ST->hasSVE2() || ST->hasSME()) {
1092 EVT VecVT = getTLI()->getValueType(
DL, RetTy);
1093 unsigned EltSizeInBytes =
1103 case Intrinsic::experimental_vector_extract_last_active:
1104 if (ST->isSVEorStreamingSVEAvailable()) {
1121 auto RequiredType =
II.getType();
1124 assert(PN &&
"Expected Phi Node!");
1127 if (!PN->hasOneUse())
1128 return std::nullopt;
1130 for (
Value *IncValPhi : PN->incoming_values()) {
1133 Reinterpret->getIntrinsicID() !=
1134 Intrinsic::aarch64_sve_convert_to_svbool ||
1135 RequiredType != Reinterpret->getArgOperand(0)->getType())
1136 return std::nullopt;
1144 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1146 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1219 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1224 return GoverningPredicateIdx;
1229 GoverningPredicateIdx = Index;
1247 return UndefIntrinsic;
1252 UndefIntrinsic = IID;
1274 return ResultLanes == InactiveLanesTakenFromOperand;
1279 return OperandIdxForInactiveLanes;
1283 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1284 ResultLanes = InactiveLanesTakenFromOperand;
1285 OperandIdxForInactiveLanes = Index;
1290 return ResultLanes == InactiveLanesAreNotDefined;
1294 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1295 ResultLanes = InactiveLanesAreNotDefined;
1300 return ResultLanes == InactiveLanesAreUnused;
1304 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1305 ResultLanes = InactiveLanesAreUnused;
1315 ResultIsZeroInitialized =
true;
1326 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1331 return OperandIdxWithNoActiveLanes;
1336 OperandIdxWithNoActiveLanes = Index;
1341 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1344 unsigned IROpcode = 0;
1346 enum PredicationStyle {
1348 InactiveLanesTakenFromOperand,
1349 InactiveLanesAreNotDefined,
1350 InactiveLanesAreUnused
1353 bool ResultIsZeroInitialized =
false;
1354 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1355 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1363 return !isa<ScalableVectorType>(V->getType());
1371 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1372 case Intrinsic::aarch64_sve_fcvt_f16f32:
1373 case Intrinsic::aarch64_sve_fcvt_f16f64:
1374 case Intrinsic::aarch64_sve_fcvt_f32f16:
1375 case Intrinsic::aarch64_sve_fcvt_f32f64:
1376 case Intrinsic::aarch64_sve_fcvt_f64f16:
1377 case Intrinsic::aarch64_sve_fcvt_f64f32:
1378 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1379 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1380 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1381 case Intrinsic::aarch64_sve_fcvtzs:
1382 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1383 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1384 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1385 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1386 case Intrinsic::aarch64_sve_fcvtzu:
1387 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1388 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1389 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1390 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1391 case Intrinsic::aarch64_sve_scvtf:
1392 case Intrinsic::aarch64_sve_scvtf_f16i32:
1393 case Intrinsic::aarch64_sve_scvtf_f16i64:
1394 case Intrinsic::aarch64_sve_scvtf_f32i64:
1395 case Intrinsic::aarch64_sve_scvtf_f64i32:
1396 case Intrinsic::aarch64_sve_ucvtf:
1397 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1398 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1399 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1400 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1403 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1404 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1405 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1406 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1409 case Intrinsic::aarch64_sve_fabd:
1411 case Intrinsic::aarch64_sve_fadd:
1414 case Intrinsic::aarch64_sve_fdiv:
1417 case Intrinsic::aarch64_sve_fmax:
1419 case Intrinsic::aarch64_sve_fmaxnm:
1421 case Intrinsic::aarch64_sve_fmin:
1423 case Intrinsic::aarch64_sve_fminnm:
1425 case Intrinsic::aarch64_sve_fmla:
1427 case Intrinsic::aarch64_sve_fmls:
1429 case Intrinsic::aarch64_sve_fmul:
1432 case Intrinsic::aarch64_sve_fmulx:
1434 case Intrinsic::aarch64_sve_fnmla:
1436 case Intrinsic::aarch64_sve_fnmls:
1438 case Intrinsic::aarch64_sve_fsub:
1441 case Intrinsic::aarch64_sve_add:
1444 case Intrinsic::aarch64_sve_mla:
1446 case Intrinsic::aarch64_sve_mls:
1448 case Intrinsic::aarch64_sve_mul:
1451 case Intrinsic::aarch64_sve_sabd:
1453 case Intrinsic::aarch64_sve_sdiv:
1456 case Intrinsic::aarch64_sve_smax:
1458 case Intrinsic::aarch64_sve_smin:
1460 case Intrinsic::aarch64_sve_smulh:
1462 case Intrinsic::aarch64_sve_sub:
1465 case Intrinsic::aarch64_sve_uabd:
1467 case Intrinsic::aarch64_sve_udiv:
1470 case Intrinsic::aarch64_sve_umax:
1472 case Intrinsic::aarch64_sve_umin:
1474 case Intrinsic::aarch64_sve_umulh:
1476 case Intrinsic::aarch64_sve_asr:
1479 case Intrinsic::aarch64_sve_lsl:
1482 case Intrinsic::aarch64_sve_lsr:
1485 case Intrinsic::aarch64_sve_and:
1488 case Intrinsic::aarch64_sve_bic:
1490 case Intrinsic::aarch64_sve_eor:
1493 case Intrinsic::aarch64_sve_orr:
1496 case Intrinsic::aarch64_sve_shsub:
1498 case Intrinsic::aarch64_sve_shsubr:
1500 case Intrinsic::aarch64_sve_sqrshl:
1502 case Intrinsic::aarch64_sve_sqshl:
1504 case Intrinsic::aarch64_sve_sqsub:
1506 case Intrinsic::aarch64_sve_srshl:
1508 case Intrinsic::aarch64_sve_uhsub:
1510 case Intrinsic::aarch64_sve_uhsubr:
1512 case Intrinsic::aarch64_sve_uqrshl:
1514 case Intrinsic::aarch64_sve_uqshl:
1516 case Intrinsic::aarch64_sve_uqsub:
1518 case Intrinsic::aarch64_sve_urshl:
1521 case Intrinsic::aarch64_sve_add_u:
1524 case Intrinsic::aarch64_sve_and_u:
1527 case Intrinsic::aarch64_sve_asr_u:
1530 case Intrinsic::aarch64_sve_eor_u:
1533 case Intrinsic::aarch64_sve_fadd_u:
1536 case Intrinsic::aarch64_sve_fdiv_u:
1539 case Intrinsic::aarch64_sve_fmul_u:
1542 case Intrinsic::aarch64_sve_fsub_u:
1545 case Intrinsic::aarch64_sve_lsl_u:
1548 case Intrinsic::aarch64_sve_lsr_u:
1551 case Intrinsic::aarch64_sve_mul_u:
1554 case Intrinsic::aarch64_sve_orr_u:
1557 case Intrinsic::aarch64_sve_sdiv_u:
1560 case Intrinsic::aarch64_sve_sub_u:
1563 case Intrinsic::aarch64_sve_udiv_u:
1567 case Intrinsic::aarch64_sve_addqv:
1568 case Intrinsic::aarch64_sve_and_z:
1569 case Intrinsic::aarch64_sve_bic_z:
1570 case Intrinsic::aarch64_sve_brka_z:
1571 case Intrinsic::aarch64_sve_brkb_z:
1572 case Intrinsic::aarch64_sve_brkn_z:
1573 case Intrinsic::aarch64_sve_brkpa_z:
1574 case Intrinsic::aarch64_sve_brkpb_z:
1575 case Intrinsic::aarch64_sve_cntp:
1576 case Intrinsic::aarch64_sve_compact:
1577 case Intrinsic::aarch64_sve_eor_z:
1578 case Intrinsic::aarch64_sve_eorv:
1579 case Intrinsic::aarch64_sve_eorqv:
1580 case Intrinsic::aarch64_sve_nand_z:
1581 case Intrinsic::aarch64_sve_nor_z:
1582 case Intrinsic::aarch64_sve_orn_z:
1583 case Intrinsic::aarch64_sve_orr_z:
1584 case Intrinsic::aarch64_sve_orv:
1585 case Intrinsic::aarch64_sve_orqv:
1586 case Intrinsic::aarch64_sve_pnext:
1587 case Intrinsic::aarch64_sve_rdffr_z:
1588 case Intrinsic::aarch64_sve_saddv:
1589 case Intrinsic::aarch64_sve_uaddv:
1590 case Intrinsic::aarch64_sve_umaxv:
1591 case Intrinsic::aarch64_sve_umaxqv:
1592 case Intrinsic::aarch64_sve_cmpeq:
1593 case Intrinsic::aarch64_sve_cmpeq_wide:
1594 case Intrinsic::aarch64_sve_cmpge:
1595 case Intrinsic::aarch64_sve_cmpge_wide:
1596 case Intrinsic::aarch64_sve_cmpgt:
1597 case Intrinsic::aarch64_sve_cmpgt_wide:
1598 case Intrinsic::aarch64_sve_cmphi:
1599 case Intrinsic::aarch64_sve_cmphi_wide:
1600 case Intrinsic::aarch64_sve_cmphs:
1601 case Intrinsic::aarch64_sve_cmphs_wide:
1602 case Intrinsic::aarch64_sve_cmple_wide:
1603 case Intrinsic::aarch64_sve_cmplo_wide:
1604 case Intrinsic::aarch64_sve_cmpls_wide:
1605 case Intrinsic::aarch64_sve_cmplt_wide:
1606 case Intrinsic::aarch64_sve_cmpne:
1607 case Intrinsic::aarch64_sve_cmpne_wide:
1608 case Intrinsic::aarch64_sve_facge:
1609 case Intrinsic::aarch64_sve_facgt:
1610 case Intrinsic::aarch64_sve_fcmpeq:
1611 case Intrinsic::aarch64_sve_fcmpge:
1612 case Intrinsic::aarch64_sve_fcmpgt:
1613 case Intrinsic::aarch64_sve_fcmpne:
1614 case Intrinsic::aarch64_sve_fcmpuo:
1615 case Intrinsic::aarch64_sve_ld1:
1616 case Intrinsic::aarch64_sve_ld1_gather:
1617 case Intrinsic::aarch64_sve_ld1_gather_index:
1618 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1619 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1620 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1621 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1622 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1623 case Intrinsic::aarch64_sve_ld1q_gather_index:
1624 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1625 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1626 case Intrinsic::aarch64_sve_ld1ro:
1627 case Intrinsic::aarch64_sve_ld1rq:
1628 case Intrinsic::aarch64_sve_ld1udq:
1629 case Intrinsic::aarch64_sve_ld1uwq:
1630 case Intrinsic::aarch64_sve_ld2_sret:
1631 case Intrinsic::aarch64_sve_ld2q_sret:
1632 case Intrinsic::aarch64_sve_ld3_sret:
1633 case Intrinsic::aarch64_sve_ld3q_sret:
1634 case Intrinsic::aarch64_sve_ld4_sret:
1635 case Intrinsic::aarch64_sve_ld4q_sret:
1636 case Intrinsic::aarch64_sve_ldff1:
1637 case Intrinsic::aarch64_sve_ldff1_gather:
1638 case Intrinsic::aarch64_sve_ldff1_gather_index:
1639 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1640 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1641 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1642 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1643 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1644 case Intrinsic::aarch64_sve_ldnf1:
1645 case Intrinsic::aarch64_sve_ldnt1:
1646 case Intrinsic::aarch64_sve_ldnt1_gather:
1647 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1648 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1649 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1652 case Intrinsic::aarch64_sve_prf:
1653 case Intrinsic::aarch64_sve_prfb_gather_index:
1654 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1655 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1656 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1657 case Intrinsic::aarch64_sve_prfd_gather_index:
1658 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1659 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1660 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1661 case Intrinsic::aarch64_sve_prfh_gather_index:
1662 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1663 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1664 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1665 case Intrinsic::aarch64_sve_prfw_gather_index:
1666 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1667 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1668 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1671 case Intrinsic::aarch64_sve_st1_scatter:
1672 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1673 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1674 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1675 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1676 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1677 case Intrinsic::aarch64_sve_st1dq:
1678 case Intrinsic::aarch64_sve_st1q_scatter_index:
1679 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1680 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1681 case Intrinsic::aarch64_sve_st1wq:
1682 case Intrinsic::aarch64_sve_stnt1:
1683 case Intrinsic::aarch64_sve_stnt1_scatter:
1684 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1685 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1686 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1688 case Intrinsic::aarch64_sve_st2:
1689 case Intrinsic::aarch64_sve_st2q:
1691 case Intrinsic::aarch64_sve_st3:
1692 case Intrinsic::aarch64_sve_st3q:
1694 case Intrinsic::aarch64_sve_st4:
1695 case Intrinsic::aarch64_sve_st4q:
1703 Value *UncastedPred;
1709 Pred = UncastedPred;
1715 if (OrigPredTy->getMinNumElements() <=
1717 ->getMinNumElements())
1718 Pred = UncastedPred;
1722 return C &&
C->isAllOnesValue();
1729 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1730 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1738static std::optional<Instruction *>
1745 Value *Op1 =
II.getOperand(1);
1746 Value *Op2 =
II.getOperand(2);
1772 return std::nullopt;
1780 if (SimpleII == Inactive)
1790static std::optional<Instruction *>
1794 return std::nullopt;
1823 II.setCalledFunction(NewDecl);
1833 return std::nullopt;
1845static std::optional<Instruction *>
1849 return std::nullopt;
1851 auto IntrinsicID = BinOp->getIntrinsicID();
1852 switch (IntrinsicID) {
1853 case Intrinsic::aarch64_sve_and_z:
1854 case Intrinsic::aarch64_sve_bic_z:
1855 case Intrinsic::aarch64_sve_eor_z:
1856 case Intrinsic::aarch64_sve_nand_z:
1857 case Intrinsic::aarch64_sve_nor_z:
1858 case Intrinsic::aarch64_sve_orn_z:
1859 case Intrinsic::aarch64_sve_orr_z:
1862 return std::nullopt;
1865 auto BinOpPred = BinOp->getOperand(0);
1866 auto BinOpOp1 = BinOp->getOperand(1);
1867 auto BinOpOp2 = BinOp->getOperand(2);
1871 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1872 return std::nullopt;
1874 auto PredOp = PredIntr->getOperand(0);
1876 if (PredOpTy !=
II.getType())
1877 return std::nullopt;
1881 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1882 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1883 if (BinOpOp1 == BinOpOp2)
1884 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1887 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1889 auto NarrowedBinOp =
1894static std::optional<Instruction *>
1901 return BinOpCombine;
1906 return std::nullopt;
1909 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1918 if (CursorVTy->getElementCount().getKnownMinValue() <
1919 IVTy->getElementCount().getKnownMinValue())
1923 if (Cursor->getType() == IVTy)
1924 EarliestReplacement = Cursor;
1929 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1930 Intrinsic::aarch64_sve_convert_to_svbool ||
1931 IntrinsicCursor->getIntrinsicID() ==
1932 Intrinsic::aarch64_sve_convert_from_svbool))
1935 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
1936 Cursor = IntrinsicCursor->getOperand(0);
1941 if (!EarliestReplacement)
1942 return std::nullopt;
1950 auto *OpPredicate =
II.getOperand(0);
1967 II.getArgOperand(2));
1973 return std::nullopt;
1977 II.getArgOperand(0),
II.getArgOperand(2),
uint64_t(0));
1986 II.getArgOperand(0));
1996 return std::nullopt;
2001 if (!SplatValue || !SplatValue->isZero())
2002 return std::nullopt;
2007 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
2008 return std::nullopt;
2012 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
2013 return std::nullopt;
2016 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
2017 return std::nullopt;
2022 return std::nullopt;
2025 return std::nullopt;
2029 return std::nullopt;
2033 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
2034 return std::nullopt;
2036 unsigned NumElts = VecTy->getNumElements();
2037 unsigned PredicateBits = 0;
2040 for (
unsigned I = 0;
I < NumElts; ++
I) {
2043 return std::nullopt;
2045 PredicateBits |= 1 << (
I * (16 / NumElts));
2049 if (PredicateBits == 0) {
2051 PFalse->takeName(&
II);
2057 for (
unsigned I = 0;
I < 16; ++
I)
2058 if ((PredicateBits & (1 <<
I)) != 0)
2061 unsigned PredSize = Mask & -Mask;
2066 for (
unsigned I = 0;
I < 16;
I += PredSize)
2067 if ((PredicateBits & (1 <<
I)) == 0)
2068 return std::nullopt;
2073 {PredType}, {PTruePat});
2075 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
2076 auto *ConvertFromSVBool =
2078 {
II.getType()}, {ConvertToSVBool});
2086 Value *Pg =
II.getArgOperand(0);
2087 Value *Vec =
II.getArgOperand(1);
2088 auto IntrinsicID =
II.getIntrinsicID();
2089 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2101 auto OpC = OldBinOp->getOpcode();
2107 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2113 if (IsAfter &&
C &&
C->isNullValue()) {
2117 Extract->insertBefore(
II.getIterator());
2118 Extract->takeName(&
II);
2124 return std::nullopt;
2126 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2127 return std::nullopt;
2129 const auto PTruePattern =
2135 return std::nullopt;
2137 unsigned Idx = MinNumElts - 1;
2147 if (Idx >= PgVTy->getMinNumElements())
2148 return std::nullopt;
2153 Extract->insertBefore(
II.getIterator());
2154 Extract->takeName(&
II);
2167 Value *Pg =
II.getArgOperand(0);
2169 Value *Vec =
II.getArgOperand(2);
2172 if (!Ty->isIntegerTy())
2173 return std::nullopt;
2178 return std::nullopt;
2195 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2208 {
II.getType()}, {AllPat});
2215static std::optional<Instruction *>
2219 if (
Pattern == AArch64SVEPredPattern::all) {
2228 return MinNumElts && NumElts >= MinNumElts
2230 II, ConstantInt::get(
II.getType(), MinNumElts)))
2234static std::optional<Instruction *>
2237 if (!ST->isStreaming())
2238 return std::nullopt;
2250 Value *PgVal =
II.getArgOperand(0);
2251 Value *OpVal =
II.getArgOperand(1);
2255 if (PgVal == OpVal &&
2256 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2257 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2272 return std::nullopt;
2276 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2277 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2291 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2292 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2293 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2294 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2295 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2296 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2297 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2298 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2299 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2300 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2301 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2302 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2303 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2313 return std::nullopt;
2316template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2317static std::optional<Instruction *>
2319 bool MergeIntoAddendOp) {
2321 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2322 if (MergeIntoAddendOp) {
2323 AddendOp =
II.getOperand(1);
2324 Mul =
II.getOperand(2);
2326 AddendOp =
II.getOperand(2);
2327 Mul =
II.getOperand(1);
2332 return std::nullopt;
2334 if (!
Mul->hasOneUse())
2335 return std::nullopt;
2338 if (
II.getType()->isFPOrFPVectorTy()) {
2343 return std::nullopt;
2345 return std::nullopt;
2350 if (MergeIntoAddendOp)
2360static std::optional<Instruction *>
2362 Value *Pred =
II.getOperand(0);
2363 Value *PtrOp =
II.getOperand(1);
2364 Type *VecTy =
II.getType();
2368 Load->copyMetadata(
II);
2379static std::optional<Instruction *>
2381 Value *VecOp =
II.getOperand(0);
2382 Value *Pred =
II.getOperand(1);
2383 Value *PtrOp =
II.getOperand(2);
2387 Store->copyMetadata(
II);
2399 case Intrinsic::aarch64_sve_fmul_u:
2400 return Instruction::BinaryOps::FMul;
2401 case Intrinsic::aarch64_sve_fadd_u:
2402 return Instruction::BinaryOps::FAdd;
2403 case Intrinsic::aarch64_sve_fsub_u:
2404 return Instruction::BinaryOps::FSub;
2406 return Instruction::BinaryOpsEnd;
2410static std::optional<Instruction *>
2413 if (
II.isStrictFP())
2414 return std::nullopt;
2416 auto *OpPredicate =
II.getOperand(0);
2418 if (BinOpCode == Instruction::BinaryOpsEnd ||
2420 return std::nullopt;
2422 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2429 Intrinsic::aarch64_sve_mla>(
2433 Intrinsic::aarch64_sve_mad>(
2436 return std::nullopt;
2439static std::optional<Instruction *>
2443 Intrinsic::aarch64_sve_fmla>(IC,
II,
2448 Intrinsic::aarch64_sve_fmad>(IC,
II,
2453 Intrinsic::aarch64_sve_fmla>(IC,
II,
2456 return std::nullopt;
2459static std::optional<Instruction *>
2463 Intrinsic::aarch64_sve_fmla>(IC,
II,
2468 Intrinsic::aarch64_sve_fmad>(IC,
II,
2473 Intrinsic::aarch64_sve_fmla_u>(
2479static std::optional<Instruction *>
2483 Intrinsic::aarch64_sve_fmls>(IC,
II,
2488 Intrinsic::aarch64_sve_fnmsb>(
2493 Intrinsic::aarch64_sve_fmls>(IC,
II,
2496 return std::nullopt;
2499static std::optional<Instruction *>
2503 Intrinsic::aarch64_sve_fmls>(IC,
II,
2508 Intrinsic::aarch64_sve_fnmsb>(
2513 Intrinsic::aarch64_sve_fmls_u>(
2522 Intrinsic::aarch64_sve_mls>(
2525 return std::nullopt;
2530 Value *UnpackArg =
II.getArgOperand(0);
2532 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2533 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2546 return std::nullopt;
2550 auto *OpVal =
II.getOperand(0);
2551 auto *OpIndices =
II.getOperand(1);
2558 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2559 return std::nullopt;
2574 Type *RetTy =
II.getType();
2575 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2576 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2580 if ((
match(
II.getArgOperand(0),
2587 if (TyA ==
B->getType() &&
2592 TyA->getMinNumElements());
2598 return std::nullopt;
2606 if (
match(
II.getArgOperand(0),
2611 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2613 return std::nullopt;
2616static std::optional<Instruction *>
2618 Value *Mask =
II.getOperand(0);
2619 Value *BasePtr =
II.getOperand(1);
2620 Value *Index =
II.getOperand(2);
2631 BasePtr->getPointerAlignment(
II.getDataLayout());
2634 BasePtr, IndexBase);
2641 return std::nullopt;
2644static std::optional<Instruction *>
2646 Value *Val =
II.getOperand(0);
2647 Value *Mask =
II.getOperand(1);
2648 Value *BasePtr =
II.getOperand(2);
2649 Value *Index =
II.getOperand(3);
2659 BasePtr->getPointerAlignment(
II.getDataLayout());
2662 BasePtr, IndexBase);
2668 return std::nullopt;
2674 Value *Pred =
II.getOperand(0);
2675 Value *Vec =
II.getOperand(1);
2676 Value *DivVec =
II.getOperand(2);
2680 if (!SplatConstantInt)
2681 return std::nullopt;
2685 if (DivisorValue == -1)
2686 return std::nullopt;
2687 if (DivisorValue == 1)
2693 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2700 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2702 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2706 return std::nullopt;
2710 size_t VecSize = Vec.
size();
2715 size_t HalfVecSize = VecSize / 2;
2719 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2727 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2745 return std::nullopt;
2752 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2753 CurrentInsertElt = InsertElt->getOperand(0);
2759 return std::nullopt;
2763 for (
size_t I = 0;
I < Elts.
size();
I++) {
2764 if (Elts[
I] ==
nullptr)
2769 if (InsertEltChain ==
nullptr)
2770 return std::nullopt;
2776 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2777 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2778 IIScalableTy->getMinNumElements() /
2783 auto *WideShuffleMaskTy =
2794 auto NarrowBitcast =
2807 return std::nullopt;
2812 Value *Pred =
II.getOperand(0);
2813 Value *Vec =
II.getOperand(1);
2814 Value *Shift =
II.getOperand(2);
2817 Value *AbsPred, *MergedValue;
2823 return std::nullopt;
2831 return std::nullopt;
2836 return std::nullopt;
2839 {
II.getType()}, {Pred, Vec, Shift});
2846 Value *Vec =
II.getOperand(0);
2851 return std::nullopt;
2857 auto *NI =
II.getNextNode();
2860 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2862 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2863 auto *NIBB = NI->getParent();
2864 NI = NI->getNextNode();
2866 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2867 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2873 if (NextII &&
II.isIdenticalTo(NextII))
2876 return std::nullopt;
2884 {II.getType(), II.getOperand(0)->getType()},
2885 {II.getOperand(0), II.getOperand(1)}));
2892 return std::nullopt;
2898 Value *Passthru =
II.getOperand(0);
2906 auto *Mask = ConstantInt::get(Ty, MaskValue);
2912 return std::nullopt;
2915static std::optional<Instruction *>
2922 return std::nullopt;
2925std::optional<Instruction *>
2936 case Intrinsic::aarch64_dmb:
2938 case Intrinsic::aarch64_neon_fmaxnm:
2939 case Intrinsic::aarch64_neon_fminnm:
2941 case Intrinsic::aarch64_sve_convert_from_svbool:
2943 case Intrinsic::aarch64_sve_dup:
2945 case Intrinsic::aarch64_sve_dup_x:
2947 case Intrinsic::aarch64_sve_cmpne:
2948 case Intrinsic::aarch64_sve_cmpne_wide:
2950 case Intrinsic::aarch64_sve_rdffr:
2952 case Intrinsic::aarch64_sve_lasta:
2953 case Intrinsic::aarch64_sve_lastb:
2955 case Intrinsic::aarch64_sve_clasta_n:
2956 case Intrinsic::aarch64_sve_clastb_n:
2958 case Intrinsic::aarch64_sve_cntd:
2960 case Intrinsic::aarch64_sve_cntw:
2962 case Intrinsic::aarch64_sve_cnth:
2964 case Intrinsic::aarch64_sve_cntb:
2966 case Intrinsic::aarch64_sme_cntsd:
2968 case Intrinsic::aarch64_sve_ptest_any:
2969 case Intrinsic::aarch64_sve_ptest_first:
2970 case Intrinsic::aarch64_sve_ptest_last:
2972 case Intrinsic::aarch64_sve_fadd:
2974 case Intrinsic::aarch64_sve_fadd_u:
2976 case Intrinsic::aarch64_sve_fmul_u:
2978 case Intrinsic::aarch64_sve_fsub:
2980 case Intrinsic::aarch64_sve_fsub_u:
2982 case Intrinsic::aarch64_sve_add:
2984 case Intrinsic::aarch64_sve_add_u:
2986 Intrinsic::aarch64_sve_mla_u>(
2988 case Intrinsic::aarch64_sve_sub:
2990 case Intrinsic::aarch64_sve_sub_u:
2992 Intrinsic::aarch64_sve_mls_u>(
2994 case Intrinsic::aarch64_sve_tbl:
2996 case Intrinsic::aarch64_sve_uunpkhi:
2997 case Intrinsic::aarch64_sve_uunpklo:
2998 case Intrinsic::aarch64_sve_sunpkhi:
2999 case Intrinsic::aarch64_sve_sunpklo:
3001 case Intrinsic::aarch64_sve_uzp1:
3003 case Intrinsic::aarch64_sve_zip1:
3004 case Intrinsic::aarch64_sve_zip2:
3006 case Intrinsic::aarch64_sve_ld1_gather_index:
3008 case Intrinsic::aarch64_sve_st1_scatter_index:
3010 case Intrinsic::aarch64_sve_ld1:
3012 case Intrinsic::aarch64_sve_st1:
3014 case Intrinsic::aarch64_sve_sdiv:
3016 case Intrinsic::aarch64_sve_sel:
3018 case Intrinsic::aarch64_sve_srshl:
3020 case Intrinsic::aarch64_sve_dupq_lane:
3022 case Intrinsic::aarch64_sve_insr:
3024 case Intrinsic::aarch64_sve_whilelo:
3026 case Intrinsic::aarch64_sve_ptrue:
3028 case Intrinsic::aarch64_sve_uxtb:
3030 case Intrinsic::aarch64_sve_uxth:
3032 case Intrinsic::aarch64_sve_uxtw:
3034 case Intrinsic::aarch64_sme_in_streaming_mode:
3038 return std::nullopt;
3045 SimplifyAndSetOp)
const {
3046 switch (
II.getIntrinsicID()) {
3049 case Intrinsic::aarch64_neon_fcvtxn:
3050 case Intrinsic::aarch64_neon_rshrn:
3051 case Intrinsic::aarch64_neon_sqrshrn:
3052 case Intrinsic::aarch64_neon_sqrshrun:
3053 case Intrinsic::aarch64_neon_sqshrn:
3054 case Intrinsic::aarch64_neon_sqshrun:
3055 case Intrinsic::aarch64_neon_sqxtn:
3056 case Intrinsic::aarch64_neon_sqxtun:
3057 case Intrinsic::aarch64_neon_uqrshrn:
3058 case Intrinsic::aarch64_neon_uqshrn:
3059 case Intrinsic::aarch64_neon_uqxtn:
3060 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
3064 return std::nullopt;
3068 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3078 if (ST->useSVEForFixedLengthVectors() &&
3081 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3082 else if (ST->isNeonAvailable())
3087 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3096bool AArch64TTIImpl::isSingleExtWideningInstruction(
3098 Type *SrcOverrideTy)
const {
3113 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3116 Type *SrcTy = SrcOverrideTy;
3118 case Instruction::Add:
3119 case Instruction::Sub: {
3128 if (Opcode == Instruction::Sub)
3152 assert(SrcTy &&
"Expected some SrcTy");
3154 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3160 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3162 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3166 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3169Type *AArch64TTIImpl::isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
3171 Type *SrcOverrideTy)
const {
3172 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3173 Opcode != Instruction::Mul)
3183 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3186 auto getScalarSizeWithOverride = [&](
const Value *
V) {
3192 ->getScalarSizeInBits();
3195 unsigned MaxEltSize = 0;
3198 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3199 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3200 MaxEltSize = std::max(EltSize0, EltSize1);
3203 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3204 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3207 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3209 MaxEltSize = DstEltSize / 2;
3210 }
else if (Opcode == Instruction::Mul &&
3223 getScalarSizeWithOverride(
isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
3227 if (MaxEltSize * 2 > DstEltSize)
3245 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3246 (Src->isScalableTy() && !ST->hasSVE2()))
3256 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3260 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3264 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3265 Src->getScalarSizeInBits() !=
3289 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3293 if (
I &&
I->hasOneUser()) {
3296 if (
Type *ExtTy = isBinExtWideningInstruction(
3297 SingleUser->getOpcode(), Dst, Operands,
3298 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3311 if (isSingleExtWideningInstruction(
3312 SingleUser->getOpcode(), Dst, Operands,
3313 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3317 if (SingleUser->getOpcode() == Instruction::Add) {
3318 if (
I == SingleUser->getOperand(1) ||
3320 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3335 EVT SrcTy = TLI->getValueType(
DL, Src);
3336 EVT DstTy = TLI->getValueType(
DL, Dst);
3338 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3343 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3375 const unsigned int SVE_EXT_COST = 1;
3376 const unsigned int SVE_FCVT_COST = 1;
3377 const unsigned int SVE_UNPACK_ONCE = 4;
3378 const unsigned int SVE_UNPACK_TWICE = 16;
3507 SVE_EXT_COST + SVE_FCVT_COST},
3512 SVE_EXT_COST + SVE_FCVT_COST},
3519 SVE_EXT_COST + SVE_FCVT_COST},
3523 SVE_EXT_COST + SVE_FCVT_COST},
3529 SVE_EXT_COST + SVE_FCVT_COST},
3532 SVE_EXT_COST + SVE_FCVT_COST},
3537 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3539 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3549 SVE_EXT_COST + SVE_FCVT_COST},
3554 SVE_EXT_COST + SVE_FCVT_COST},
3567 SVE_EXT_COST + SVE_FCVT_COST},
3571 SVE_EXT_COST + SVE_FCVT_COST},
3583 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3585 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3587 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3589 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3593 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3595 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3611 SVE_EXT_COST + SVE_FCVT_COST},
3616 SVE_EXT_COST + SVE_FCVT_COST},
3627 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3629 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3631 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3633 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3635 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3637 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3641 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3643 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3645 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3647 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3846 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3849 ST->useSVEForFixedLengthVectors(WiderTy)) {
3850 std::pair<InstructionCost, MVT> LT =
3852 unsigned NumElements =
3891 if (ST->hasFullFP16())
3903 Src->getScalarType(), CCH,
CostKind) +
3911 ST->isSVEorStreamingSVEAvailable() &&
3912 TLI->getTypeAction(Src->getContext(), SrcTy) ==
3914 TLI->getTypeAction(Dst->getContext(), DstTy) ==
3923 Opcode, LegalTy, Src, CCH,
CostKind,
I);
3926 return Part1 + Part2;
3933 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
3945 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3958 CostKind, Index,
nullptr,
nullptr);
3962 auto DstVT = TLI->getValueType(
DL, Dst);
3963 auto SrcVT = TLI->getValueType(
DL, Src);
3968 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
3974 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
3984 case Instruction::SExt:
3989 case Instruction::ZExt:
3990 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
4003 return Opcode == Instruction::PHI ? 0 : 1;
4012 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4021 if (!LT.second.isVector())
4026 if (LT.second.isFixedLengthVector()) {
4027 unsigned Width = LT.second.getVectorNumElements();
4028 Index = Index % Width;
4043 if (ST->hasFastLD1Single())
4055 : ST->getVectorInsertExtractBaseCost() + 1;
4079 auto ExtractCanFuseWithFmul = [&]() {
4086 auto IsAllowedScalarTy = [&](
const Type *
T) {
4087 return T->isFloatTy() ||
T->isDoubleTy() ||
4088 (
T->isHalfTy() && ST->hasFullFP16());
4092 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
4095 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4096 !BO->getType()->isVectorTy();
4101 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
4105 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4114 DenseMap<User *, unsigned> UserToExtractIdx;
4115 for (
auto *U :
Scalar->users()) {
4116 if (!IsUserFMulScalarTy(U))
4120 UserToExtractIdx[
U];
4122 if (UserToExtractIdx.
empty())
4124 for (
auto &[S, U, L] : ScalarUserAndIdx) {
4125 for (
auto *U : S->users()) {
4126 if (UserToExtractIdx.
contains(U)) {
4128 auto *Op0 =
FMul->getOperand(0);
4129 auto *Op1 =
FMul->getOperand(1);
4130 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4131 UserToExtractIdx[
U] =
L;
4137 for (
auto &[U, L] : UserToExtractIdx) {
4149 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
4150 if (!IsUserFMulScalarTy(U))
4155 const auto *BO = cast<BinaryOperator>(U);
4156 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4157 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4159 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4162 return IsExtractLaneEquivalentToZero(
4163 cast<ConstantInt>(OtherEE->getIndexOperand())
4166 OtherEE->getType()->getScalarSizeInBits());
4174 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4175 ExtractCanFuseWithFmul())
4180 :
ST->getVectorInsertExtractBaseCost();
4189 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4192 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr,
4198 Value *Scalar,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4200 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4201 ScalarUserAndIdx, VIC);
4208 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I,
4215 unsigned Index)
const {
4227 : ST->getVectorInsertExtractBaseCost() + 1;
4236 if (Ty->getElementType()->isFloatingPointTy())
4239 unsigned VecInstCost =
4241 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4248 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4249 return std::nullopt;
4250 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4251 return std::nullopt;
4252 if (CanUseSVE && Ty->isScalableTy() && ST->hasSVEB16B16() &&
4253 ST->isNonStreamingSVEorSME2Available())
4254 return std::nullopt;
4261 Cost += InstCost(PromotedTy);
4284 Op2Info, Args, CxtI);
4288 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4295 Ty,
CostKind, Op1Info, Op2Info,
true,
4298 [&](
Type *PromotedTy) {
4302 return *PromotedCost;
4308 if (
Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4375 auto VT = TLI->getValueType(
DL, Ty);
4376 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4380 : (3 * AsrCost + AddCost);
4382 return MulCost + AsrCost + 2 * AddCost;
4384 }
else if (VT.isVector()) {
4394 if (Ty->isScalableTy() && ST->hasSVE())
4395 Cost += 2 * AsrCost;
4400 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4404 }
else if (LT.second == MVT::v2i64) {
4405 return VT.getVectorNumElements() *
4412 if (Ty->isScalableTy() && ST->hasSVE())
4413 return MulCost + 2 * AddCost + 2 * AsrCost;
4414 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4419 LT.second.isFixedLengthVector()) {
4429 return ExtractCost + InsertCost +
4437 auto VT = TLI->getValueType(
DL, Ty);
4453 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4454 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4455 LT.second == MVT::nxv16i8;
4456 bool Is128bit = LT.second.is128BitVector();
4468 (HasMULH ? 0 : ShrCost) +
4469 AddCost * 2 + ShrCost;
4470 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4477 if (!VT.isVector() && VT.getSizeInBits() > 64)
4481 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4483 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4487 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4497 if (
nullptr != Entry)
4502 if (LT.second.getScalarType() == MVT::i8)
4504 else if (LT.second.getScalarType() == MVT::i16)
4516 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4517 return (4 + DivCost) * VTy->getNumElements();
4523 -1,
nullptr,
nullptr);
4537 if (LT.second == MVT::v2i64 && ST->hasSVE())
4550 if (LT.second != MVT::v2i64)
4572 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4573 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4582 if (!Ty->getScalarType()->isFP128Ty())
4589 if (!Ty->getScalarType()->isFP128Ty())
4590 return 2 * LT.first;
4597 if (!Ty->isVectorTy())
4613 int MaxMergeDistance = 64;
4617 return NumVectorInstToHideOverhead;
4627 unsigned Opcode1,
unsigned Opcode2)
const {
4630 if (!
Sched.hasInstrSchedModel())
4634 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4636 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4642 "Cannot handle variant scheduling classes without an MI");
4658 const int AmortizationCost = 20;
4666 VecPred = CurrentPred;
4674 static const auto ValidMinMaxTys = {
4675 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4676 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4677 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4681 (ST->hasFullFP16() &&
4687 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4688 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4689 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4690 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4691 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4692 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4693 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4694 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4695 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4696 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4697 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4699 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4700 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4709 if (Opcode == Instruction::FCmp) {
4711 ValTy,
CostKind, Op1Info, Op2Info,
false,
4713 false, [&](
Type *PromotedTy) {
4725 return *PromotedCost;
4729 if (LT.second.getScalarType() != MVT::f64 &&
4730 LT.second.getScalarType() != MVT::f32 &&
4731 LT.second.getScalarType() != MVT::f16)
4736 unsigned Factor = 1;
4751 AArch64::FCMEQv4f32))
4763 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4782 Op1Info, Op2Info,
I);
4788 if (ST->requiresStrictAlign()) {
4793 Options.AllowOverlappingLoads =
true;
4794 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4799 Options.LoadSizes = {8, 4, 2, 1};
4800 Options.AllowedTailExpansions = {3, 5, 6};
4805 return ST->hasSVE();
4811 switch (MICA.
getID()) {
4812 case Intrinsic::masked_scatter:
4813 case Intrinsic::masked_gather:
4815 case Intrinsic::masked_load:
4816 case Intrinsic::masked_store:
4830 if (!LT.first.isValid())
4835 if (VT->getElementType()->isIntegerTy(1))
4852 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4853 "Should be called on only load or stores.");
4855 case Instruction::Load:
4858 return ST->getGatherOverhead();
4860 case Instruction::Store:
4863 return ST->getScatterOverhead();
4874 unsigned Opcode = (MICA.
getID() == Intrinsic::masked_gather ||
4875 MICA.
getID() == Intrinsic::vp_gather)
4877 : Instruction::Store;
4887 if (!LT.first.isValid())
4891 if (!LT.second.isVector() ||
4893 VT->getElementType()->isIntegerTy(1))
4903 ElementCount LegalVF = LT.second.getVectorElementCount();
4906 {TTI::OK_AnyValue, TTI::OP_None},
I);
4922 EVT VT = TLI->getValueType(
DL, Ty,
true);
4924 if (VT == MVT::Other)
4929 if (!LT.first.isValid())
4939 (VTy->getElementType()->isIntegerTy(1) &&
4940 !VTy->getElementCount().isKnownMultipleOf(
4951 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
4952 LT.second.is128BitVector() && Alignment <
Align(16)) {
4958 const int AmortizationCost = 6;
4960 return LT.first * 2 * AmortizationCost;
4964 if (Ty->isPtrOrPtrVectorTy())
4969 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
4971 if (VT == MVT::v4i8)
4978 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
4993 while (!TypeWorklist.
empty()) {
5015 bool UseMaskForCond,
bool UseMaskForGaps)
const {
5016 assert(Factor >= 2 &&
"Invalid interleave factor");
5031 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
5034 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
5035 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
5038 VecVTy->getElementCount().divideCoefficientBy(Factor));
5044 if (MinElts % Factor == 0 &&
5045 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
5046 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
5051 UseMaskForCond, UseMaskForGaps);
5058 for (
auto *
I : Tys) {
5059 if (!
I->isVectorTy())
5070 return ST->getMaxInterleaveFactor();
5080 enum { MaxStridedLoads = 7 };
5082 int StridedLoads = 0;
5085 for (
const auto BB : L->blocks()) {
5086 for (
auto &
I : *BB) {
5092 if (L->isLoopInvariant(PtrValue))
5097 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
5106 if (StridedLoads > MaxStridedLoads / 2)
5107 return StridedLoads;
5110 return StridedLoads;
5113 int StridedLoads = countStridedLoads(L, SE);
5115 <<
" strided loads\n");
5131 unsigned *FinalSize) {
5135 for (
auto *BB : L->getBlocks()) {
5136 for (
auto &
I : *BB) {
5142 if (!Cost.isValid())
5146 if (LoopCost > Budget)
5168 if (MaxTC > 0 && MaxTC <= 32)
5179 if (Blocks.
size() != 2)
5201 if (!L->isInnermost() || L->getNumBlocks() > 8)
5205 if (!L->getExitBlock())
5211 bool HasParellelizableReductions =
5212 L->getNumBlocks() == 1 &&
5213 any_of(L->getHeader()->phis(),
5215 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5218 if (HasParellelizableReductions &&
5240 if (HasParellelizableReductions) {
5251 if (Header == Latch) {
5254 unsigned Width = 10;
5260 unsigned MaxInstsPerLine = 16;
5262 unsigned BestUC = 1;
5263 unsigned SizeWithBestUC = BestUC *
Size;
5265 unsigned SizeWithUC = UC *
Size;
5266 if (SizeWithUC > 48)
5268 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5269 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5271 SizeWithBestUC = BestUC *
Size;
5281 for (
auto *BB : L->blocks()) {
5282 for (
auto &
I : *BB) {
5292 for (
auto *U :
I.users())
5294 LoadedValuesPlus.
insert(U);
5301 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5314 if (!Term || !Term->isConditional() || Preds.
size() == 1 ||
5328 auto *I = dyn_cast<Instruction>(V);
5329 return I && DependsOnLoopLoad(I, Depth + 1);
5336 DependsOnLoopLoad(
I, 0)) {
5352 if (L->getLoopDepth() > 1)
5363 for (
auto *BB : L->getBlocks()) {
5364 for (
auto &
I : *BB) {
5368 if (IsVectorized &&
I.getType()->isVectorTy())
5385 if (ST->isAppleMLike())
5387 else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
5409 !ST->getSchedModel().isOutOfOrder()) {
5432 bool CanCreate)
const {
5436 case Intrinsic::aarch64_neon_st2:
5437 case Intrinsic::aarch64_neon_st3:
5438 case Intrinsic::aarch64_neon_st4: {
5441 if (!CanCreate || !ST)
5443 unsigned NumElts = Inst->
arg_size() - 1;
5444 if (ST->getNumElements() != NumElts)
5446 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5452 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5454 Res = Builder.CreateInsertValue(Res, L, i);
5458 case Intrinsic::aarch64_neon_ld2:
5459 case Intrinsic::aarch64_neon_ld3:
5460 case Intrinsic::aarch64_neon_ld4:
5461 if (Inst->
getType() == ExpectedType)
5472 case Intrinsic::aarch64_neon_ld2:
5473 case Intrinsic::aarch64_neon_ld3:
5474 case Intrinsic::aarch64_neon_ld4:
5475 Info.ReadMem =
true;
5476 Info.WriteMem =
false;
5479 case Intrinsic::aarch64_neon_st2:
5480 case Intrinsic::aarch64_neon_st3:
5481 case Intrinsic::aarch64_neon_st4:
5482 Info.ReadMem =
false;
5483 Info.WriteMem =
true;
5491 case Intrinsic::aarch64_neon_ld2:
5492 case Intrinsic::aarch64_neon_st2:
5493 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5495 case Intrinsic::aarch64_neon_ld3:
5496 case Intrinsic::aarch64_neon_st3:
5497 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5499 case Intrinsic::aarch64_neon_ld4:
5500 case Intrinsic::aarch64_neon_st4:
5501 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5513 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5514 bool Considerable =
false;
5515 AllowPromotionWithoutCommonHeader =
false;
5518 Type *ConsideredSExtType =
5520 if (
I.getType() != ConsideredSExtType)
5524 for (
const User *U :
I.users()) {
5526 Considerable =
true;
5530 if (GEPInst->getNumOperands() > 2) {
5531 AllowPromotionWithoutCommonHeader =
true;
5536 return Considerable;
5585 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5595 return LegalizationCost + 2;
5605 LegalizationCost *= LT.first - 1;
5608 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5617 return LegalizationCost + 2;
5625 std::optional<FastMathFlags> FMF,
5641 return BaseCost + FixedVTy->getNumElements();
5644 if (Opcode != Instruction::FAdd)
5658 MVT MTy = LT.second;
5659 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5707 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5708 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5710 if (ValTy->getElementCount().getFixedValue() >= 2 && NElts >= 2 &&
5720 return (LT.first - 1) +
Log2_32(NElts);
5725 return (LT.first - 1) + Entry->Cost;
5737 if (LT.first != 1) {
5743 ExtraCost *= LT.first - 1;
5746 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5747 return Cost + ExtraCost;
5755 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
5757 EVT VecVT = TLI->getValueType(
DL, VecTy);
5758 EVT ResVT = TLI->getValueType(
DL, ResTy);
5768 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5770 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5772 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5774 return (LT.first - 1) * 2 + 2;
5785 EVT VecVT = TLI->getValueType(
DL, VecTy);
5786 EVT ResVT = TLI->getValueType(
DL, ResTy);
5789 RedOpcode == Instruction::Add) {
5795 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5797 return LT.first + 2;
5832 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5833 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
5847 if (LT.second.getScalarType() == MVT::i1) {
5856 assert(Entry &&
"Illegal Type for Splice");
5857 LegalizationCost += Entry->Cost;
5858 return LegalizationCost * LT.first;
5862 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
5871 if (VF.
isFixed() && !ST->isSVEorStreamingSVEAvailable() &&
5872 (!ST->isNeonAvailable() || !ST->hasDotProd()))
5875 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub &&
5876 Opcode != Instruction::FAdd) ||
5883 assert(FMF &&
"Missing FastMathFlags for floating-point partial reduction");
5884 if (!FMF->allowReassoc() || !FMF->allowContract())
5888 "FastMathFlags only apply to floating-point partial reductions");
5892 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
5893 "Unexpected values for OpBExtend or InputTypeB");
5897 if (BinOp && ((*BinOp != Instruction::Mul && *BinOp != Instruction::FMul) ||
5898 InputTypeA != InputTypeB))
5901 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
5902 if (IsUSDot && !ST->hasMatMulInt8())
5914 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
5923 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
5929 std::pair<InstructionCost, MVT> AccumLT =
5931 std::pair<InstructionCost, MVT> InputLT =
5938 if (Opcode == Instruction::Sub)
5949 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
5951 if (AccumLT.second.getScalarType() == MVT::i64 &&
5952 InputLT.second.getScalarType() == MVT::i16)
5955 if (AccumLT.second.getScalarType() == MVT::i32 &&
5956 InputLT.second.getScalarType() == MVT::i16 &&
5957 (ST->hasSVE2p1() || ST->hasSME2()))
5960 if (AccumLT.second.getScalarType() == MVT::i64 &&
5961 InputLT.second.getScalarType() == MVT::i8)
5971 if (ST->isSVEorStreamingSVEAvailable() ||
5972 (AccumLT.second.isFixedLengthVector() && ST->isNeonAvailable() &&
5973 ST->hasDotProd())) {
5974 if (AccumLT.second.getScalarType() == MVT::i32 &&
5975 InputLT.second.getScalarType() == MVT::i8)
5980 if (Opcode == Instruction::FAdd && (ST->hasSME2() || ST->hasSVE2p1())) {
5981 if (AccumLT.second.getScalarType() == MVT::f32 &&
5982 InputLT.second.getScalarType() == MVT::f16 &&
5983 AccumLT.second.getVectorMinNumElements() == 4 &&
5984 InputLT.second.getVectorMinNumElements() == 8)
6003 "Expected the Mask to match the return size if given");
6005 "Expected the same scalar types");
6011 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
6012 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
6013 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
6021 return std::max<InstructionCost>(1, LT.first / 4);
6029 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
6031 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
6034 unsigned TpNumElts = Mask.size();
6035 unsigned LTNumElts = LT.second.getVectorNumElements();
6036 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
6038 LT.second.getVectorElementCount());
6040 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
6042 for (
unsigned N = 0;
N < NumVecs;
N++) {
6046 unsigned Source1 = -1U, Source2 = -1U;
6047 unsigned NumSources = 0;
6048 for (
unsigned E = 0; E < LTNumElts; E++) {
6049 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
6058 unsigned Source = MaskElt / LTNumElts;
6059 if (NumSources == 0) {
6062 }
else if (NumSources == 1 && Source != Source1) {
6065 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
6071 if (Source == Source1)
6073 else if (Source == Source2)
6074 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
6083 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
6094 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
6097 Result.first->second = NCost;
6111 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
6112 if (LT.second.getFixedSizeInBits() >= 128 &&
6114 LT.second.getVectorNumElements() / 2) {
6117 if (Index == (
int)LT.second.getVectorNumElements() / 2)
6131 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6134 return M.value() < 0 || M.value() == (int)M.index();
6140 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6141 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6150 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6151 ST->isSVEorStreamingSVEAvailable() &&
6156 if (ST->isSVEorStreamingSVEAvailable() &&
6170 if (IsLoad && LT.second.isVector() &&
6172 LT.second.getVectorElementCount()))
6178 if (Mask.size() == 4 &&
6180 (SrcTy->getScalarSizeInBits() == 16 ||
6181 SrcTy->getScalarSizeInBits() == 32) &&
6182 all_of(Mask, [](
int E) {
return E < 8; }))
6188 if (LT.second.isFixedLengthVector() &&
6189 LT.second.getVectorNumElements() == Mask.size() &&
6195 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6196 isTRNMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6197 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6198 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6199 LT.second.getVectorNumElements(), 16) ||
6200 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6201 LT.second.getVectorNumElements(), 32) ||
6202 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6203 LT.second.getVectorNumElements(), 64) ||
6206 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6335 return LT.first * Entry->Cost;
6344 LT.second.getSizeInBits() <= 128 && SubTp) {
6346 if (SubLT.second.isVector()) {
6347 int NumElts = LT.second.getVectorNumElements();
6348 int NumSubElts = SubLT.second.getVectorNumElements();
6349 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6355 if (IsExtractSubvector)
6372 if (
getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
6391 return ST->useFixedOverScalableIfEqualCost();
6395 return ST->getEpilogueVectorizationMinVF();
6430 unsigned NumInsns = 0;
6432 NumInsns += BB->sizeWithoutDebug();
6442 int64_t Scale,
unsigned AddrSpace)
const {
6470 if (
I->getOpcode() == Instruction::Or &&
6475 if (
I->getOpcode() == Instruction::Add ||
6476 I->getOpcode() == Instruction::Sub)
6501 return all_equal(Shuf->getShuffleMask());
6508 bool AllowSplat =
false) {
6513 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6514 auto *FullTy = FullV->
getType();
6515 auto *HalfTy = HalfV->getType();
6517 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6520 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6523 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6527 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6541 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6542 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6556 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6557 (M2Start != 0 && M2Start != (NumElements / 2)))
6559 if (S1Op1 && S2Op1 && M1Start != M2Start)
6569 return Ext->getType()->getScalarSizeInBits() ==
6570 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6584 Value *VectorOperand =
nullptr;
6601 if (!
GEP ||
GEP->getNumOperands() != 2)
6605 Value *Offsets =
GEP->getOperand(1);
6608 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6614 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6615 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6616 Ops.push_back(&
GEP->getOperandUse(1));
6652 switch (
II->getIntrinsicID()) {
6653 case Intrinsic::aarch64_neon_smull:
6654 case Intrinsic::aarch64_neon_umull:
6657 Ops.push_back(&
II->getOperandUse(0));
6658 Ops.push_back(&
II->getOperandUse(1));
6663 case Intrinsic::fma:
6664 case Intrinsic::fmuladd:
6671 Ops.push_back(&
II->getOperandUse(0));
6673 Ops.push_back(&
II->getOperandUse(1));
6676 case Intrinsic::aarch64_neon_sqdmull:
6677 case Intrinsic::aarch64_neon_sqdmulh:
6678 case Intrinsic::aarch64_neon_sqrdmulh:
6681 Ops.push_back(&
II->getOperandUse(0));
6683 Ops.push_back(&
II->getOperandUse(1));
6684 return !
Ops.empty();
6685 case Intrinsic::aarch64_neon_fmlal:
6686 case Intrinsic::aarch64_neon_fmlal2:
6687 case Intrinsic::aarch64_neon_fmlsl:
6688 case Intrinsic::aarch64_neon_fmlsl2:
6691 Ops.push_back(&
II->getOperandUse(1));
6693 Ops.push_back(&
II->getOperandUse(2));
6694 return !
Ops.empty();
6695 case Intrinsic::aarch64_sve_ptest_first:
6696 case Intrinsic::aarch64_sve_ptest_last:
6698 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6699 Ops.push_back(&
II->getOperandUse(0));
6700 return !
Ops.empty();
6701 case Intrinsic::aarch64_sme_write_horiz:
6702 case Intrinsic::aarch64_sme_write_vert:
6703 case Intrinsic::aarch64_sme_writeq_horiz:
6704 case Intrinsic::aarch64_sme_writeq_vert: {
6706 if (!Idx || Idx->getOpcode() != Instruction::Add)
6708 Ops.push_back(&
II->getOperandUse(1));
6711 case Intrinsic::aarch64_sme_read_horiz:
6712 case Intrinsic::aarch64_sme_read_vert:
6713 case Intrinsic::aarch64_sme_readq_horiz:
6714 case Intrinsic::aarch64_sme_readq_vert:
6715 case Intrinsic::aarch64_sme_ld1b_vert:
6716 case Intrinsic::aarch64_sme_ld1h_vert:
6717 case Intrinsic::aarch64_sme_ld1w_vert:
6718 case Intrinsic::aarch64_sme_ld1d_vert:
6719 case Intrinsic::aarch64_sme_ld1q_vert:
6720 case Intrinsic::aarch64_sme_st1b_vert:
6721 case Intrinsic::aarch64_sme_st1h_vert:
6722 case Intrinsic::aarch64_sme_st1w_vert:
6723 case Intrinsic::aarch64_sme_st1d_vert:
6724 case Intrinsic::aarch64_sme_st1q_vert:
6725 case Intrinsic::aarch64_sme_ld1b_horiz:
6726 case Intrinsic::aarch64_sme_ld1h_horiz:
6727 case Intrinsic::aarch64_sme_ld1w_horiz:
6728 case Intrinsic::aarch64_sme_ld1d_horiz:
6729 case Intrinsic::aarch64_sme_ld1q_horiz:
6730 case Intrinsic::aarch64_sme_st1b_horiz:
6731 case Intrinsic::aarch64_sme_st1h_horiz:
6732 case Intrinsic::aarch64_sme_st1w_horiz:
6733 case Intrinsic::aarch64_sme_st1d_horiz:
6734 case Intrinsic::aarch64_sme_st1q_horiz: {
6736 if (!Idx || Idx->getOpcode() != Instruction::Add)
6738 Ops.push_back(&
II->getOperandUse(3));
6741 case Intrinsic::aarch64_neon_pmull:
6744 Ops.push_back(&
II->getOperandUse(0));
6745 Ops.push_back(&
II->getOperandUse(1));
6747 case Intrinsic::aarch64_neon_pmull64:
6749 II->getArgOperand(1)))
6751 Ops.push_back(&
II->getArgOperandUse(0));
6752 Ops.push_back(&
II->getArgOperandUse(1));
6754 case Intrinsic::masked_gather:
6757 Ops.push_back(&
II->getArgOperandUse(0));
6759 case Intrinsic::masked_scatter:
6762 Ops.push_back(&
II->getArgOperandUse(1));
6769 auto ShouldSinkCondition = [](
Value *
Cond,
6774 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6778 Ops.push_back(&
II->getOperandUse(0));
6782 switch (
I->getOpcode()) {
6783 case Instruction::GetElementPtr:
6784 case Instruction::Add:
6785 case Instruction::Sub:
6787 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
6789 Ops.push_back(&
I->getOperandUse(
Op));
6794 case Instruction::Select: {
6795 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
6798 Ops.push_back(&
I->getOperandUse(0));
6801 case Instruction::Br: {
6808 Ops.push_back(&
I->getOperandUse(0));
6811 case Instruction::FMul:
6816 Ops.push_back(&
I->getOperandUse(0));
6818 Ops.push_back(&
I->getOperandUse(1));
6826 if (!
I->getType()->isVectorTy())
6827 return !
Ops.empty();
6829 switch (
I->getOpcode()) {
6830 case Instruction::Sub:
6831 case Instruction::Add: {
6840 Ops.push_back(&Ext1->getOperandUse(0));
6841 Ops.push_back(&Ext2->getOperandUse(0));
6844 Ops.push_back(&
I->getOperandUse(0));
6845 Ops.push_back(&
I->getOperandUse(1));
6849 case Instruction::Or: {
6852 if (ST->hasNEON()) {
6866 if (
I->getParent() != MainAnd->
getParent() ||
6871 if (
I->getParent() != IA->getParent() ||
6872 I->getParent() != IB->getParent())
6877 Ops.push_back(&
I->getOperandUse(0));
6878 Ops.push_back(&
I->getOperandUse(1));
6887 case Instruction::Mul: {
6888 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
6891 if (Ty->isScalableTy())
6895 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
6898 int NumZExts = 0, NumSExts = 0;
6899 for (
auto &
Op :
I->operands()) {
6906 auto *ExtOp = Ext->getOperand(0);
6907 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
6908 Ops.push_back(&Ext->getOperandUse(0));
6916 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
6917 I->getType()->getScalarSizeInBits())
6954 if (!ElementConstant || !ElementConstant->
isZero())
6957 unsigned Opcode = OperandInstr->
getOpcode();
6958 if (Opcode == Instruction::SExt)
6960 else if (Opcode == Instruction::ZExt)
6965 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
6975 Ops.push_back(&Insert->getOperandUse(1));
6981 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
6985 if (!ShouldSinkSplatForIndexedVariant(
I))
6990 Ops.push_back(&
I->getOperandUse(0));
6992 Ops.push_back(&
I->getOperandUse(1));
6994 return !
Ops.empty();
6996 case Instruction::FMul: {
6998 if (
I->getType()->isScalableTy())
6999 return !
Ops.empty();
7003 return !
Ops.empty();
7007 Ops.push_back(&
I->getOperandUse(0));
7009 Ops.push_back(&
I->getOperandUse(1));
7010 return !
Ops.empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
APInt getPriorityMask(const Function &F) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
Information for memory intrinsic cost model.
Align getAlignment() const
Type * getDataType() const
Intrinsic::ID getID() const
const Instruction * getInst() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
LLVM_ABI APInt getCpuSupportsMask(ArrayRef< StringRef > Features)
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FindLast
FindLast reduction with select(cmp(),x,y) where x and y.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...