23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
82 cl::desc(
"Threshold for forced unrolling of small loops in AArch64"));
85class TailFoldingOption {
100 bool NeedsDefault =
true;
104 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
124 Bits &= ~DisableBits;
130 errs() <<
"invalid argument '" << Opt
131 <<
"' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
139 void operator=(
const std::string &Val) {
148 setNeedsDefault(
false);
151 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] ==
"disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] ==
"all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] ==
"default")
159 setNeedsDefault(
true);
160 else if (TailFoldTypes[0] ==
"simple")
161 setInitialBits(TailFoldingOpts::Simple);
164 setInitialBits(TailFoldingOpts::Disabled);
167 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
168 if (TailFoldTypes[
I] ==
"reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[
I] ==
"recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[
I] ==
"reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[
I] ==
"noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[
I] ==
"norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[
I] ==
"noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
200 "\ndefault (Initial) Uses the default tail-folding settings for "
202 "\nall (Initial) All legal loop types will vectorize using "
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
213 "\nnoreverse Inverse of above"),
258 TTI->isMultiversionedFunction(
F) ?
"fmv-features" :
"target-features";
259 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.
split(Features,
",");
276 return F.hasFnAttribute(
"fmv-features");
280 AArch64::FeatureExecuteOnly,
320 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
321 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
323 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
341 auto FVTy = dyn_cast<FixedVectorType>(Ty);
343 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
352 unsigned DefaultCallPenalty)
const {
377 if (
F ==
Call.getCaller())
383 return DefaultCallPenalty;
394 ST->isSVEorStreamingSVEAvailable() &&
395 !ST->disableMaximizeScalableBandwidth();
419 assert(Ty->isIntegerTy());
421 unsigned BitSize = Ty->getPrimitiveSizeInBits();
428 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
433 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
439 return std::max<InstructionCost>(1,
Cost);
446 assert(Ty->isIntegerTy());
448 unsigned BitSize = Ty->getPrimitiveSizeInBits();
454 unsigned ImmIdx = ~0U;
458 case Instruction::GetElementPtr:
463 case Instruction::Store:
466 case Instruction::Add:
467 case Instruction::Sub:
468 case Instruction::Mul:
469 case Instruction::UDiv:
470 case Instruction::SDiv:
471 case Instruction::URem:
472 case Instruction::SRem:
473 case Instruction::And:
474 case Instruction::Or:
475 case Instruction::Xor:
476 case Instruction::ICmp:
480 case Instruction::Shl:
481 case Instruction::LShr:
482 case Instruction::AShr:
486 case Instruction::Trunc:
487 case Instruction::ZExt:
488 case Instruction::SExt:
489 case Instruction::IntToPtr:
490 case Instruction::PtrToInt:
491 case Instruction::BitCast:
492 case Instruction::PHI:
493 case Instruction::Call:
494 case Instruction::Select:
495 case Instruction::Ret:
496 case Instruction::Load:
501 int NumConstants = (BitSize + 63) / 64;
514 assert(Ty->isIntegerTy());
516 unsigned BitSize = Ty->getPrimitiveSizeInBits();
525 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
531 case Intrinsic::sadd_with_overflow:
532 case Intrinsic::uadd_with_overflow:
533 case Intrinsic::ssub_with_overflow:
534 case Intrinsic::usub_with_overflow:
535 case Intrinsic::smul_with_overflow:
536 case Intrinsic::umul_with_overflow:
538 int NumConstants = (BitSize + 63) / 64;
545 case Intrinsic::experimental_stackmap:
546 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
549 case Intrinsic::experimental_patchpoint_void:
550 case Intrinsic::experimental_patchpoint:
551 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
554 case Intrinsic::experimental_gc_statepoint:
555 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
565 if (TyWidth == 32 || TyWidth == 64)
589 unsigned TotalHistCnts = 1;
599 unsigned EC = VTy->getElementCount().getKnownMinValue();
604 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
606 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
610 TotalHistCnts = EC / NaturalVectorWidth;
630 switch (ICA.
getID()) {
631 case Intrinsic::experimental_vector_histogram_add: {
638 case Intrinsic::umin:
639 case Intrinsic::umax:
640 case Intrinsic::smin:
641 case Intrinsic::smax: {
642 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
643 MVT::v8i16, MVT::v2i32, MVT::v4i32,
644 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
648 if (LT.second == MVT::v2i64)
654 case Intrinsic::scmp:
655 case Intrinsic::ucmp: {
657 {Intrinsic::scmp, MVT::i32, 3},
658 {Intrinsic::scmp, MVT::i64, 3},
659 {Intrinsic::scmp, MVT::v8i8, 3},
660 {Intrinsic::scmp, MVT::v16i8, 3},
661 {Intrinsic::scmp, MVT::v4i16, 3},
662 {Intrinsic::scmp, MVT::v8i16, 3},
663 {Intrinsic::scmp, MVT::v2i32, 3},
664 {Intrinsic::scmp, MVT::v4i32, 3},
665 {Intrinsic::scmp, MVT::v1i64, 3},
666 {Intrinsic::scmp, MVT::v2i64, 3},
672 return Entry->Cost * LT.first;
675 case Intrinsic::sadd_sat:
676 case Intrinsic::ssub_sat:
677 case Intrinsic::uadd_sat:
678 case Intrinsic::usub_sat: {
679 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
680 MVT::v8i16, MVT::v2i32, MVT::v4i32,
686 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
688 return LT.first * Instrs;
693 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
694 return LT.first * Instrs;
698 case Intrinsic::abs: {
699 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
700 MVT::v8i16, MVT::v2i32, MVT::v4i32,
707 case Intrinsic::bswap: {
708 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
709 MVT::v4i32, MVT::v2i64};
712 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
717 case Intrinsic::fmuladd: {
722 (EltTy->
isHalfTy() && ST->hasFullFP16()))
726 case Intrinsic::stepvector: {
735 Cost += AddCost * (LT.first - 1);
739 case Intrinsic::vector_extract:
740 case Intrinsic::vector_insert: {
753 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
754 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
762 getTLI()->getTypeConversion(
C, SubVecVT);
764 getTLI()->getTypeConversion(
C, VecVT);
772 case Intrinsic::bitreverse: {
774 {Intrinsic::bitreverse, MVT::i32, 1},
775 {Intrinsic::bitreverse, MVT::i64, 1},
776 {Intrinsic::bitreverse, MVT::v8i8, 1},
777 {Intrinsic::bitreverse, MVT::v16i8, 1},
778 {Intrinsic::bitreverse, MVT::v4i16, 2},
779 {Intrinsic::bitreverse, MVT::v8i16, 2},
780 {Intrinsic::bitreverse, MVT::v2i32, 2},
781 {Intrinsic::bitreverse, MVT::v4i32, 2},
782 {Intrinsic::bitreverse, MVT::v1i64, 2},
783 {Intrinsic::bitreverse, MVT::v2i64, 2},
791 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
792 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
793 return LegalisationCost.first * Entry->Cost + 1;
795 return LegalisationCost.first * Entry->Cost;
799 case Intrinsic::ctpop: {
800 if (!ST->hasNEON()) {
821 RetTy->getScalarSizeInBits()
824 return LT.first * Entry->Cost + ExtraCost;
828 case Intrinsic::sadd_with_overflow:
829 case Intrinsic::uadd_with_overflow:
830 case Intrinsic::ssub_with_overflow:
831 case Intrinsic::usub_with_overflow:
832 case Intrinsic::smul_with_overflow:
833 case Intrinsic::umul_with_overflow: {
835 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
836 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
837 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
838 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
839 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
840 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
841 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
842 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
843 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
844 {Intrinsic::usub_with_overflow, MVT::i8, 3},
845 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
846 {Intrinsic::usub_with_overflow, MVT::i16, 3},
847 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
848 {Intrinsic::usub_with_overflow, MVT::i32, 1},
849 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
850 {Intrinsic::usub_with_overflow, MVT::i64, 1},
851 {Intrinsic::smul_with_overflow, MVT::i8, 5},
852 {Intrinsic::umul_with_overflow, MVT::i8, 4},
853 {Intrinsic::smul_with_overflow, MVT::i16, 5},
854 {Intrinsic::umul_with_overflow, MVT::i16, 4},
855 {Intrinsic::smul_with_overflow, MVT::i32, 2},
856 {Intrinsic::umul_with_overflow, MVT::i32, 2},
857 {Intrinsic::smul_with_overflow, MVT::i64, 3},
858 {Intrinsic::umul_with_overflow, MVT::i64, 3},
860 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
867 case Intrinsic::fptosi_sat:
868 case Intrinsic::fptoui_sat: {
871 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
873 EVT MTy = TLI->getValueType(
DL, RetTy);
876 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
877 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
878 LT.second == MVT::v2f64)) {
880 (LT.second == MVT::f64 && MTy == MVT::i32) ||
881 (LT.second == MVT::f32 && MTy == MVT::i64)))
890 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
897 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
898 (LT.second == MVT::f16 && MTy == MVT::i64) ||
899 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
913 if ((LT.second.getScalarType() == MVT::f32 ||
914 LT.second.getScalarType() == MVT::f64 ||
915 LT.second.getScalarType() == MVT::f16) &&
919 if (LT.second.isVector())
923 LegalTy, {LegalTy, LegalTy});
926 LegalTy, {LegalTy, LegalTy});
928 return LT.first *
Cost +
929 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
935 RetTy = RetTy->getScalarType();
936 if (LT.second.isVector()) {
954 return LT.first *
Cost;
956 case Intrinsic::fshl:
957 case Intrinsic::fshr: {
966 if (RetTy->isIntegerTy() && ICA.
getArgs()[0] == ICA.
getArgs()[1] &&
967 (RetTy->getPrimitiveSizeInBits() == 32 ||
968 RetTy->getPrimitiveSizeInBits() == 64)) {
981 {Intrinsic::fshl, MVT::v4i32, 2},
982 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
983 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
984 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
990 return LegalisationCost.first * Entry->Cost;
994 if (!RetTy->isIntegerTy())
999 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
1000 RetTy->getScalarSizeInBits() < 64) ||
1001 (RetTy->getScalarSizeInBits() % 64 != 0);
1002 unsigned ExtraCost = HigherCost ? 1 : 0;
1003 if (RetTy->getScalarSizeInBits() == 32 ||
1004 RetTy->getScalarSizeInBits() == 64)
1007 else if (HigherCost)
1011 return TyL.first + ExtraCost;
1013 case Intrinsic::get_active_lane_mask: {
1015 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
1017 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
1020 if (RetTy->isScalableTy()) {
1021 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
1031 if (ST->hasSVE2p1() || ST->hasSME2()) {
1046 return Cost + (SplitCost * (
Cost - 1));
1061 case Intrinsic::experimental_vector_match: {
1064 unsigned SearchSize = NeedleTy->getNumElements();
1065 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1078 case Intrinsic::experimental_cttz_elts: {
1080 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1088 case Intrinsic::loop_dependence_raw_mask:
1089 case Intrinsic::loop_dependence_war_mask: {
1091 if (ST->hasSVE2() || ST->hasSME()) {
1092 EVT VecVT = getTLI()->getValueType(
DL, RetTy);
1093 unsigned EltSizeInBytes =
1103 case Intrinsic::experimental_vector_extract_last_active:
1104 if (ST->isSVEorStreamingSVEAvailable()) {
1110 case Intrinsic::pow: {
1111 EVT VT = getTLI()->getValueType(
DL, RetTy);
1113 if (getTLI()->getLibcallImpl(LC) != RTLIB::Unsupported)
1128 auto RequiredType =
II.getType();
1131 assert(PN &&
"Expected Phi Node!");
1134 if (!PN->hasOneUse())
1135 return std::nullopt;
1137 for (
Value *IncValPhi : PN->incoming_values()) {
1140 Reinterpret->getIntrinsicID() !=
1141 Intrinsic::aarch64_sve_convert_to_svbool ||
1142 RequiredType != Reinterpret->getArgOperand(0)->getType())
1143 return std::nullopt;
1151 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1153 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1226 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1231 return GoverningPredicateIdx;
1236 GoverningPredicateIdx = Index;
1254 return UndefIntrinsic;
1259 UndefIntrinsic = IID;
1281 return ResultLanes == InactiveLanesTakenFromOperand;
1286 return OperandIdxForInactiveLanes;
1290 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1291 ResultLanes = InactiveLanesTakenFromOperand;
1292 OperandIdxForInactiveLanes = Index;
1297 return ResultLanes == InactiveLanesAreNotDefined;
1301 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1302 ResultLanes = InactiveLanesAreNotDefined;
1307 return ResultLanes == InactiveLanesAreUnused;
1311 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1312 ResultLanes = InactiveLanesAreUnused;
1322 ResultIsZeroInitialized =
true;
1333 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1338 return OperandIdxWithNoActiveLanes;
1343 OperandIdxWithNoActiveLanes = Index;
1348 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1351 unsigned IROpcode = 0;
1353 enum PredicationStyle {
1355 InactiveLanesTakenFromOperand,
1356 InactiveLanesAreNotDefined,
1357 InactiveLanesAreUnused
1360 bool ResultIsZeroInitialized =
false;
1361 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1362 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1370 return !isa<ScalableVectorType>(V->getType());
1378 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1379 case Intrinsic::aarch64_sve_fcvt_f16f32:
1380 case Intrinsic::aarch64_sve_fcvt_f16f64:
1381 case Intrinsic::aarch64_sve_fcvt_f32f16:
1382 case Intrinsic::aarch64_sve_fcvt_f32f64:
1383 case Intrinsic::aarch64_sve_fcvt_f64f16:
1384 case Intrinsic::aarch64_sve_fcvt_f64f32:
1385 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1386 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1387 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1388 case Intrinsic::aarch64_sve_fcvtzs:
1389 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1390 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1391 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1392 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1393 case Intrinsic::aarch64_sve_fcvtzu:
1394 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1395 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1396 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1397 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1398 case Intrinsic::aarch64_sve_scvtf:
1399 case Intrinsic::aarch64_sve_scvtf_f16i32:
1400 case Intrinsic::aarch64_sve_scvtf_f16i64:
1401 case Intrinsic::aarch64_sve_scvtf_f32i64:
1402 case Intrinsic::aarch64_sve_scvtf_f64i32:
1403 case Intrinsic::aarch64_sve_ucvtf:
1404 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1405 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1406 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1407 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1410 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1411 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1412 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1413 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1416 case Intrinsic::aarch64_sve_fabd:
1418 case Intrinsic::aarch64_sve_fadd:
1421 case Intrinsic::aarch64_sve_fdiv:
1424 case Intrinsic::aarch64_sve_fmax:
1426 case Intrinsic::aarch64_sve_fmaxnm:
1428 case Intrinsic::aarch64_sve_fmin:
1430 case Intrinsic::aarch64_sve_fminnm:
1432 case Intrinsic::aarch64_sve_fmla:
1434 case Intrinsic::aarch64_sve_fmls:
1436 case Intrinsic::aarch64_sve_fmul:
1439 case Intrinsic::aarch64_sve_fmulx:
1441 case Intrinsic::aarch64_sve_fnmla:
1443 case Intrinsic::aarch64_sve_fnmls:
1445 case Intrinsic::aarch64_sve_fsub:
1448 case Intrinsic::aarch64_sve_add:
1451 case Intrinsic::aarch64_sve_mla:
1453 case Intrinsic::aarch64_sve_mls:
1455 case Intrinsic::aarch64_sve_mul:
1458 case Intrinsic::aarch64_sve_sabd:
1460 case Intrinsic::aarch64_sve_sdiv:
1463 case Intrinsic::aarch64_sve_smax:
1465 case Intrinsic::aarch64_sve_smin:
1467 case Intrinsic::aarch64_sve_smulh:
1469 case Intrinsic::aarch64_sve_sub:
1472 case Intrinsic::aarch64_sve_uabd:
1474 case Intrinsic::aarch64_sve_udiv:
1477 case Intrinsic::aarch64_sve_umax:
1479 case Intrinsic::aarch64_sve_umin:
1481 case Intrinsic::aarch64_sve_umulh:
1483 case Intrinsic::aarch64_sve_asr:
1486 case Intrinsic::aarch64_sve_lsl:
1489 case Intrinsic::aarch64_sve_lsr:
1492 case Intrinsic::aarch64_sve_and:
1495 case Intrinsic::aarch64_sve_bic:
1497 case Intrinsic::aarch64_sve_eor:
1500 case Intrinsic::aarch64_sve_orr:
1503 case Intrinsic::aarch64_sve_shsub:
1505 case Intrinsic::aarch64_sve_shsubr:
1507 case Intrinsic::aarch64_sve_sqrshl:
1509 case Intrinsic::aarch64_sve_sqshl:
1511 case Intrinsic::aarch64_sve_sqsub:
1513 case Intrinsic::aarch64_sve_srshl:
1515 case Intrinsic::aarch64_sve_uhsub:
1517 case Intrinsic::aarch64_sve_uhsubr:
1519 case Intrinsic::aarch64_sve_uqrshl:
1521 case Intrinsic::aarch64_sve_uqshl:
1523 case Intrinsic::aarch64_sve_uqsub:
1525 case Intrinsic::aarch64_sve_urshl:
1528 case Intrinsic::aarch64_sve_add_u:
1531 case Intrinsic::aarch64_sve_and_u:
1534 case Intrinsic::aarch64_sve_asr_u:
1537 case Intrinsic::aarch64_sve_eor_u:
1540 case Intrinsic::aarch64_sve_fadd_u:
1543 case Intrinsic::aarch64_sve_fdiv_u:
1546 case Intrinsic::aarch64_sve_fmul_u:
1549 case Intrinsic::aarch64_sve_fsub_u:
1552 case Intrinsic::aarch64_sve_lsl_u:
1555 case Intrinsic::aarch64_sve_lsr_u:
1558 case Intrinsic::aarch64_sve_mul_u:
1561 case Intrinsic::aarch64_sve_orr_u:
1564 case Intrinsic::aarch64_sve_sdiv_u:
1567 case Intrinsic::aarch64_sve_sub_u:
1570 case Intrinsic::aarch64_sve_udiv_u:
1574 case Intrinsic::aarch64_sve_addqv:
1575 case Intrinsic::aarch64_sve_and_z:
1576 case Intrinsic::aarch64_sve_bic_z:
1577 case Intrinsic::aarch64_sve_brka_z:
1578 case Intrinsic::aarch64_sve_brkb_z:
1579 case Intrinsic::aarch64_sve_brkn_z:
1580 case Intrinsic::aarch64_sve_brkpa_z:
1581 case Intrinsic::aarch64_sve_brkpb_z:
1582 case Intrinsic::aarch64_sve_cntp:
1583 case Intrinsic::aarch64_sve_compact:
1584 case Intrinsic::aarch64_sve_eor_z:
1585 case Intrinsic::aarch64_sve_eorv:
1586 case Intrinsic::aarch64_sve_eorqv:
1587 case Intrinsic::aarch64_sve_nand_z:
1588 case Intrinsic::aarch64_sve_nor_z:
1589 case Intrinsic::aarch64_sve_orn_z:
1590 case Intrinsic::aarch64_sve_orr_z:
1591 case Intrinsic::aarch64_sve_orv:
1592 case Intrinsic::aarch64_sve_orqv:
1593 case Intrinsic::aarch64_sve_pnext:
1594 case Intrinsic::aarch64_sve_rdffr_z:
1595 case Intrinsic::aarch64_sve_saddv:
1596 case Intrinsic::aarch64_sve_uaddv:
1597 case Intrinsic::aarch64_sve_umaxv:
1598 case Intrinsic::aarch64_sve_umaxqv:
1599 case Intrinsic::aarch64_sve_cmpeq:
1600 case Intrinsic::aarch64_sve_cmpeq_wide:
1601 case Intrinsic::aarch64_sve_cmpge:
1602 case Intrinsic::aarch64_sve_cmpge_wide:
1603 case Intrinsic::aarch64_sve_cmpgt:
1604 case Intrinsic::aarch64_sve_cmpgt_wide:
1605 case Intrinsic::aarch64_sve_cmphi:
1606 case Intrinsic::aarch64_sve_cmphi_wide:
1607 case Intrinsic::aarch64_sve_cmphs:
1608 case Intrinsic::aarch64_sve_cmphs_wide:
1609 case Intrinsic::aarch64_sve_cmple_wide:
1610 case Intrinsic::aarch64_sve_cmplo_wide:
1611 case Intrinsic::aarch64_sve_cmpls_wide:
1612 case Intrinsic::aarch64_sve_cmplt_wide:
1613 case Intrinsic::aarch64_sve_cmpne:
1614 case Intrinsic::aarch64_sve_cmpne_wide:
1615 case Intrinsic::aarch64_sve_facge:
1616 case Intrinsic::aarch64_sve_facgt:
1617 case Intrinsic::aarch64_sve_fcmpeq:
1618 case Intrinsic::aarch64_sve_fcmpge:
1619 case Intrinsic::aarch64_sve_fcmpgt:
1620 case Intrinsic::aarch64_sve_fcmpne:
1621 case Intrinsic::aarch64_sve_fcmpuo:
1622 case Intrinsic::aarch64_sve_ld1:
1623 case Intrinsic::aarch64_sve_ld1_gather:
1624 case Intrinsic::aarch64_sve_ld1_gather_index:
1625 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1626 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1627 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1628 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1629 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1630 case Intrinsic::aarch64_sve_ld1q_gather_index:
1631 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1632 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1633 case Intrinsic::aarch64_sve_ld1ro:
1634 case Intrinsic::aarch64_sve_ld1rq:
1635 case Intrinsic::aarch64_sve_ld1udq:
1636 case Intrinsic::aarch64_sve_ld1uwq:
1637 case Intrinsic::aarch64_sve_ld2_sret:
1638 case Intrinsic::aarch64_sve_ld2q_sret:
1639 case Intrinsic::aarch64_sve_ld3_sret:
1640 case Intrinsic::aarch64_sve_ld3q_sret:
1641 case Intrinsic::aarch64_sve_ld4_sret:
1642 case Intrinsic::aarch64_sve_ld4q_sret:
1643 case Intrinsic::aarch64_sve_ldff1:
1644 case Intrinsic::aarch64_sve_ldff1_gather:
1645 case Intrinsic::aarch64_sve_ldff1_gather_index:
1646 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1647 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1648 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1649 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1650 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1651 case Intrinsic::aarch64_sve_ldnf1:
1652 case Intrinsic::aarch64_sve_ldnt1:
1653 case Intrinsic::aarch64_sve_ldnt1_gather:
1654 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1655 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1656 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1659 case Intrinsic::aarch64_sve_prf:
1660 case Intrinsic::aarch64_sve_prfb_gather_index:
1661 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1662 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1663 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1664 case Intrinsic::aarch64_sve_prfd_gather_index:
1665 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1666 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1667 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1668 case Intrinsic::aarch64_sve_prfh_gather_index:
1669 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1670 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1671 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1672 case Intrinsic::aarch64_sve_prfw_gather_index:
1673 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1674 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1675 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1678 case Intrinsic::aarch64_sve_st1_scatter:
1679 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1680 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1681 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1682 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1683 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1684 case Intrinsic::aarch64_sve_st1dq:
1685 case Intrinsic::aarch64_sve_st1q_scatter_index:
1686 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1687 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1688 case Intrinsic::aarch64_sve_st1wq:
1689 case Intrinsic::aarch64_sve_stnt1:
1690 case Intrinsic::aarch64_sve_stnt1_scatter:
1691 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1692 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1693 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1695 case Intrinsic::aarch64_sve_st2:
1696 case Intrinsic::aarch64_sve_st2q:
1698 case Intrinsic::aarch64_sve_st3:
1699 case Intrinsic::aarch64_sve_st3q:
1701 case Intrinsic::aarch64_sve_st4:
1702 case Intrinsic::aarch64_sve_st4q:
1710 Value *UncastedPred;
1716 Pred = UncastedPred;
1722 if (OrigPredTy->getMinNumElements() <=
1724 ->getMinNumElements())
1725 Pred = UncastedPred;
1729 return C &&
C->isAllOnesValue();
1736 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1737 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1745static std::optional<Instruction *>
1752 Value *Op1 =
II.getOperand(1);
1753 Value *Op2 =
II.getOperand(2);
1779 return std::nullopt;
1787 if (SimpleII == Inactive)
1797static std::optional<Instruction *>
1801 return std::nullopt;
1830 II.setCalledFunction(NewDecl);
1840 return std::nullopt;
1852static std::optional<Instruction *>
1856 return std::nullopt;
1858 auto IntrinsicID = BinOp->getIntrinsicID();
1859 switch (IntrinsicID) {
1860 case Intrinsic::aarch64_sve_and_z:
1861 case Intrinsic::aarch64_sve_bic_z:
1862 case Intrinsic::aarch64_sve_eor_z:
1863 case Intrinsic::aarch64_sve_nand_z:
1864 case Intrinsic::aarch64_sve_nor_z:
1865 case Intrinsic::aarch64_sve_orn_z:
1866 case Intrinsic::aarch64_sve_orr_z:
1869 return std::nullopt;
1872 auto BinOpPred = BinOp->getOperand(0);
1873 auto BinOpOp1 = BinOp->getOperand(1);
1874 auto BinOpOp2 = BinOp->getOperand(2);
1878 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1879 return std::nullopt;
1881 auto PredOp = PredIntr->getOperand(0);
1883 if (PredOpTy !=
II.getType())
1884 return std::nullopt;
1888 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1889 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1890 if (BinOpOp1 == BinOpOp2)
1891 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1894 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1896 auto NarrowedBinOp =
1901static std::optional<Instruction *>
1908 return BinOpCombine;
1913 return std::nullopt;
1916 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1925 if (CursorVTy->getElementCount().getKnownMinValue() <
1926 IVTy->getElementCount().getKnownMinValue())
1930 if (Cursor->getType() == IVTy)
1931 EarliestReplacement = Cursor;
1936 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1937 Intrinsic::aarch64_sve_convert_to_svbool ||
1938 IntrinsicCursor->getIntrinsicID() ==
1939 Intrinsic::aarch64_sve_convert_from_svbool))
1942 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
1943 Cursor = IntrinsicCursor->getOperand(0);
1948 if (!EarliestReplacement)
1949 return std::nullopt;
1957 auto *OpPredicate =
II.getOperand(0);
1974 II.getArgOperand(2));
1980 return std::nullopt;
1984 II.getArgOperand(0),
II.getArgOperand(2),
uint64_t(0));
1993 II.getArgOperand(0));
2003 return std::nullopt;
2008 if (!SplatValue || !SplatValue->isZero())
2009 return std::nullopt;
2014 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
2015 return std::nullopt;
2019 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
2020 return std::nullopt;
2023 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
2024 return std::nullopt;
2029 return std::nullopt;
2032 return std::nullopt;
2036 return std::nullopt;
2040 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
2041 return std::nullopt;
2043 unsigned NumElts = VecTy->getNumElements();
2044 unsigned PredicateBits = 0;
2047 for (
unsigned I = 0;
I < NumElts; ++
I) {
2050 return std::nullopt;
2052 PredicateBits |= 1 << (
I * (16 / NumElts));
2056 if (PredicateBits == 0) {
2058 PFalse->takeName(&
II);
2064 for (
unsigned I = 0;
I < 16; ++
I)
2065 if ((PredicateBits & (1 <<
I)) != 0)
2068 unsigned PredSize = Mask & -Mask;
2073 for (
unsigned I = 0;
I < 16;
I += PredSize)
2074 if ((PredicateBits & (1 <<
I)) == 0)
2075 return std::nullopt;
2080 {PredType}, {PTruePat});
2082 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
2083 auto *ConvertFromSVBool =
2085 {
II.getType()}, {ConvertToSVBool});
2093 Value *Pg =
II.getArgOperand(0);
2094 Value *Vec =
II.getArgOperand(1);
2095 auto IntrinsicID =
II.getIntrinsicID();
2096 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2108 auto OpC = OldBinOp->getOpcode();
2114 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2120 if (IsAfter &&
C &&
C->isNullValue()) {
2124 Extract->insertBefore(
II.getIterator());
2125 Extract->takeName(&
II);
2131 return std::nullopt;
2133 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2134 return std::nullopt;
2136 const auto PTruePattern =
2142 return std::nullopt;
2144 unsigned Idx = MinNumElts - 1;
2154 if (Idx >= PgVTy->getMinNumElements())
2155 return std::nullopt;
2160 Extract->insertBefore(
II.getIterator());
2161 Extract->takeName(&
II);
2174 Value *Pg =
II.getArgOperand(0);
2176 Value *Vec =
II.getArgOperand(2);
2179 if (!Ty->isIntegerTy())
2180 return std::nullopt;
2185 return std::nullopt;
2202 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2215 {
II.getType()}, {AllPat});
2222static std::optional<Instruction *>
2226 if (
Pattern == AArch64SVEPredPattern::all) {
2235 return MinNumElts && NumElts >= MinNumElts
2237 II, ConstantInt::get(
II.getType(), MinNumElts)))
2241static std::optional<Instruction *>
2244 if (!ST->isStreaming())
2245 return std::nullopt;
2257 Value *PgVal =
II.getArgOperand(0);
2258 Value *OpVal =
II.getArgOperand(1);
2262 if (PgVal == OpVal &&
2263 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2264 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2279 return std::nullopt;
2283 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2284 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2298 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2299 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2300 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2301 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2302 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2303 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2304 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2305 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2306 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2307 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2308 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2309 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2310 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2320 return std::nullopt;
2323template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2324static std::optional<Instruction *>
2326 bool MergeIntoAddendOp) {
2328 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2329 if (MergeIntoAddendOp) {
2330 AddendOp =
II.getOperand(1);
2331 Mul =
II.getOperand(2);
2333 AddendOp =
II.getOperand(2);
2334 Mul =
II.getOperand(1);
2339 return std::nullopt;
2341 if (!
Mul->hasOneUse())
2342 return std::nullopt;
2345 if (
II.getType()->isFPOrFPVectorTy()) {
2350 return std::nullopt;
2352 return std::nullopt;
2357 if (MergeIntoAddendOp)
2367static std::optional<Instruction *>
2369 Value *Pred =
II.getOperand(0);
2370 Value *PtrOp =
II.getOperand(1);
2371 Type *VecTy =
II.getType();
2375 Load->copyMetadata(
II);
2386static std::optional<Instruction *>
2388 Value *VecOp =
II.getOperand(0);
2389 Value *Pred =
II.getOperand(1);
2390 Value *PtrOp =
II.getOperand(2);
2394 Store->copyMetadata(
II);
2406 case Intrinsic::aarch64_sve_fmul_u:
2407 return Instruction::BinaryOps::FMul;
2408 case Intrinsic::aarch64_sve_fadd_u:
2409 return Instruction::BinaryOps::FAdd;
2410 case Intrinsic::aarch64_sve_fsub_u:
2411 return Instruction::BinaryOps::FSub;
2413 return Instruction::BinaryOpsEnd;
2417static std::optional<Instruction *>
2420 if (
II.isStrictFP())
2421 return std::nullopt;
2423 auto *OpPredicate =
II.getOperand(0);
2425 if (BinOpCode == Instruction::BinaryOpsEnd ||
2427 return std::nullopt;
2429 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2436 Intrinsic::aarch64_sve_mla>(
2440 Intrinsic::aarch64_sve_mad>(
2443 return std::nullopt;
2446static std::optional<Instruction *>
2450 Intrinsic::aarch64_sve_fmla>(IC,
II,
2455 Intrinsic::aarch64_sve_fmad>(IC,
II,
2460 Intrinsic::aarch64_sve_fmla>(IC,
II,
2463 return std::nullopt;
2466static std::optional<Instruction *>
2470 Intrinsic::aarch64_sve_fmla>(IC,
II,
2475 Intrinsic::aarch64_sve_fmad>(IC,
II,
2480 Intrinsic::aarch64_sve_fmla_u>(
2486static std::optional<Instruction *>
2490 Intrinsic::aarch64_sve_fmls>(IC,
II,
2495 Intrinsic::aarch64_sve_fnmsb>(
2500 Intrinsic::aarch64_sve_fmls>(IC,
II,
2503 return std::nullopt;
2506static std::optional<Instruction *>
2510 Intrinsic::aarch64_sve_fmls>(IC,
II,
2515 Intrinsic::aarch64_sve_fnmsb>(
2520 Intrinsic::aarch64_sve_fmls_u>(
2529 Intrinsic::aarch64_sve_mls>(
2532 return std::nullopt;
2537 Value *UnpackArg =
II.getArgOperand(0);
2539 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2540 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2553 return std::nullopt;
2557 auto *OpVal =
II.getOperand(0);
2558 auto *OpIndices =
II.getOperand(1);
2565 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2566 return std::nullopt;
2581 Type *RetTy =
II.getType();
2582 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2583 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2587 if ((
match(
II.getArgOperand(0),
2594 if (TyA ==
B->getType() &&
2599 TyA->getMinNumElements());
2605 return std::nullopt;
2613 if (
match(
II.getArgOperand(0),
2618 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2620 return std::nullopt;
2623static std::optional<Instruction *>
2625 Value *Mask =
II.getOperand(0);
2626 Value *BasePtr =
II.getOperand(1);
2627 Value *Index =
II.getOperand(2);
2638 BasePtr->getPointerAlignment(
II.getDataLayout());
2641 BasePtr, IndexBase);
2648 return std::nullopt;
2651static std::optional<Instruction *>
2653 Value *Val =
II.getOperand(0);
2654 Value *Mask =
II.getOperand(1);
2655 Value *BasePtr =
II.getOperand(2);
2656 Value *Index =
II.getOperand(3);
2666 BasePtr->getPointerAlignment(
II.getDataLayout());
2669 BasePtr, IndexBase);
2675 return std::nullopt;
2681 Value *Pred =
II.getOperand(0);
2682 Value *Vec =
II.getOperand(1);
2683 Value *DivVec =
II.getOperand(2);
2687 if (!SplatConstantInt)
2688 return std::nullopt;
2692 if (DivisorValue == -1)
2693 return std::nullopt;
2694 if (DivisorValue == 1)
2700 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2707 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2709 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2713 return std::nullopt;
2717 size_t VecSize = Vec.
size();
2722 size_t HalfVecSize = VecSize / 2;
2726 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2734 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2752 return std::nullopt;
2759 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2760 CurrentInsertElt = InsertElt->getOperand(0);
2766 return std::nullopt;
2770 for (
size_t I = 0;
I < Elts.
size();
I++) {
2771 if (Elts[
I] ==
nullptr)
2776 if (InsertEltChain ==
nullptr)
2777 return std::nullopt;
2783 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2784 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2785 IIScalableTy->getMinNumElements() /
2790 auto *WideShuffleMaskTy =
2801 auto NarrowBitcast =
2814 return std::nullopt;
2819 Value *Pred =
II.getOperand(0);
2820 Value *Vec =
II.getOperand(1);
2821 Value *Shift =
II.getOperand(2);
2824 Value *AbsPred, *MergedValue;
2830 return std::nullopt;
2838 return std::nullopt;
2843 return std::nullopt;
2846 {
II.getType()}, {Pred, Vec, Shift});
2853 Value *Vec =
II.getOperand(0);
2858 return std::nullopt;
2864 auto *NI =
II.getNextNode();
2867 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2869 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2870 auto *NIBB = NI->getParent();
2871 NI = NI->getNextNode();
2873 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2874 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2880 if (NextII &&
II.isIdenticalTo(NextII))
2883 return std::nullopt;
2891 {II.getType(), II.getOperand(0)->getType()},
2892 {II.getOperand(0), II.getOperand(1)}));
2899 return std::nullopt;
2905 Value *Passthru =
II.getOperand(0);
2913 auto *Mask = ConstantInt::get(Ty, MaskValue);
2919 return std::nullopt;
2922static std::optional<Instruction *>
2929 return std::nullopt;
2932std::optional<Instruction *>
2943 case Intrinsic::aarch64_dmb:
2945 case Intrinsic::aarch64_neon_fmaxnm:
2946 case Intrinsic::aarch64_neon_fminnm:
2948 case Intrinsic::aarch64_sve_convert_from_svbool:
2950 case Intrinsic::aarch64_sve_dup:
2952 case Intrinsic::aarch64_sve_dup_x:
2954 case Intrinsic::aarch64_sve_cmpne:
2955 case Intrinsic::aarch64_sve_cmpne_wide:
2957 case Intrinsic::aarch64_sve_rdffr:
2959 case Intrinsic::aarch64_sve_lasta:
2960 case Intrinsic::aarch64_sve_lastb:
2962 case Intrinsic::aarch64_sve_clasta_n:
2963 case Intrinsic::aarch64_sve_clastb_n:
2965 case Intrinsic::aarch64_sve_cntd:
2967 case Intrinsic::aarch64_sve_cntw:
2969 case Intrinsic::aarch64_sve_cnth:
2971 case Intrinsic::aarch64_sve_cntb:
2973 case Intrinsic::aarch64_sme_cntsd:
2975 case Intrinsic::aarch64_sve_ptest_any:
2976 case Intrinsic::aarch64_sve_ptest_first:
2977 case Intrinsic::aarch64_sve_ptest_last:
2979 case Intrinsic::aarch64_sve_fadd:
2981 case Intrinsic::aarch64_sve_fadd_u:
2983 case Intrinsic::aarch64_sve_fmul_u:
2985 case Intrinsic::aarch64_sve_fsub:
2987 case Intrinsic::aarch64_sve_fsub_u:
2989 case Intrinsic::aarch64_sve_add:
2991 case Intrinsic::aarch64_sve_add_u:
2993 Intrinsic::aarch64_sve_mla_u>(
2995 case Intrinsic::aarch64_sve_sub:
2997 case Intrinsic::aarch64_sve_sub_u:
2999 Intrinsic::aarch64_sve_mls_u>(
3001 case Intrinsic::aarch64_sve_tbl:
3003 case Intrinsic::aarch64_sve_uunpkhi:
3004 case Intrinsic::aarch64_sve_uunpklo:
3005 case Intrinsic::aarch64_sve_sunpkhi:
3006 case Intrinsic::aarch64_sve_sunpklo:
3008 case Intrinsic::aarch64_sve_uzp1:
3010 case Intrinsic::aarch64_sve_zip1:
3011 case Intrinsic::aarch64_sve_zip2:
3013 case Intrinsic::aarch64_sve_ld1_gather_index:
3015 case Intrinsic::aarch64_sve_st1_scatter_index:
3017 case Intrinsic::aarch64_sve_ld1:
3019 case Intrinsic::aarch64_sve_st1:
3021 case Intrinsic::aarch64_sve_sdiv:
3023 case Intrinsic::aarch64_sve_sel:
3025 case Intrinsic::aarch64_sve_srshl:
3027 case Intrinsic::aarch64_sve_dupq_lane:
3029 case Intrinsic::aarch64_sve_insr:
3031 case Intrinsic::aarch64_sve_whilelo:
3033 case Intrinsic::aarch64_sve_ptrue:
3035 case Intrinsic::aarch64_sve_uxtb:
3037 case Intrinsic::aarch64_sve_uxth:
3039 case Intrinsic::aarch64_sve_uxtw:
3041 case Intrinsic::aarch64_sme_in_streaming_mode:
3045 return std::nullopt;
3052 SimplifyAndSetOp)
const {
3053 switch (
II.getIntrinsicID()) {
3056 case Intrinsic::aarch64_neon_fcvtxn:
3057 case Intrinsic::aarch64_neon_rshrn:
3058 case Intrinsic::aarch64_neon_sqrshrn:
3059 case Intrinsic::aarch64_neon_sqrshrun:
3060 case Intrinsic::aarch64_neon_sqshrn:
3061 case Intrinsic::aarch64_neon_sqshrun:
3062 case Intrinsic::aarch64_neon_sqxtn:
3063 case Intrinsic::aarch64_neon_sqxtun:
3064 case Intrinsic::aarch64_neon_uqrshrn:
3065 case Intrinsic::aarch64_neon_uqshrn:
3066 case Intrinsic::aarch64_neon_uqxtn:
3067 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
3071 return std::nullopt;
3075 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3085 if (ST->useSVEForFixedLengthVectors() &&
3088 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3089 else if (ST->isNeonAvailable())
3094 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3103bool AArch64TTIImpl::isSingleExtWideningInstruction(
3105 Type *SrcOverrideTy)
const {
3120 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3123 Type *SrcTy = SrcOverrideTy;
3125 case Instruction::Add:
3126 case Instruction::Sub: {
3135 if (Opcode == Instruction::Sub)
3159 assert(SrcTy &&
"Expected some SrcTy");
3161 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3167 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3169 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3173 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3176Type *AArch64TTIImpl::isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
3178 Type *SrcOverrideTy)
const {
3179 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3180 Opcode != Instruction::Mul)
3190 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3193 auto getScalarSizeWithOverride = [&](
const Value *
V) {
3199 ->getScalarSizeInBits();
3202 unsigned MaxEltSize = 0;
3205 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3206 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3207 MaxEltSize = std::max(EltSize0, EltSize1);
3210 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3211 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3214 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3216 MaxEltSize = DstEltSize / 2;
3217 }
else if (Opcode == Instruction::Mul &&
3230 getScalarSizeWithOverride(
isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
3234 if (MaxEltSize * 2 > DstEltSize)
3252 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3253 (Src->isScalableTy() && !ST->hasSVE2()))
3263 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3267 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3271 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3272 Src->getScalarSizeInBits() !=
3296 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3300 if (
I &&
I->hasOneUser()) {
3303 if (
Type *ExtTy = isBinExtWideningInstruction(
3304 SingleUser->getOpcode(), Dst, Operands,
3305 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3318 if (isSingleExtWideningInstruction(
3319 SingleUser->getOpcode(), Dst, Operands,
3320 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3324 if (SingleUser->getOpcode() == Instruction::Add) {
3325 if (
I == SingleUser->getOperand(1) ||
3327 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3342 EVT SrcTy = TLI->getValueType(
DL, Src);
3343 EVT DstTy = TLI->getValueType(
DL, Dst);
3345 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3350 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3382 const unsigned int SVE_EXT_COST = 1;
3383 const unsigned int SVE_FCVT_COST = 1;
3384 const unsigned int SVE_UNPACK_ONCE = 4;
3385 const unsigned int SVE_UNPACK_TWICE = 16;
3514 SVE_EXT_COST + SVE_FCVT_COST},
3519 SVE_EXT_COST + SVE_FCVT_COST},
3526 SVE_EXT_COST + SVE_FCVT_COST},
3530 SVE_EXT_COST + SVE_FCVT_COST},
3536 SVE_EXT_COST + SVE_FCVT_COST},
3539 SVE_EXT_COST + SVE_FCVT_COST},
3544 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3546 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3556 SVE_EXT_COST + SVE_FCVT_COST},
3561 SVE_EXT_COST + SVE_FCVT_COST},
3574 SVE_EXT_COST + SVE_FCVT_COST},
3578 SVE_EXT_COST + SVE_FCVT_COST},
3590 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3592 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3594 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3596 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3600 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3602 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3618 SVE_EXT_COST + SVE_FCVT_COST},
3623 SVE_EXT_COST + SVE_FCVT_COST},
3634 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3636 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3638 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3640 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3642 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3644 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3648 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3650 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3652 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3654 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3853 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3856 ST->useSVEForFixedLengthVectors(WiderTy)) {
3857 std::pair<InstructionCost, MVT> LT =
3859 unsigned NumElements =
3898 if (ST->hasFullFP16())
3910 Src->getScalarType(), CCH,
CostKind) +
3918 ST->isSVEorStreamingSVEAvailable() &&
3919 TLI->getTypeAction(Src->getContext(), SrcTy) ==
3921 TLI->getTypeAction(Dst->getContext(), DstTy) ==
3930 Opcode, LegalTy, Src, CCH,
CostKind,
I);
3933 return Part1 + Part2;
3940 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
3952 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3965 CostKind, Index,
nullptr,
nullptr);
3969 auto DstVT = TLI->getValueType(
DL, Dst);
3970 auto SrcVT = TLI->getValueType(
DL, Src);
3975 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
3981 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
3991 case Instruction::SExt:
3996 case Instruction::ZExt:
3997 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
4010 return Opcode == Instruction::PHI ? 0 : 1;
4019 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4028 if (!LT.second.isVector())
4033 if (LT.second.isFixedLengthVector()) {
4034 unsigned Width = LT.second.getVectorNumElements();
4035 Index = Index % Width;
4050 if (ST->hasFastLD1Single())
4062 : ST->getVectorInsertExtractBaseCost() + 1;
4086 auto ExtractCanFuseWithFmul = [&]() {
4093 auto IsAllowedScalarTy = [&](
const Type *
T) {
4094 return T->isFloatTy() ||
T->isDoubleTy() ||
4095 (
T->isHalfTy() && ST->hasFullFP16());
4099 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
4102 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4103 !BO->getType()->isVectorTy();
4108 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
4112 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4121 DenseMap<User *, unsigned> UserToExtractIdx;
4122 for (
auto *U :
Scalar->users()) {
4123 if (!IsUserFMulScalarTy(U))
4127 UserToExtractIdx[
U];
4129 if (UserToExtractIdx.
empty())
4131 for (
auto &[S, U, L] : ScalarUserAndIdx) {
4132 for (
auto *U : S->users()) {
4133 if (UserToExtractIdx.
contains(U)) {
4135 auto *Op0 =
FMul->getOperand(0);
4136 auto *Op1 =
FMul->getOperand(1);
4137 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4138 UserToExtractIdx[
U] =
L;
4144 for (
auto &[U, L] : UserToExtractIdx) {
4156 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
4157 if (!IsUserFMulScalarTy(U))
4162 const auto *BO = cast<BinaryOperator>(U);
4163 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4164 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4166 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4169 return IsExtractLaneEquivalentToZero(
4170 cast<ConstantInt>(OtherEE->getIndexOperand())
4173 OtherEE->getType()->getScalarSizeInBits());
4181 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4182 ExtractCanFuseWithFmul())
4187 :
ST->getVectorInsertExtractBaseCost();
4196 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4199 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr,
4205 Value *Scalar,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4207 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4208 ScalarUserAndIdx, VIC);
4215 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I,
4222 unsigned Index)
const {
4234 : ST->getVectorInsertExtractBaseCost() + 1;
4243 if (Ty->getElementType()->isFloatingPointTy())
4246 unsigned VecInstCost =
4248 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4255 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4256 return std::nullopt;
4257 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4258 return std::nullopt;
4259 if (CanUseSVE && Ty->isScalableTy() && ST->hasSVEB16B16() &&
4260 ST->isNonStreamingSVEorSME2Available())
4261 return std::nullopt;
4268 Cost += InstCost(PromotedTy);
4291 Op2Info, Args, CxtI);
4295 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4302 Ty,
CostKind, Op1Info, Op2Info,
true,
4305 [&](
Type *PromotedTy) {
4309 return *PromotedCost;
4315 if (
Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4382 auto VT = TLI->getValueType(
DL, Ty);
4383 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4387 : (3 * AsrCost + AddCost);
4389 return MulCost + AsrCost + 2 * AddCost;
4391 }
else if (VT.isVector()) {
4401 if (Ty->isScalableTy() && ST->hasSVE())
4402 Cost += 2 * AsrCost;
4407 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4411 }
else if (LT.second == MVT::v2i64) {
4412 return VT.getVectorNumElements() *
4419 if (Ty->isScalableTy() && ST->hasSVE())
4420 return MulCost + 2 * AddCost + 2 * AsrCost;
4421 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4426 LT.second.isFixedLengthVector()) {
4436 return ExtractCost + InsertCost +
4444 auto VT = TLI->getValueType(
DL, Ty);
4460 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4461 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4462 LT.second == MVT::nxv16i8;
4463 bool Is128bit = LT.second.is128BitVector();
4475 (HasMULH ? 0 : ShrCost) +
4476 AddCost * 2 + ShrCost;
4477 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4484 if (!VT.isVector() && VT.getSizeInBits() > 64)
4488 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4490 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4494 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4504 if (
nullptr != Entry)
4509 if (LT.second.getScalarType() == MVT::i8)
4511 else if (LT.second.getScalarType() == MVT::i16)
4523 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4524 return (4 + DivCost) * VTy->getNumElements();
4530 -1,
nullptr,
nullptr);
4544 if (LT.second == MVT::v2i64 && ST->hasSVE())
4557 if (LT.second != MVT::v2i64)
4579 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4580 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4589 if (!Ty->getScalarType()->isFP128Ty())
4596 if (!Ty->getScalarType()->isFP128Ty())
4597 return 2 * LT.first;
4604 if (!Ty->isVectorTy())
4620 int MaxMergeDistance = 64;
4624 return NumVectorInstToHideOverhead;
4634 unsigned Opcode1,
unsigned Opcode2)
const {
4637 if (!
Sched.hasInstrSchedModel())
4641 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4643 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4649 "Cannot handle variant scheduling classes without an MI");
4665 const int AmortizationCost = 20;
4673 VecPred = CurrentPred;
4681 static const auto ValidMinMaxTys = {
4682 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4683 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4684 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4688 (ST->hasFullFP16() &&
4694 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4695 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4696 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4697 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4698 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4699 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4700 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4701 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4702 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4703 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4704 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4706 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4707 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4716 if (Opcode == Instruction::FCmp) {
4718 ValTy,
CostKind, Op1Info, Op2Info,
false,
4720 false, [&](
Type *PromotedTy) {
4732 return *PromotedCost;
4736 if (LT.second.getScalarType() != MVT::f64 &&
4737 LT.second.getScalarType() != MVT::f32 &&
4738 LT.second.getScalarType() != MVT::f16)
4743 unsigned Factor = 1;
4758 AArch64::FCMEQv4f32))
4770 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4789 Op1Info, Op2Info,
I);
4795 if (ST->requiresStrictAlign()) {
4800 Options.AllowOverlappingLoads =
true;
4801 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4806 Options.LoadSizes = {8, 4, 2, 1};
4807 Options.AllowedTailExpansions = {3, 5, 6};
4812 return ST->hasSVE();
4818 switch (MICA.
getID()) {
4819 case Intrinsic::masked_scatter:
4820 case Intrinsic::masked_gather:
4822 case Intrinsic::masked_load:
4823 case Intrinsic::masked_store:
4837 if (!LT.first.isValid())
4842 if (VT->getElementType()->isIntegerTy(1))
4859 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4860 "Should be called on only load or stores.");
4862 case Instruction::Load:
4865 return ST->getGatherOverhead();
4867 case Instruction::Store:
4870 return ST->getScatterOverhead();
4881 unsigned Opcode = (MICA.
getID() == Intrinsic::masked_gather ||
4882 MICA.
getID() == Intrinsic::vp_gather)
4884 : Instruction::Store;
4894 if (!LT.first.isValid())
4898 if (!LT.second.isVector() ||
4900 VT->getElementType()->isIntegerTy(1))
4910 ElementCount LegalVF = LT.second.getVectorElementCount();
4913 {TTI::OK_AnyValue, TTI::OP_None},
I);
4929 EVT VT = TLI->getValueType(
DL, Ty,
true);
4931 if (VT == MVT::Other)
4936 if (!LT.first.isValid())
4946 (VTy->getElementType()->isIntegerTy(1) &&
4947 !VTy->getElementCount().isKnownMultipleOf(
4958 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
4959 LT.second.is128BitVector() && Alignment <
Align(16)) {
4965 const int AmortizationCost = 6;
4967 return LT.first * 2 * AmortizationCost;
4971 if (Ty->isPtrOrPtrVectorTy())
4976 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
4978 if (VT == MVT::v4i8)
4985 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
5000 while (!TypeWorklist.
empty()) {
5022 bool UseMaskForCond,
bool UseMaskForGaps)
const {
5023 assert(Factor >= 2 &&
"Invalid interleave factor");
5038 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
5041 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
5042 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
5045 VecVTy->getElementCount().divideCoefficientBy(Factor));
5051 if (MinElts % Factor == 0 &&
5052 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
5053 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
5058 UseMaskForCond, UseMaskForGaps);
5065 for (
auto *
I : Tys) {
5066 if (!
I->isVectorTy())
5077 return ST->getMaxInterleaveFactor();
5087 enum { MaxStridedLoads = 7 };
5089 int StridedLoads = 0;
5092 for (
const auto BB : L->blocks()) {
5093 for (
auto &
I : *BB) {
5099 if (L->isLoopInvariant(PtrValue))
5104 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
5113 if (StridedLoads > MaxStridedLoads / 2)
5114 return StridedLoads;
5117 return StridedLoads;
5120 int StridedLoads = countStridedLoads(L, SE);
5122 <<
" strided loads\n");
5138 unsigned *FinalSize) {
5142 for (
auto *BB : L->getBlocks()) {
5143 for (
auto &
I : *BB) {
5149 if (!Cost.isValid())
5153 if (LoopCost > Budget)
5175 if (MaxTC > 0 && MaxTC <= 32)
5186 if (Blocks.
size() != 2)
5208 if (!L->isInnermost() || L->getNumBlocks() > 8)
5212 if (!L->getExitBlock())
5218 bool HasParellelizableReductions =
5219 L->getNumBlocks() == 1 &&
5220 any_of(L->getHeader()->phis(),
5222 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5225 if (HasParellelizableReductions &&
5247 if (HasParellelizableReductions) {
5258 if (Header == Latch) {
5261 unsigned Width = 10;
5267 unsigned MaxInstsPerLine = 16;
5269 unsigned BestUC = 1;
5270 unsigned SizeWithBestUC = BestUC *
Size;
5272 unsigned SizeWithUC = UC *
Size;
5273 if (SizeWithUC > 48)
5275 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5276 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5278 SizeWithBestUC = BestUC *
Size;
5288 for (
auto *BB : L->blocks()) {
5289 for (
auto &
I : *BB) {
5299 for (
auto *U :
I.users())
5301 LoadedValuesPlus.
insert(U);
5308 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5321 if (!Term || !Term->isConditional() || Preds.
size() == 1 ||
5335 auto *I = dyn_cast<Instruction>(V);
5336 return I && DependsOnLoopLoad(I, Depth + 1);
5343 DependsOnLoopLoad(
I, 0)) {
5359 if (L->getLoopDepth() > 1)
5370 for (
auto *BB : L->getBlocks()) {
5371 for (
auto &
I : *BB) {
5375 if (IsVectorized &&
I.getType()->isVectorTy())
5392 if (ST->isAppleMLike())
5394 else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
5416 !ST->getSchedModel().isOutOfOrder()) {
5439 bool CanCreate)
const {
5443 case Intrinsic::aarch64_neon_st2:
5444 case Intrinsic::aarch64_neon_st3:
5445 case Intrinsic::aarch64_neon_st4: {
5448 if (!CanCreate || !ST)
5450 unsigned NumElts = Inst->
arg_size() - 1;
5451 if (ST->getNumElements() != NumElts)
5453 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5459 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5461 Res = Builder.CreateInsertValue(Res, L, i);
5465 case Intrinsic::aarch64_neon_ld2:
5466 case Intrinsic::aarch64_neon_ld3:
5467 case Intrinsic::aarch64_neon_ld4:
5468 if (Inst->
getType() == ExpectedType)
5479 case Intrinsic::aarch64_neon_ld2:
5480 case Intrinsic::aarch64_neon_ld3:
5481 case Intrinsic::aarch64_neon_ld4:
5482 Info.ReadMem =
true;
5483 Info.WriteMem =
false;
5486 case Intrinsic::aarch64_neon_st2:
5487 case Intrinsic::aarch64_neon_st3:
5488 case Intrinsic::aarch64_neon_st4:
5489 Info.ReadMem =
false;
5490 Info.WriteMem =
true;
5498 case Intrinsic::aarch64_neon_ld2:
5499 case Intrinsic::aarch64_neon_st2:
5500 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5502 case Intrinsic::aarch64_neon_ld3:
5503 case Intrinsic::aarch64_neon_st3:
5504 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5506 case Intrinsic::aarch64_neon_ld4:
5507 case Intrinsic::aarch64_neon_st4:
5508 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5520 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5521 bool Considerable =
false;
5522 AllowPromotionWithoutCommonHeader =
false;
5525 Type *ConsideredSExtType =
5527 if (
I.getType() != ConsideredSExtType)
5531 for (
const User *U :
I.users()) {
5533 Considerable =
true;
5537 if (GEPInst->getNumOperands() > 2) {
5538 AllowPromotionWithoutCommonHeader =
true;
5543 return Considerable;
5592 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5602 return LegalizationCost + 2;
5612 LegalizationCost *= LT.first - 1;
5615 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5624 return LegalizationCost + 2;
5632 std::optional<FastMathFlags> FMF,
5648 return BaseCost + FixedVTy->getNumElements();
5651 if (Opcode != Instruction::FAdd)
5665 MVT MTy = LT.second;
5666 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5714 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5715 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5717 if (ValTy->getElementCount().getFixedValue() >= 2 && NElts >= 2 &&
5727 return (LT.first - 1) +
Log2_32(NElts);
5732 return (LT.first - 1) + Entry->Cost;
5744 if (LT.first != 1) {
5750 ExtraCost *= LT.first - 1;
5753 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5754 return Cost + ExtraCost;
5762 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
5764 EVT VecVT = TLI->getValueType(
DL, VecTy);
5765 EVT ResVT = TLI->getValueType(
DL, ResTy);
5775 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5777 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5779 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5781 return (LT.first - 1) * 2 + 2;
5792 EVT VecVT = TLI->getValueType(
DL, VecTy);
5793 EVT ResVT = TLI->getValueType(
DL, ResTy);
5796 RedOpcode == Instruction::Add) {
5802 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5804 return LT.first + 2;
5839 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5840 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
5854 if (LT.second.getScalarType() == MVT::i1) {
5863 assert(Entry &&
"Illegal Type for Splice");
5864 LegalizationCost += Entry->Cost;
5865 return LegalizationCost * LT.first;
5869 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
5878 if (VF.
isFixed() && !ST->isSVEorStreamingSVEAvailable() &&
5879 (!ST->isNeonAvailable() || !ST->hasDotProd()))
5882 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub &&
5883 Opcode != Instruction::FAdd) ||
5890 assert(FMF &&
"Missing FastMathFlags for floating-point partial reduction");
5891 if (!FMF->allowReassoc() || !FMF->allowContract())
5895 "FastMathFlags only apply to floating-point partial reductions");
5899 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
5900 "Unexpected values for OpBExtend or InputTypeB");
5904 if (BinOp && ((*BinOp != Instruction::Mul && *BinOp != Instruction::FMul) ||
5905 InputTypeA != InputTypeB))
5908 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
5909 if (IsUSDot && !ST->hasMatMulInt8())
5921 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
5930 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
5936 std::pair<InstructionCost, MVT> AccumLT =
5938 std::pair<InstructionCost, MVT> InputLT =
5945 if (Opcode == Instruction::Sub)
5956 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
5958 if (AccumLT.second.getScalarType() == MVT::i64 &&
5959 InputLT.second.getScalarType() == MVT::i16)
5962 if (AccumLT.second.getScalarType() == MVT::i32 &&
5963 InputLT.second.getScalarType() == MVT::i16 &&
5964 (ST->hasSVE2p1() || ST->hasSME2()))
5967 if (AccumLT.second.getScalarType() == MVT::i64 &&
5968 InputLT.second.getScalarType() == MVT::i8)
5978 if (ST->isSVEorStreamingSVEAvailable() ||
5979 (AccumLT.second.isFixedLengthVector() && ST->isNeonAvailable() &&
5980 ST->hasDotProd())) {
5981 if (AccumLT.second.getScalarType() == MVT::i32 &&
5982 InputLT.second.getScalarType() == MVT::i8)
5987 if (Opcode == Instruction::FAdd && (ST->hasSME2() || ST->hasSVE2p1())) {
5988 if (AccumLT.second.getScalarType() == MVT::f32 &&
5989 InputLT.second.getScalarType() == MVT::f16 &&
5990 AccumLT.second.getVectorMinNumElements() == 4 &&
5991 InputLT.second.getVectorMinNumElements() == 8)
6010 "Expected the Mask to match the return size if given");
6012 "Expected the same scalar types");
6018 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
6019 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
6020 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
6028 return std::max<InstructionCost>(1, LT.first / 4);
6036 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
6038 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
6041 unsigned TpNumElts = Mask.size();
6042 unsigned LTNumElts = LT.second.getVectorNumElements();
6043 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
6045 LT.second.getVectorElementCount());
6047 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
6049 for (
unsigned N = 0;
N < NumVecs;
N++) {
6053 unsigned Source1 = -1U, Source2 = -1U;
6054 unsigned NumSources = 0;
6055 for (
unsigned E = 0; E < LTNumElts; E++) {
6056 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
6065 unsigned Source = MaskElt / LTNumElts;
6066 if (NumSources == 0) {
6069 }
else if (NumSources == 1 && Source != Source1) {
6072 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
6078 if (Source == Source1)
6080 else if (Source == Source2)
6081 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
6090 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
6101 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
6104 Result.first->second = NCost;
6118 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
6119 if (LT.second.getFixedSizeInBits() >= 128 &&
6121 LT.second.getVectorNumElements() / 2) {
6124 if (Index == (
int)LT.second.getVectorNumElements() / 2)
6138 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6141 return M.value() < 0 || M.value() == (int)M.index();
6147 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6148 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6157 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6158 ST->isSVEorStreamingSVEAvailable() &&
6163 if (ST->isSVEorStreamingSVEAvailable() &&
6177 if (IsLoad && LT.second.isVector() &&
6179 LT.second.getVectorElementCount()))
6185 if (Mask.size() == 4 &&
6187 (SrcTy->getScalarSizeInBits() == 16 ||
6188 SrcTy->getScalarSizeInBits() == 32) &&
6189 all_of(Mask, [](
int E) {
return E < 8; }))
6195 if (LT.second.isFixedLengthVector() &&
6196 LT.second.getVectorNumElements() == Mask.size() &&
6202 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6203 isTRNMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6204 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6205 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6206 LT.second.getVectorNumElements(), 16) ||
6207 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6208 LT.second.getVectorNumElements(), 32) ||
6209 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6210 LT.second.getVectorNumElements(), 64) ||
6213 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6342 return LT.first * Entry->Cost;
6351 LT.second.getSizeInBits() <= 128 && SubTp) {
6353 if (SubLT.second.isVector()) {
6354 int NumElts = LT.second.getVectorNumElements();
6355 int NumSubElts = SubLT.second.getVectorNumElements();
6356 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6362 if (IsExtractSubvector)
6379 if (
getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
6398 return ST->useFixedOverScalableIfEqualCost();
6402 return ST->getEpilogueVectorizationMinVF();
6437 unsigned NumInsns = 0;
6439 NumInsns += BB->sizeWithoutDebug();
6449 int64_t Scale,
unsigned AddrSpace)
const {
6477 if (
I->getOpcode() == Instruction::Or &&
6482 if (
I->getOpcode() == Instruction::Add ||
6483 I->getOpcode() == Instruction::Sub)
6508 return all_equal(Shuf->getShuffleMask());
6515 bool AllowSplat =
false) {
6520 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6521 auto *FullTy = FullV->
getType();
6522 auto *HalfTy = HalfV->getType();
6524 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6527 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6530 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6534 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6548 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6549 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6563 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6564 (M2Start != 0 && M2Start != (NumElements / 2)))
6566 if (S1Op1 && S2Op1 && M1Start != M2Start)
6576 return Ext->getType()->getScalarSizeInBits() ==
6577 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6591 Value *VectorOperand =
nullptr;
6608 if (!
GEP ||
GEP->getNumOperands() != 2)
6612 Value *Offsets =
GEP->getOperand(1);
6615 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6621 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6622 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6623 Ops.push_back(&
GEP->getOperandUse(1));
6659 switch (
II->getIntrinsicID()) {
6660 case Intrinsic::aarch64_neon_smull:
6661 case Intrinsic::aarch64_neon_umull:
6664 Ops.push_back(&
II->getOperandUse(0));
6665 Ops.push_back(&
II->getOperandUse(1));
6670 case Intrinsic::fma:
6671 case Intrinsic::fmuladd:
6678 Ops.push_back(&
II->getOperandUse(0));
6680 Ops.push_back(&
II->getOperandUse(1));
6683 case Intrinsic::aarch64_neon_sqdmull:
6684 case Intrinsic::aarch64_neon_sqdmulh:
6685 case Intrinsic::aarch64_neon_sqrdmulh:
6688 Ops.push_back(&
II->getOperandUse(0));
6690 Ops.push_back(&
II->getOperandUse(1));
6691 return !
Ops.empty();
6692 case Intrinsic::aarch64_neon_fmlal:
6693 case Intrinsic::aarch64_neon_fmlal2:
6694 case Intrinsic::aarch64_neon_fmlsl:
6695 case Intrinsic::aarch64_neon_fmlsl2:
6698 Ops.push_back(&
II->getOperandUse(1));
6700 Ops.push_back(&
II->getOperandUse(2));
6701 return !
Ops.empty();
6702 case Intrinsic::aarch64_sve_ptest_first:
6703 case Intrinsic::aarch64_sve_ptest_last:
6705 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6706 Ops.push_back(&
II->getOperandUse(0));
6707 return !
Ops.empty();
6708 case Intrinsic::aarch64_sme_write_horiz:
6709 case Intrinsic::aarch64_sme_write_vert:
6710 case Intrinsic::aarch64_sme_writeq_horiz:
6711 case Intrinsic::aarch64_sme_writeq_vert: {
6713 if (!Idx || Idx->getOpcode() != Instruction::Add)
6715 Ops.push_back(&
II->getOperandUse(1));
6718 case Intrinsic::aarch64_sme_read_horiz:
6719 case Intrinsic::aarch64_sme_read_vert:
6720 case Intrinsic::aarch64_sme_readq_horiz:
6721 case Intrinsic::aarch64_sme_readq_vert:
6722 case Intrinsic::aarch64_sme_ld1b_vert:
6723 case Intrinsic::aarch64_sme_ld1h_vert:
6724 case Intrinsic::aarch64_sme_ld1w_vert:
6725 case Intrinsic::aarch64_sme_ld1d_vert:
6726 case Intrinsic::aarch64_sme_ld1q_vert:
6727 case Intrinsic::aarch64_sme_st1b_vert:
6728 case Intrinsic::aarch64_sme_st1h_vert:
6729 case Intrinsic::aarch64_sme_st1w_vert:
6730 case Intrinsic::aarch64_sme_st1d_vert:
6731 case Intrinsic::aarch64_sme_st1q_vert:
6732 case Intrinsic::aarch64_sme_ld1b_horiz:
6733 case Intrinsic::aarch64_sme_ld1h_horiz:
6734 case Intrinsic::aarch64_sme_ld1w_horiz:
6735 case Intrinsic::aarch64_sme_ld1d_horiz:
6736 case Intrinsic::aarch64_sme_ld1q_horiz:
6737 case Intrinsic::aarch64_sme_st1b_horiz:
6738 case Intrinsic::aarch64_sme_st1h_horiz:
6739 case Intrinsic::aarch64_sme_st1w_horiz:
6740 case Intrinsic::aarch64_sme_st1d_horiz:
6741 case Intrinsic::aarch64_sme_st1q_horiz: {
6743 if (!Idx || Idx->getOpcode() != Instruction::Add)
6745 Ops.push_back(&
II->getOperandUse(3));
6748 case Intrinsic::aarch64_neon_pmull:
6751 Ops.push_back(&
II->getOperandUse(0));
6752 Ops.push_back(&
II->getOperandUse(1));
6754 case Intrinsic::aarch64_neon_pmull64:
6756 II->getArgOperand(1)))
6758 Ops.push_back(&
II->getArgOperandUse(0));
6759 Ops.push_back(&
II->getArgOperandUse(1));
6761 case Intrinsic::masked_gather:
6764 Ops.push_back(&
II->getArgOperandUse(0));
6766 case Intrinsic::masked_scatter:
6769 Ops.push_back(&
II->getArgOperandUse(1));
6776 auto ShouldSinkCondition = [](
Value *
Cond,
6781 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6785 Ops.push_back(&
II->getOperandUse(0));
6789 switch (
I->getOpcode()) {
6790 case Instruction::GetElementPtr:
6791 case Instruction::Add:
6792 case Instruction::Sub:
6794 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
6796 Ops.push_back(&
I->getOperandUse(
Op));
6801 case Instruction::Select: {
6802 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
6805 Ops.push_back(&
I->getOperandUse(0));
6808 case Instruction::Br: {
6815 Ops.push_back(&
I->getOperandUse(0));
6818 case Instruction::FMul:
6823 Ops.push_back(&
I->getOperandUse(0));
6825 Ops.push_back(&
I->getOperandUse(1));
6833 if (!
I->getType()->isVectorTy())
6834 return !
Ops.empty();
6836 switch (
I->getOpcode()) {
6837 case Instruction::Sub:
6838 case Instruction::Add: {
6847 Ops.push_back(&Ext1->getOperandUse(0));
6848 Ops.push_back(&Ext2->getOperandUse(0));
6851 Ops.push_back(&
I->getOperandUse(0));
6852 Ops.push_back(&
I->getOperandUse(1));
6856 case Instruction::Or: {
6859 if (ST->hasNEON()) {
6873 if (
I->getParent() != MainAnd->
getParent() ||
6878 if (
I->getParent() != IA->getParent() ||
6879 I->getParent() != IB->getParent())
6884 Ops.push_back(&
I->getOperandUse(0));
6885 Ops.push_back(&
I->getOperandUse(1));
6894 case Instruction::Mul: {
6895 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
6898 if (Ty->isScalableTy())
6902 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
6905 int NumZExts = 0, NumSExts = 0;
6906 for (
auto &
Op :
I->operands()) {
6913 auto *ExtOp = Ext->getOperand(0);
6914 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
6915 Ops.push_back(&Ext->getOperandUse(0));
6923 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
6924 I->getType()->getScalarSizeInBits())
6961 if (!ElementConstant || !ElementConstant->
isZero())
6964 unsigned Opcode = OperandInstr->
getOpcode();
6965 if (Opcode == Instruction::SExt)
6967 else if (Opcode == Instruction::ZExt)
6972 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
6982 Ops.push_back(&Insert->getOperandUse(1));
6988 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
6992 if (!ShouldSinkSplatForIndexedVariant(
I))
6997 Ops.push_back(&
I->getOperandUse(0));
6999 Ops.push_back(&
I->getOperandUse(1));
7001 return !
Ops.empty();
7003 case Instruction::FMul: {
7005 if (
I->getType()->isScalableTy())
7006 return !
Ops.empty();
7010 return !
Ops.empty();
7014 Ops.push_back(&
I->getOperandUse(0));
7016 Ops.push_back(&
I->getOperandUse(1));
7017 return !
Ops.empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
APInt getPriorityMask(const Function &F) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
Information for memory intrinsic cost model.
Align getAlignment() const
Type * getDataType() const
Intrinsic::ID getID() const
const Instruction * getInst() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
LLVM_ABI APInt getCpuSupportsMask(ArrayRef< StringRef > Features)
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
LLVM_ABI Libcall getPOW(EVT RetVT)
getPOW - Return the POW_* value for the given types, or UNKNOWN_LIBCALL if there is none.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FindLast
FindLast reduction with select(cmp(),x,y) where x and y.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...