23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
82 cl::desc(
"Threshold for forced unrolling of small loops in AArch64"));
85class TailFoldingOption {
100 bool NeedsDefault =
true;
104 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
124 Bits &= ~DisableBits;
130 errs() <<
"invalid argument '" << Opt
131 <<
"' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
139 void operator=(
const std::string &Val) {
148 setNeedsDefault(
false);
151 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] ==
"disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] ==
"all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] ==
"default")
159 setNeedsDefault(
true);
160 else if (TailFoldTypes[0] ==
"simple")
161 setInitialBits(TailFoldingOpts::Simple);
164 setInitialBits(TailFoldingOpts::Disabled);
167 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
168 if (TailFoldTypes[
I] ==
"reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[
I] ==
"recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[
I] ==
"reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[
I] ==
"noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[
I] ==
"norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[
I] ==
"noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
200 "\ndefault (Initial) Uses the default tail-folding settings for "
202 "\nall (Initial) All legal loop types will vectorize using "
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
213 "\nnoreverse Inverse of above"),
258 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.
split(Features,
",");
265 return F.hasFnAttribute(
"fmv-features");
269 AArch64::FeatureExecuteOnly,
309 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
310 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
312 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
330 auto FVTy = dyn_cast<FixedVectorType>(Ty);
332 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
341 unsigned DefaultCallPenalty)
const {
366 if (
F ==
Call.getCaller())
372 return DefaultCallPenalty;
379 ST->isNeonAvailable());
403 assert(Ty->isIntegerTy());
405 unsigned BitSize = Ty->getPrimitiveSizeInBits();
412 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
417 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
423 return std::max<InstructionCost>(1,
Cost);
430 assert(Ty->isIntegerTy());
432 unsigned BitSize = Ty->getPrimitiveSizeInBits();
438 unsigned ImmIdx = ~0U;
442 case Instruction::GetElementPtr:
447 case Instruction::Store:
450 case Instruction::Add:
451 case Instruction::Sub:
452 case Instruction::Mul:
453 case Instruction::UDiv:
454 case Instruction::SDiv:
455 case Instruction::URem:
456 case Instruction::SRem:
457 case Instruction::And:
458 case Instruction::Or:
459 case Instruction::Xor:
460 case Instruction::ICmp:
464 case Instruction::Shl:
465 case Instruction::LShr:
466 case Instruction::AShr:
470 case Instruction::Trunc:
471 case Instruction::ZExt:
472 case Instruction::SExt:
473 case Instruction::IntToPtr:
474 case Instruction::PtrToInt:
475 case Instruction::BitCast:
476 case Instruction::PHI:
477 case Instruction::Call:
478 case Instruction::Select:
479 case Instruction::Ret:
480 case Instruction::Load:
485 int NumConstants = (BitSize + 63) / 64;
498 assert(Ty->isIntegerTy());
500 unsigned BitSize = Ty->getPrimitiveSizeInBits();
509 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
515 case Intrinsic::sadd_with_overflow:
516 case Intrinsic::uadd_with_overflow:
517 case Intrinsic::ssub_with_overflow:
518 case Intrinsic::usub_with_overflow:
519 case Intrinsic::smul_with_overflow:
520 case Intrinsic::umul_with_overflow:
522 int NumConstants = (BitSize + 63) / 64;
529 case Intrinsic::experimental_stackmap:
530 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
533 case Intrinsic::experimental_patchpoint_void:
534 case Intrinsic::experimental_patchpoint:
535 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
538 case Intrinsic::experimental_gc_statepoint:
539 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
549 if (TyWidth == 32 || TyWidth == 64)
573 unsigned TotalHistCnts = 1;
583 unsigned EC = VTy->getElementCount().getKnownMinValue();
588 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
590 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
594 TotalHistCnts = EC / NaturalVectorWidth;
614 switch (ICA.
getID()) {
615 case Intrinsic::experimental_vector_histogram_add: {
622 case Intrinsic::umin:
623 case Intrinsic::umax:
624 case Intrinsic::smin:
625 case Intrinsic::smax: {
626 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
627 MVT::v8i16, MVT::v2i32, MVT::v4i32,
628 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
632 if (LT.second == MVT::v2i64)
634 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
638 case Intrinsic::sadd_sat:
639 case Intrinsic::ssub_sat:
640 case Intrinsic::uadd_sat:
641 case Intrinsic::usub_sat: {
642 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
643 MVT::v8i16, MVT::v2i32, MVT::v4i32,
649 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
650 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
651 return LT.first * Instrs;
656 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
657 return LT.first * Instrs;
661 case Intrinsic::abs: {
662 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
663 MVT::v8i16, MVT::v2i32, MVT::v4i32,
666 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
670 case Intrinsic::bswap: {
671 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
672 MVT::v4i32, MVT::v2i64};
674 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }) &&
675 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
680 case Intrinsic::fmuladd: {
685 (EltTy->
isHalfTy() && ST->hasFullFP16()))
689 case Intrinsic::stepvector: {
698 Cost += AddCost * (LT.first - 1);
702 case Intrinsic::vector_extract:
703 case Intrinsic::vector_insert: {
716 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
717 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
725 getTLI()->getTypeConversion(
C, SubVecVT);
727 getTLI()->getTypeConversion(
C, VecVT);
735 case Intrinsic::bitreverse: {
737 {Intrinsic::bitreverse, MVT::i32, 1},
738 {Intrinsic::bitreverse, MVT::i64, 1},
739 {Intrinsic::bitreverse, MVT::v8i8, 1},
740 {Intrinsic::bitreverse, MVT::v16i8, 1},
741 {Intrinsic::bitreverse, MVT::v4i16, 2},
742 {Intrinsic::bitreverse, MVT::v8i16, 2},
743 {Intrinsic::bitreverse, MVT::v2i32, 2},
744 {Intrinsic::bitreverse, MVT::v4i32, 2},
745 {Intrinsic::bitreverse, MVT::v1i64, 2},
746 {Intrinsic::bitreverse, MVT::v2i64, 2},
754 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
755 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
756 return LegalisationCost.first * Entry->Cost + 1;
758 return LegalisationCost.first * Entry->Cost;
762 case Intrinsic::ctpop: {
763 if (!ST->hasNEON()) {
784 RetTy->getScalarSizeInBits()
787 return LT.first * Entry->Cost + ExtraCost;
791 case Intrinsic::sadd_with_overflow:
792 case Intrinsic::uadd_with_overflow:
793 case Intrinsic::ssub_with_overflow:
794 case Intrinsic::usub_with_overflow:
795 case Intrinsic::smul_with_overflow:
796 case Intrinsic::umul_with_overflow: {
798 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
799 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
800 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
801 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
802 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
803 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
804 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
805 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
806 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
807 {Intrinsic::usub_with_overflow, MVT::i8, 3},
808 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
809 {Intrinsic::usub_with_overflow, MVT::i16, 3},
810 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
811 {Intrinsic::usub_with_overflow, MVT::i32, 1},
812 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
813 {Intrinsic::usub_with_overflow, MVT::i64, 1},
814 {Intrinsic::smul_with_overflow, MVT::i8, 5},
815 {Intrinsic::umul_with_overflow, MVT::i8, 4},
816 {Intrinsic::smul_with_overflow, MVT::i16, 5},
817 {Intrinsic::umul_with_overflow, MVT::i16, 4},
818 {Intrinsic::smul_with_overflow, MVT::i32, 2},
819 {Intrinsic::umul_with_overflow, MVT::i32, 2},
820 {Intrinsic::smul_with_overflow, MVT::i64, 3},
821 {Intrinsic::umul_with_overflow, MVT::i64, 3},
823 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
830 case Intrinsic::fptosi_sat:
831 case Intrinsic::fptoui_sat: {
834 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
836 EVT MTy = TLI->getValueType(
DL, RetTy);
839 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
840 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
841 LT.second == MVT::v2f64)) {
843 (LT.second == MVT::f64 && MTy == MVT::i32) ||
844 (LT.second == MVT::f32 && MTy == MVT::i64)))
853 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
860 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
861 (LT.second == MVT::f16 && MTy == MVT::i64) ||
862 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
876 if ((LT.second.getScalarType() == MVT::f32 ||
877 LT.second.getScalarType() == MVT::f64 ||
878 LT.second.getScalarType() == MVT::f16) &&
882 if (LT.second.isVector())
886 LegalTy, {LegalTy, LegalTy});
889 LegalTy, {LegalTy, LegalTy});
891 return LT.first *
Cost +
892 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
898 RetTy = RetTy->getScalarType();
899 if (LT.second.isVector()) {
917 return LT.first *
Cost;
919 case Intrinsic::fshl:
920 case Intrinsic::fshr: {
932 {Intrinsic::fshl, MVT::v4i32, 2},
933 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
934 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
935 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
941 return LegalisationCost.first * Entry->Cost;
945 if (!RetTy->isIntegerTy())
950 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
951 RetTy->getScalarSizeInBits() < 64) ||
952 (RetTy->getScalarSizeInBits() % 64 != 0);
953 unsigned ExtraCost = HigherCost ? 1 : 0;
954 if (RetTy->getScalarSizeInBits() == 32 ||
955 RetTy->getScalarSizeInBits() == 64)
962 return TyL.first + ExtraCost;
964 case Intrinsic::get_active_lane_mask: {
966 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
968 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
971 if (RetTy->isScalableTy()) {
972 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
982 if (ST->hasSVE2p1() || ST->hasSME2()) {
997 return Cost + (SplitCost * (
Cost - 1));
1012 case Intrinsic::experimental_vector_match: {
1015 unsigned SearchSize = NeedleTy->getNumElements();
1016 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1029 case Intrinsic::experimental_cttz_elts: {
1031 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1039 case Intrinsic::experimental_vector_extract_last_active:
1040 if (ST->isSVEorStreamingSVEAvailable()) {
1057 auto RequiredType =
II.getType();
1060 assert(PN &&
"Expected Phi Node!");
1063 if (!PN->hasOneUse())
1064 return std::nullopt;
1066 for (
Value *IncValPhi : PN->incoming_values()) {
1069 Reinterpret->getIntrinsicID() !=
1070 Intrinsic::aarch64_sve_convert_to_svbool ||
1071 RequiredType != Reinterpret->getArgOperand(0)->getType())
1072 return std::nullopt;
1080 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1082 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1155 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1160 return GoverningPredicateIdx;
1165 GoverningPredicateIdx = Index;
1183 return UndefIntrinsic;
1188 UndefIntrinsic = IID;
1210 return ResultLanes == InactiveLanesTakenFromOperand;
1215 return OperandIdxForInactiveLanes;
1219 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1220 ResultLanes = InactiveLanesTakenFromOperand;
1221 OperandIdxForInactiveLanes = Index;
1226 return ResultLanes == InactiveLanesAreNotDefined;
1230 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1231 ResultLanes = InactiveLanesAreNotDefined;
1236 return ResultLanes == InactiveLanesAreUnused;
1240 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1241 ResultLanes = InactiveLanesAreUnused;
1251 ResultIsZeroInitialized =
true;
1262 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1267 return OperandIdxWithNoActiveLanes;
1272 OperandIdxWithNoActiveLanes = Index;
1277 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1280 unsigned IROpcode = 0;
1282 enum PredicationStyle {
1284 InactiveLanesTakenFromOperand,
1285 InactiveLanesAreNotDefined,
1286 InactiveLanesAreUnused
1289 bool ResultIsZeroInitialized =
false;
1290 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1291 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1299 return !isa<ScalableVectorType>(V->getType());
1307 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1308 case Intrinsic::aarch64_sve_fcvt_f16f32:
1309 case Intrinsic::aarch64_sve_fcvt_f16f64:
1310 case Intrinsic::aarch64_sve_fcvt_f32f16:
1311 case Intrinsic::aarch64_sve_fcvt_f32f64:
1312 case Intrinsic::aarch64_sve_fcvt_f64f16:
1313 case Intrinsic::aarch64_sve_fcvt_f64f32:
1314 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1315 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1316 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1317 case Intrinsic::aarch64_sve_fcvtzs:
1318 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1319 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1320 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1321 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1322 case Intrinsic::aarch64_sve_fcvtzu:
1323 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1324 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1325 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1326 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1327 case Intrinsic::aarch64_sve_scvtf:
1328 case Intrinsic::aarch64_sve_scvtf_f16i32:
1329 case Intrinsic::aarch64_sve_scvtf_f16i64:
1330 case Intrinsic::aarch64_sve_scvtf_f32i64:
1331 case Intrinsic::aarch64_sve_scvtf_f64i32:
1332 case Intrinsic::aarch64_sve_ucvtf:
1333 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1334 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1335 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1336 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1339 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1340 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1341 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1342 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1345 case Intrinsic::aarch64_sve_fabd:
1347 case Intrinsic::aarch64_sve_fadd:
1350 case Intrinsic::aarch64_sve_fdiv:
1353 case Intrinsic::aarch64_sve_fmax:
1355 case Intrinsic::aarch64_sve_fmaxnm:
1357 case Intrinsic::aarch64_sve_fmin:
1359 case Intrinsic::aarch64_sve_fminnm:
1361 case Intrinsic::aarch64_sve_fmla:
1363 case Intrinsic::aarch64_sve_fmls:
1365 case Intrinsic::aarch64_sve_fmul:
1368 case Intrinsic::aarch64_sve_fmulx:
1370 case Intrinsic::aarch64_sve_fnmla:
1372 case Intrinsic::aarch64_sve_fnmls:
1374 case Intrinsic::aarch64_sve_fsub:
1377 case Intrinsic::aarch64_sve_add:
1380 case Intrinsic::aarch64_sve_mla:
1382 case Intrinsic::aarch64_sve_mls:
1384 case Intrinsic::aarch64_sve_mul:
1387 case Intrinsic::aarch64_sve_sabd:
1389 case Intrinsic::aarch64_sve_sdiv:
1392 case Intrinsic::aarch64_sve_smax:
1394 case Intrinsic::aarch64_sve_smin:
1396 case Intrinsic::aarch64_sve_smulh:
1398 case Intrinsic::aarch64_sve_sub:
1401 case Intrinsic::aarch64_sve_uabd:
1403 case Intrinsic::aarch64_sve_udiv:
1406 case Intrinsic::aarch64_sve_umax:
1408 case Intrinsic::aarch64_sve_umin:
1410 case Intrinsic::aarch64_sve_umulh:
1412 case Intrinsic::aarch64_sve_asr:
1415 case Intrinsic::aarch64_sve_lsl:
1418 case Intrinsic::aarch64_sve_lsr:
1421 case Intrinsic::aarch64_sve_and:
1424 case Intrinsic::aarch64_sve_bic:
1426 case Intrinsic::aarch64_sve_eor:
1429 case Intrinsic::aarch64_sve_orr:
1432 case Intrinsic::aarch64_sve_sqsub:
1434 case Intrinsic::aarch64_sve_uqsub:
1437 case Intrinsic::aarch64_sve_add_u:
1440 case Intrinsic::aarch64_sve_and_u:
1443 case Intrinsic::aarch64_sve_asr_u:
1446 case Intrinsic::aarch64_sve_eor_u:
1449 case Intrinsic::aarch64_sve_fadd_u:
1452 case Intrinsic::aarch64_sve_fdiv_u:
1455 case Intrinsic::aarch64_sve_fmul_u:
1458 case Intrinsic::aarch64_sve_fsub_u:
1461 case Intrinsic::aarch64_sve_lsl_u:
1464 case Intrinsic::aarch64_sve_lsr_u:
1467 case Intrinsic::aarch64_sve_mul_u:
1470 case Intrinsic::aarch64_sve_orr_u:
1473 case Intrinsic::aarch64_sve_sdiv_u:
1476 case Intrinsic::aarch64_sve_sub_u:
1479 case Intrinsic::aarch64_sve_udiv_u:
1483 case Intrinsic::aarch64_sve_addqv:
1484 case Intrinsic::aarch64_sve_and_z:
1485 case Intrinsic::aarch64_sve_bic_z:
1486 case Intrinsic::aarch64_sve_brka_z:
1487 case Intrinsic::aarch64_sve_brkb_z:
1488 case Intrinsic::aarch64_sve_brkn_z:
1489 case Intrinsic::aarch64_sve_brkpa_z:
1490 case Intrinsic::aarch64_sve_brkpb_z:
1491 case Intrinsic::aarch64_sve_cntp:
1492 case Intrinsic::aarch64_sve_compact:
1493 case Intrinsic::aarch64_sve_eor_z:
1494 case Intrinsic::aarch64_sve_eorv:
1495 case Intrinsic::aarch64_sve_eorqv:
1496 case Intrinsic::aarch64_sve_nand_z:
1497 case Intrinsic::aarch64_sve_nor_z:
1498 case Intrinsic::aarch64_sve_orn_z:
1499 case Intrinsic::aarch64_sve_orr_z:
1500 case Intrinsic::aarch64_sve_orv:
1501 case Intrinsic::aarch64_sve_orqv:
1502 case Intrinsic::aarch64_sve_pnext:
1503 case Intrinsic::aarch64_sve_rdffr_z:
1504 case Intrinsic::aarch64_sve_saddv:
1505 case Intrinsic::aarch64_sve_uaddv:
1506 case Intrinsic::aarch64_sve_umaxv:
1507 case Intrinsic::aarch64_sve_umaxqv:
1508 case Intrinsic::aarch64_sve_cmpeq:
1509 case Intrinsic::aarch64_sve_cmpeq_wide:
1510 case Intrinsic::aarch64_sve_cmpge:
1511 case Intrinsic::aarch64_sve_cmpge_wide:
1512 case Intrinsic::aarch64_sve_cmpgt:
1513 case Intrinsic::aarch64_sve_cmpgt_wide:
1514 case Intrinsic::aarch64_sve_cmphi:
1515 case Intrinsic::aarch64_sve_cmphi_wide:
1516 case Intrinsic::aarch64_sve_cmphs:
1517 case Intrinsic::aarch64_sve_cmphs_wide:
1518 case Intrinsic::aarch64_sve_cmple_wide:
1519 case Intrinsic::aarch64_sve_cmplo_wide:
1520 case Intrinsic::aarch64_sve_cmpls_wide:
1521 case Intrinsic::aarch64_sve_cmplt_wide:
1522 case Intrinsic::aarch64_sve_cmpne:
1523 case Intrinsic::aarch64_sve_cmpne_wide:
1524 case Intrinsic::aarch64_sve_facge:
1525 case Intrinsic::aarch64_sve_facgt:
1526 case Intrinsic::aarch64_sve_fcmpeq:
1527 case Intrinsic::aarch64_sve_fcmpge:
1528 case Intrinsic::aarch64_sve_fcmpgt:
1529 case Intrinsic::aarch64_sve_fcmpne:
1530 case Intrinsic::aarch64_sve_fcmpuo:
1531 case Intrinsic::aarch64_sve_ld1:
1532 case Intrinsic::aarch64_sve_ld1_gather:
1533 case Intrinsic::aarch64_sve_ld1_gather_index:
1534 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1535 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1536 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1537 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1538 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1539 case Intrinsic::aarch64_sve_ld1q_gather_index:
1540 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1541 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1542 case Intrinsic::aarch64_sve_ld1ro:
1543 case Intrinsic::aarch64_sve_ld1rq:
1544 case Intrinsic::aarch64_sve_ld1udq:
1545 case Intrinsic::aarch64_sve_ld1uwq:
1546 case Intrinsic::aarch64_sve_ld2_sret:
1547 case Intrinsic::aarch64_sve_ld2q_sret:
1548 case Intrinsic::aarch64_sve_ld3_sret:
1549 case Intrinsic::aarch64_sve_ld3q_sret:
1550 case Intrinsic::aarch64_sve_ld4_sret:
1551 case Intrinsic::aarch64_sve_ld4q_sret:
1552 case Intrinsic::aarch64_sve_ldff1:
1553 case Intrinsic::aarch64_sve_ldff1_gather:
1554 case Intrinsic::aarch64_sve_ldff1_gather_index:
1555 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1556 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1557 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1558 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1559 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1560 case Intrinsic::aarch64_sve_ldnf1:
1561 case Intrinsic::aarch64_sve_ldnt1:
1562 case Intrinsic::aarch64_sve_ldnt1_gather:
1563 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1564 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1565 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1568 case Intrinsic::aarch64_sve_prf:
1569 case Intrinsic::aarch64_sve_prfb_gather_index:
1570 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1571 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1572 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1573 case Intrinsic::aarch64_sve_prfd_gather_index:
1574 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1575 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1576 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1577 case Intrinsic::aarch64_sve_prfh_gather_index:
1578 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1579 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1580 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1581 case Intrinsic::aarch64_sve_prfw_gather_index:
1582 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1583 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1584 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1587 case Intrinsic::aarch64_sve_st1_scatter:
1588 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1589 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1590 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1591 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1592 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1593 case Intrinsic::aarch64_sve_st1dq:
1594 case Intrinsic::aarch64_sve_st1q_scatter_index:
1595 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1596 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1597 case Intrinsic::aarch64_sve_st1wq:
1598 case Intrinsic::aarch64_sve_stnt1:
1599 case Intrinsic::aarch64_sve_stnt1_scatter:
1600 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1601 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1602 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1604 case Intrinsic::aarch64_sve_st2:
1605 case Intrinsic::aarch64_sve_st2q:
1607 case Intrinsic::aarch64_sve_st3:
1608 case Intrinsic::aarch64_sve_st3q:
1610 case Intrinsic::aarch64_sve_st4:
1611 case Intrinsic::aarch64_sve_st4q:
1619 Value *UncastedPred;
1625 Pred = UncastedPred;
1631 if (OrigPredTy->getMinNumElements() <=
1633 ->getMinNumElements())
1634 Pred = UncastedPred;
1638 return C &&
C->isAllOnesValue();
1645 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1646 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1654static std::optional<Instruction *>
1661 Value *Op1 =
II.getOperand(1);
1662 Value *Op2 =
II.getOperand(2);
1688 return std::nullopt;
1696 if (SimpleII == Inactive)
1706static std::optional<Instruction *>
1710 return std::nullopt;
1739 II.setCalledFunction(NewDecl);
1749 return std::nullopt;
1761static std::optional<Instruction *>
1765 return std::nullopt;
1767 auto IntrinsicID = BinOp->getIntrinsicID();
1768 switch (IntrinsicID) {
1769 case Intrinsic::aarch64_sve_and_z:
1770 case Intrinsic::aarch64_sve_bic_z:
1771 case Intrinsic::aarch64_sve_eor_z:
1772 case Intrinsic::aarch64_sve_nand_z:
1773 case Intrinsic::aarch64_sve_nor_z:
1774 case Intrinsic::aarch64_sve_orn_z:
1775 case Intrinsic::aarch64_sve_orr_z:
1778 return std::nullopt;
1781 auto BinOpPred = BinOp->getOperand(0);
1782 auto BinOpOp1 = BinOp->getOperand(1);
1783 auto BinOpOp2 = BinOp->getOperand(2);
1787 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1788 return std::nullopt;
1790 auto PredOp = PredIntr->getOperand(0);
1792 if (PredOpTy !=
II.getType())
1793 return std::nullopt;
1797 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1798 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1799 if (BinOpOp1 == BinOpOp2)
1800 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1803 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1805 auto NarrowedBinOp =
1810static std::optional<Instruction *>
1817 return BinOpCombine;
1822 return std::nullopt;
1825 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1834 if (CursorVTy->getElementCount().getKnownMinValue() <
1835 IVTy->getElementCount().getKnownMinValue())
1839 if (Cursor->getType() == IVTy)
1840 EarliestReplacement = Cursor;
1845 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1846 Intrinsic::aarch64_sve_convert_to_svbool ||
1847 IntrinsicCursor->getIntrinsicID() ==
1848 Intrinsic::aarch64_sve_convert_from_svbool))
1851 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
1852 Cursor = IntrinsicCursor->getOperand(0);
1857 if (!EarliestReplacement)
1858 return std::nullopt;
1866 auto *OpPredicate =
II.getOperand(0);
1879 return std::nullopt;
1882 return std::nullopt;
1884 const auto PTruePattern =
1886 if (PTruePattern != AArch64SVEPredPattern::vl1)
1887 return std::nullopt;
1892 II.getArgOperand(0),
II.getArgOperand(2), ConstantInt::get(IdxTy, 0));
1893 Insert->insertBefore(
II.getIterator());
1894 Insert->takeName(&
II);
1904 II.getArgOperand(0));
1914 return std::nullopt;
1919 if (!SplatValue || !SplatValue->isZero())
1920 return std::nullopt;
1925 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1926 return std::nullopt;
1930 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
1931 return std::nullopt;
1934 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1935 return std::nullopt;
1940 return std::nullopt;
1943 return std::nullopt;
1947 return std::nullopt;
1951 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
1952 return std::nullopt;
1954 unsigned NumElts = VecTy->getNumElements();
1955 unsigned PredicateBits = 0;
1958 for (
unsigned I = 0;
I < NumElts; ++
I) {
1961 return std::nullopt;
1963 PredicateBits |= 1 << (
I * (16 / NumElts));
1967 if (PredicateBits == 0) {
1969 PFalse->takeName(&
II);
1975 for (
unsigned I = 0;
I < 16; ++
I)
1976 if ((PredicateBits & (1 <<
I)) != 0)
1979 unsigned PredSize = Mask & -Mask;
1984 for (
unsigned I = 0;
I < 16;
I += PredSize)
1985 if ((PredicateBits & (1 <<
I)) == 0)
1986 return std::nullopt;
1991 {PredType}, {PTruePat});
1993 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
1994 auto *ConvertFromSVBool =
1996 {
II.getType()}, {ConvertToSVBool});
2004 Value *Pg =
II.getArgOperand(0);
2005 Value *Vec =
II.getArgOperand(1);
2006 auto IntrinsicID =
II.getIntrinsicID();
2007 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2019 auto OpC = OldBinOp->getOpcode();
2025 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2031 if (IsAfter &&
C &&
C->isNullValue()) {
2035 Extract->insertBefore(
II.getIterator());
2036 Extract->takeName(&
II);
2042 return std::nullopt;
2044 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2045 return std::nullopt;
2047 const auto PTruePattern =
2053 return std::nullopt;
2055 unsigned Idx = MinNumElts - 1;
2065 if (Idx >= PgVTy->getMinNumElements())
2066 return std::nullopt;
2071 Extract->insertBefore(
II.getIterator());
2072 Extract->takeName(&
II);
2085 Value *Pg =
II.getArgOperand(0);
2087 Value *Vec =
II.getArgOperand(2);
2090 if (!Ty->isIntegerTy())
2091 return std::nullopt;
2096 return std::nullopt;
2113 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2126 {
II.getType()}, {AllPat});
2133static std::optional<Instruction *>
2137 if (
Pattern == AArch64SVEPredPattern::all) {
2146 return MinNumElts && NumElts >= MinNumElts
2148 II, ConstantInt::get(
II.getType(), MinNumElts)))
2152static std::optional<Instruction *>
2155 if (!ST->isStreaming())
2156 return std::nullopt;
2168 Value *PgVal =
II.getArgOperand(0);
2169 Value *OpVal =
II.getArgOperand(1);
2173 if (PgVal == OpVal &&
2174 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2175 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2190 return std::nullopt;
2194 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2195 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2209 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2210 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2211 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2212 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2213 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2214 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2215 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2216 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2217 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2218 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2219 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2220 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2221 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2231 return std::nullopt;
2234template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2235static std::optional<Instruction *>
2237 bool MergeIntoAddendOp) {
2239 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2240 if (MergeIntoAddendOp) {
2241 AddendOp =
II.getOperand(1);
2242 Mul =
II.getOperand(2);
2244 AddendOp =
II.getOperand(2);
2245 Mul =
II.getOperand(1);
2250 return std::nullopt;
2252 if (!
Mul->hasOneUse())
2253 return std::nullopt;
2256 if (
II.getType()->isFPOrFPVectorTy()) {
2261 return std::nullopt;
2263 return std::nullopt;
2268 if (MergeIntoAddendOp)
2278static std::optional<Instruction *>
2280 Value *Pred =
II.getOperand(0);
2281 Value *PtrOp =
II.getOperand(1);
2282 Type *VecTy =
II.getType();
2286 Load->copyMetadata(
II);
2297static std::optional<Instruction *>
2299 Value *VecOp =
II.getOperand(0);
2300 Value *Pred =
II.getOperand(1);
2301 Value *PtrOp =
II.getOperand(2);
2305 Store->copyMetadata(
II);
2317 case Intrinsic::aarch64_sve_fmul_u:
2318 return Instruction::BinaryOps::FMul;
2319 case Intrinsic::aarch64_sve_fadd_u:
2320 return Instruction::BinaryOps::FAdd;
2321 case Intrinsic::aarch64_sve_fsub_u:
2322 return Instruction::BinaryOps::FSub;
2324 return Instruction::BinaryOpsEnd;
2328static std::optional<Instruction *>
2331 if (
II.isStrictFP())
2332 return std::nullopt;
2334 auto *OpPredicate =
II.getOperand(0);
2336 if (BinOpCode == Instruction::BinaryOpsEnd ||
2338 return std::nullopt;
2340 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2347 Intrinsic::aarch64_sve_mla>(
2351 Intrinsic::aarch64_sve_mad>(
2354 return std::nullopt;
2357static std::optional<Instruction *>
2361 Intrinsic::aarch64_sve_fmla>(IC,
II,
2366 Intrinsic::aarch64_sve_fmad>(IC,
II,
2371 Intrinsic::aarch64_sve_fmla>(IC,
II,
2374 return std::nullopt;
2377static std::optional<Instruction *>
2381 Intrinsic::aarch64_sve_fmla>(IC,
II,
2386 Intrinsic::aarch64_sve_fmad>(IC,
II,
2391 Intrinsic::aarch64_sve_fmla_u>(
2397static std::optional<Instruction *>
2401 Intrinsic::aarch64_sve_fmls>(IC,
II,
2406 Intrinsic::aarch64_sve_fnmsb>(
2411 Intrinsic::aarch64_sve_fmls>(IC,
II,
2414 return std::nullopt;
2417static std::optional<Instruction *>
2421 Intrinsic::aarch64_sve_fmls>(IC,
II,
2426 Intrinsic::aarch64_sve_fnmsb>(
2431 Intrinsic::aarch64_sve_fmls_u>(
2440 Intrinsic::aarch64_sve_mls>(
2443 return std::nullopt;
2448 Value *UnpackArg =
II.getArgOperand(0);
2450 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2451 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2464 return std::nullopt;
2468 auto *OpVal =
II.getOperand(0);
2469 auto *OpIndices =
II.getOperand(1);
2476 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2477 return std::nullopt;
2492 Type *RetTy =
II.getType();
2493 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2494 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2498 if ((
match(
II.getArgOperand(0),
2505 if (TyA ==
B->getType() &&
2510 TyA->getMinNumElements());
2516 return std::nullopt;
2524 if (
match(
II.getArgOperand(0),
2529 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2531 return std::nullopt;
2534static std::optional<Instruction *>
2536 Value *Mask =
II.getOperand(0);
2537 Value *BasePtr =
II.getOperand(1);
2538 Value *Index =
II.getOperand(2);
2549 BasePtr->getPointerAlignment(
II.getDataLayout());
2552 BasePtr, IndexBase);
2559 return std::nullopt;
2562static std::optional<Instruction *>
2564 Value *Val =
II.getOperand(0);
2565 Value *Mask =
II.getOperand(1);
2566 Value *BasePtr =
II.getOperand(2);
2567 Value *Index =
II.getOperand(3);
2577 BasePtr->getPointerAlignment(
II.getDataLayout());
2580 BasePtr, IndexBase);
2586 return std::nullopt;
2592 Value *Pred =
II.getOperand(0);
2593 Value *Vec =
II.getOperand(1);
2594 Value *DivVec =
II.getOperand(2);
2598 if (!SplatConstantInt)
2599 return std::nullopt;
2603 if (DivisorValue == -1)
2604 return std::nullopt;
2605 if (DivisorValue == 1)
2611 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2618 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2620 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2624 return std::nullopt;
2628 size_t VecSize = Vec.
size();
2633 size_t HalfVecSize = VecSize / 2;
2637 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2645 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2663 return std::nullopt;
2670 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2671 CurrentInsertElt = InsertElt->getOperand(0);
2677 return std::nullopt;
2681 for (
size_t I = 0;
I < Elts.
size();
I++) {
2682 if (Elts[
I] ==
nullptr)
2687 if (InsertEltChain ==
nullptr)
2688 return std::nullopt;
2694 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2695 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2696 IIScalableTy->getMinNumElements() /
2701 auto *WideShuffleMaskTy =
2712 auto NarrowBitcast =
2725 return std::nullopt;
2730 Value *Pred =
II.getOperand(0);
2731 Value *Vec =
II.getOperand(1);
2732 Value *Shift =
II.getOperand(2);
2735 Value *AbsPred, *MergedValue;
2741 return std::nullopt;
2749 return std::nullopt;
2754 return std::nullopt;
2757 {
II.getType()}, {Pred, Vec, Shift});
2764 Value *Vec =
II.getOperand(0);
2769 return std::nullopt;
2775 auto *NI =
II.getNextNode();
2778 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2780 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2781 auto *NIBB = NI->getParent();
2782 NI = NI->getNextNode();
2784 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2785 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2791 if (NextII &&
II.isIdenticalTo(NextII))
2794 return std::nullopt;
2802 {II.getType(), II.getOperand(0)->getType()},
2803 {II.getOperand(0), II.getOperand(1)}));
2810 return std::nullopt;
2816 Value *Passthru =
II.getOperand(0);
2824 auto *Mask = ConstantInt::get(Ty, MaskValue);
2830 return std::nullopt;
2833static std::optional<Instruction *>
2840 return std::nullopt;
2843std::optional<Instruction *>
2854 case Intrinsic::aarch64_dmb:
2856 case Intrinsic::aarch64_neon_fmaxnm:
2857 case Intrinsic::aarch64_neon_fminnm:
2859 case Intrinsic::aarch64_sve_convert_from_svbool:
2861 case Intrinsic::aarch64_sve_dup:
2863 case Intrinsic::aarch64_sve_dup_x:
2865 case Intrinsic::aarch64_sve_cmpne:
2866 case Intrinsic::aarch64_sve_cmpne_wide:
2868 case Intrinsic::aarch64_sve_rdffr:
2870 case Intrinsic::aarch64_sve_lasta:
2871 case Intrinsic::aarch64_sve_lastb:
2873 case Intrinsic::aarch64_sve_clasta_n:
2874 case Intrinsic::aarch64_sve_clastb_n:
2876 case Intrinsic::aarch64_sve_cntd:
2878 case Intrinsic::aarch64_sve_cntw:
2880 case Intrinsic::aarch64_sve_cnth:
2882 case Intrinsic::aarch64_sve_cntb:
2884 case Intrinsic::aarch64_sme_cntsd:
2886 case Intrinsic::aarch64_sve_ptest_any:
2887 case Intrinsic::aarch64_sve_ptest_first:
2888 case Intrinsic::aarch64_sve_ptest_last:
2890 case Intrinsic::aarch64_sve_fadd:
2892 case Intrinsic::aarch64_sve_fadd_u:
2894 case Intrinsic::aarch64_sve_fmul_u:
2896 case Intrinsic::aarch64_sve_fsub:
2898 case Intrinsic::aarch64_sve_fsub_u:
2900 case Intrinsic::aarch64_sve_add:
2902 case Intrinsic::aarch64_sve_add_u:
2904 Intrinsic::aarch64_sve_mla_u>(
2906 case Intrinsic::aarch64_sve_sub:
2908 case Intrinsic::aarch64_sve_sub_u:
2910 Intrinsic::aarch64_sve_mls_u>(
2912 case Intrinsic::aarch64_sve_tbl:
2914 case Intrinsic::aarch64_sve_uunpkhi:
2915 case Intrinsic::aarch64_sve_uunpklo:
2916 case Intrinsic::aarch64_sve_sunpkhi:
2917 case Intrinsic::aarch64_sve_sunpklo:
2919 case Intrinsic::aarch64_sve_uzp1:
2921 case Intrinsic::aarch64_sve_zip1:
2922 case Intrinsic::aarch64_sve_zip2:
2924 case Intrinsic::aarch64_sve_ld1_gather_index:
2926 case Intrinsic::aarch64_sve_st1_scatter_index:
2928 case Intrinsic::aarch64_sve_ld1:
2930 case Intrinsic::aarch64_sve_st1:
2932 case Intrinsic::aarch64_sve_sdiv:
2934 case Intrinsic::aarch64_sve_sel:
2936 case Intrinsic::aarch64_sve_srshl:
2938 case Intrinsic::aarch64_sve_dupq_lane:
2940 case Intrinsic::aarch64_sve_insr:
2942 case Intrinsic::aarch64_sve_whilelo:
2944 case Intrinsic::aarch64_sve_ptrue:
2946 case Intrinsic::aarch64_sve_uxtb:
2948 case Intrinsic::aarch64_sve_uxth:
2950 case Intrinsic::aarch64_sve_uxtw:
2952 case Intrinsic::aarch64_sme_in_streaming_mode:
2956 return std::nullopt;
2963 SimplifyAndSetOp)
const {
2964 switch (
II.getIntrinsicID()) {
2967 case Intrinsic::aarch64_neon_fcvtxn:
2968 case Intrinsic::aarch64_neon_rshrn:
2969 case Intrinsic::aarch64_neon_sqrshrn:
2970 case Intrinsic::aarch64_neon_sqrshrun:
2971 case Intrinsic::aarch64_neon_sqshrn:
2972 case Intrinsic::aarch64_neon_sqshrun:
2973 case Intrinsic::aarch64_neon_sqxtn:
2974 case Intrinsic::aarch64_neon_sqxtun:
2975 case Intrinsic::aarch64_neon_uqrshrn:
2976 case Intrinsic::aarch64_neon_uqshrn:
2977 case Intrinsic::aarch64_neon_uqxtn:
2978 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
2982 return std::nullopt;
2986 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
2996 if (ST->useSVEForFixedLengthVectors() &&
2999 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3000 else if (ST->isNeonAvailable())
3005 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3014bool AArch64TTIImpl::isSingleExtWideningInstruction(
3016 Type *SrcOverrideTy)
const {
3031 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3034 Type *SrcTy = SrcOverrideTy;
3036 case Instruction::Add:
3037 case Instruction::Sub: {
3046 if (Opcode == Instruction::Sub)
3070 assert(SrcTy &&
"Expected some SrcTy");
3072 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3078 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3080 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3084 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3087Type *AArch64TTIImpl::isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
3089 Type *SrcOverrideTy)
const {
3090 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3091 Opcode != Instruction::Mul)
3101 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3104 auto getScalarSizeWithOverride = [&](
const Value *
V) {
3110 ->getScalarSizeInBits();
3113 unsigned MaxEltSize = 0;
3116 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3117 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3118 MaxEltSize = std::max(EltSize0, EltSize1);
3121 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3122 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3125 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3127 MaxEltSize = DstEltSize / 2;
3128 }
else if (Opcode == Instruction::Mul &&
3141 getScalarSizeWithOverride(
isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
3145 if (MaxEltSize * 2 > DstEltSize)
3163 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3164 (Src->isScalableTy() && !ST->hasSVE2()))
3174 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3178 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3182 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3183 Src->getScalarSizeInBits() !=
3207 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3211 if (
I &&
I->hasOneUser()) {
3214 if (
Type *ExtTy = isBinExtWideningInstruction(
3215 SingleUser->getOpcode(), Dst, Operands,
3216 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3229 if (isSingleExtWideningInstruction(
3230 SingleUser->getOpcode(), Dst, Operands,
3231 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3235 if (SingleUser->getOpcode() == Instruction::Add) {
3236 if (
I == SingleUser->getOperand(1) ||
3238 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3256 return Cost == 0 ? 0 : 1;
3260 EVT SrcTy = TLI->getValueType(
DL, Src);
3261 EVT DstTy = TLI->getValueType(
DL, Dst);
3263 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3269 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3293 return AdjustCost(Entry->Cost);
3301 const unsigned int SVE_EXT_COST = 1;
3302 const unsigned int SVE_FCVT_COST = 1;
3303 const unsigned int SVE_UNPACK_ONCE = 4;
3304 const unsigned int SVE_UNPACK_TWICE = 16;
3382 {ISD::FP_EXTEND, MVT::f64, MVT::f32, 1},
3383 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f32, 1},
3384 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 2},
3386 {ISD::FP_EXTEND, MVT::f32, MVT::f16, 1},
3387 {ISD::FP_EXTEND, MVT::f64, MVT::f16, 1},
3388 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},
3389 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 2},
3390 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2},
3391 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3},
3392 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6},
3394 {ISD::FP_EXTEND, MVT::f32, MVT::bf16, 1},
3395 {ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2},
3396 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1},
3397 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2},
3398 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2},
3399 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3},
3400 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6},
3433 SVE_EXT_COST + SVE_FCVT_COST},
3438 SVE_EXT_COST + SVE_FCVT_COST},
3445 SVE_EXT_COST + SVE_FCVT_COST},
3449 SVE_EXT_COST + SVE_FCVT_COST},
3455 SVE_EXT_COST + SVE_FCVT_COST},
3458 SVE_EXT_COST + SVE_FCVT_COST},
3463 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3465 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3475 SVE_EXT_COST + SVE_FCVT_COST},
3480 SVE_EXT_COST + SVE_FCVT_COST},
3493 SVE_EXT_COST + SVE_FCVT_COST},
3497 SVE_EXT_COST + SVE_FCVT_COST},
3509 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3511 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3513 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3515 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3519 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3521 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3537 SVE_EXT_COST + SVE_FCVT_COST},
3542 SVE_EXT_COST + SVE_FCVT_COST},
3553 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3555 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3557 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3559 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3561 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3563 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3567 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3569 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3571 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3573 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3717 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2f16, 1},
3718 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
3719 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},
3722 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2bf16, 1},
3723 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4bf16, 1},
3724 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 4},
3727 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
3728 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
3729 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},
3732 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2bf16, 2},
3733 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6},
3734 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8bf16, 14},
3737 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
3738 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
3739 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},
3742 {ISD::BITCAST, MVT::nxv2f16, MVT::nxv2i16, 0},
3743 {ISD::BITCAST, MVT::nxv4f16, MVT::nxv4i16, 0},
3744 {ISD::BITCAST, MVT::nxv2f32, MVT::nxv2i32, 0},
3747 {ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0},
3748 {ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0},
3749 {ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0},
3772 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3775 ST->useSVEForFixedLengthVectors(WiderTy)) {
3776 std::pair<InstructionCost, MVT> LT =
3778 unsigned NumElements =
3790 return AdjustCost(Entry->Cost);
3817 if (ST->hasFullFP16())
3820 return AdjustCost(Entry->Cost);
3838 ST->isSVEorStreamingSVEAvailable() &&
3839 TLI->getTypeAction(Src->getContext(), SrcTy) ==
3841 TLI->getTypeAction(Dst->getContext(), DstTy) ==
3850 Opcode, LegalTy, Src, CCH,
CostKind,
I);
3853 return Part1 + Part2;
3860 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
3873 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3886 CostKind, Index,
nullptr,
nullptr);
3890 auto DstVT = TLI->getValueType(
DL, Dst);
3891 auto SrcVT = TLI->getValueType(
DL, Src);
3896 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
3902 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
3912 case Instruction::SExt:
3917 case Instruction::ZExt:
3918 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
3931 return Opcode == Instruction::PHI ? 0 : 1;
3940 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
3948 if (!LT.second.isVector())
3953 if (LT.second.isFixedLengthVector()) {
3954 unsigned Width = LT.second.getVectorNumElements();
3955 Index = Index % Width;
4003 auto ExtractCanFuseWithFmul = [&]() {
4010 auto IsAllowedScalarTy = [&](
const Type *
T) {
4011 return T->isFloatTy() ||
T->isDoubleTy() ||
4012 (
T->isHalfTy() && ST->hasFullFP16());
4016 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
4019 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4020 !BO->getType()->isVectorTy();
4025 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
4029 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4038 DenseMap<User *, unsigned> UserToExtractIdx;
4039 for (
auto *U :
Scalar->users()) {
4040 if (!IsUserFMulScalarTy(U))
4044 UserToExtractIdx[
U];
4046 if (UserToExtractIdx.
empty())
4048 for (
auto &[S, U, L] : ScalarUserAndIdx) {
4049 for (
auto *U : S->users()) {
4050 if (UserToExtractIdx.
contains(U)) {
4052 auto *Op0 =
FMul->getOperand(0);
4053 auto *Op1 =
FMul->getOperand(1);
4054 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4055 UserToExtractIdx[
U] =
L;
4061 for (
auto &[U, L] : UserToExtractIdx) {
4073 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
4074 if (!IsUserFMulScalarTy(U))
4079 const auto *BO = cast<BinaryOperator>(U);
4080 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4081 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4083 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4086 return IsExtractLaneEquivalentToZero(
4087 cast<ConstantInt>(OtherEE->getIndexOperand())
4090 OtherEE->getType()->getScalarSizeInBits());
4098 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4099 ExtractCanFuseWithFmul())
4104 :
ST->getVectorInsertExtractBaseCost();
4111 const Value *Op1)
const {
4115 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4118 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index);
4124 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
4125 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4132 unsigned Index)
const {
4133 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I);
4139 unsigned Index)
const {
4151 : ST->getVectorInsertExtractBaseCost() + 1;
4160 if (Ty->getElementType()->isFloatingPointTy())
4163 unsigned VecInstCost =
4165 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4172 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4173 return std::nullopt;
4174 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4175 return std::nullopt;
4176 if (CanUseSVE && Ty->isScalableTy() && ST->hasSVEB16B16() &&
4177 ST->isNonStreamingSVEorSME2Available())
4178 return std::nullopt;
4185 Cost += InstCost(PromotedTy);
4208 Op2Info, Args, CxtI);
4212 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4219 Ty,
CostKind, Op1Info, Op2Info,
true,
4222 [&](
Type *PromotedTy) {
4226 return *PromotedCost;
4232 if (
Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4299 auto VT = TLI->getValueType(
DL, Ty);
4300 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4304 : (3 * AsrCost + AddCost);
4306 return MulCost + AsrCost + 2 * AddCost;
4308 }
else if (VT.isVector()) {
4318 if (Ty->isScalableTy() && ST->hasSVE())
4319 Cost += 2 * AsrCost;
4324 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4328 }
else if (LT.second == MVT::v2i64) {
4329 return VT.getVectorNumElements() *
4336 if (Ty->isScalableTy() && ST->hasSVE())
4337 return MulCost + 2 * AddCost + 2 * AsrCost;
4338 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4343 LT.second.isFixedLengthVector()) {
4353 return ExtractCost + InsertCost +
4361 auto VT = TLI->getValueType(
DL, Ty);
4377 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4378 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4379 LT.second == MVT::nxv16i8;
4380 bool Is128bit = LT.second.is128BitVector();
4392 (HasMULH ? 0 : ShrCost) +
4393 AddCost * 2 + ShrCost;
4394 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4401 if (!VT.isVector() && VT.getSizeInBits() > 64)
4405 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4407 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4411 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4421 if (
nullptr != Entry)
4426 if (LT.second.getScalarType() == MVT::i8)
4428 else if (LT.second.getScalarType() == MVT::i16)
4440 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4441 return (4 + DivCost) * VTy->getNumElements();
4447 -1,
nullptr,
nullptr);
4461 if (LT.second == MVT::v2i64 && ST->hasSVE())
4474 if (LT.second != MVT::v2i64)
4496 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4497 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4506 if (!Ty->getScalarType()->isFP128Ty())
4513 if (!Ty->getScalarType()->isFP128Ty())
4514 return 2 * LT.first;
4521 if (!Ty->isVectorTy())
4537 int MaxMergeDistance = 64;
4541 return NumVectorInstToHideOverhead;
4551 unsigned Opcode1,
unsigned Opcode2)
const {
4554 if (!
Sched.hasInstrSchedModel())
4558 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4560 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4566 "Cannot handle variant scheduling classes without an MI");
4582 const int AmortizationCost = 20;
4590 VecPred = CurrentPred;
4598 static const auto ValidMinMaxTys = {
4599 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4600 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4601 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4604 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
4605 (ST->hasFullFP16() &&
4606 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
4611 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4612 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4613 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4614 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4615 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4616 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4617 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4618 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4619 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4620 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4621 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4623 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4624 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4633 if (Opcode == Instruction::FCmp) {
4635 ValTy,
CostKind, Op1Info, Op2Info,
false,
4637 false, [&](
Type *PromotedTy) {
4649 return *PromotedCost;
4653 if (LT.second.getScalarType() != MVT::f64 &&
4654 LT.second.getScalarType() != MVT::f32 &&
4655 LT.second.getScalarType() != MVT::f16)
4660 unsigned Factor = 1;
4675 AArch64::FCMEQv4f32))
4687 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4706 Op1Info, Op2Info,
I);
4712 if (ST->requiresStrictAlign()) {
4717 Options.AllowOverlappingLoads =
true;
4718 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4723 Options.LoadSizes = {8, 4, 2, 1};
4724 Options.AllowedTailExpansions = {3, 5, 6};
4729 return ST->hasSVE();
4740 if (!LT.first.isValid())
4745 if (VT->getElementType()->isIntegerTy(1))
4762 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4763 "Should be called on only load or stores.");
4765 case Instruction::Load:
4768 return ST->getGatherOverhead();
4770 case Instruction::Store:
4773 return ST->getScatterOverhead();
4781 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
4788 if (!LT.first.isValid())
4792 if (!LT.second.isVector() ||
4794 VT->getElementType()->isIntegerTy(1))
4804 ElementCount LegalVF = LT.second.getVectorElementCount();
4807 {TTI::OK_AnyValue, TTI::OP_None},
I);
4823 EVT VT = TLI->getValueType(
DL, Ty,
true);
4825 if (VT == MVT::Other)
4830 if (!LT.first.isValid())
4840 (VTy->getElementType()->isIntegerTy(1) &&
4841 !VTy->getElementCount().isKnownMultipleOf(
4852 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
4853 LT.second.is128BitVector() && Alignment <
Align(16)) {
4859 const int AmortizationCost = 6;
4861 return LT.first * 2 * AmortizationCost;
4865 if (Ty->isPtrOrPtrVectorTy())
4870 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
4872 if (VT == MVT::v4i8)
4879 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
4894 while (!TypeWorklist.
empty()) {
4916 bool UseMaskForCond,
bool UseMaskForGaps)
const {
4917 assert(Factor >= 2 &&
"Invalid interleave factor");
4932 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
4935 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
4936 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
4939 VecVTy->getElementCount().divideCoefficientBy(Factor));
4945 if (MinElts % Factor == 0 &&
4946 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
4947 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
4952 UseMaskForCond, UseMaskForGaps);
4959 for (
auto *
I : Tys) {
4960 if (!
I->isVectorTy())
4971 return ST->getMaxInterleaveFactor();
4981 enum { MaxStridedLoads = 7 };
4983 int StridedLoads = 0;
4986 for (
const auto BB : L->blocks()) {
4987 for (
auto &
I : *BB) {
4993 if (L->isLoopInvariant(PtrValue))
4998 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
5007 if (StridedLoads > MaxStridedLoads / 2)
5008 return StridedLoads;
5011 return StridedLoads;
5014 int StridedLoads = countStridedLoads(L, SE);
5016 <<
" strided loads\n");
5032 unsigned *FinalSize) {
5036 for (
auto *BB : L->getBlocks()) {
5037 for (
auto &
I : *BB) {
5043 if (!Cost.isValid())
5047 if (LoopCost > Budget)
5069 if (MaxTC > 0 && MaxTC <= 32)
5080 if (Blocks.
size() != 2)
5102 if (!L->isInnermost() || L->getNumBlocks() > 8)
5106 if (!L->getExitBlock())
5112 bool HasParellelizableReductions =
5113 L->getNumBlocks() == 1 &&
5114 any_of(L->getHeader()->phis(),
5116 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5119 if (HasParellelizableReductions &&
5141 if (HasParellelizableReductions) {
5152 if (Header == Latch) {
5155 unsigned Width = 10;
5161 unsigned MaxInstsPerLine = 16;
5163 unsigned BestUC = 1;
5164 unsigned SizeWithBestUC = BestUC *
Size;
5166 unsigned SizeWithUC = UC *
Size;
5167 if (SizeWithUC > 48)
5169 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5170 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5172 SizeWithBestUC = BestUC *
Size;
5182 for (
auto *BB : L->blocks()) {
5183 for (
auto &
I : *BB) {
5193 for (
auto *U :
I.users())
5195 LoadedValuesPlus.
insert(U);
5202 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5215 if (!Term || !Term->isConditional() || Preds.
size() == 1 ||
5229 auto *I = dyn_cast<Instruction>(V);
5230 return I && DependsOnLoopLoad(I, Depth + 1);
5237 DependsOnLoopLoad(
I, 0)) {
5253 if (L->getLoopDepth() > 1)
5264 for (
auto *BB : L->getBlocks()) {
5265 for (
auto &
I : *BB) {
5269 if (IsVectorized &&
I.getType()->isVectorTy())
5286 switch (ST->getProcFamily()) {
5287 case AArch64Subtarget::AppleA14:
5288 case AArch64Subtarget::AppleA15:
5289 case AArch64Subtarget::AppleA16:
5290 case AArch64Subtarget::AppleM4:
5293 case AArch64Subtarget::Falkor:
5319 !ST->getSchedModel().isOutOfOrder()) {
5342 bool CanCreate)
const {
5346 case Intrinsic::aarch64_neon_st2:
5347 case Intrinsic::aarch64_neon_st3:
5348 case Intrinsic::aarch64_neon_st4: {
5351 if (!CanCreate || !ST)
5353 unsigned NumElts = Inst->
arg_size() - 1;
5354 if (ST->getNumElements() != NumElts)
5356 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5362 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5364 Res = Builder.CreateInsertValue(Res, L, i);
5368 case Intrinsic::aarch64_neon_ld2:
5369 case Intrinsic::aarch64_neon_ld3:
5370 case Intrinsic::aarch64_neon_ld4:
5371 if (Inst->
getType() == ExpectedType)
5382 case Intrinsic::aarch64_neon_ld2:
5383 case Intrinsic::aarch64_neon_ld3:
5384 case Intrinsic::aarch64_neon_ld4:
5385 Info.ReadMem =
true;
5386 Info.WriteMem =
false;
5389 case Intrinsic::aarch64_neon_st2:
5390 case Intrinsic::aarch64_neon_st3:
5391 case Intrinsic::aarch64_neon_st4:
5392 Info.ReadMem =
false;
5393 Info.WriteMem =
true;
5401 case Intrinsic::aarch64_neon_ld2:
5402 case Intrinsic::aarch64_neon_st2:
5403 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5405 case Intrinsic::aarch64_neon_ld3:
5406 case Intrinsic::aarch64_neon_st3:
5407 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5409 case Intrinsic::aarch64_neon_ld4:
5410 case Intrinsic::aarch64_neon_st4:
5411 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5423 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5424 bool Considerable =
false;
5425 AllowPromotionWithoutCommonHeader =
false;
5428 Type *ConsideredSExtType =
5430 if (
I.getType() != ConsideredSExtType)
5434 for (
const User *U :
I.users()) {
5436 Considerable =
true;
5440 if (GEPInst->getNumOperands() > 2) {
5441 AllowPromotionWithoutCommonHeader =
true;
5446 return Considerable;
5494 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5504 return LegalizationCost + 2;
5514 LegalizationCost *= LT.first - 1;
5517 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5526 return LegalizationCost + 2;
5534 std::optional<FastMathFlags> FMF,
5550 return BaseCost + FixedVTy->getNumElements();
5553 if (Opcode != Instruction::FAdd)
5567 MVT MTy = LT.second;
5568 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5616 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5617 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5619 if (ValTy->getElementCount().getFixedValue() >= 2 && NElts >= 2 &&
5629 return (LT.first - 1) +
Log2_32(NElts);
5634 return (LT.first - 1) + Entry->Cost;
5646 if (LT.first != 1) {
5652 ExtraCost *= LT.first - 1;
5655 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5656 return Cost + ExtraCost;
5664 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
5666 EVT VecVT = TLI->getValueType(
DL, VecTy);
5667 EVT ResVT = TLI->getValueType(
DL, ResTy);
5677 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5679 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5681 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5683 return (LT.first - 1) * 2 + 2;
5694 EVT VecVT = TLI->getValueType(
DL, VecTy);
5695 EVT ResVT = TLI->getValueType(
DL, ResTy);
5698 RedOpcode == Instruction::Add) {
5704 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5706 return LT.first + 2;
5741 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5742 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
5756 if (LT.second.getScalarType() == MVT::i1) {
5765 assert(Entry &&
"Illegal Type for Splice");
5766 LegalizationCost += Entry->Cost;
5767 return LegalizationCost * LT.first;
5771 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
5780 if (VF.
isFixed() && !ST->isSVEorStreamingSVEAvailable() &&
5781 (!ST->isNeonAvailable() || !ST->hasDotProd()))
5784 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub) ||
5789 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
5790 "Unexpected values for OpBExtend or InputTypeB");
5794 if (BinOp && (*BinOp != Instruction::Mul || InputTypeA != InputTypeB))
5797 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
5798 if (IsUSDot && !ST->hasMatMulInt8())
5810 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
5819 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
5825 std::pair<InstructionCost, MVT> AccumLT =
5827 std::pair<InstructionCost, MVT> InputLT =
5840 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
5842 if (AccumLT.second.getScalarType() == MVT::i64 &&
5843 InputLT.second.getScalarType() == MVT::i16)
5846 if (AccumLT.second.getScalarType() == MVT::i64 &&
5847 InputLT.second.getScalarType() == MVT::i8)
5857 if (ST->isSVEorStreamingSVEAvailable() ||
5858 (AccumLT.second.isFixedLengthVector() && ST->isNeonAvailable() &&
5859 ST->hasDotProd())) {
5860 if (AccumLT.second.getScalarType() == MVT::i32 &&
5861 InputLT.second.getScalarType() == MVT::i8)
5877 "Expected the Mask to match the return size if given");
5879 "Expected the same scalar types");
5885 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
5886 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
5887 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
5895 return std::max<InstructionCost>(1, LT.first / 4);
5903 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
5905 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
5908 unsigned TpNumElts = Mask.size();
5909 unsigned LTNumElts = LT.second.getVectorNumElements();
5910 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
5912 LT.second.getVectorElementCount());
5914 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
5916 for (
unsigned N = 0;
N < NumVecs;
N++) {
5920 unsigned Source1 = -1U, Source2 = -1U;
5921 unsigned NumSources = 0;
5922 for (
unsigned E = 0; E < LTNumElts; E++) {
5923 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
5932 unsigned Source = MaskElt / LTNumElts;
5933 if (NumSources == 0) {
5936 }
else if (NumSources == 1 && Source != Source1) {
5939 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
5945 if (Source == Source1)
5947 else if (Source == Source2)
5948 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
5957 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
5968 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
5971 Result.first->second = NCost;
5985 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
5986 if (LT.second.getFixedSizeInBits() >= 128 &&
5988 LT.second.getVectorNumElements() / 2) {
5991 if (Index == (
int)LT.second.getVectorNumElements() / 2)
6005 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6008 return M.value() < 0 || M.value() == (int)M.index();
6014 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6015 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6024 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6025 ST->isSVEorStreamingSVEAvailable() &&
6030 if (ST->isSVEorStreamingSVEAvailable() &&
6044 if (IsLoad && LT.second.isVector() &&
6046 LT.second.getVectorElementCount()))
6052 if (Mask.size() == 4 &&
6054 (SrcTy->getScalarSizeInBits() == 16 ||
6055 SrcTy->getScalarSizeInBits() == 32) &&
6056 all_of(Mask, [](
int E) {
return E < 8; }))
6062 if (LT.second.isFixedLengthVector() &&
6063 LT.second.getVectorNumElements() == Mask.size() &&
6065 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6066 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6067 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6068 LT.second.getVectorNumElements(), 16) ||
6069 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6070 LT.second.getVectorNumElements(), 32) ||
6071 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6072 LT.second.getVectorNumElements(), 64) ||
6075 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6204 return LT.first * Entry->Cost;
6213 LT.second.getSizeInBits() <= 128 && SubTp) {
6215 if (SubLT.second.isVector()) {
6216 int NumElts = LT.second.getVectorNumElements();
6217 int NumSubElts = SubLT.second.getVectorNumElements();
6218 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6224 if (IsExtractSubvector)
6260 return ST->useFixedOverScalableIfEqualCost();
6264 return ST->getEpilogueVectorizationMinVF();
6299 unsigned NumInsns = 0;
6301 NumInsns += BB->sizeWithoutDebug();
6311 int64_t Scale,
unsigned AddrSpace)
const {
6339 if (
I->getOpcode() == Instruction::Or &&
6344 if (
I->getOpcode() == Instruction::Add ||
6345 I->getOpcode() == Instruction::Sub)
6370 return all_equal(Shuf->getShuffleMask());
6377 bool AllowSplat =
false) {
6382 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6383 auto *FullTy = FullV->
getType();
6384 auto *HalfTy = HalfV->getType();
6386 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6389 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6392 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6396 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6410 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6411 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6425 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6426 (M2Start != 0 && M2Start != (NumElements / 2)))
6428 if (S1Op1 && S2Op1 && M1Start != M2Start)
6438 return Ext->getType()->getScalarSizeInBits() ==
6439 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6453 Value *VectorOperand =
nullptr;
6470 if (!
GEP ||
GEP->getNumOperands() != 2)
6474 Value *Offsets =
GEP->getOperand(1);
6477 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6483 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6484 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6485 Ops.push_back(&
GEP->getOperandUse(1));
6519 switch (
II->getIntrinsicID()) {
6520 case Intrinsic::aarch64_neon_smull:
6521 case Intrinsic::aarch64_neon_umull:
6524 Ops.push_back(&
II->getOperandUse(0));
6525 Ops.push_back(&
II->getOperandUse(1));
6530 case Intrinsic::fma:
6531 case Intrinsic::fmuladd:
6537 case Intrinsic::aarch64_neon_sqdmull:
6538 case Intrinsic::aarch64_neon_sqdmulh:
6539 case Intrinsic::aarch64_neon_sqrdmulh:
6542 Ops.push_back(&
II->getOperandUse(0));
6544 Ops.push_back(&
II->getOperandUse(1));
6545 return !
Ops.empty();
6546 case Intrinsic::aarch64_neon_fmlal:
6547 case Intrinsic::aarch64_neon_fmlal2:
6548 case Intrinsic::aarch64_neon_fmlsl:
6549 case Intrinsic::aarch64_neon_fmlsl2:
6552 Ops.push_back(&
II->getOperandUse(1));
6554 Ops.push_back(&
II->getOperandUse(2));
6555 return !
Ops.empty();
6556 case Intrinsic::aarch64_sve_ptest_first:
6557 case Intrinsic::aarch64_sve_ptest_last:
6559 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6560 Ops.push_back(&
II->getOperandUse(0));
6561 return !
Ops.empty();
6562 case Intrinsic::aarch64_sme_write_horiz:
6563 case Intrinsic::aarch64_sme_write_vert:
6564 case Intrinsic::aarch64_sme_writeq_horiz:
6565 case Intrinsic::aarch64_sme_writeq_vert: {
6567 if (!Idx || Idx->getOpcode() != Instruction::Add)
6569 Ops.push_back(&
II->getOperandUse(1));
6572 case Intrinsic::aarch64_sme_read_horiz:
6573 case Intrinsic::aarch64_sme_read_vert:
6574 case Intrinsic::aarch64_sme_readq_horiz:
6575 case Intrinsic::aarch64_sme_readq_vert:
6576 case Intrinsic::aarch64_sme_ld1b_vert:
6577 case Intrinsic::aarch64_sme_ld1h_vert:
6578 case Intrinsic::aarch64_sme_ld1w_vert:
6579 case Intrinsic::aarch64_sme_ld1d_vert:
6580 case Intrinsic::aarch64_sme_ld1q_vert:
6581 case Intrinsic::aarch64_sme_st1b_vert:
6582 case Intrinsic::aarch64_sme_st1h_vert:
6583 case Intrinsic::aarch64_sme_st1w_vert:
6584 case Intrinsic::aarch64_sme_st1d_vert:
6585 case Intrinsic::aarch64_sme_st1q_vert:
6586 case Intrinsic::aarch64_sme_ld1b_horiz:
6587 case Intrinsic::aarch64_sme_ld1h_horiz:
6588 case Intrinsic::aarch64_sme_ld1w_horiz:
6589 case Intrinsic::aarch64_sme_ld1d_horiz:
6590 case Intrinsic::aarch64_sme_ld1q_horiz:
6591 case Intrinsic::aarch64_sme_st1b_horiz:
6592 case Intrinsic::aarch64_sme_st1h_horiz:
6593 case Intrinsic::aarch64_sme_st1w_horiz:
6594 case Intrinsic::aarch64_sme_st1d_horiz:
6595 case Intrinsic::aarch64_sme_st1q_horiz: {
6597 if (!Idx || Idx->getOpcode() != Instruction::Add)
6599 Ops.push_back(&
II->getOperandUse(3));
6602 case Intrinsic::aarch64_neon_pmull:
6605 Ops.push_back(&
II->getOperandUse(0));
6606 Ops.push_back(&
II->getOperandUse(1));
6608 case Intrinsic::aarch64_neon_pmull64:
6610 II->getArgOperand(1)))
6612 Ops.push_back(&
II->getArgOperandUse(0));
6613 Ops.push_back(&
II->getArgOperandUse(1));
6615 case Intrinsic::masked_gather:
6618 Ops.push_back(&
II->getArgOperandUse(0));
6620 case Intrinsic::masked_scatter:
6623 Ops.push_back(&
II->getArgOperandUse(1));
6630 auto ShouldSinkCondition = [](
Value *
Cond,
6635 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6639 Ops.push_back(&
II->getOperandUse(0));
6643 switch (
I->getOpcode()) {
6644 case Instruction::GetElementPtr:
6645 case Instruction::Add:
6646 case Instruction::Sub:
6648 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
6650 Ops.push_back(&
I->getOperandUse(
Op));
6655 case Instruction::Select: {
6656 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
6659 Ops.push_back(&
I->getOperandUse(0));
6662 case Instruction::Br: {
6669 Ops.push_back(&
I->getOperandUse(0));
6676 if (!
I->getType()->isVectorTy())
6679 switch (
I->getOpcode()) {
6680 case Instruction::Sub:
6681 case Instruction::Add: {
6690 Ops.push_back(&Ext1->getOperandUse(0));
6691 Ops.push_back(&Ext2->getOperandUse(0));
6694 Ops.push_back(&
I->getOperandUse(0));
6695 Ops.push_back(&
I->getOperandUse(1));
6699 case Instruction::Or: {
6702 if (ST->hasNEON()) {
6716 if (
I->getParent() != MainAnd->
getParent() ||
6721 if (
I->getParent() != IA->getParent() ||
6722 I->getParent() != IB->getParent())
6727 Ops.push_back(&
I->getOperandUse(0));
6728 Ops.push_back(&
I->getOperandUse(1));
6737 case Instruction::Mul: {
6738 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
6741 if (Ty->isScalableTy())
6745 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
6748 int NumZExts = 0, NumSExts = 0;
6749 for (
auto &
Op :
I->operands()) {
6756 auto *ExtOp = Ext->getOperand(0);
6757 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
6758 Ops.push_back(&Ext->getOperandUse(0));
6766 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
6767 I->getType()->getScalarSizeInBits())
6804 if (!ElementConstant || !ElementConstant->
isZero())
6807 unsigned Opcode = OperandInstr->
getOpcode();
6808 if (Opcode == Instruction::SExt)
6810 else if (Opcode == Instruction::ZExt)
6815 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
6825 Ops.push_back(&Insert->getOperandUse(1));
6831 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
6835 if (!ShouldSinkSplatForIndexedVariant(
I))
6840 Ops.push_back(&
I->getOperandUse(0));
6842 Ops.push_back(&
I->getOperandUse(1));
6844 return !
Ops.empty();
6846 case Instruction::FMul: {
6848 if (
I->getType()->isScalableTy())
6857 Ops.push_back(&
I->getOperandUse(0));
6859 Ops.push_back(&
I->getOperandUse(1));
6860 return !
Ops.empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static unsigned getNumElements(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
Information for memory intrinsic cost model.
Type * getDataType() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...