23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
81class TailFoldingOption {
96 bool NeedsDefault =
true;
100 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
115 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
116 "Initial bits should only include one of "
117 "(disabled|all|simple|default)");
118 Bits = NeedsDefault ? DefaultBits : InitialBits;
120 Bits &= ~DisableBits;
126 errs() <<
"invalid argument '" << Opt
127 <<
"' to -sve-tail-folding=; the option should be of the form\n"
128 " (disabled|all|default|simple)[+(reductions|recurrences"
129 "|reverse|noreductions|norecurrences|noreverse)]\n";
135 void operator=(
const std::string &Val) {
144 setNeedsDefault(
false);
147 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
149 unsigned StartIdx = 1;
150 if (TailFoldTypes[0] ==
"disabled")
151 setInitialBits(TailFoldingOpts::Disabled);
152 else if (TailFoldTypes[0] ==
"all")
153 setInitialBits(TailFoldingOpts::All);
154 else if (TailFoldTypes[0] ==
"default")
155 setNeedsDefault(
true);
156 else if (TailFoldTypes[0] ==
"simple")
157 setInitialBits(TailFoldingOpts::Simple);
160 setInitialBits(TailFoldingOpts::Disabled);
163 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
164 if (TailFoldTypes[
I] ==
"reductions")
165 setEnableBit(TailFoldingOpts::Reductions);
166 else if (TailFoldTypes[
I] ==
"recurrences")
167 setEnableBit(TailFoldingOpts::Recurrences);
168 else if (TailFoldTypes[
I] ==
"reverse")
169 setEnableBit(TailFoldingOpts::Reverse);
170 else if (TailFoldTypes[
I] ==
"noreductions")
171 setDisableBit(TailFoldingOpts::Reductions);
172 else if (TailFoldTypes[
I] ==
"norecurrences")
173 setDisableBit(TailFoldingOpts::Recurrences);
174 else if (TailFoldTypes[
I] ==
"noreverse")
175 setDisableBit(TailFoldingOpts::Reverse);
192 "Control the use of vectorisation using tail-folding for SVE where the"
193 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
194 "\ndisabled (Initial) No loop types will vectorize using "
196 "\ndefault (Initial) Uses the default tail-folding settings for "
198 "\nall (Initial) All legal loop types will vectorize using "
200 "\nsimple (Initial) Use tail-folding for simple loops (not "
201 "reductions or recurrences)"
202 "\nreductions Use tail-folding for loops containing reductions"
203 "\nnoreductions Inverse of above"
204 "\nrecurrences Use tail-folding for loops containing fixed order "
206 "\nnorecurrences Inverse of above"
207 "\nreverse Use tail-folding for loops requiring reversed "
209 "\nnoreverse Inverse of above"),
254 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
256 FeatureStr.
split(Features,
",");
261 return F.hasFnAttribute(
"fmv-features");
265 AArch64::FeatureExecuteOnly,
298 TM.getSubtargetImpl(*Caller)->getFeatureBits();
300 TM.getSubtargetImpl(*Callee)->getFeatureBits();
305 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
306 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
308 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
326 auto FVTy = dyn_cast<FixedVectorType>(Ty);
328 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
337 unsigned DefaultCallPenalty)
const {
362 if (
F ==
Call.getCaller())
368 return DefaultCallPenalty;
375 ST->isNeonAvailable());
399 assert(Ty->isIntegerTy());
401 unsigned BitSize = Ty->getPrimitiveSizeInBits();
408 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
413 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
419 return std::max<InstructionCost>(1,
Cost);
426 assert(Ty->isIntegerTy());
428 unsigned BitSize = Ty->getPrimitiveSizeInBits();
434 unsigned ImmIdx = ~0U;
438 case Instruction::GetElementPtr:
443 case Instruction::Store:
446 case Instruction::Add:
447 case Instruction::Sub:
448 case Instruction::Mul:
449 case Instruction::UDiv:
450 case Instruction::SDiv:
451 case Instruction::URem:
452 case Instruction::SRem:
453 case Instruction::And:
454 case Instruction::Or:
455 case Instruction::Xor:
456 case Instruction::ICmp:
460 case Instruction::Shl:
461 case Instruction::LShr:
462 case Instruction::AShr:
466 case Instruction::Trunc:
467 case Instruction::ZExt:
468 case Instruction::SExt:
469 case Instruction::IntToPtr:
470 case Instruction::PtrToInt:
471 case Instruction::BitCast:
472 case Instruction::PHI:
473 case Instruction::Call:
474 case Instruction::Select:
475 case Instruction::Ret:
476 case Instruction::Load:
481 int NumConstants = (BitSize + 63) / 64;
494 assert(Ty->isIntegerTy());
496 unsigned BitSize = Ty->getPrimitiveSizeInBits();
505 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
511 case Intrinsic::sadd_with_overflow:
512 case Intrinsic::uadd_with_overflow:
513 case Intrinsic::ssub_with_overflow:
514 case Intrinsic::usub_with_overflow:
515 case Intrinsic::smul_with_overflow:
516 case Intrinsic::umul_with_overflow:
518 int NumConstants = (BitSize + 63) / 64;
525 case Intrinsic::experimental_stackmap:
526 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
529 case Intrinsic::experimental_patchpoint_void:
530 case Intrinsic::experimental_patchpoint:
531 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
534 case Intrinsic::experimental_gc_statepoint:
535 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
545 if (TyWidth == 32 || TyWidth == 64)
569 unsigned TotalHistCnts = 1;
579 unsigned EC = VTy->getElementCount().getKnownMinValue();
584 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
586 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
590 TotalHistCnts = EC / NaturalVectorWidth;
610 switch (ICA.
getID()) {
611 case Intrinsic::experimental_vector_histogram_add: {
618 case Intrinsic::umin:
619 case Intrinsic::umax:
620 case Intrinsic::smin:
621 case Intrinsic::smax: {
622 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
623 MVT::v8i16, MVT::v2i32, MVT::v4i32,
624 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
628 if (LT.second == MVT::v2i64)
630 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
634 case Intrinsic::sadd_sat:
635 case Intrinsic::ssub_sat:
636 case Intrinsic::uadd_sat:
637 case Intrinsic::usub_sat: {
638 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
639 MVT::v8i16, MVT::v2i32, MVT::v4i32,
645 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
646 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
647 return LT.first * Instrs;
652 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
653 return LT.first * Instrs;
657 case Intrinsic::abs: {
658 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
659 MVT::v8i16, MVT::v2i32, MVT::v4i32,
662 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
666 case Intrinsic::bswap: {
667 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
668 MVT::v4i32, MVT::v2i64};
670 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }) &&
671 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
676 case Intrinsic::fmuladd: {
681 (EltTy->
isHalfTy() && ST->hasFullFP16()))
685 case Intrinsic::stepvector: {
694 Cost += AddCost * (LT.first - 1);
698 case Intrinsic::vector_extract:
699 case Intrinsic::vector_insert: {
712 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
713 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
721 getTLI()->getTypeConversion(
C, SubVecVT);
723 getTLI()->getTypeConversion(
C, VecVT);
731 case Intrinsic::bitreverse: {
733 {Intrinsic::bitreverse, MVT::i32, 1},
734 {Intrinsic::bitreverse, MVT::i64, 1},
735 {Intrinsic::bitreverse, MVT::v8i8, 1},
736 {Intrinsic::bitreverse, MVT::v16i8, 1},
737 {Intrinsic::bitreverse, MVT::v4i16, 2},
738 {Intrinsic::bitreverse, MVT::v8i16, 2},
739 {Intrinsic::bitreverse, MVT::v2i32, 2},
740 {Intrinsic::bitreverse, MVT::v4i32, 2},
741 {Intrinsic::bitreverse, MVT::v1i64, 2},
742 {Intrinsic::bitreverse, MVT::v2i64, 2},
750 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
751 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
752 return LegalisationCost.first * Entry->Cost + 1;
754 return LegalisationCost.first * Entry->Cost;
758 case Intrinsic::ctpop: {
759 if (!ST->hasNEON()) {
780 RetTy->getScalarSizeInBits()
783 return LT.first * Entry->Cost + ExtraCost;
787 case Intrinsic::sadd_with_overflow:
788 case Intrinsic::uadd_with_overflow:
789 case Intrinsic::ssub_with_overflow:
790 case Intrinsic::usub_with_overflow:
791 case Intrinsic::smul_with_overflow:
792 case Intrinsic::umul_with_overflow: {
794 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
795 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
796 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
797 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
798 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
799 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
800 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
801 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
802 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
803 {Intrinsic::usub_with_overflow, MVT::i8, 3},
804 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
805 {Intrinsic::usub_with_overflow, MVT::i16, 3},
806 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
807 {Intrinsic::usub_with_overflow, MVT::i32, 1},
808 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
809 {Intrinsic::usub_with_overflow, MVT::i64, 1},
810 {Intrinsic::smul_with_overflow, MVT::i8, 5},
811 {Intrinsic::umul_with_overflow, MVT::i8, 4},
812 {Intrinsic::smul_with_overflow, MVT::i16, 5},
813 {Intrinsic::umul_with_overflow, MVT::i16, 4},
814 {Intrinsic::smul_with_overflow, MVT::i32, 2},
815 {Intrinsic::umul_with_overflow, MVT::i32, 2},
816 {Intrinsic::smul_with_overflow, MVT::i64, 3},
817 {Intrinsic::umul_with_overflow, MVT::i64, 3},
819 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
826 case Intrinsic::fptosi_sat:
827 case Intrinsic::fptoui_sat: {
830 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
832 EVT MTy = TLI->getValueType(
DL, RetTy);
835 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
836 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
837 LT.second == MVT::v2f64)) {
839 (LT.second == MVT::f64 && MTy == MVT::i32) ||
840 (LT.second == MVT::f32 && MTy == MVT::i64)))
849 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
856 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
857 (LT.second == MVT::f16 && MTy == MVT::i64) ||
858 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
872 if ((LT.second.getScalarType() == MVT::f32 ||
873 LT.second.getScalarType() == MVT::f64 ||
874 LT.second.getScalarType() == MVT::f16) &&
878 if (LT.second.isVector())
882 LegalTy, {LegalTy, LegalTy});
885 LegalTy, {LegalTy, LegalTy});
887 return LT.first *
Cost +
888 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
894 RetTy = RetTy->getScalarType();
895 if (LT.second.isVector()) {
913 return LT.first *
Cost;
915 case Intrinsic::fshl:
916 case Intrinsic::fshr: {
928 {Intrinsic::fshl, MVT::v4i32, 2},
929 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
930 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
931 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
937 return LegalisationCost.first * Entry->Cost;
941 if (!RetTy->isIntegerTy())
946 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
947 RetTy->getScalarSizeInBits() < 64) ||
948 (RetTy->getScalarSizeInBits() % 64 != 0);
949 unsigned ExtraCost = HigherCost ? 1 : 0;
950 if (RetTy->getScalarSizeInBits() == 32 ||
951 RetTy->getScalarSizeInBits() == 64)
958 return TyL.first + ExtraCost;
960 case Intrinsic::get_active_lane_mask: {
962 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
964 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
967 if (RetTy->isScalableTy()) {
968 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
978 if (ST->hasSVE2p1() || ST->hasSME2()) {
993 return Cost + (SplitCost * (
Cost - 1));
1008 case Intrinsic::experimental_vector_match: {
1011 unsigned SearchSize = NeedleTy->getNumElements();
1012 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1025 case Intrinsic::experimental_cttz_elts: {
1027 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1046 auto RequiredType =
II.getType();
1049 assert(PN &&
"Expected Phi Node!");
1052 if (!PN->hasOneUse())
1053 return std::nullopt;
1055 for (
Value *IncValPhi : PN->incoming_values()) {
1058 Reinterpret->getIntrinsicID() !=
1059 Intrinsic::aarch64_sve_convert_to_svbool ||
1060 RequiredType != Reinterpret->getArgOperand(0)->getType())
1061 return std::nullopt;
1069 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1071 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1144 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1149 return GoverningPredicateIdx;
1154 GoverningPredicateIdx = Index;
1172 return UndefIntrinsic;
1177 UndefIntrinsic = IID;
1199 return ResultLanes == InactiveLanesTakenFromOperand;
1204 return OperandIdxForInactiveLanes;
1208 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1209 ResultLanes = InactiveLanesTakenFromOperand;
1210 OperandIdxForInactiveLanes = Index;
1215 return ResultLanes == InactiveLanesAreNotDefined;
1219 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1220 ResultLanes = InactiveLanesAreNotDefined;
1225 return ResultLanes == InactiveLanesAreUnused;
1229 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1230 ResultLanes = InactiveLanesAreUnused;
1240 ResultIsZeroInitialized =
true;
1251 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1256 return OperandIdxWithNoActiveLanes;
1261 OperandIdxWithNoActiveLanes = Index;
1266 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1269 unsigned IROpcode = 0;
1271 enum PredicationStyle {
1273 InactiveLanesTakenFromOperand,
1274 InactiveLanesAreNotDefined,
1275 InactiveLanesAreUnused
1278 bool ResultIsZeroInitialized =
false;
1279 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1280 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1288 return !isa<ScalableVectorType>(V->getType());
1296 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1297 case Intrinsic::aarch64_sve_fcvt_f16f32:
1298 case Intrinsic::aarch64_sve_fcvt_f16f64:
1299 case Intrinsic::aarch64_sve_fcvt_f32f16:
1300 case Intrinsic::aarch64_sve_fcvt_f32f64:
1301 case Intrinsic::aarch64_sve_fcvt_f64f16:
1302 case Intrinsic::aarch64_sve_fcvt_f64f32:
1303 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1304 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1305 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1306 case Intrinsic::aarch64_sve_fcvtzs:
1307 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1308 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1309 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1310 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1311 case Intrinsic::aarch64_sve_fcvtzu:
1312 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1313 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1314 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1315 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1316 case Intrinsic::aarch64_sve_scvtf:
1317 case Intrinsic::aarch64_sve_scvtf_f16i32:
1318 case Intrinsic::aarch64_sve_scvtf_f16i64:
1319 case Intrinsic::aarch64_sve_scvtf_f32i64:
1320 case Intrinsic::aarch64_sve_scvtf_f64i32:
1321 case Intrinsic::aarch64_sve_ucvtf:
1322 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1323 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1324 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1325 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1328 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1329 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1330 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1331 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1334 case Intrinsic::aarch64_sve_fabd:
1336 case Intrinsic::aarch64_sve_fadd:
1339 case Intrinsic::aarch64_sve_fdiv:
1342 case Intrinsic::aarch64_sve_fmax:
1344 case Intrinsic::aarch64_sve_fmaxnm:
1346 case Intrinsic::aarch64_sve_fmin:
1348 case Intrinsic::aarch64_sve_fminnm:
1350 case Intrinsic::aarch64_sve_fmla:
1352 case Intrinsic::aarch64_sve_fmls:
1354 case Intrinsic::aarch64_sve_fmul:
1357 case Intrinsic::aarch64_sve_fmulx:
1359 case Intrinsic::aarch64_sve_fnmla:
1361 case Intrinsic::aarch64_sve_fnmls:
1363 case Intrinsic::aarch64_sve_fsub:
1366 case Intrinsic::aarch64_sve_add:
1369 case Intrinsic::aarch64_sve_mla:
1371 case Intrinsic::aarch64_sve_mls:
1373 case Intrinsic::aarch64_sve_mul:
1376 case Intrinsic::aarch64_sve_sabd:
1378 case Intrinsic::aarch64_sve_sdiv:
1381 case Intrinsic::aarch64_sve_smax:
1383 case Intrinsic::aarch64_sve_smin:
1385 case Intrinsic::aarch64_sve_smulh:
1387 case Intrinsic::aarch64_sve_sub:
1390 case Intrinsic::aarch64_sve_uabd:
1392 case Intrinsic::aarch64_sve_udiv:
1395 case Intrinsic::aarch64_sve_umax:
1397 case Intrinsic::aarch64_sve_umin:
1399 case Intrinsic::aarch64_sve_umulh:
1401 case Intrinsic::aarch64_sve_asr:
1404 case Intrinsic::aarch64_sve_lsl:
1407 case Intrinsic::aarch64_sve_lsr:
1410 case Intrinsic::aarch64_sve_and:
1413 case Intrinsic::aarch64_sve_bic:
1415 case Intrinsic::aarch64_sve_eor:
1418 case Intrinsic::aarch64_sve_orr:
1421 case Intrinsic::aarch64_sve_sqsub:
1423 case Intrinsic::aarch64_sve_uqsub:
1426 case Intrinsic::aarch64_sve_add_u:
1429 case Intrinsic::aarch64_sve_and_u:
1432 case Intrinsic::aarch64_sve_asr_u:
1435 case Intrinsic::aarch64_sve_eor_u:
1438 case Intrinsic::aarch64_sve_fadd_u:
1441 case Intrinsic::aarch64_sve_fdiv_u:
1444 case Intrinsic::aarch64_sve_fmul_u:
1447 case Intrinsic::aarch64_sve_fsub_u:
1450 case Intrinsic::aarch64_sve_lsl_u:
1453 case Intrinsic::aarch64_sve_lsr_u:
1456 case Intrinsic::aarch64_sve_mul_u:
1459 case Intrinsic::aarch64_sve_orr_u:
1462 case Intrinsic::aarch64_sve_sdiv_u:
1465 case Intrinsic::aarch64_sve_sub_u:
1468 case Intrinsic::aarch64_sve_udiv_u:
1472 case Intrinsic::aarch64_sve_addqv:
1473 case Intrinsic::aarch64_sve_and_z:
1474 case Intrinsic::aarch64_sve_bic_z:
1475 case Intrinsic::aarch64_sve_brka_z:
1476 case Intrinsic::aarch64_sve_brkb_z:
1477 case Intrinsic::aarch64_sve_brkn_z:
1478 case Intrinsic::aarch64_sve_brkpa_z:
1479 case Intrinsic::aarch64_sve_brkpb_z:
1480 case Intrinsic::aarch64_sve_cntp:
1481 case Intrinsic::aarch64_sve_compact:
1482 case Intrinsic::aarch64_sve_eor_z:
1483 case Intrinsic::aarch64_sve_eorv:
1484 case Intrinsic::aarch64_sve_eorqv:
1485 case Intrinsic::aarch64_sve_nand_z:
1486 case Intrinsic::aarch64_sve_nor_z:
1487 case Intrinsic::aarch64_sve_orn_z:
1488 case Intrinsic::aarch64_sve_orr_z:
1489 case Intrinsic::aarch64_sve_orv:
1490 case Intrinsic::aarch64_sve_orqv:
1491 case Intrinsic::aarch64_sve_pnext:
1492 case Intrinsic::aarch64_sve_rdffr_z:
1493 case Intrinsic::aarch64_sve_saddv:
1494 case Intrinsic::aarch64_sve_uaddv:
1495 case Intrinsic::aarch64_sve_umaxv:
1496 case Intrinsic::aarch64_sve_umaxqv:
1497 case Intrinsic::aarch64_sve_cmpeq:
1498 case Intrinsic::aarch64_sve_cmpeq_wide:
1499 case Intrinsic::aarch64_sve_cmpge:
1500 case Intrinsic::aarch64_sve_cmpge_wide:
1501 case Intrinsic::aarch64_sve_cmpgt:
1502 case Intrinsic::aarch64_sve_cmpgt_wide:
1503 case Intrinsic::aarch64_sve_cmphi:
1504 case Intrinsic::aarch64_sve_cmphi_wide:
1505 case Intrinsic::aarch64_sve_cmphs:
1506 case Intrinsic::aarch64_sve_cmphs_wide:
1507 case Intrinsic::aarch64_sve_cmple_wide:
1508 case Intrinsic::aarch64_sve_cmplo_wide:
1509 case Intrinsic::aarch64_sve_cmpls_wide:
1510 case Intrinsic::aarch64_sve_cmplt_wide:
1511 case Intrinsic::aarch64_sve_cmpne:
1512 case Intrinsic::aarch64_sve_cmpne_wide:
1513 case Intrinsic::aarch64_sve_facge:
1514 case Intrinsic::aarch64_sve_facgt:
1515 case Intrinsic::aarch64_sve_fcmpeq:
1516 case Intrinsic::aarch64_sve_fcmpge:
1517 case Intrinsic::aarch64_sve_fcmpgt:
1518 case Intrinsic::aarch64_sve_fcmpne:
1519 case Intrinsic::aarch64_sve_fcmpuo:
1520 case Intrinsic::aarch64_sve_ld1:
1521 case Intrinsic::aarch64_sve_ld1_gather:
1522 case Intrinsic::aarch64_sve_ld1_gather_index:
1523 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1524 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1525 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1526 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1527 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1528 case Intrinsic::aarch64_sve_ld1q_gather_index:
1529 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1530 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1531 case Intrinsic::aarch64_sve_ld1ro:
1532 case Intrinsic::aarch64_sve_ld1rq:
1533 case Intrinsic::aarch64_sve_ld1udq:
1534 case Intrinsic::aarch64_sve_ld1uwq:
1535 case Intrinsic::aarch64_sve_ld2_sret:
1536 case Intrinsic::aarch64_sve_ld2q_sret:
1537 case Intrinsic::aarch64_sve_ld3_sret:
1538 case Intrinsic::aarch64_sve_ld3q_sret:
1539 case Intrinsic::aarch64_sve_ld4_sret:
1540 case Intrinsic::aarch64_sve_ld4q_sret:
1541 case Intrinsic::aarch64_sve_ldff1:
1542 case Intrinsic::aarch64_sve_ldff1_gather:
1543 case Intrinsic::aarch64_sve_ldff1_gather_index:
1544 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1545 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1546 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1547 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1548 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1549 case Intrinsic::aarch64_sve_ldnf1:
1550 case Intrinsic::aarch64_sve_ldnt1:
1551 case Intrinsic::aarch64_sve_ldnt1_gather:
1552 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1553 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1554 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1557 case Intrinsic::aarch64_sve_prf:
1558 case Intrinsic::aarch64_sve_prfb_gather_index:
1559 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1560 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1561 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1562 case Intrinsic::aarch64_sve_prfd_gather_index:
1563 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1564 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1565 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1566 case Intrinsic::aarch64_sve_prfh_gather_index:
1567 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1568 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1569 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1570 case Intrinsic::aarch64_sve_prfw_gather_index:
1571 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1572 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1573 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1576 case Intrinsic::aarch64_sve_st1_scatter:
1577 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1578 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1579 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1580 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1581 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1582 case Intrinsic::aarch64_sve_st1dq:
1583 case Intrinsic::aarch64_sve_st1q_scatter_index:
1584 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1585 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1586 case Intrinsic::aarch64_sve_st1wq:
1587 case Intrinsic::aarch64_sve_stnt1:
1588 case Intrinsic::aarch64_sve_stnt1_scatter:
1589 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1590 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1591 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1593 case Intrinsic::aarch64_sve_st2:
1594 case Intrinsic::aarch64_sve_st2q:
1596 case Intrinsic::aarch64_sve_st3:
1597 case Intrinsic::aarch64_sve_st3q:
1599 case Intrinsic::aarch64_sve_st4:
1600 case Intrinsic::aarch64_sve_st4q:
1608 Value *UncastedPred;
1614 Pred = UncastedPred;
1620 if (OrigPredTy->getMinNumElements() <=
1622 ->getMinNumElements())
1623 Pred = UncastedPred;
1627 return C &&
C->isAllOnesValue();
1634 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1635 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1643static std::optional<Instruction *>
1650 Value *Op1 =
II.getOperand(1);
1651 Value *Op2 =
II.getOperand(2);
1677 return std::nullopt;
1685 if (SimpleII == Inactive)
1695static std::optional<Instruction *>
1699 return std::nullopt;
1728 II.setCalledFunction(NewDecl);
1738 return std::nullopt;
1750static std::optional<Instruction *>
1754 return std::nullopt;
1756 auto IntrinsicID = BinOp->getIntrinsicID();
1757 switch (IntrinsicID) {
1758 case Intrinsic::aarch64_sve_and_z:
1759 case Intrinsic::aarch64_sve_bic_z:
1760 case Intrinsic::aarch64_sve_eor_z:
1761 case Intrinsic::aarch64_sve_nand_z:
1762 case Intrinsic::aarch64_sve_nor_z:
1763 case Intrinsic::aarch64_sve_orn_z:
1764 case Intrinsic::aarch64_sve_orr_z:
1767 return std::nullopt;
1770 auto BinOpPred = BinOp->getOperand(0);
1771 auto BinOpOp1 = BinOp->getOperand(1);
1772 auto BinOpOp2 = BinOp->getOperand(2);
1776 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1777 return std::nullopt;
1779 auto PredOp = PredIntr->getOperand(0);
1781 if (PredOpTy !=
II.getType())
1782 return std::nullopt;
1786 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1787 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1788 if (BinOpOp1 == BinOpOp2)
1789 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1792 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1794 auto NarrowedBinOp =
1799static std::optional<Instruction *>
1806 return BinOpCombine;
1811 return std::nullopt;
1814 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1823 if (CursorVTy->getElementCount().getKnownMinValue() <
1824 IVTy->getElementCount().getKnownMinValue())
1828 if (Cursor->getType() == IVTy)
1829 EarliestReplacement = Cursor;
1834 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1835 Intrinsic::aarch64_sve_convert_to_svbool ||
1836 IntrinsicCursor->getIntrinsicID() ==
1837 Intrinsic::aarch64_sve_convert_from_svbool))
1840 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
1841 Cursor = IntrinsicCursor->getOperand(0);
1846 if (!EarliestReplacement)
1847 return std::nullopt;
1855 auto *OpPredicate =
II.getOperand(0);
1868 return std::nullopt;
1871 return std::nullopt;
1873 const auto PTruePattern =
1875 if (PTruePattern != AArch64SVEPredPattern::vl1)
1876 return std::nullopt;
1881 II.getArgOperand(0),
II.getArgOperand(2), ConstantInt::get(IdxTy, 0));
1882 Insert->insertBefore(
II.getIterator());
1883 Insert->takeName(&
II);
1893 II.getArgOperand(0));
1903 return std::nullopt;
1908 if (!SplatValue || !SplatValue->isZero())
1909 return std::nullopt;
1914 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1915 return std::nullopt;
1919 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
1920 return std::nullopt;
1923 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1924 return std::nullopt;
1929 return std::nullopt;
1932 return std::nullopt;
1936 return std::nullopt;
1940 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
1941 return std::nullopt;
1943 unsigned NumElts = VecTy->getNumElements();
1944 unsigned PredicateBits = 0;
1947 for (
unsigned I = 0;
I < NumElts; ++
I) {
1950 return std::nullopt;
1952 PredicateBits |= 1 << (
I * (16 / NumElts));
1956 if (PredicateBits == 0) {
1958 PFalse->takeName(&
II);
1964 for (
unsigned I = 0;
I < 16; ++
I)
1965 if ((PredicateBits & (1 <<
I)) != 0)
1968 unsigned PredSize = Mask & -Mask;
1973 for (
unsigned I = 0;
I < 16;
I += PredSize)
1974 if ((PredicateBits & (1 <<
I)) == 0)
1975 return std::nullopt;
1980 {PredType}, {PTruePat});
1982 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
1983 auto *ConvertFromSVBool =
1985 {
II.getType()}, {ConvertToSVBool});
1993 Value *Pg =
II.getArgOperand(0);
1994 Value *Vec =
II.getArgOperand(1);
1995 auto IntrinsicID =
II.getIntrinsicID();
1996 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2008 auto OpC = OldBinOp->getOpcode();
2014 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2020 if (IsAfter &&
C &&
C->isNullValue()) {
2024 Extract->insertBefore(
II.getIterator());
2025 Extract->takeName(&
II);
2031 return std::nullopt;
2033 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2034 return std::nullopt;
2036 const auto PTruePattern =
2042 return std::nullopt;
2044 unsigned Idx = MinNumElts - 1;
2054 if (Idx >= PgVTy->getMinNumElements())
2055 return std::nullopt;
2060 Extract->insertBefore(
II.getIterator());
2061 Extract->takeName(&
II);
2074 Value *Pg =
II.getArgOperand(0);
2076 Value *Vec =
II.getArgOperand(2);
2079 if (!Ty->isIntegerTy())
2080 return std::nullopt;
2085 return std::nullopt;
2102 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2115 {
II.getType()}, {AllPat});
2122static std::optional<Instruction *>
2126 if (
Pattern == AArch64SVEPredPattern::all) {
2135 return MinNumElts && NumElts >= MinNumElts
2137 II, ConstantInt::get(
II.getType(), MinNumElts)))
2141static std::optional<Instruction *>
2144 if (!ST->isStreaming())
2145 return std::nullopt;
2157 Value *PgVal =
II.getArgOperand(0);
2158 Value *OpVal =
II.getArgOperand(1);
2162 if (PgVal == OpVal &&
2163 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2164 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2179 return std::nullopt;
2183 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2184 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2198 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2199 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2200 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2201 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2202 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2203 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2204 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2205 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2206 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2207 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2208 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2209 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2210 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2220 return std::nullopt;
2223template <Intrinsic::ID MulOpc,
typename Intrinsic::ID FuseOpc>
2224static std::optional<Instruction *>
2226 bool MergeIntoAddendOp) {
2228 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2229 if (MergeIntoAddendOp) {
2230 AddendOp =
II.getOperand(1);
2231 Mul =
II.getOperand(2);
2233 AddendOp =
II.getOperand(2);
2234 Mul =
II.getOperand(1);
2239 return std::nullopt;
2241 if (!
Mul->hasOneUse())
2242 return std::nullopt;
2245 if (
II.getType()->isFPOrFPVectorTy()) {
2250 return std::nullopt;
2252 return std::nullopt;
2257 if (MergeIntoAddendOp)
2267static std::optional<Instruction *>
2269 Value *Pred =
II.getOperand(0);
2270 Value *PtrOp =
II.getOperand(1);
2271 Type *VecTy =
II.getType();
2275 Load->copyMetadata(
II);
2286static std::optional<Instruction *>
2288 Value *VecOp =
II.getOperand(0);
2289 Value *Pred =
II.getOperand(1);
2290 Value *PtrOp =
II.getOperand(2);
2294 Store->copyMetadata(
II);
2306 case Intrinsic::aarch64_sve_fmul_u:
2307 return Instruction::BinaryOps::FMul;
2308 case Intrinsic::aarch64_sve_fadd_u:
2309 return Instruction::BinaryOps::FAdd;
2310 case Intrinsic::aarch64_sve_fsub_u:
2311 return Instruction::BinaryOps::FSub;
2313 return Instruction::BinaryOpsEnd;
2317static std::optional<Instruction *>
2320 if (
II.isStrictFP())
2321 return std::nullopt;
2323 auto *OpPredicate =
II.getOperand(0);
2325 if (BinOpCode == Instruction::BinaryOpsEnd ||
2327 return std::nullopt;
2329 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2336 Intrinsic::aarch64_sve_mla>(
2340 Intrinsic::aarch64_sve_mad>(
2343 return std::nullopt;
2346static std::optional<Instruction *>
2350 Intrinsic::aarch64_sve_fmla>(IC,
II,
2355 Intrinsic::aarch64_sve_fmad>(IC,
II,
2360 Intrinsic::aarch64_sve_fmla>(IC,
II,
2363 return std::nullopt;
2366static std::optional<Instruction *>
2370 Intrinsic::aarch64_sve_fmla>(IC,
II,
2375 Intrinsic::aarch64_sve_fmad>(IC,
II,
2380 Intrinsic::aarch64_sve_fmla_u>(
2386static std::optional<Instruction *>
2390 Intrinsic::aarch64_sve_fmls>(IC,
II,
2395 Intrinsic::aarch64_sve_fnmsb>(
2400 Intrinsic::aarch64_sve_fmls>(IC,
II,
2403 return std::nullopt;
2406static std::optional<Instruction *>
2410 Intrinsic::aarch64_sve_fmls>(IC,
II,
2415 Intrinsic::aarch64_sve_fnmsb>(
2420 Intrinsic::aarch64_sve_fmls_u>(
2429 Intrinsic::aarch64_sve_mls>(
2432 return std::nullopt;
2437 Value *UnpackArg =
II.getArgOperand(0);
2439 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2440 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2453 return std::nullopt;
2457 auto *OpVal =
II.getOperand(0);
2458 auto *OpIndices =
II.getOperand(1);
2465 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2466 return std::nullopt;
2481 Type *RetTy =
II.getType();
2482 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2483 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2487 if ((
match(
II.getArgOperand(0),
2494 if (TyA ==
B->getType() &&
2499 TyA->getMinNumElements());
2505 return std::nullopt;
2513 if (
match(
II.getArgOperand(0),
2518 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2520 return std::nullopt;
2523static std::optional<Instruction *>
2525 Value *Mask =
II.getOperand(0);
2526 Value *BasePtr =
II.getOperand(1);
2527 Value *Index =
II.getOperand(2);
2538 BasePtr->getPointerAlignment(
II.getDataLayout());
2541 BasePtr, IndexBase);
2548 return std::nullopt;
2551static std::optional<Instruction *>
2553 Value *Val =
II.getOperand(0);
2554 Value *Mask =
II.getOperand(1);
2555 Value *BasePtr =
II.getOperand(2);
2556 Value *Index =
II.getOperand(3);
2566 BasePtr->getPointerAlignment(
II.getDataLayout());
2569 BasePtr, IndexBase);
2575 return std::nullopt;
2581 Value *Pred =
II.getOperand(0);
2582 Value *Vec =
II.getOperand(1);
2583 Value *DivVec =
II.getOperand(2);
2587 if (!SplatConstantInt)
2588 return std::nullopt;
2592 if (DivisorValue == -1)
2593 return std::nullopt;
2594 if (DivisorValue == 1)
2600 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2607 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2609 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2613 return std::nullopt;
2617 size_t VecSize = Vec.
size();
2622 size_t HalfVecSize = VecSize / 2;
2626 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2634 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2652 return std::nullopt;
2659 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2660 CurrentInsertElt = InsertElt->getOperand(0);
2666 return std::nullopt;
2670 for (
size_t I = 0;
I < Elts.
size();
I++) {
2671 if (Elts[
I] ==
nullptr)
2676 if (InsertEltChain ==
nullptr)
2677 return std::nullopt;
2683 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2684 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2685 IIScalableTy->getMinNumElements() /
2690 auto *WideShuffleMaskTy =
2701 auto NarrowBitcast =
2714 return std::nullopt;
2719 Value *Pred =
II.getOperand(0);
2720 Value *Vec =
II.getOperand(1);
2721 Value *Shift =
II.getOperand(2);
2724 Value *AbsPred, *MergedValue;
2730 return std::nullopt;
2738 return std::nullopt;
2743 return std::nullopt;
2746 {
II.getType()}, {Pred, Vec, Shift});
2753 Value *Vec =
II.getOperand(0);
2758 return std::nullopt;
2764 auto *NI =
II.getNextNode();
2767 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2769 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2770 auto *NIBB = NI->getParent();
2771 NI = NI->getNextNode();
2773 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2774 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2780 if (NextII &&
II.isIdenticalTo(NextII))
2783 return std::nullopt;
2791 {II.getType(), II.getOperand(0)->getType()},
2792 {II.getOperand(0), II.getOperand(1)}));
2799 return std::nullopt;
2805 Value *Passthru =
II.getOperand(0);
2813 auto *Mask = ConstantInt::get(Ty, MaskValue);
2819 return std::nullopt;
2822static std::optional<Instruction *>
2829 return std::nullopt;
2832std::optional<Instruction *>
2843 case Intrinsic::aarch64_dmb:
2845 case Intrinsic::aarch64_neon_fmaxnm:
2846 case Intrinsic::aarch64_neon_fminnm:
2848 case Intrinsic::aarch64_sve_convert_from_svbool:
2850 case Intrinsic::aarch64_sve_dup:
2852 case Intrinsic::aarch64_sve_dup_x:
2854 case Intrinsic::aarch64_sve_cmpne:
2855 case Intrinsic::aarch64_sve_cmpne_wide:
2857 case Intrinsic::aarch64_sve_rdffr:
2859 case Intrinsic::aarch64_sve_lasta:
2860 case Intrinsic::aarch64_sve_lastb:
2862 case Intrinsic::aarch64_sve_clasta_n:
2863 case Intrinsic::aarch64_sve_clastb_n:
2865 case Intrinsic::aarch64_sve_cntd:
2867 case Intrinsic::aarch64_sve_cntw:
2869 case Intrinsic::aarch64_sve_cnth:
2871 case Intrinsic::aarch64_sve_cntb:
2873 case Intrinsic::aarch64_sme_cntsd:
2875 case Intrinsic::aarch64_sve_ptest_any:
2876 case Intrinsic::aarch64_sve_ptest_first:
2877 case Intrinsic::aarch64_sve_ptest_last:
2879 case Intrinsic::aarch64_sve_fadd:
2881 case Intrinsic::aarch64_sve_fadd_u:
2883 case Intrinsic::aarch64_sve_fmul_u:
2885 case Intrinsic::aarch64_sve_fsub:
2887 case Intrinsic::aarch64_sve_fsub_u:
2889 case Intrinsic::aarch64_sve_add:
2891 case Intrinsic::aarch64_sve_add_u:
2893 Intrinsic::aarch64_sve_mla_u>(
2895 case Intrinsic::aarch64_sve_sub:
2897 case Intrinsic::aarch64_sve_sub_u:
2899 Intrinsic::aarch64_sve_mls_u>(
2901 case Intrinsic::aarch64_sve_tbl:
2903 case Intrinsic::aarch64_sve_uunpkhi:
2904 case Intrinsic::aarch64_sve_uunpklo:
2905 case Intrinsic::aarch64_sve_sunpkhi:
2906 case Intrinsic::aarch64_sve_sunpklo:
2908 case Intrinsic::aarch64_sve_uzp1:
2910 case Intrinsic::aarch64_sve_zip1:
2911 case Intrinsic::aarch64_sve_zip2:
2913 case Intrinsic::aarch64_sve_ld1_gather_index:
2915 case Intrinsic::aarch64_sve_st1_scatter_index:
2917 case Intrinsic::aarch64_sve_ld1:
2919 case Intrinsic::aarch64_sve_st1:
2921 case Intrinsic::aarch64_sve_sdiv:
2923 case Intrinsic::aarch64_sve_sel:
2925 case Intrinsic::aarch64_sve_srshl:
2927 case Intrinsic::aarch64_sve_dupq_lane:
2929 case Intrinsic::aarch64_sve_insr:
2931 case Intrinsic::aarch64_sve_whilelo:
2933 case Intrinsic::aarch64_sve_ptrue:
2935 case Intrinsic::aarch64_sve_uxtb:
2937 case Intrinsic::aarch64_sve_uxth:
2939 case Intrinsic::aarch64_sve_uxtw:
2941 case Intrinsic::aarch64_sme_in_streaming_mode:
2945 return std::nullopt;
2952 SimplifyAndSetOp)
const {
2953 switch (
II.getIntrinsicID()) {
2956 case Intrinsic::aarch64_neon_fcvtxn:
2957 case Intrinsic::aarch64_neon_rshrn:
2958 case Intrinsic::aarch64_neon_sqrshrn:
2959 case Intrinsic::aarch64_neon_sqrshrun:
2960 case Intrinsic::aarch64_neon_sqshrn:
2961 case Intrinsic::aarch64_neon_sqshrun:
2962 case Intrinsic::aarch64_neon_sqxtn:
2963 case Intrinsic::aarch64_neon_sqxtun:
2964 case Intrinsic::aarch64_neon_uqrshrn:
2965 case Intrinsic::aarch64_neon_uqshrn:
2966 case Intrinsic::aarch64_neon_uqxtn:
2967 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
2971 return std::nullopt;
2975 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
2985 if (ST->useSVEForFixedLengthVectors() &&
2988 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
2989 else if (ST->isNeonAvailable())
2994 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3003bool AArch64TTIImpl::isWideningInstruction(
Type *DstTy,
unsigned Opcode,
3005 Type *SrcOverrideTy)
const {
3020 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3030 Type *SrcTy = SrcOverrideTy;
3032 case Instruction::Add:
3033 case Instruction::Sub:
3042 case Instruction::Mul: {
3078 assert(SrcTy &&
"Expected some SrcTy");
3080 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3086 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3088 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3092 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3104 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3105 (Src->isScalableTy() && !ST->hasSVE2()))
3115 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3119 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3123 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3124 Src->getScalarSizeInBits() !=
3148 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3152 if (
I &&
I->hasOneUser()) {
3155 if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands, Src)) {
3159 if (SingleUser->getOpcode() == Instruction::Add) {
3160 if (
I == SingleUser->getOperand(1) ||
3162 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3177 return Cost == 0 ? 0 : 1;
3181 EVT SrcTy = TLI->getValueType(
DL, Src);
3182 EVT DstTy = TLI->getValueType(
DL, Dst);
3184 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3190 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3214 return AdjustCost(Entry->Cost);
3222 const unsigned int SVE_EXT_COST = 1;
3223 const unsigned int SVE_FCVT_COST = 1;
3224 const unsigned int SVE_UNPACK_ONCE = 4;
3225 const unsigned int SVE_UNPACK_TWICE = 16;
3303 {ISD::FP_EXTEND, MVT::f64, MVT::f32, 1},
3304 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f32, 1},
3305 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 2},
3307 {ISD::FP_EXTEND, MVT::f32, MVT::f16, 1},
3308 {ISD::FP_EXTEND, MVT::f64, MVT::f16, 1},
3309 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},
3310 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 2},
3311 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2},
3312 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3},
3313 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6},
3315 {ISD::FP_EXTEND, MVT::f32, MVT::bf16, 1},
3316 {ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2},
3317 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1},
3318 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2},
3319 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2},
3320 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3},
3321 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6},
3354 SVE_EXT_COST + SVE_FCVT_COST},
3359 SVE_EXT_COST + SVE_FCVT_COST},
3366 SVE_EXT_COST + SVE_FCVT_COST},
3370 SVE_EXT_COST + SVE_FCVT_COST},
3376 SVE_EXT_COST + SVE_FCVT_COST},
3379 SVE_EXT_COST + SVE_FCVT_COST},
3384 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3386 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3396 SVE_EXT_COST + SVE_FCVT_COST},
3401 SVE_EXT_COST + SVE_FCVT_COST},
3414 SVE_EXT_COST + SVE_FCVT_COST},
3418 SVE_EXT_COST + SVE_FCVT_COST},
3430 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3432 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3434 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3436 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3440 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3442 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3458 SVE_EXT_COST + SVE_FCVT_COST},
3463 SVE_EXT_COST + SVE_FCVT_COST},
3474 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3476 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3478 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3480 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3482 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3484 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3488 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3490 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3492 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3494 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3638 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2f16, 1},
3639 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
3640 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},
3643 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2bf16, 1},
3644 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4bf16, 1},
3645 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 4},
3648 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
3649 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
3650 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},
3653 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2bf16, 2},
3654 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6},
3655 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8bf16, 14},
3658 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
3659 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
3660 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},
3663 {ISD::BITCAST, MVT::nxv2f16, MVT::nxv2i16, 0},
3664 {ISD::BITCAST, MVT::nxv4f16, MVT::nxv4i16, 0},
3665 {ISD::BITCAST, MVT::nxv2f32, MVT::nxv2i32, 0},
3668 {ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0},
3669 {ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0},
3670 {ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0},
3693 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3696 ST->useSVEForFixedLengthVectors(WiderTy)) {
3697 std::pair<InstructionCost, MVT> LT =
3699 unsigned NumElements =
3711 return AdjustCost(Entry->Cost);
3738 if (ST->hasFullFP16())
3741 return AdjustCost(Entry->Cost);
3759 ST->isSVEorStreamingSVEAvailable() &&
3760 TLI->getTypeAction(Src->getContext(), SrcTy) ==
3762 TLI->getTypeAction(Dst->getContext(), DstTy) ==
3771 Opcode, LegalTy, Src, CCH,
CostKind,
I);
3774 return Part1 + Part2;
3781 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
3794 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3807 CostKind, Index,
nullptr,
nullptr);
3811 auto DstVT = TLI->getValueType(
DL, Dst);
3812 auto SrcVT = TLI->getValueType(
DL, Src);
3817 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
3823 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
3833 case Instruction::SExt:
3838 case Instruction::ZExt:
3839 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
3852 return Opcode == Instruction::PHI ? 0 : 1;
3861 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
3869 if (!LT.second.isVector())
3874 if (LT.second.isFixedLengthVector()) {
3875 unsigned Width = LT.second.getVectorNumElements();
3876 Index = Index % Width;
3924 auto ExtractCanFuseWithFmul = [&]() {
3931 auto IsAllowedScalarTy = [&](
const Type *
T) {
3932 return T->isFloatTy() ||
T->isDoubleTy() ||
3933 (
T->isHalfTy() && ST->hasFullFP16());
3937 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
3940 return BO && BO->getOpcode() == BinaryOperator::FMul &&
3941 !BO->getType()->isVectorTy();
3946 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
3950 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
3959 DenseMap<User *, unsigned> UserToExtractIdx;
3960 for (
auto *U :
Scalar->users()) {
3961 if (!IsUserFMulScalarTy(U))
3965 UserToExtractIdx[
U];
3967 if (UserToExtractIdx.
empty())
3969 for (
auto &[S, U, L] : ScalarUserAndIdx) {
3970 for (
auto *U : S->users()) {
3971 if (UserToExtractIdx.
contains(U)) {
3973 auto *Op0 =
FMul->getOperand(0);
3974 auto *Op1 =
FMul->getOperand(1);
3975 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
3976 UserToExtractIdx[
U] =
L;
3982 for (
auto &[U, L] : UserToExtractIdx) {
3994 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
3995 if (!IsUserFMulScalarTy(U))
4000 const auto *BO = cast<BinaryOperator>(U);
4001 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4002 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4004 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4007 return IsExtractLaneEquivalentToZero(
4008 cast<ConstantInt>(OtherEE->getIndexOperand())
4011 OtherEE->getType()->getScalarSizeInBits());
4019 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4020 ExtractCanFuseWithFmul())
4025 :
ST->getVectorInsertExtractBaseCost();
4032 const Value *Op1)
const {
4036 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4039 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index);
4045 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
4046 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4053 unsigned Index)
const {
4054 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I);
4060 unsigned Index)
const {
4072 : ST->getVectorInsertExtractBaseCost() + 1;
4081 if (Ty->getElementType()->isFloatingPointTy())
4084 unsigned VecInstCost =
4086 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4093 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4094 return std::nullopt;
4095 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4096 return std::nullopt;
4103 Cost += InstCost(PromotedTy);
4126 Op2Info, Args, CxtI);
4130 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4137 Ty,
CostKind, Op1Info, Op2Info,
true,
4138 [&](
Type *PromotedTy) {
4142 return *PromotedCost;
4203 auto VT = TLI->getValueType(
DL, Ty);
4204 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4208 : (3 * AsrCost + AddCost);
4210 return MulCost + AsrCost + 2 * AddCost;
4212 }
else if (VT.isVector()) {
4222 if (Ty->isScalableTy() && ST->hasSVE())
4223 Cost += 2 * AsrCost;
4228 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4232 }
else if (LT.second == MVT::v2i64) {
4233 return VT.getVectorNumElements() *
4240 if (Ty->isScalableTy() && ST->hasSVE())
4241 return MulCost + 2 * AddCost + 2 * AsrCost;
4242 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4247 LT.second.isFixedLengthVector()) {
4257 return ExtractCost + InsertCost +
4265 auto VT = TLI->getValueType(
DL, Ty);
4281 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4282 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4283 LT.second == MVT::nxv16i8;
4284 bool Is128bit = LT.second.is128BitVector();
4296 (HasMULH ? 0 : ShrCost) +
4297 AddCost * 2 + ShrCost;
4298 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4305 if (!VT.isVector() && VT.getSizeInBits() > 64)
4309 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4311 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4315 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4325 if (
nullptr != Entry)
4330 if (LT.second.getScalarType() == MVT::i8)
4332 else if (LT.second.getScalarType() == MVT::i16)
4344 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4345 return (4 + DivCost) * VTy->getNumElements();
4351 -1,
nullptr,
nullptr);
4365 if (LT.second == MVT::v2i64 && ST->hasSVE())
4380 if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
4402 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4403 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4412 if (!Ty->getScalarType()->isFP128Ty())
4419 if (!Ty->getScalarType()->isFP128Ty())
4420 return 2 * LT.first;
4427 if (!Ty->isVectorTy())
4443 int MaxMergeDistance = 64;
4447 return NumVectorInstToHideOverhead;
4457 unsigned Opcode1,
unsigned Opcode2)
const {
4460 if (!
Sched.hasInstrSchedModel())
4464 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4466 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4472 "Cannot handle variant scheduling classes without an MI");
4488 const int AmortizationCost = 20;
4496 VecPred = CurrentPred;
4504 static const auto ValidMinMaxTys = {
4505 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4506 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4507 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4510 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
4511 (ST->hasFullFP16() &&
4512 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
4517 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4518 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4519 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4520 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4521 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4522 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4523 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4524 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4525 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4526 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4527 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4529 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4530 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4539 if (Opcode == Instruction::FCmp) {
4541 ValTy,
CostKind, Op1Info, Op2Info,
false,
4542 [&](
Type *PromotedTy) {
4554 return *PromotedCost;
4558 if (LT.second.getScalarType() != MVT::f64 &&
4559 LT.second.getScalarType() != MVT::f32 &&
4560 LT.second.getScalarType() != MVT::f16)
4565 unsigned Factor = 1;
4580 AArch64::FCMEQv4f32))
4592 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4611 Op1Info, Op2Info,
I);
4617 if (ST->requiresStrictAlign()) {
4622 Options.AllowOverlappingLoads =
true;
4623 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4628 Options.LoadSizes = {8, 4, 2, 1};
4629 Options.AllowedTailExpansions = {3, 5, 6};
4634 return ST->hasSVE();
4645 if (!LT.first.isValid())
4650 if (VT->getElementType()->isIntegerTy(1))
4667 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4668 "Should be called on only load or stores.");
4670 case Instruction::Load:
4673 return ST->getGatherOverhead();
4675 case Instruction::Store:
4678 return ST->getScatterOverhead();
4686 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
4693 if (!LT.first.isValid())
4697 if (!LT.second.isVector() ||
4699 VT->getElementType()->isIntegerTy(1))
4709 ElementCount LegalVF = LT.second.getVectorElementCount();
4712 {TTI::OK_AnyValue, TTI::OP_None},
I);
4728 EVT VT = TLI->getValueType(
DL, Ty,
true);
4730 if (VT == MVT::Other)
4735 if (!LT.first.isValid())
4745 (VTy->getElementType()->isIntegerTy(1) &&
4746 !VTy->getElementCount().isKnownMultipleOf(
4757 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
4758 LT.second.is128BitVector() && Alignment <
Align(16)) {
4764 const int AmortizationCost = 6;
4766 return LT.first * 2 * AmortizationCost;
4770 if (Ty->isPtrOrPtrVectorTy())
4775 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
4777 if (VT == MVT::v4i8)
4784 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
4799 while (!TypeWorklist.
empty()) {
4821 bool UseMaskForCond,
bool UseMaskForGaps)
const {
4822 assert(Factor >= 2 &&
"Invalid interleave factor");
4837 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
4840 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
4841 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
4844 VecVTy->getElementCount().divideCoefficientBy(Factor));
4850 if (MinElts % Factor == 0 &&
4851 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
4852 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
4857 UseMaskForCond, UseMaskForGaps);
4864 for (
auto *
I : Tys) {
4865 if (!
I->isVectorTy())
4876 return ST->getMaxInterleaveFactor();
4886 enum { MaxStridedLoads = 7 };
4888 int StridedLoads = 0;
4891 for (
const auto BB : L->blocks()) {
4892 for (
auto &
I : *BB) {
4898 if (L->isLoopInvariant(PtrValue))
4903 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
4912 if (StridedLoads > MaxStridedLoads / 2)
4913 return StridedLoads;
4916 return StridedLoads;
4919 int StridedLoads = countStridedLoads(L, SE);
4921 <<
" strided loads\n");
4937 unsigned *FinalSize) {
4941 for (
auto *BB : L->getBlocks()) {
4942 for (
auto &
I : *BB) {
4948 if (!Cost.isValid())
4952 if (LoopCost > Budget)
4974 if (MaxTC > 0 && MaxTC <= 32)
4985 if (Blocks.
size() != 2)
5007 if (!L->isInnermost() || L->getNumBlocks() > 8)
5011 if (!L->getExitBlock())
5017 bool HasParellelizableReductions =
5018 L->getNumBlocks() == 1 &&
5019 any_of(L->getHeader()->phis(),
5021 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5024 if (HasParellelizableReductions &&
5046 if (HasParellelizableReductions) {
5057 if (Header == Latch) {
5060 unsigned Width = 10;
5066 unsigned MaxInstsPerLine = 16;
5068 unsigned BestUC = 1;
5069 unsigned SizeWithBestUC = BestUC *
Size;
5071 unsigned SizeWithUC = UC *
Size;
5072 if (SizeWithUC > 48)
5074 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5075 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5077 SizeWithBestUC = BestUC *
Size;
5087 for (
auto *BB : L->blocks()) {
5088 for (
auto &
I : *BB) {
5098 for (
auto *U :
I.users())
5100 LoadedValuesPlus.
insert(U);
5107 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5120 if (!Term || !Term->isConditional() || Preds.
size() == 1 ||
5134 auto *I = dyn_cast<Instruction>(V);
5135 return I && DependsOnLoopLoad(I, Depth + 1);
5142 DependsOnLoopLoad(
I, 0)) {
5158 if (L->getLoopDepth() > 1)
5168 for (
auto *BB : L->getBlocks()) {
5169 for (
auto &
I : *BB) {
5173 if (IsVectorized &&
I.getType()->isVectorTy())
5186 switch (ST->getProcFamily()) {
5187 case AArch64Subtarget::AppleA14:
5188 case AArch64Subtarget::AppleA15:
5189 case AArch64Subtarget::AppleA16:
5190 case AArch64Subtarget::AppleM4:
5193 case AArch64Subtarget::Falkor:
5219 !ST->getSchedModel().isOutOfOrder()) {
5237 bool CanCreate)
const {
5241 case Intrinsic::aarch64_neon_st2:
5242 case Intrinsic::aarch64_neon_st3:
5243 case Intrinsic::aarch64_neon_st4: {
5246 if (!CanCreate || !ST)
5248 unsigned NumElts = Inst->
arg_size() - 1;
5249 if (ST->getNumElements() != NumElts)
5251 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5257 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5259 Res = Builder.CreateInsertValue(Res, L, i);
5263 case Intrinsic::aarch64_neon_ld2:
5264 case Intrinsic::aarch64_neon_ld3:
5265 case Intrinsic::aarch64_neon_ld4:
5266 if (Inst->
getType() == ExpectedType)
5277 case Intrinsic::aarch64_neon_ld2:
5278 case Intrinsic::aarch64_neon_ld3:
5279 case Intrinsic::aarch64_neon_ld4:
5280 Info.ReadMem =
true;
5281 Info.WriteMem =
false;
5284 case Intrinsic::aarch64_neon_st2:
5285 case Intrinsic::aarch64_neon_st3:
5286 case Intrinsic::aarch64_neon_st4:
5287 Info.ReadMem =
false;
5288 Info.WriteMem =
true;
5296 case Intrinsic::aarch64_neon_ld2:
5297 case Intrinsic::aarch64_neon_st2:
5298 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5300 case Intrinsic::aarch64_neon_ld3:
5301 case Intrinsic::aarch64_neon_st3:
5302 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5304 case Intrinsic::aarch64_neon_ld4:
5305 case Intrinsic::aarch64_neon_st4:
5306 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5318 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5319 bool Considerable =
false;
5320 AllowPromotionWithoutCommonHeader =
false;
5323 Type *ConsideredSExtType =
5325 if (
I.getType() != ConsideredSExtType)
5329 for (
const User *U :
I.users()) {
5331 Considerable =
true;
5335 if (GEPInst->getNumOperands() > 2) {
5336 AllowPromotionWithoutCommonHeader =
true;
5341 return Considerable;
5389 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5399 return LegalizationCost + 2;
5409 LegalizationCost *= LT.first - 1;
5412 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5421 return LegalizationCost + 2;
5429 std::optional<FastMathFlags> FMF,
5445 return BaseCost + FixedVTy->getNumElements();
5448 if (Opcode != Instruction::FAdd)
5462 MVT MTy = LT.second;
5463 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5511 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5512 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5514 if (ValTy->getElementCount().getFixedValue() >= 2 && NElts >= 2 &&
5524 return (LT.first - 1) +
Log2_32(NElts);
5529 return (LT.first - 1) + Entry->Cost;
5541 if (LT.first != 1) {
5547 ExtraCost *= LT.first - 1;
5550 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5551 return Cost + ExtraCost;
5559 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
5561 EVT VecVT = TLI->getValueType(
DL, VecTy);
5562 EVT ResVT = TLI->getValueType(
DL, ResTy);
5572 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5574 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5576 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5578 return (LT.first - 1) * 2 + 2;
5589 EVT VecVT = TLI->getValueType(
DL, VecTy);
5590 EVT ResVT = TLI->getValueType(
DL, ResTy);
5593 RedOpcode == Instruction::Add) {
5599 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5601 return LT.first + 2;
5636 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5637 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
5651 if (LT.second.getScalarType() == MVT::i1) {
5660 assert(Entry &&
"Illegal Type for Splice");
5661 LegalizationCost += Entry->Cost;
5662 return LegalizationCost * LT.first;
5666 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
5675 if (VF.
isFixed() && !ST->isSVEorStreamingSVEAvailable() &&
5676 (!ST->isNeonAvailable() || !ST->hasDotProd()))
5679 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub) ||
5684 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
5685 "Unexpected values for OpBExtend or InputTypeB");
5689 if (BinOp && (*BinOp != Instruction::Mul || InputTypeA != InputTypeB))
5692 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
5693 if (IsUSDot && !ST->hasMatMulInt8())
5705 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
5714 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
5720 std::pair<InstructionCost, MVT> AccumLT =
5722 std::pair<InstructionCost, MVT> InputLT =
5735 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
5737 if (AccumLT.second.getScalarType() == MVT::i64 &&
5738 InputLT.second.getScalarType() == MVT::i16)
5741 if (AccumLT.second.getScalarType() == MVT::i64 &&
5742 InputLT.second.getScalarType() == MVT::i8)
5752 if (ST->isSVEorStreamingSVEAvailable() ||
5753 (AccumLT.second.isFixedLengthVector() && ST->isNeonAvailable() &&
5754 ST->hasDotProd())) {
5755 if (AccumLT.second.getScalarType() == MVT::i32 &&
5756 InputLT.second.getScalarType() == MVT::i8)
5772 "Expected the Mask to match the return size if given");
5774 "Expected the same scalar types");
5780 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
5781 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
5782 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
5790 return std::max<InstructionCost>(1, LT.first / 4);
5798 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
5800 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
5803 unsigned TpNumElts = Mask.size();
5804 unsigned LTNumElts = LT.second.getVectorNumElements();
5805 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
5807 LT.second.getVectorElementCount());
5809 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
5811 for (
unsigned N = 0;
N < NumVecs;
N++) {
5815 unsigned Source1 = -1U, Source2 = -1U;
5816 unsigned NumSources = 0;
5817 for (
unsigned E = 0; E < LTNumElts; E++) {
5818 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
5827 unsigned Source = MaskElt / LTNumElts;
5828 if (NumSources == 0) {
5831 }
else if (NumSources == 1 && Source != Source1) {
5834 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
5840 if (Source == Source1)
5842 else if (Source == Source2)
5843 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
5852 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
5863 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
5866 Result.first->second = NCost;
5880 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
5881 if (LT.second.getFixedSizeInBits() >= 128 &&
5883 LT.second.getVectorNumElements() / 2) {
5886 if (Index == (
int)LT.second.getVectorNumElements() / 2)
5900 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
5901 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
5910 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
5911 ST->isSVEorStreamingSVEAvailable() &&
5916 if (ST->isSVEorStreamingSVEAvailable() &&
5930 if (IsLoad && LT.second.isVector() &&
5932 LT.second.getVectorElementCount()))
5938 if (Mask.size() == 4 &&
5940 (SrcTy->getScalarSizeInBits() == 16 ||
5941 SrcTy->getScalarSizeInBits() == 32) &&
5942 all_of(Mask, [](
int E) {
return E < 8; }))
5946 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
5949 return M.value() < 0 || M.value() == (int)M.index();
5956 if (LT.second.isFixedLengthVector() &&
5957 LT.second.getVectorNumElements() == Mask.size() &&
5959 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
5960 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
5961 isREVMask(Mask, LT.second.getScalarSizeInBits(),
5962 LT.second.getVectorNumElements(), 16) ||
5963 isREVMask(Mask, LT.second.getScalarSizeInBits(),
5964 LT.second.getVectorNumElements(), 32) ||
5965 isREVMask(Mask, LT.second.getScalarSizeInBits(),
5966 LT.second.getVectorNumElements(), 64) ||
5969 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6098 return LT.first * Entry->Cost;
6107 LT.second.getSizeInBits() <= 128 && SubTp) {
6109 if (SubLT.second.isVector()) {
6110 int NumElts = LT.second.getVectorNumElements();
6111 int NumSubElts = SubLT.second.getVectorNumElements();
6112 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6118 if (IsExtractSubvector)
6153 return ST->useFixedOverScalableIfEqualCost();
6157 return ST->getEpilogueVectorizationMinVF();
6191 unsigned NumInsns = 0;
6193 NumInsns += BB->sizeWithoutDebug();
6203 int64_t Scale,
unsigned AddrSpace)
const {
6231 if (
I->getOpcode() == Instruction::Or &&
6236 if (
I->getOpcode() == Instruction::Add ||
6237 I->getOpcode() == Instruction::Sub)
6262 return all_equal(Shuf->getShuffleMask());
6269 bool AllowSplat =
false) {
6274 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6275 auto *FullTy = FullV->
getType();
6276 auto *HalfTy = HalfV->getType();
6278 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6281 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6284 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6288 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6302 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6303 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6317 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6318 (M2Start != 0 && M2Start != (NumElements / 2)))
6320 if (S1Op1 && S2Op1 && M1Start != M2Start)
6330 return Ext->getType()->getScalarSizeInBits() ==
6331 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6345 Value *VectorOperand =
nullptr;
6362 if (!
GEP ||
GEP->getNumOperands() != 2)
6366 Value *Offsets =
GEP->getOperand(1);
6369 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6375 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6376 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6377 Ops.push_back(&
GEP->getOperandUse(1));
6411 switch (
II->getIntrinsicID()) {
6412 case Intrinsic::aarch64_neon_smull:
6413 case Intrinsic::aarch64_neon_umull:
6416 Ops.push_back(&
II->getOperandUse(0));
6417 Ops.push_back(&
II->getOperandUse(1));
6422 case Intrinsic::fma:
6423 case Intrinsic::fmuladd:
6429 case Intrinsic::aarch64_neon_sqdmull:
6430 case Intrinsic::aarch64_neon_sqdmulh:
6431 case Intrinsic::aarch64_neon_sqrdmulh:
6434 Ops.push_back(&
II->getOperandUse(0));
6436 Ops.push_back(&
II->getOperandUse(1));
6437 return !
Ops.empty();
6438 case Intrinsic::aarch64_neon_fmlal:
6439 case Intrinsic::aarch64_neon_fmlal2:
6440 case Intrinsic::aarch64_neon_fmlsl:
6441 case Intrinsic::aarch64_neon_fmlsl2:
6444 Ops.push_back(&
II->getOperandUse(1));
6446 Ops.push_back(&
II->getOperandUse(2));
6447 return !
Ops.empty();
6448 case Intrinsic::aarch64_sve_ptest_first:
6449 case Intrinsic::aarch64_sve_ptest_last:
6451 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6452 Ops.push_back(&
II->getOperandUse(0));
6453 return !
Ops.empty();
6454 case Intrinsic::aarch64_sme_write_horiz:
6455 case Intrinsic::aarch64_sme_write_vert:
6456 case Intrinsic::aarch64_sme_writeq_horiz:
6457 case Intrinsic::aarch64_sme_writeq_vert: {
6459 if (!Idx || Idx->getOpcode() != Instruction::Add)
6461 Ops.push_back(&
II->getOperandUse(1));
6464 case Intrinsic::aarch64_sme_read_horiz:
6465 case Intrinsic::aarch64_sme_read_vert:
6466 case Intrinsic::aarch64_sme_readq_horiz:
6467 case Intrinsic::aarch64_sme_readq_vert:
6468 case Intrinsic::aarch64_sme_ld1b_vert:
6469 case Intrinsic::aarch64_sme_ld1h_vert:
6470 case Intrinsic::aarch64_sme_ld1w_vert:
6471 case Intrinsic::aarch64_sme_ld1d_vert:
6472 case Intrinsic::aarch64_sme_ld1q_vert:
6473 case Intrinsic::aarch64_sme_st1b_vert:
6474 case Intrinsic::aarch64_sme_st1h_vert:
6475 case Intrinsic::aarch64_sme_st1w_vert:
6476 case Intrinsic::aarch64_sme_st1d_vert:
6477 case Intrinsic::aarch64_sme_st1q_vert:
6478 case Intrinsic::aarch64_sme_ld1b_horiz:
6479 case Intrinsic::aarch64_sme_ld1h_horiz:
6480 case Intrinsic::aarch64_sme_ld1w_horiz:
6481 case Intrinsic::aarch64_sme_ld1d_horiz:
6482 case Intrinsic::aarch64_sme_ld1q_horiz:
6483 case Intrinsic::aarch64_sme_st1b_horiz:
6484 case Intrinsic::aarch64_sme_st1h_horiz:
6485 case Intrinsic::aarch64_sme_st1w_horiz:
6486 case Intrinsic::aarch64_sme_st1d_horiz:
6487 case Intrinsic::aarch64_sme_st1q_horiz: {
6489 if (!Idx || Idx->getOpcode() != Instruction::Add)
6491 Ops.push_back(&
II->getOperandUse(3));
6494 case Intrinsic::aarch64_neon_pmull:
6497 Ops.push_back(&
II->getOperandUse(0));
6498 Ops.push_back(&
II->getOperandUse(1));
6500 case Intrinsic::aarch64_neon_pmull64:
6502 II->getArgOperand(1)))
6504 Ops.push_back(&
II->getArgOperandUse(0));
6505 Ops.push_back(&
II->getArgOperandUse(1));
6507 case Intrinsic::masked_gather:
6510 Ops.push_back(&
II->getArgOperandUse(0));
6512 case Intrinsic::masked_scatter:
6515 Ops.push_back(&
II->getArgOperandUse(1));
6522 auto ShouldSinkCondition = [](
Value *
Cond,
6527 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6531 Ops.push_back(&
II->getOperandUse(0));
6535 switch (
I->getOpcode()) {
6536 case Instruction::GetElementPtr:
6537 case Instruction::Add:
6538 case Instruction::Sub:
6540 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
6542 Ops.push_back(&
I->getOperandUse(
Op));
6547 case Instruction::Select: {
6548 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
6551 Ops.push_back(&
I->getOperandUse(0));
6554 case Instruction::Br: {
6561 Ops.push_back(&
I->getOperandUse(0));
6568 if (!
I->getType()->isVectorTy())
6571 switch (
I->getOpcode()) {
6572 case Instruction::Sub:
6573 case Instruction::Add: {
6582 Ops.push_back(&Ext1->getOperandUse(0));
6583 Ops.push_back(&Ext2->getOperandUse(0));
6586 Ops.push_back(&
I->getOperandUse(0));
6587 Ops.push_back(&
I->getOperandUse(1));
6591 case Instruction::Or: {
6594 if (ST->hasNEON()) {
6608 if (
I->getParent() != MainAnd->
getParent() ||
6613 if (
I->getParent() != IA->getParent() ||
6614 I->getParent() != IB->getParent())
6619 Ops.push_back(&
I->getOperandUse(0));
6620 Ops.push_back(&
I->getOperandUse(1));
6629 case Instruction::Mul: {
6630 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
6633 if (Ty->isScalableTy())
6637 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
6640 int NumZExts = 0, NumSExts = 0;
6641 for (
auto &
Op :
I->operands()) {
6648 auto *ExtOp = Ext->getOperand(0);
6649 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
6650 Ops.push_back(&Ext->getOperandUse(0));
6691 if (!ElementConstant || !ElementConstant->
isZero())
6694 unsigned Opcode = OperandInstr->
getOpcode();
6695 if (Opcode == Instruction::SExt)
6697 else if (Opcode == Instruction::ZExt)
6702 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
6712 Ops.push_back(&Insert->getOperandUse(1));
6718 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
6722 if (!ShouldSinkSplatForIndexedVariant(
I))
6727 Ops.push_back(&
I->getOperandUse(0));
6729 Ops.push_back(&
I->getOperandUse(1));
6731 return !
Ops.empty();
6733 case Instruction::FMul: {
6735 if (
I->getType()->isScalableTy())
6744 Ops.push_back(&
I->getOperandUse(0));
6746 Ops.push_back(&
I->getOperandUse(1));
6747 return !
Ops.empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static unsigned getNumElements(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const override
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool enableScalableVectorization() const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...