23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
82 cl::desc(
"Threshold for forced unrolling of small loops in AArch64"));
85class TailFoldingOption {
100 bool NeedsDefault =
true;
104 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
124 Bits &= ~DisableBits;
130 errs() <<
"invalid argument '" << Opt
131 <<
"' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
139 void operator=(
const std::string &Val) {
148 setNeedsDefault(
false);
151 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] ==
"disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] ==
"all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] ==
"default")
159 setNeedsDefault(
true);
160 else if (TailFoldTypes[0] ==
"simple")
161 setInitialBits(TailFoldingOpts::Simple);
164 setInitialBits(TailFoldingOpts::Disabled);
167 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
168 if (TailFoldTypes[
I] ==
"reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[
I] ==
"recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[
I] ==
"reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[
I] ==
"noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[
I] ==
"norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[
I] ==
"noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
200 "\ndefault (Initial) Uses the default tail-folding settings for "
202 "\nall (Initial) All legal loop types will vectorize using "
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
213 "\nnoreverse Inverse of above"),
258 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.
split(Features,
",");
265 return F.hasFnAttribute(
"fmv-features");
269 AArch64::FeatureExecuteOnly,
309 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
310 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
312 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
330 auto FVTy = dyn_cast<FixedVectorType>(Ty);
332 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
341 unsigned DefaultCallPenalty)
const {
366 if (
F ==
Call.getCaller())
372 return DefaultCallPenalty;
383 ST->isSVEorStreamingSVEAvailable() &&
384 !ST->disableMaximizeScalableBandwidth();
408 assert(Ty->isIntegerTy());
410 unsigned BitSize = Ty->getPrimitiveSizeInBits();
417 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
422 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
428 return std::max<InstructionCost>(1,
Cost);
435 assert(Ty->isIntegerTy());
437 unsigned BitSize = Ty->getPrimitiveSizeInBits();
443 unsigned ImmIdx = ~0U;
447 case Instruction::GetElementPtr:
452 case Instruction::Store:
455 case Instruction::Add:
456 case Instruction::Sub:
457 case Instruction::Mul:
458 case Instruction::UDiv:
459 case Instruction::SDiv:
460 case Instruction::URem:
461 case Instruction::SRem:
462 case Instruction::And:
463 case Instruction::Or:
464 case Instruction::Xor:
465 case Instruction::ICmp:
469 case Instruction::Shl:
470 case Instruction::LShr:
471 case Instruction::AShr:
475 case Instruction::Trunc:
476 case Instruction::ZExt:
477 case Instruction::SExt:
478 case Instruction::IntToPtr:
479 case Instruction::PtrToInt:
480 case Instruction::BitCast:
481 case Instruction::PHI:
482 case Instruction::Call:
483 case Instruction::Select:
484 case Instruction::Ret:
485 case Instruction::Load:
490 int NumConstants = (BitSize + 63) / 64;
503 assert(Ty->isIntegerTy());
505 unsigned BitSize = Ty->getPrimitiveSizeInBits();
514 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
520 case Intrinsic::sadd_with_overflow:
521 case Intrinsic::uadd_with_overflow:
522 case Intrinsic::ssub_with_overflow:
523 case Intrinsic::usub_with_overflow:
524 case Intrinsic::smul_with_overflow:
525 case Intrinsic::umul_with_overflow:
527 int NumConstants = (BitSize + 63) / 64;
534 case Intrinsic::experimental_stackmap:
535 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
538 case Intrinsic::experimental_patchpoint_void:
539 case Intrinsic::experimental_patchpoint:
540 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
543 case Intrinsic::experimental_gc_statepoint:
544 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
554 if (TyWidth == 32 || TyWidth == 64)
578 unsigned TotalHistCnts = 1;
588 unsigned EC = VTy->getElementCount().getKnownMinValue();
593 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
595 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
599 TotalHistCnts = EC / NaturalVectorWidth;
619 switch (ICA.
getID()) {
620 case Intrinsic::experimental_vector_histogram_add: {
627 case Intrinsic::umin:
628 case Intrinsic::umax:
629 case Intrinsic::smin:
630 case Intrinsic::smax: {
631 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
632 MVT::v8i16, MVT::v2i32, MVT::v4i32,
633 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
637 if (LT.second == MVT::v2i64)
639 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
643 case Intrinsic::sadd_sat:
644 case Intrinsic::ssub_sat:
645 case Intrinsic::uadd_sat:
646 case Intrinsic::usub_sat: {
647 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
648 MVT::v8i16, MVT::v2i32, MVT::v4i32,
654 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
655 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
656 return LT.first * Instrs;
661 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
662 return LT.first * Instrs;
666 case Intrinsic::abs: {
667 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
668 MVT::v8i16, MVT::v2i32, MVT::v4i32,
671 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
675 case Intrinsic::bswap: {
676 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
677 MVT::v4i32, MVT::v2i64};
679 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }) &&
680 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
685 case Intrinsic::fmuladd: {
690 (EltTy->
isHalfTy() && ST->hasFullFP16()))
694 case Intrinsic::stepvector: {
703 Cost += AddCost * (LT.first - 1);
707 case Intrinsic::vector_extract:
708 case Intrinsic::vector_insert: {
721 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
722 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
730 getTLI()->getTypeConversion(
C, SubVecVT);
732 getTLI()->getTypeConversion(
C, VecVT);
740 case Intrinsic::bitreverse: {
742 {Intrinsic::bitreverse, MVT::i32, 1},
743 {Intrinsic::bitreverse, MVT::i64, 1},
744 {Intrinsic::bitreverse, MVT::v8i8, 1},
745 {Intrinsic::bitreverse, MVT::v16i8, 1},
746 {Intrinsic::bitreverse, MVT::v4i16, 2},
747 {Intrinsic::bitreverse, MVT::v8i16, 2},
748 {Intrinsic::bitreverse, MVT::v2i32, 2},
749 {Intrinsic::bitreverse, MVT::v4i32, 2},
750 {Intrinsic::bitreverse, MVT::v1i64, 2},
751 {Intrinsic::bitreverse, MVT::v2i64, 2},
759 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
760 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
761 return LegalisationCost.first * Entry->Cost + 1;
763 return LegalisationCost.first * Entry->Cost;
767 case Intrinsic::ctpop: {
768 if (!ST->hasNEON()) {
789 RetTy->getScalarSizeInBits()
792 return LT.first * Entry->Cost + ExtraCost;
796 case Intrinsic::sadd_with_overflow:
797 case Intrinsic::uadd_with_overflow:
798 case Intrinsic::ssub_with_overflow:
799 case Intrinsic::usub_with_overflow:
800 case Intrinsic::smul_with_overflow:
801 case Intrinsic::umul_with_overflow: {
803 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
804 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
805 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
806 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
807 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
808 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
809 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
810 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
811 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
812 {Intrinsic::usub_with_overflow, MVT::i8, 3},
813 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
814 {Intrinsic::usub_with_overflow, MVT::i16, 3},
815 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
816 {Intrinsic::usub_with_overflow, MVT::i32, 1},
817 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
818 {Intrinsic::usub_with_overflow, MVT::i64, 1},
819 {Intrinsic::smul_with_overflow, MVT::i8, 5},
820 {Intrinsic::umul_with_overflow, MVT::i8, 4},
821 {Intrinsic::smul_with_overflow, MVT::i16, 5},
822 {Intrinsic::umul_with_overflow, MVT::i16, 4},
823 {Intrinsic::smul_with_overflow, MVT::i32, 2},
824 {Intrinsic::umul_with_overflow, MVT::i32, 2},
825 {Intrinsic::smul_with_overflow, MVT::i64, 3},
826 {Intrinsic::umul_with_overflow, MVT::i64, 3},
828 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
835 case Intrinsic::fptosi_sat:
836 case Intrinsic::fptoui_sat: {
839 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
841 EVT MTy = TLI->getValueType(
DL, RetTy);
844 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
845 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
846 LT.second == MVT::v2f64)) {
848 (LT.second == MVT::f64 && MTy == MVT::i32) ||
849 (LT.second == MVT::f32 && MTy == MVT::i64)))
858 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
865 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
866 (LT.second == MVT::f16 && MTy == MVT::i64) ||
867 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
881 if ((LT.second.getScalarType() == MVT::f32 ||
882 LT.second.getScalarType() == MVT::f64 ||
883 LT.second.getScalarType() == MVT::f16) &&
887 if (LT.second.isVector())
891 LegalTy, {LegalTy, LegalTy});
894 LegalTy, {LegalTy, LegalTy});
896 return LT.first *
Cost +
897 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
903 RetTy = RetTy->getScalarType();
904 if (LT.second.isVector()) {
922 return LT.first *
Cost;
924 case Intrinsic::fshl:
925 case Intrinsic::fshr: {
934 if (RetTy->isIntegerTy() && ICA.
getArgs()[0] == ICA.
getArgs()[1] &&
935 (RetTy->getPrimitiveSizeInBits() == 32 ||
936 RetTy->getPrimitiveSizeInBits() == 64)) {
949 {Intrinsic::fshl, MVT::v4i32, 2},
950 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
951 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
952 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
958 return LegalisationCost.first * Entry->Cost;
962 if (!RetTy->isIntegerTy())
967 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
968 RetTy->getScalarSizeInBits() < 64) ||
969 (RetTy->getScalarSizeInBits() % 64 != 0);
970 unsigned ExtraCost = HigherCost ? 1 : 0;
971 if (RetTy->getScalarSizeInBits() == 32 ||
972 RetTy->getScalarSizeInBits() == 64)
979 return TyL.first + ExtraCost;
981 case Intrinsic::get_active_lane_mask: {
983 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
985 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
988 if (RetTy->isScalableTy()) {
989 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
999 if (ST->hasSVE2p1() || ST->hasSME2()) {
1014 return Cost + (SplitCost * (
Cost - 1));
1029 case Intrinsic::experimental_vector_match: {
1032 unsigned SearchSize = NeedleTy->getNumElements();
1033 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1046 case Intrinsic::experimental_cttz_elts: {
1048 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1056 case Intrinsic::experimental_vector_extract_last_active:
1057 if (ST->isSVEorStreamingSVEAvailable()) {
1074 auto RequiredType =
II.getType();
1077 assert(PN &&
"Expected Phi Node!");
1080 if (!PN->hasOneUse())
1081 return std::nullopt;
1083 for (
Value *IncValPhi : PN->incoming_values()) {
1086 Reinterpret->getIntrinsicID() !=
1087 Intrinsic::aarch64_sve_convert_to_svbool ||
1088 RequiredType != Reinterpret->getArgOperand(0)->getType())
1089 return std::nullopt;
1097 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1099 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1172 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1177 return GoverningPredicateIdx;
1182 GoverningPredicateIdx = Index;
1200 return UndefIntrinsic;
1205 UndefIntrinsic = IID;
1227 return ResultLanes == InactiveLanesTakenFromOperand;
1232 return OperandIdxForInactiveLanes;
1236 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1237 ResultLanes = InactiveLanesTakenFromOperand;
1238 OperandIdxForInactiveLanes = Index;
1243 return ResultLanes == InactiveLanesAreNotDefined;
1247 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1248 ResultLanes = InactiveLanesAreNotDefined;
1253 return ResultLanes == InactiveLanesAreUnused;
1257 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1258 ResultLanes = InactiveLanesAreUnused;
1268 ResultIsZeroInitialized =
true;
1279 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1284 return OperandIdxWithNoActiveLanes;
1289 OperandIdxWithNoActiveLanes = Index;
1294 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1297 unsigned IROpcode = 0;
1299 enum PredicationStyle {
1301 InactiveLanesTakenFromOperand,
1302 InactiveLanesAreNotDefined,
1303 InactiveLanesAreUnused
1306 bool ResultIsZeroInitialized =
false;
1307 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1308 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1316 return !isa<ScalableVectorType>(V->getType());
1324 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1325 case Intrinsic::aarch64_sve_fcvt_f16f32:
1326 case Intrinsic::aarch64_sve_fcvt_f16f64:
1327 case Intrinsic::aarch64_sve_fcvt_f32f16:
1328 case Intrinsic::aarch64_sve_fcvt_f32f64:
1329 case Intrinsic::aarch64_sve_fcvt_f64f16:
1330 case Intrinsic::aarch64_sve_fcvt_f64f32:
1331 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1332 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1333 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1334 case Intrinsic::aarch64_sve_fcvtzs:
1335 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1336 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1337 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1338 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1339 case Intrinsic::aarch64_sve_fcvtzu:
1340 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1341 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1342 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1343 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1344 case Intrinsic::aarch64_sve_scvtf:
1345 case Intrinsic::aarch64_sve_scvtf_f16i32:
1346 case Intrinsic::aarch64_sve_scvtf_f16i64:
1347 case Intrinsic::aarch64_sve_scvtf_f32i64:
1348 case Intrinsic::aarch64_sve_scvtf_f64i32:
1349 case Intrinsic::aarch64_sve_ucvtf:
1350 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1351 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1352 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1353 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1356 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1357 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1358 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1359 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1362 case Intrinsic::aarch64_sve_fabd:
1364 case Intrinsic::aarch64_sve_fadd:
1367 case Intrinsic::aarch64_sve_fdiv:
1370 case Intrinsic::aarch64_sve_fmax:
1372 case Intrinsic::aarch64_sve_fmaxnm:
1374 case Intrinsic::aarch64_sve_fmin:
1376 case Intrinsic::aarch64_sve_fminnm:
1378 case Intrinsic::aarch64_sve_fmla:
1380 case Intrinsic::aarch64_sve_fmls:
1382 case Intrinsic::aarch64_sve_fmul:
1385 case Intrinsic::aarch64_sve_fmulx:
1387 case Intrinsic::aarch64_sve_fnmla:
1389 case Intrinsic::aarch64_sve_fnmls:
1391 case Intrinsic::aarch64_sve_fsub:
1394 case Intrinsic::aarch64_sve_add:
1397 case Intrinsic::aarch64_sve_mla:
1399 case Intrinsic::aarch64_sve_mls:
1401 case Intrinsic::aarch64_sve_mul:
1404 case Intrinsic::aarch64_sve_sabd:
1406 case Intrinsic::aarch64_sve_sdiv:
1409 case Intrinsic::aarch64_sve_smax:
1411 case Intrinsic::aarch64_sve_smin:
1413 case Intrinsic::aarch64_sve_smulh:
1415 case Intrinsic::aarch64_sve_sub:
1418 case Intrinsic::aarch64_sve_uabd:
1420 case Intrinsic::aarch64_sve_udiv:
1423 case Intrinsic::aarch64_sve_umax:
1425 case Intrinsic::aarch64_sve_umin:
1427 case Intrinsic::aarch64_sve_umulh:
1429 case Intrinsic::aarch64_sve_asr:
1432 case Intrinsic::aarch64_sve_lsl:
1435 case Intrinsic::aarch64_sve_lsr:
1438 case Intrinsic::aarch64_sve_and:
1441 case Intrinsic::aarch64_sve_bic:
1443 case Intrinsic::aarch64_sve_eor:
1446 case Intrinsic::aarch64_sve_orr:
1449 case Intrinsic::aarch64_sve_sqrshl:
1451 case Intrinsic::aarch64_sve_sqshl:
1453 case Intrinsic::aarch64_sve_sqsub:
1455 case Intrinsic::aarch64_sve_srshl:
1457 case Intrinsic::aarch64_sve_uqrshl:
1459 case Intrinsic::aarch64_sve_uqshl:
1461 case Intrinsic::aarch64_sve_uqsub:
1463 case Intrinsic::aarch64_sve_urshl:
1466 case Intrinsic::aarch64_sve_add_u:
1469 case Intrinsic::aarch64_sve_and_u:
1472 case Intrinsic::aarch64_sve_asr_u:
1475 case Intrinsic::aarch64_sve_eor_u:
1478 case Intrinsic::aarch64_sve_fadd_u:
1481 case Intrinsic::aarch64_sve_fdiv_u:
1484 case Intrinsic::aarch64_sve_fmul_u:
1487 case Intrinsic::aarch64_sve_fsub_u:
1490 case Intrinsic::aarch64_sve_lsl_u:
1493 case Intrinsic::aarch64_sve_lsr_u:
1496 case Intrinsic::aarch64_sve_mul_u:
1499 case Intrinsic::aarch64_sve_orr_u:
1502 case Intrinsic::aarch64_sve_sdiv_u:
1505 case Intrinsic::aarch64_sve_sub_u:
1508 case Intrinsic::aarch64_sve_udiv_u:
1512 case Intrinsic::aarch64_sve_addqv:
1513 case Intrinsic::aarch64_sve_and_z:
1514 case Intrinsic::aarch64_sve_bic_z:
1515 case Intrinsic::aarch64_sve_brka_z:
1516 case Intrinsic::aarch64_sve_brkb_z:
1517 case Intrinsic::aarch64_sve_brkn_z:
1518 case Intrinsic::aarch64_sve_brkpa_z:
1519 case Intrinsic::aarch64_sve_brkpb_z:
1520 case Intrinsic::aarch64_sve_cntp:
1521 case Intrinsic::aarch64_sve_compact:
1522 case Intrinsic::aarch64_sve_eor_z:
1523 case Intrinsic::aarch64_sve_eorv:
1524 case Intrinsic::aarch64_sve_eorqv:
1525 case Intrinsic::aarch64_sve_nand_z:
1526 case Intrinsic::aarch64_sve_nor_z:
1527 case Intrinsic::aarch64_sve_orn_z:
1528 case Intrinsic::aarch64_sve_orr_z:
1529 case Intrinsic::aarch64_sve_orv:
1530 case Intrinsic::aarch64_sve_orqv:
1531 case Intrinsic::aarch64_sve_pnext:
1532 case Intrinsic::aarch64_sve_rdffr_z:
1533 case Intrinsic::aarch64_sve_saddv:
1534 case Intrinsic::aarch64_sve_uaddv:
1535 case Intrinsic::aarch64_sve_umaxv:
1536 case Intrinsic::aarch64_sve_umaxqv:
1537 case Intrinsic::aarch64_sve_cmpeq:
1538 case Intrinsic::aarch64_sve_cmpeq_wide:
1539 case Intrinsic::aarch64_sve_cmpge:
1540 case Intrinsic::aarch64_sve_cmpge_wide:
1541 case Intrinsic::aarch64_sve_cmpgt:
1542 case Intrinsic::aarch64_sve_cmpgt_wide:
1543 case Intrinsic::aarch64_sve_cmphi:
1544 case Intrinsic::aarch64_sve_cmphi_wide:
1545 case Intrinsic::aarch64_sve_cmphs:
1546 case Intrinsic::aarch64_sve_cmphs_wide:
1547 case Intrinsic::aarch64_sve_cmple_wide:
1548 case Intrinsic::aarch64_sve_cmplo_wide:
1549 case Intrinsic::aarch64_sve_cmpls_wide:
1550 case Intrinsic::aarch64_sve_cmplt_wide:
1551 case Intrinsic::aarch64_sve_cmpne:
1552 case Intrinsic::aarch64_sve_cmpne_wide:
1553 case Intrinsic::aarch64_sve_facge:
1554 case Intrinsic::aarch64_sve_facgt:
1555 case Intrinsic::aarch64_sve_fcmpeq:
1556 case Intrinsic::aarch64_sve_fcmpge:
1557 case Intrinsic::aarch64_sve_fcmpgt:
1558 case Intrinsic::aarch64_sve_fcmpne:
1559 case Intrinsic::aarch64_sve_fcmpuo:
1560 case Intrinsic::aarch64_sve_ld1:
1561 case Intrinsic::aarch64_sve_ld1_gather:
1562 case Intrinsic::aarch64_sve_ld1_gather_index:
1563 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1564 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1565 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1566 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1567 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1568 case Intrinsic::aarch64_sve_ld1q_gather_index:
1569 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1570 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1571 case Intrinsic::aarch64_sve_ld1ro:
1572 case Intrinsic::aarch64_sve_ld1rq:
1573 case Intrinsic::aarch64_sve_ld1udq:
1574 case Intrinsic::aarch64_sve_ld1uwq:
1575 case Intrinsic::aarch64_sve_ld2_sret:
1576 case Intrinsic::aarch64_sve_ld2q_sret:
1577 case Intrinsic::aarch64_sve_ld3_sret:
1578 case Intrinsic::aarch64_sve_ld3q_sret:
1579 case Intrinsic::aarch64_sve_ld4_sret:
1580 case Intrinsic::aarch64_sve_ld4q_sret:
1581 case Intrinsic::aarch64_sve_ldff1:
1582 case Intrinsic::aarch64_sve_ldff1_gather:
1583 case Intrinsic::aarch64_sve_ldff1_gather_index:
1584 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1585 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1586 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1587 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1588 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1589 case Intrinsic::aarch64_sve_ldnf1:
1590 case Intrinsic::aarch64_sve_ldnt1:
1591 case Intrinsic::aarch64_sve_ldnt1_gather:
1592 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1593 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1594 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1597 case Intrinsic::aarch64_sve_prf:
1598 case Intrinsic::aarch64_sve_prfb_gather_index:
1599 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1600 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1601 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1602 case Intrinsic::aarch64_sve_prfd_gather_index:
1603 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1604 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1605 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1606 case Intrinsic::aarch64_sve_prfh_gather_index:
1607 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1608 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1609 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1610 case Intrinsic::aarch64_sve_prfw_gather_index:
1611 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1612 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1613 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1616 case Intrinsic::aarch64_sve_st1_scatter:
1617 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1618 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1619 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1620 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1621 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1622 case Intrinsic::aarch64_sve_st1dq:
1623 case Intrinsic::aarch64_sve_st1q_scatter_index:
1624 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1625 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1626 case Intrinsic::aarch64_sve_st1wq:
1627 case Intrinsic::aarch64_sve_stnt1:
1628 case Intrinsic::aarch64_sve_stnt1_scatter:
1629 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1630 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1631 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1633 case Intrinsic::aarch64_sve_st2:
1634 case Intrinsic::aarch64_sve_st2q:
1636 case Intrinsic::aarch64_sve_st3:
1637 case Intrinsic::aarch64_sve_st3q:
1639 case Intrinsic::aarch64_sve_st4:
1640 case Intrinsic::aarch64_sve_st4q:
1648 Value *UncastedPred;
1654 Pred = UncastedPred;
1660 if (OrigPredTy->getMinNumElements() <=
1662 ->getMinNumElements())
1663 Pred = UncastedPred;
1667 return C &&
C->isAllOnesValue();
1674 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1675 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1683static std::optional<Instruction *>
1690 Value *Op1 =
II.getOperand(1);
1691 Value *Op2 =
II.getOperand(2);
1717 return std::nullopt;
1725 if (SimpleII == Inactive)
1735static std::optional<Instruction *>
1739 return std::nullopt;
1768 II.setCalledFunction(NewDecl);
1778 return std::nullopt;
1790static std::optional<Instruction *>
1794 return std::nullopt;
1796 auto IntrinsicID = BinOp->getIntrinsicID();
1797 switch (IntrinsicID) {
1798 case Intrinsic::aarch64_sve_and_z:
1799 case Intrinsic::aarch64_sve_bic_z:
1800 case Intrinsic::aarch64_sve_eor_z:
1801 case Intrinsic::aarch64_sve_nand_z:
1802 case Intrinsic::aarch64_sve_nor_z:
1803 case Intrinsic::aarch64_sve_orn_z:
1804 case Intrinsic::aarch64_sve_orr_z:
1807 return std::nullopt;
1810 auto BinOpPred = BinOp->getOperand(0);
1811 auto BinOpOp1 = BinOp->getOperand(1);
1812 auto BinOpOp2 = BinOp->getOperand(2);
1816 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1817 return std::nullopt;
1819 auto PredOp = PredIntr->getOperand(0);
1821 if (PredOpTy !=
II.getType())
1822 return std::nullopt;
1826 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1827 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1828 if (BinOpOp1 == BinOpOp2)
1829 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1832 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1834 auto NarrowedBinOp =
1839static std::optional<Instruction *>
1846 return BinOpCombine;
1851 return std::nullopt;
1854 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1863 if (CursorVTy->getElementCount().getKnownMinValue() <
1864 IVTy->getElementCount().getKnownMinValue())
1868 if (Cursor->getType() == IVTy)
1869 EarliestReplacement = Cursor;
1874 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1875 Intrinsic::aarch64_sve_convert_to_svbool ||
1876 IntrinsicCursor->getIntrinsicID() ==
1877 Intrinsic::aarch64_sve_convert_from_svbool))
1880 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
1881 Cursor = IntrinsicCursor->getOperand(0);
1886 if (!EarliestReplacement)
1887 return std::nullopt;
1895 auto *OpPredicate =
II.getOperand(0);
1912 II.getArgOperand(2));
1918 return std::nullopt;
1922 II.getArgOperand(0),
II.getArgOperand(2),
uint64_t(0));
1931 II.getArgOperand(0));
1941 return std::nullopt;
1946 if (!SplatValue || !SplatValue->isZero())
1947 return std::nullopt;
1952 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1953 return std::nullopt;
1957 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
1958 return std::nullopt;
1961 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1962 return std::nullopt;
1967 return std::nullopt;
1970 return std::nullopt;
1974 return std::nullopt;
1978 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
1979 return std::nullopt;
1981 unsigned NumElts = VecTy->getNumElements();
1982 unsigned PredicateBits = 0;
1985 for (
unsigned I = 0;
I < NumElts; ++
I) {
1988 return std::nullopt;
1990 PredicateBits |= 1 << (
I * (16 / NumElts));
1994 if (PredicateBits == 0) {
1996 PFalse->takeName(&
II);
2002 for (
unsigned I = 0;
I < 16; ++
I)
2003 if ((PredicateBits & (1 <<
I)) != 0)
2006 unsigned PredSize = Mask & -Mask;
2011 for (
unsigned I = 0;
I < 16;
I += PredSize)
2012 if ((PredicateBits & (1 <<
I)) == 0)
2013 return std::nullopt;
2018 {PredType}, {PTruePat});
2020 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
2021 auto *ConvertFromSVBool =
2023 {
II.getType()}, {ConvertToSVBool});
2031 Value *Pg =
II.getArgOperand(0);
2032 Value *Vec =
II.getArgOperand(1);
2033 auto IntrinsicID =
II.getIntrinsicID();
2034 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2046 auto OpC = OldBinOp->getOpcode();
2052 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2058 if (IsAfter &&
C &&
C->isNullValue()) {
2062 Extract->insertBefore(
II.getIterator());
2063 Extract->takeName(&
II);
2069 return std::nullopt;
2071 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2072 return std::nullopt;
2074 const auto PTruePattern =
2080 return std::nullopt;
2082 unsigned Idx = MinNumElts - 1;
2092 if (Idx >= PgVTy->getMinNumElements())
2093 return std::nullopt;
2098 Extract->insertBefore(
II.getIterator());
2099 Extract->takeName(&
II);
2112 Value *Pg =
II.getArgOperand(0);
2114 Value *Vec =
II.getArgOperand(2);
2117 if (!Ty->isIntegerTy())
2118 return std::nullopt;
2123 return std::nullopt;
2140 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2153 {
II.getType()}, {AllPat});
2160static std::optional<Instruction *>
2164 if (
Pattern == AArch64SVEPredPattern::all) {
2173 return MinNumElts && NumElts >= MinNumElts
2175 II, ConstantInt::get(
II.getType(), MinNumElts)))
2179static std::optional<Instruction *>
2182 if (!ST->isStreaming())
2183 return std::nullopt;
2195 Value *PgVal =
II.getArgOperand(0);
2196 Value *OpVal =
II.getArgOperand(1);
2200 if (PgVal == OpVal &&
2201 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2202 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2217 return std::nullopt;
2221 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2222 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2236 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2237 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2238 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2239 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2240 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2241 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2242 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2243 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2244 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2245 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2246 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2247 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2248 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2258 return std::nullopt;
2261template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2262static std::optional<Instruction *>
2264 bool MergeIntoAddendOp) {
2266 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2267 if (MergeIntoAddendOp) {
2268 AddendOp =
II.getOperand(1);
2269 Mul =
II.getOperand(2);
2271 AddendOp =
II.getOperand(2);
2272 Mul =
II.getOperand(1);
2277 return std::nullopt;
2279 if (!
Mul->hasOneUse())
2280 return std::nullopt;
2283 if (
II.getType()->isFPOrFPVectorTy()) {
2288 return std::nullopt;
2290 return std::nullopt;
2295 if (MergeIntoAddendOp)
2305static std::optional<Instruction *>
2307 Value *Pred =
II.getOperand(0);
2308 Value *PtrOp =
II.getOperand(1);
2309 Type *VecTy =
II.getType();
2313 Load->copyMetadata(
II);
2324static std::optional<Instruction *>
2326 Value *VecOp =
II.getOperand(0);
2327 Value *Pred =
II.getOperand(1);
2328 Value *PtrOp =
II.getOperand(2);
2332 Store->copyMetadata(
II);
2344 case Intrinsic::aarch64_sve_fmul_u:
2345 return Instruction::BinaryOps::FMul;
2346 case Intrinsic::aarch64_sve_fadd_u:
2347 return Instruction::BinaryOps::FAdd;
2348 case Intrinsic::aarch64_sve_fsub_u:
2349 return Instruction::BinaryOps::FSub;
2351 return Instruction::BinaryOpsEnd;
2355static std::optional<Instruction *>
2358 if (
II.isStrictFP())
2359 return std::nullopt;
2361 auto *OpPredicate =
II.getOperand(0);
2363 if (BinOpCode == Instruction::BinaryOpsEnd ||
2365 return std::nullopt;
2367 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2374 Intrinsic::aarch64_sve_mla>(
2378 Intrinsic::aarch64_sve_mad>(
2381 return std::nullopt;
2384static std::optional<Instruction *>
2388 Intrinsic::aarch64_sve_fmla>(IC,
II,
2393 Intrinsic::aarch64_sve_fmad>(IC,
II,
2398 Intrinsic::aarch64_sve_fmla>(IC,
II,
2401 return std::nullopt;
2404static std::optional<Instruction *>
2408 Intrinsic::aarch64_sve_fmla>(IC,
II,
2413 Intrinsic::aarch64_sve_fmad>(IC,
II,
2418 Intrinsic::aarch64_sve_fmla_u>(
2424static std::optional<Instruction *>
2428 Intrinsic::aarch64_sve_fmls>(IC,
II,
2433 Intrinsic::aarch64_sve_fnmsb>(
2438 Intrinsic::aarch64_sve_fmls>(IC,
II,
2441 return std::nullopt;
2444static std::optional<Instruction *>
2448 Intrinsic::aarch64_sve_fmls>(IC,
II,
2453 Intrinsic::aarch64_sve_fnmsb>(
2458 Intrinsic::aarch64_sve_fmls_u>(
2467 Intrinsic::aarch64_sve_mls>(
2470 return std::nullopt;
2475 Value *UnpackArg =
II.getArgOperand(0);
2477 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2478 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2491 return std::nullopt;
2495 auto *OpVal =
II.getOperand(0);
2496 auto *OpIndices =
II.getOperand(1);
2503 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2504 return std::nullopt;
2519 Type *RetTy =
II.getType();
2520 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2521 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2525 if ((
match(
II.getArgOperand(0),
2532 if (TyA ==
B->getType() &&
2537 TyA->getMinNumElements());
2543 return std::nullopt;
2551 if (
match(
II.getArgOperand(0),
2556 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2558 return std::nullopt;
2561static std::optional<Instruction *>
2563 Value *Mask =
II.getOperand(0);
2564 Value *BasePtr =
II.getOperand(1);
2565 Value *Index =
II.getOperand(2);
2576 BasePtr->getPointerAlignment(
II.getDataLayout());
2579 BasePtr, IndexBase);
2586 return std::nullopt;
2589static std::optional<Instruction *>
2591 Value *Val =
II.getOperand(0);
2592 Value *Mask =
II.getOperand(1);
2593 Value *BasePtr =
II.getOperand(2);
2594 Value *Index =
II.getOperand(3);
2604 BasePtr->getPointerAlignment(
II.getDataLayout());
2607 BasePtr, IndexBase);
2613 return std::nullopt;
2619 Value *Pred =
II.getOperand(0);
2620 Value *Vec =
II.getOperand(1);
2621 Value *DivVec =
II.getOperand(2);
2625 if (!SplatConstantInt)
2626 return std::nullopt;
2630 if (DivisorValue == -1)
2631 return std::nullopt;
2632 if (DivisorValue == 1)
2638 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2645 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2647 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2651 return std::nullopt;
2655 size_t VecSize = Vec.
size();
2660 size_t HalfVecSize = VecSize / 2;
2664 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2672 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2690 return std::nullopt;
2697 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2698 CurrentInsertElt = InsertElt->getOperand(0);
2704 return std::nullopt;
2708 for (
size_t I = 0;
I < Elts.
size();
I++) {
2709 if (Elts[
I] ==
nullptr)
2714 if (InsertEltChain ==
nullptr)
2715 return std::nullopt;
2721 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2722 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2723 IIScalableTy->getMinNumElements() /
2728 auto *WideShuffleMaskTy =
2739 auto NarrowBitcast =
2752 return std::nullopt;
2757 Value *Pred =
II.getOperand(0);
2758 Value *Vec =
II.getOperand(1);
2759 Value *Shift =
II.getOperand(2);
2762 Value *AbsPred, *MergedValue;
2768 return std::nullopt;
2776 return std::nullopt;
2781 return std::nullopt;
2784 {
II.getType()}, {Pred, Vec, Shift});
2791 Value *Vec =
II.getOperand(0);
2796 return std::nullopt;
2802 auto *NI =
II.getNextNode();
2805 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2807 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2808 auto *NIBB = NI->getParent();
2809 NI = NI->getNextNode();
2811 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2812 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2818 if (NextII &&
II.isIdenticalTo(NextII))
2821 return std::nullopt;
2829 {II.getType(), II.getOperand(0)->getType()},
2830 {II.getOperand(0), II.getOperand(1)}));
2837 return std::nullopt;
2843 Value *Passthru =
II.getOperand(0);
2851 auto *Mask = ConstantInt::get(Ty, MaskValue);
2857 return std::nullopt;
2860static std::optional<Instruction *>
2867 return std::nullopt;
2870std::optional<Instruction *>
2881 case Intrinsic::aarch64_dmb:
2883 case Intrinsic::aarch64_neon_fmaxnm:
2884 case Intrinsic::aarch64_neon_fminnm:
2886 case Intrinsic::aarch64_sve_convert_from_svbool:
2888 case Intrinsic::aarch64_sve_dup:
2890 case Intrinsic::aarch64_sve_dup_x:
2892 case Intrinsic::aarch64_sve_cmpne:
2893 case Intrinsic::aarch64_sve_cmpne_wide:
2895 case Intrinsic::aarch64_sve_rdffr:
2897 case Intrinsic::aarch64_sve_lasta:
2898 case Intrinsic::aarch64_sve_lastb:
2900 case Intrinsic::aarch64_sve_clasta_n:
2901 case Intrinsic::aarch64_sve_clastb_n:
2903 case Intrinsic::aarch64_sve_cntd:
2905 case Intrinsic::aarch64_sve_cntw:
2907 case Intrinsic::aarch64_sve_cnth:
2909 case Intrinsic::aarch64_sve_cntb:
2911 case Intrinsic::aarch64_sme_cntsd:
2913 case Intrinsic::aarch64_sve_ptest_any:
2914 case Intrinsic::aarch64_sve_ptest_first:
2915 case Intrinsic::aarch64_sve_ptest_last:
2917 case Intrinsic::aarch64_sve_fadd:
2919 case Intrinsic::aarch64_sve_fadd_u:
2921 case Intrinsic::aarch64_sve_fmul_u:
2923 case Intrinsic::aarch64_sve_fsub:
2925 case Intrinsic::aarch64_sve_fsub_u:
2927 case Intrinsic::aarch64_sve_add:
2929 case Intrinsic::aarch64_sve_add_u:
2931 Intrinsic::aarch64_sve_mla_u>(
2933 case Intrinsic::aarch64_sve_sub:
2935 case Intrinsic::aarch64_sve_sub_u:
2937 Intrinsic::aarch64_sve_mls_u>(
2939 case Intrinsic::aarch64_sve_tbl:
2941 case Intrinsic::aarch64_sve_uunpkhi:
2942 case Intrinsic::aarch64_sve_uunpklo:
2943 case Intrinsic::aarch64_sve_sunpkhi:
2944 case Intrinsic::aarch64_sve_sunpklo:
2946 case Intrinsic::aarch64_sve_uzp1:
2948 case Intrinsic::aarch64_sve_zip1:
2949 case Intrinsic::aarch64_sve_zip2:
2951 case Intrinsic::aarch64_sve_ld1_gather_index:
2953 case Intrinsic::aarch64_sve_st1_scatter_index:
2955 case Intrinsic::aarch64_sve_ld1:
2957 case Intrinsic::aarch64_sve_st1:
2959 case Intrinsic::aarch64_sve_sdiv:
2961 case Intrinsic::aarch64_sve_sel:
2963 case Intrinsic::aarch64_sve_srshl:
2965 case Intrinsic::aarch64_sve_dupq_lane:
2967 case Intrinsic::aarch64_sve_insr:
2969 case Intrinsic::aarch64_sve_whilelo:
2971 case Intrinsic::aarch64_sve_ptrue:
2973 case Intrinsic::aarch64_sve_uxtb:
2975 case Intrinsic::aarch64_sve_uxth:
2977 case Intrinsic::aarch64_sve_uxtw:
2979 case Intrinsic::aarch64_sme_in_streaming_mode:
2983 return std::nullopt;
2990 SimplifyAndSetOp)
const {
2991 switch (
II.getIntrinsicID()) {
2994 case Intrinsic::aarch64_neon_fcvtxn:
2995 case Intrinsic::aarch64_neon_rshrn:
2996 case Intrinsic::aarch64_neon_sqrshrn:
2997 case Intrinsic::aarch64_neon_sqrshrun:
2998 case Intrinsic::aarch64_neon_sqshrn:
2999 case Intrinsic::aarch64_neon_sqshrun:
3000 case Intrinsic::aarch64_neon_sqxtn:
3001 case Intrinsic::aarch64_neon_sqxtun:
3002 case Intrinsic::aarch64_neon_uqrshrn:
3003 case Intrinsic::aarch64_neon_uqshrn:
3004 case Intrinsic::aarch64_neon_uqxtn:
3005 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
3009 return std::nullopt;
3013 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3023 if (ST->useSVEForFixedLengthVectors() &&
3026 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3027 else if (ST->isNeonAvailable())
3032 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3041bool AArch64TTIImpl::isSingleExtWideningInstruction(
3043 Type *SrcOverrideTy)
const {
3058 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3061 Type *SrcTy = SrcOverrideTy;
3063 case Instruction::Add:
3064 case Instruction::Sub: {
3073 if (Opcode == Instruction::Sub)
3097 assert(SrcTy &&
"Expected some SrcTy");
3099 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3105 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3107 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3111 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3114Type *AArch64TTIImpl::isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
3116 Type *SrcOverrideTy)
const {
3117 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3118 Opcode != Instruction::Mul)
3128 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3131 auto getScalarSizeWithOverride = [&](
const Value *
V) {
3137 ->getScalarSizeInBits();
3140 unsigned MaxEltSize = 0;
3143 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3144 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3145 MaxEltSize = std::max(EltSize0, EltSize1);
3148 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3149 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3152 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3154 MaxEltSize = DstEltSize / 2;
3155 }
else if (Opcode == Instruction::Mul &&
3168 getScalarSizeWithOverride(
isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
3172 if (MaxEltSize * 2 > DstEltSize)
3190 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3191 (Src->isScalableTy() && !ST->hasSVE2()))
3201 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3205 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3209 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3210 Src->getScalarSizeInBits() !=
3234 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3238 if (
I &&
I->hasOneUser()) {
3241 if (
Type *ExtTy = isBinExtWideningInstruction(
3242 SingleUser->getOpcode(), Dst, Operands,
3243 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3256 if (isSingleExtWideningInstruction(
3257 SingleUser->getOpcode(), Dst, Operands,
3258 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3262 if (SingleUser->getOpcode() == Instruction::Add) {
3263 if (
I == SingleUser->getOperand(1) ||
3265 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3283 return Cost == 0 ? 0 : 1;
3287 EVT SrcTy = TLI->getValueType(
DL, Src);
3288 EVT DstTy = TLI->getValueType(
DL, Dst);
3290 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3296 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3320 return AdjustCost(Entry->Cost);
3328 const unsigned int SVE_EXT_COST = 1;
3329 const unsigned int SVE_FCVT_COST = 1;
3330 const unsigned int SVE_UNPACK_ONCE = 4;
3331 const unsigned int SVE_UNPACK_TWICE = 16;
3409 {ISD::FP_EXTEND, MVT::f64, MVT::f32, 1},
3410 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f32, 1},
3411 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 2},
3413 {ISD::FP_EXTEND, MVT::f32, MVT::f16, 1},
3414 {ISD::FP_EXTEND, MVT::f64, MVT::f16, 1},
3415 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},
3416 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 2},
3417 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2},
3418 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3},
3419 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6},
3421 {ISD::FP_EXTEND, MVT::f32, MVT::bf16, 1},
3422 {ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2},
3423 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1},
3424 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2},
3425 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2},
3426 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3},
3427 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6},
3460 SVE_EXT_COST + SVE_FCVT_COST},
3465 SVE_EXT_COST + SVE_FCVT_COST},
3472 SVE_EXT_COST + SVE_FCVT_COST},
3476 SVE_EXT_COST + SVE_FCVT_COST},
3482 SVE_EXT_COST + SVE_FCVT_COST},
3485 SVE_EXT_COST + SVE_FCVT_COST},
3490 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3492 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3502 SVE_EXT_COST + SVE_FCVT_COST},
3507 SVE_EXT_COST + SVE_FCVT_COST},
3520 SVE_EXT_COST + SVE_FCVT_COST},
3524 SVE_EXT_COST + SVE_FCVT_COST},
3536 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3538 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3540 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3542 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3546 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3548 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3564 SVE_EXT_COST + SVE_FCVT_COST},
3569 SVE_EXT_COST + SVE_FCVT_COST},
3580 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3582 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3584 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3586 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3588 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3590 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3594 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3596 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3598 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3600 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3744 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2f16, 1},
3745 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
3746 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},
3749 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2bf16, 1},
3750 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4bf16, 1},
3751 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 4},
3754 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
3755 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
3756 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},
3759 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2bf16, 2},
3760 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6},
3761 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8bf16, 14},
3764 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
3765 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
3766 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},
3769 {ISD::BITCAST, MVT::nxv2f16, MVT::nxv2i16, 0},
3770 {ISD::BITCAST, MVT::nxv4f16, MVT::nxv4i16, 0},
3771 {ISD::BITCAST, MVT::nxv2f32, MVT::nxv2i32, 0},
3774 {ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0},
3775 {ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0},
3776 {ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0},
3799 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3802 ST->useSVEForFixedLengthVectors(WiderTy)) {
3803 std::pair<InstructionCost, MVT> LT =
3805 unsigned NumElements =
3817 return AdjustCost(Entry->Cost);
3844 if (ST->hasFullFP16())
3847 return AdjustCost(Entry->Cost);
3865 ST->isSVEorStreamingSVEAvailable() &&
3866 TLI->getTypeAction(Src->getContext(), SrcTy) ==
3868 TLI->getTypeAction(Dst->getContext(), DstTy) ==
3877 Opcode, LegalTy, Src, CCH,
CostKind,
I);
3880 return Part1 + Part2;
3887 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
3900 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3913 CostKind, Index,
nullptr,
nullptr);
3917 auto DstVT = TLI->getValueType(
DL, Dst);
3918 auto SrcVT = TLI->getValueType(
DL, Src);
3923 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
3929 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
3939 case Instruction::SExt:
3944 case Instruction::ZExt:
3945 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
3958 return Opcode == Instruction::PHI ? 0 : 1;
3967 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
3975 if (!LT.second.isVector())
3980 if (LT.second.isFixedLengthVector()) {
3981 unsigned Width = LT.second.getVectorNumElements();
3982 Index = Index % Width;
4030 auto ExtractCanFuseWithFmul = [&]() {
4037 auto IsAllowedScalarTy = [&](
const Type *
T) {
4038 return T->isFloatTy() ||
T->isDoubleTy() ||
4039 (
T->isHalfTy() && ST->hasFullFP16());
4043 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
4046 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4047 !BO->getType()->isVectorTy();
4052 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
4056 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4065 DenseMap<User *, unsigned> UserToExtractIdx;
4066 for (
auto *U :
Scalar->users()) {
4067 if (!IsUserFMulScalarTy(U))
4071 UserToExtractIdx[
U];
4073 if (UserToExtractIdx.
empty())
4075 for (
auto &[S, U, L] : ScalarUserAndIdx) {
4076 for (
auto *U : S->users()) {
4077 if (UserToExtractIdx.
contains(U)) {
4079 auto *Op0 =
FMul->getOperand(0);
4080 auto *Op1 =
FMul->getOperand(1);
4081 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4082 UserToExtractIdx[
U] =
L;
4088 for (
auto &[U, L] : UserToExtractIdx) {
4100 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
4101 if (!IsUserFMulScalarTy(U))
4106 const auto *BO = cast<BinaryOperator>(U);
4107 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4108 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4110 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4113 return IsExtractLaneEquivalentToZero(
4114 cast<ConstantInt>(OtherEE->getIndexOperand())
4117 OtherEE->getType()->getScalarSizeInBits());
4125 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4126 ExtractCanFuseWithFmul())
4131 :
ST->getVectorInsertExtractBaseCost();
4138 const Value *Op1)
const {
4142 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4145 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index);
4151 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
4152 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4159 unsigned Index)
const {
4160 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I);
4166 unsigned Index)
const {
4178 : ST->getVectorInsertExtractBaseCost() + 1;
4187 if (Ty->getElementType()->isFloatingPointTy())
4190 unsigned VecInstCost =
4192 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4199 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4200 return std::nullopt;
4201 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4202 return std::nullopt;
4203 if (CanUseSVE && Ty->isScalableTy() && ST->hasSVEB16B16() &&
4204 ST->isNonStreamingSVEorSME2Available())
4205 return std::nullopt;
4212 Cost += InstCost(PromotedTy);
4235 Op2Info, Args, CxtI);
4239 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4246 Ty,
CostKind, Op1Info, Op2Info,
true,
4249 [&](
Type *PromotedTy) {
4253 return *PromotedCost;
4259 if (
Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4326 auto VT = TLI->getValueType(
DL, Ty);
4327 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4331 : (3 * AsrCost + AddCost);
4333 return MulCost + AsrCost + 2 * AddCost;
4335 }
else if (VT.isVector()) {
4345 if (Ty->isScalableTy() && ST->hasSVE())
4346 Cost += 2 * AsrCost;
4351 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4355 }
else if (LT.second == MVT::v2i64) {
4356 return VT.getVectorNumElements() *
4363 if (Ty->isScalableTy() && ST->hasSVE())
4364 return MulCost + 2 * AddCost + 2 * AsrCost;
4365 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4370 LT.second.isFixedLengthVector()) {
4380 return ExtractCost + InsertCost +
4388 auto VT = TLI->getValueType(
DL, Ty);
4404 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4405 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4406 LT.second == MVT::nxv16i8;
4407 bool Is128bit = LT.second.is128BitVector();
4419 (HasMULH ? 0 : ShrCost) +
4420 AddCost * 2 + ShrCost;
4421 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4428 if (!VT.isVector() && VT.getSizeInBits() > 64)
4432 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4434 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4438 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4448 if (
nullptr != Entry)
4453 if (LT.second.getScalarType() == MVT::i8)
4455 else if (LT.second.getScalarType() == MVT::i16)
4467 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4468 return (4 + DivCost) * VTy->getNumElements();
4474 -1,
nullptr,
nullptr);
4488 if (LT.second == MVT::v2i64 && ST->hasSVE())
4501 if (LT.second != MVT::v2i64)
4523 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4524 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4533 if (!Ty->getScalarType()->isFP128Ty())
4540 if (!Ty->getScalarType()->isFP128Ty())
4541 return 2 * LT.first;
4548 if (!Ty->isVectorTy())
4564 int MaxMergeDistance = 64;
4568 return NumVectorInstToHideOverhead;
4578 unsigned Opcode1,
unsigned Opcode2)
const {
4581 if (!
Sched.hasInstrSchedModel())
4585 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4587 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4593 "Cannot handle variant scheduling classes without an MI");
4609 const int AmortizationCost = 20;
4617 VecPred = CurrentPred;
4625 static const auto ValidMinMaxTys = {
4626 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4627 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4628 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4631 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
4632 (ST->hasFullFP16() &&
4633 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
4638 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4639 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4640 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4641 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4642 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4643 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4644 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4645 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4646 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4647 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4648 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4650 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4651 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4660 if (Opcode == Instruction::FCmp) {
4662 ValTy,
CostKind, Op1Info, Op2Info,
false,
4664 false, [&](
Type *PromotedTy) {
4676 return *PromotedCost;
4680 if (LT.second.getScalarType() != MVT::f64 &&
4681 LT.second.getScalarType() != MVT::f32 &&
4682 LT.second.getScalarType() != MVT::f16)
4687 unsigned Factor = 1;
4702 AArch64::FCMEQv4f32))
4714 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4733 Op1Info, Op2Info,
I);
4739 if (ST->requiresStrictAlign()) {
4744 Options.AllowOverlappingLoads =
true;
4745 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4750 Options.LoadSizes = {8, 4, 2, 1};
4751 Options.AllowedTailExpansions = {3, 5, 6};
4756 return ST->hasSVE();
4762 switch (MICA.
getID()) {
4763 case Intrinsic::masked_scatter:
4764 case Intrinsic::masked_gather:
4766 case Intrinsic::masked_load:
4767 case Intrinsic::masked_store:
4781 if (!LT.first.isValid())
4786 if (VT->getElementType()->isIntegerTy(1))
4803 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4804 "Should be called on only load or stores.");
4806 case Instruction::Load:
4809 return ST->getGatherOverhead();
4811 case Instruction::Store:
4814 return ST->getScatterOverhead();
4825 unsigned Opcode = (MICA.
getID() == Intrinsic::masked_gather ||
4826 MICA.
getID() == Intrinsic::vp_gather)
4828 : Instruction::Store;
4838 if (!LT.first.isValid())
4842 if (!LT.second.isVector() ||
4844 VT->getElementType()->isIntegerTy(1))
4854 ElementCount LegalVF = LT.second.getVectorElementCount();
4857 {TTI::OK_AnyValue, TTI::OP_None},
I);
4873 EVT VT = TLI->getValueType(
DL, Ty,
true);
4875 if (VT == MVT::Other)
4880 if (!LT.first.isValid())
4890 (VTy->getElementType()->isIntegerTy(1) &&
4891 !VTy->getElementCount().isKnownMultipleOf(
4902 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
4903 LT.second.is128BitVector() && Alignment <
Align(16)) {
4909 const int AmortizationCost = 6;
4911 return LT.first * 2 * AmortizationCost;
4915 if (Ty->isPtrOrPtrVectorTy())
4920 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
4922 if (VT == MVT::v4i8)
4929 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
4944 while (!TypeWorklist.
empty()) {
4966 bool UseMaskForCond,
bool UseMaskForGaps)
const {
4967 assert(Factor >= 2 &&
"Invalid interleave factor");
4982 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
4985 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
4986 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
4989 VecVTy->getElementCount().divideCoefficientBy(Factor));
4995 if (MinElts % Factor == 0 &&
4996 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
4997 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
5002 UseMaskForCond, UseMaskForGaps);
5009 for (
auto *
I : Tys) {
5010 if (!
I->isVectorTy())
5021 return ST->getMaxInterleaveFactor();
5031 enum { MaxStridedLoads = 7 };
5033 int StridedLoads = 0;
5036 for (
const auto BB : L->blocks()) {
5037 for (
auto &
I : *BB) {
5043 if (L->isLoopInvariant(PtrValue))
5048 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
5057 if (StridedLoads > MaxStridedLoads / 2)
5058 return StridedLoads;
5061 return StridedLoads;
5064 int StridedLoads = countStridedLoads(L, SE);
5066 <<
" strided loads\n");
5082 unsigned *FinalSize) {
5086 for (
auto *BB : L->getBlocks()) {
5087 for (
auto &
I : *BB) {
5093 if (!Cost.isValid())
5097 if (LoopCost > Budget)
5119 if (MaxTC > 0 && MaxTC <= 32)
5130 if (Blocks.
size() != 2)
5152 if (!L->isInnermost() || L->getNumBlocks() > 8)
5156 if (!L->getExitBlock())
5162 bool HasParellelizableReductions =
5163 L->getNumBlocks() == 1 &&
5164 any_of(L->getHeader()->phis(),
5166 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5169 if (HasParellelizableReductions &&
5191 if (HasParellelizableReductions) {
5202 if (Header == Latch) {
5205 unsigned Width = 10;
5211 unsigned MaxInstsPerLine = 16;
5213 unsigned BestUC = 1;
5214 unsigned SizeWithBestUC = BestUC *
Size;
5216 unsigned SizeWithUC = UC *
Size;
5217 if (SizeWithUC > 48)
5219 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5220 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5222 SizeWithBestUC = BestUC *
Size;
5232 for (
auto *BB : L->blocks()) {
5233 for (
auto &
I : *BB) {
5243 for (
auto *U :
I.users())
5245 LoadedValuesPlus.
insert(U);
5252 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5265 if (!Term || !Term->isConditional() || Preds.
size() == 1 ||
5279 auto *I = dyn_cast<Instruction>(V);
5280 return I && DependsOnLoopLoad(I, Depth + 1);
5287 DependsOnLoopLoad(
I, 0)) {
5303 if (L->getLoopDepth() > 1)
5314 for (
auto *BB : L->getBlocks()) {
5315 for (
auto &
I : *BB) {
5319 if (IsVectorized &&
I.getType()->isVectorTy())
5336 if (ST->isAppleMLike())
5338 else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
5360 !ST->getSchedModel().isOutOfOrder()) {
5383 bool CanCreate)
const {
5387 case Intrinsic::aarch64_neon_st2:
5388 case Intrinsic::aarch64_neon_st3:
5389 case Intrinsic::aarch64_neon_st4: {
5392 if (!CanCreate || !ST)
5394 unsigned NumElts = Inst->
arg_size() - 1;
5395 if (ST->getNumElements() != NumElts)
5397 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5403 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5405 Res = Builder.CreateInsertValue(Res, L, i);
5409 case Intrinsic::aarch64_neon_ld2:
5410 case Intrinsic::aarch64_neon_ld3:
5411 case Intrinsic::aarch64_neon_ld4:
5412 if (Inst->
getType() == ExpectedType)
5423 case Intrinsic::aarch64_neon_ld2:
5424 case Intrinsic::aarch64_neon_ld3:
5425 case Intrinsic::aarch64_neon_ld4:
5426 Info.ReadMem =
true;
5427 Info.WriteMem =
false;
5430 case Intrinsic::aarch64_neon_st2:
5431 case Intrinsic::aarch64_neon_st3:
5432 case Intrinsic::aarch64_neon_st4:
5433 Info.ReadMem =
false;
5434 Info.WriteMem =
true;
5442 case Intrinsic::aarch64_neon_ld2:
5443 case Intrinsic::aarch64_neon_st2:
5444 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5446 case Intrinsic::aarch64_neon_ld3:
5447 case Intrinsic::aarch64_neon_st3:
5448 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5450 case Intrinsic::aarch64_neon_ld4:
5451 case Intrinsic::aarch64_neon_st4:
5452 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5464 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5465 bool Considerable =
false;
5466 AllowPromotionWithoutCommonHeader =
false;
5469 Type *ConsideredSExtType =
5471 if (
I.getType() != ConsideredSExtType)
5475 for (
const User *U :
I.users()) {
5477 Considerable =
true;
5481 if (GEPInst->getNumOperands() > 2) {
5482 AllowPromotionWithoutCommonHeader =
true;
5487 return Considerable;
5535 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5545 return LegalizationCost + 2;
5555 LegalizationCost *= LT.first - 1;
5558 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5567 return LegalizationCost + 2;
5575 std::optional<FastMathFlags> FMF,
5591 return BaseCost + FixedVTy->getNumElements();
5594 if (Opcode != Instruction::FAdd)
5608 MVT MTy = LT.second;
5609 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5657 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5658 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5660 if (ValTy->getElementCount().getFixedValue() >= 2 && NElts >= 2 &&
5670 return (LT.first - 1) +
Log2_32(NElts);
5675 return (LT.first - 1) + Entry->Cost;
5687 if (LT.first != 1) {
5693 ExtraCost *= LT.first - 1;
5696 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5697 return Cost + ExtraCost;
5705 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
5707 EVT VecVT = TLI->getValueType(
DL, VecTy);
5708 EVT ResVT = TLI->getValueType(
DL, ResTy);
5718 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5720 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5722 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5724 return (LT.first - 1) * 2 + 2;
5735 EVT VecVT = TLI->getValueType(
DL, VecTy);
5736 EVT ResVT = TLI->getValueType(
DL, ResTy);
5739 RedOpcode == Instruction::Add) {
5745 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5747 return LT.first + 2;
5782 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5783 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
5797 if (LT.second.getScalarType() == MVT::i1) {
5806 assert(Entry &&
"Illegal Type for Splice");
5807 LegalizationCost += Entry->Cost;
5808 return LegalizationCost * LT.first;
5812 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
5821 if (VF.
isFixed() && !ST->isSVEorStreamingSVEAvailable() &&
5822 (!ST->isNeonAvailable() || !ST->hasDotProd()))
5825 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub) ||
5830 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
5831 "Unexpected values for OpBExtend or InputTypeB");
5835 if (BinOp && (*BinOp != Instruction::Mul || InputTypeA != InputTypeB))
5838 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
5839 if (IsUSDot && !ST->hasMatMulInt8())
5851 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
5860 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
5866 std::pair<InstructionCost, MVT> AccumLT =
5868 std::pair<InstructionCost, MVT> InputLT =
5881 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
5883 if (AccumLT.second.getScalarType() == MVT::i64 &&
5884 InputLT.second.getScalarType() == MVT::i16)
5887 if (AccumLT.second.getScalarType() == MVT::i64 &&
5888 InputLT.second.getScalarType() == MVT::i8)
5898 if (ST->isSVEorStreamingSVEAvailable() ||
5899 (AccumLT.second.isFixedLengthVector() && ST->isNeonAvailable() &&
5900 ST->hasDotProd())) {
5901 if (AccumLT.second.getScalarType() == MVT::i32 &&
5902 InputLT.second.getScalarType() == MVT::i8)
5918 "Expected the Mask to match the return size if given");
5920 "Expected the same scalar types");
5926 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
5927 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
5928 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
5936 return std::max<InstructionCost>(1, LT.first / 4);
5944 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
5946 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
5949 unsigned TpNumElts = Mask.size();
5950 unsigned LTNumElts = LT.second.getVectorNumElements();
5951 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
5953 LT.second.getVectorElementCount());
5955 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
5957 for (
unsigned N = 0;
N < NumVecs;
N++) {
5961 unsigned Source1 = -1U, Source2 = -1U;
5962 unsigned NumSources = 0;
5963 for (
unsigned E = 0; E < LTNumElts; E++) {
5964 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
5973 unsigned Source = MaskElt / LTNumElts;
5974 if (NumSources == 0) {
5977 }
else if (NumSources == 1 && Source != Source1) {
5980 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
5986 if (Source == Source1)
5988 else if (Source == Source2)
5989 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
5998 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
6009 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
6012 Result.first->second = NCost;
6026 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
6027 if (LT.second.getFixedSizeInBits() >= 128 &&
6029 LT.second.getVectorNumElements() / 2) {
6032 if (Index == (
int)LT.second.getVectorNumElements() / 2)
6046 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6049 return M.value() < 0 || M.value() == (int)M.index();
6055 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6056 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6065 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6066 ST->isSVEorStreamingSVEAvailable() &&
6071 if (ST->isSVEorStreamingSVEAvailable() &&
6085 if (IsLoad && LT.second.isVector() &&
6087 LT.second.getVectorElementCount()))
6093 if (Mask.size() == 4 &&
6095 (SrcTy->getScalarSizeInBits() == 16 ||
6096 SrcTy->getScalarSizeInBits() == 32) &&
6097 all_of(Mask, [](
int E) {
return E < 8; }))
6103 if (LT.second.isFixedLengthVector() &&
6104 LT.second.getVectorNumElements() == Mask.size() &&
6106 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6107 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6108 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6109 LT.second.getVectorNumElements(), 16) ||
6110 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6111 LT.second.getVectorNumElements(), 32) ||
6112 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6113 LT.second.getVectorNumElements(), 64) ||
6116 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6245 return LT.first * Entry->Cost;
6254 LT.second.getSizeInBits() <= 128 && SubTp) {
6256 if (SubLT.second.isVector()) {
6257 int NumElts = LT.second.getVectorNumElements();
6258 int NumSubElts = SubLT.second.getVectorNumElements();
6259 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6265 if (IsExtractSubvector)
6282 if (
getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
6301 return ST->useFixedOverScalableIfEqualCost();
6305 return ST->getEpilogueVectorizationMinVF();
6340 unsigned NumInsns = 0;
6342 NumInsns += BB->sizeWithoutDebug();
6352 int64_t Scale,
unsigned AddrSpace)
const {
6380 if (
I->getOpcode() == Instruction::Or &&
6385 if (
I->getOpcode() == Instruction::Add ||
6386 I->getOpcode() == Instruction::Sub)
6411 return all_equal(Shuf->getShuffleMask());
6418 bool AllowSplat =
false) {
6423 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6424 auto *FullTy = FullV->
getType();
6425 auto *HalfTy = HalfV->getType();
6427 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6430 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6433 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6437 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6451 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6452 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6466 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6467 (M2Start != 0 && M2Start != (NumElements / 2)))
6469 if (S1Op1 && S2Op1 && M1Start != M2Start)
6479 return Ext->getType()->getScalarSizeInBits() ==
6480 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6494 Value *VectorOperand =
nullptr;
6511 if (!
GEP ||
GEP->getNumOperands() != 2)
6515 Value *Offsets =
GEP->getOperand(1);
6518 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6524 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6525 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6526 Ops.push_back(&
GEP->getOperandUse(1));
6560 switch (
II->getIntrinsicID()) {
6561 case Intrinsic::aarch64_neon_smull:
6562 case Intrinsic::aarch64_neon_umull:
6565 Ops.push_back(&
II->getOperandUse(0));
6566 Ops.push_back(&
II->getOperandUse(1));
6571 case Intrinsic::fma:
6572 case Intrinsic::fmuladd:
6578 case Intrinsic::aarch64_neon_sqdmull:
6579 case Intrinsic::aarch64_neon_sqdmulh:
6580 case Intrinsic::aarch64_neon_sqrdmulh:
6583 Ops.push_back(&
II->getOperandUse(0));
6585 Ops.push_back(&
II->getOperandUse(1));
6586 return !
Ops.empty();
6587 case Intrinsic::aarch64_neon_fmlal:
6588 case Intrinsic::aarch64_neon_fmlal2:
6589 case Intrinsic::aarch64_neon_fmlsl:
6590 case Intrinsic::aarch64_neon_fmlsl2:
6593 Ops.push_back(&
II->getOperandUse(1));
6595 Ops.push_back(&
II->getOperandUse(2));
6596 return !
Ops.empty();
6597 case Intrinsic::aarch64_sve_ptest_first:
6598 case Intrinsic::aarch64_sve_ptest_last:
6600 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6601 Ops.push_back(&
II->getOperandUse(0));
6602 return !
Ops.empty();
6603 case Intrinsic::aarch64_sme_write_horiz:
6604 case Intrinsic::aarch64_sme_write_vert:
6605 case Intrinsic::aarch64_sme_writeq_horiz:
6606 case Intrinsic::aarch64_sme_writeq_vert: {
6608 if (!Idx || Idx->getOpcode() != Instruction::Add)
6610 Ops.push_back(&
II->getOperandUse(1));
6613 case Intrinsic::aarch64_sme_read_horiz:
6614 case Intrinsic::aarch64_sme_read_vert:
6615 case Intrinsic::aarch64_sme_readq_horiz:
6616 case Intrinsic::aarch64_sme_readq_vert:
6617 case Intrinsic::aarch64_sme_ld1b_vert:
6618 case Intrinsic::aarch64_sme_ld1h_vert:
6619 case Intrinsic::aarch64_sme_ld1w_vert:
6620 case Intrinsic::aarch64_sme_ld1d_vert:
6621 case Intrinsic::aarch64_sme_ld1q_vert:
6622 case Intrinsic::aarch64_sme_st1b_vert:
6623 case Intrinsic::aarch64_sme_st1h_vert:
6624 case Intrinsic::aarch64_sme_st1w_vert:
6625 case Intrinsic::aarch64_sme_st1d_vert:
6626 case Intrinsic::aarch64_sme_st1q_vert:
6627 case Intrinsic::aarch64_sme_ld1b_horiz:
6628 case Intrinsic::aarch64_sme_ld1h_horiz:
6629 case Intrinsic::aarch64_sme_ld1w_horiz:
6630 case Intrinsic::aarch64_sme_ld1d_horiz:
6631 case Intrinsic::aarch64_sme_ld1q_horiz:
6632 case Intrinsic::aarch64_sme_st1b_horiz:
6633 case Intrinsic::aarch64_sme_st1h_horiz:
6634 case Intrinsic::aarch64_sme_st1w_horiz:
6635 case Intrinsic::aarch64_sme_st1d_horiz:
6636 case Intrinsic::aarch64_sme_st1q_horiz: {
6638 if (!Idx || Idx->getOpcode() != Instruction::Add)
6640 Ops.push_back(&
II->getOperandUse(3));
6643 case Intrinsic::aarch64_neon_pmull:
6646 Ops.push_back(&
II->getOperandUse(0));
6647 Ops.push_back(&
II->getOperandUse(1));
6649 case Intrinsic::aarch64_neon_pmull64:
6651 II->getArgOperand(1)))
6653 Ops.push_back(&
II->getArgOperandUse(0));
6654 Ops.push_back(&
II->getArgOperandUse(1));
6656 case Intrinsic::masked_gather:
6659 Ops.push_back(&
II->getArgOperandUse(0));
6661 case Intrinsic::masked_scatter:
6664 Ops.push_back(&
II->getArgOperandUse(1));
6671 auto ShouldSinkCondition = [](
Value *
Cond,
6676 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6680 Ops.push_back(&
II->getOperandUse(0));
6684 switch (
I->getOpcode()) {
6685 case Instruction::GetElementPtr:
6686 case Instruction::Add:
6687 case Instruction::Sub:
6689 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
6691 Ops.push_back(&
I->getOperandUse(
Op));
6696 case Instruction::Select: {
6697 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
6700 Ops.push_back(&
I->getOperandUse(0));
6703 case Instruction::Br: {
6710 Ops.push_back(&
I->getOperandUse(0));
6717 if (!
I->getType()->isVectorTy())
6720 switch (
I->getOpcode()) {
6721 case Instruction::Sub:
6722 case Instruction::Add: {
6731 Ops.push_back(&Ext1->getOperandUse(0));
6732 Ops.push_back(&Ext2->getOperandUse(0));
6735 Ops.push_back(&
I->getOperandUse(0));
6736 Ops.push_back(&
I->getOperandUse(1));
6740 case Instruction::Or: {
6743 if (ST->hasNEON()) {
6757 if (
I->getParent() != MainAnd->
getParent() ||
6762 if (
I->getParent() != IA->getParent() ||
6763 I->getParent() != IB->getParent())
6768 Ops.push_back(&
I->getOperandUse(0));
6769 Ops.push_back(&
I->getOperandUse(1));
6778 case Instruction::Mul: {
6779 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
6782 if (Ty->isScalableTy())
6786 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
6789 int NumZExts = 0, NumSExts = 0;
6790 for (
auto &
Op :
I->operands()) {
6797 auto *ExtOp = Ext->getOperand(0);
6798 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
6799 Ops.push_back(&Ext->getOperandUse(0));
6807 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
6808 I->getType()->getScalarSizeInBits())
6845 if (!ElementConstant || !ElementConstant->
isZero())
6848 unsigned Opcode = OperandInstr->
getOpcode();
6849 if (Opcode == Instruction::SExt)
6851 else if (Opcode == Instruction::ZExt)
6856 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
6866 Ops.push_back(&Insert->getOperandUse(1));
6872 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
6876 if (!ShouldSinkSplatForIndexedVariant(
I))
6881 Ops.push_back(&
I->getOperandUse(0));
6883 Ops.push_back(&
I->getOperandUse(1));
6885 return !
Ops.empty();
6887 case Instruction::FMul: {
6889 if (
I->getType()->isScalableTy())
6898 Ops.push_back(&
I->getOperandUse(0));
6900 Ops.push_back(&
I->getOperandUse(1));
6901 return !
Ops.empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static unsigned getNumElements(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
Information for memory intrinsic cost model.
Align getAlignment() const
Type * getDataType() const
Intrinsic::ID getID() const
const Instruction * getInst() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...