23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
82 cl::desc(
"Threshold for forced unrolling of small loops in AArch64"));
85class TailFoldingOption {
100 bool NeedsDefault =
true;
104 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
124 Bits &= ~DisableBits;
130 errs() <<
"invalid argument '" << Opt
131 <<
"' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
139 void operator=(
const std::string &Val) {
148 setNeedsDefault(
false);
151 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] ==
"disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] ==
"all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] ==
"default")
159 setNeedsDefault(
true);
160 else if (TailFoldTypes[0] ==
"simple")
161 setInitialBits(TailFoldingOpts::Simple);
164 setInitialBits(TailFoldingOpts::Disabled);
167 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
168 if (TailFoldTypes[
I] ==
"reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[
I] ==
"recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[
I] ==
"reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[
I] ==
"noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[
I] ==
"norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[
I] ==
"noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
200 "\ndefault (Initial) Uses the default tail-folding settings for "
202 "\nall (Initial) All legal loop types will vectorize using "
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
213 "\nnoreverse Inverse of above"),
258 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.
split(Features,
",");
265 return F.hasFnAttribute(
"fmv-features");
269 AArch64::FeatureExecuteOnly,
309 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
310 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
312 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
330 auto FVTy = dyn_cast<FixedVectorType>(Ty);
332 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
341 unsigned DefaultCallPenalty)
const {
366 if (
F ==
Call.getCaller())
372 return DefaultCallPenalty;
383 ST->isSVEorStreamingSVEAvailable() &&
384 !ST->disableMaximizeScalableBandwidth();
408 assert(Ty->isIntegerTy());
410 unsigned BitSize = Ty->getPrimitiveSizeInBits();
417 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
422 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
428 return std::max<InstructionCost>(1,
Cost);
435 assert(Ty->isIntegerTy());
437 unsigned BitSize = Ty->getPrimitiveSizeInBits();
443 unsigned ImmIdx = ~0U;
447 case Instruction::GetElementPtr:
452 case Instruction::Store:
455 case Instruction::Add:
456 case Instruction::Sub:
457 case Instruction::Mul:
458 case Instruction::UDiv:
459 case Instruction::SDiv:
460 case Instruction::URem:
461 case Instruction::SRem:
462 case Instruction::And:
463 case Instruction::Or:
464 case Instruction::Xor:
465 case Instruction::ICmp:
469 case Instruction::Shl:
470 case Instruction::LShr:
471 case Instruction::AShr:
475 case Instruction::Trunc:
476 case Instruction::ZExt:
477 case Instruction::SExt:
478 case Instruction::IntToPtr:
479 case Instruction::PtrToInt:
480 case Instruction::BitCast:
481 case Instruction::PHI:
482 case Instruction::Call:
483 case Instruction::Select:
484 case Instruction::Ret:
485 case Instruction::Load:
490 int NumConstants = (BitSize + 63) / 64;
503 assert(Ty->isIntegerTy());
505 unsigned BitSize = Ty->getPrimitiveSizeInBits();
514 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
520 case Intrinsic::sadd_with_overflow:
521 case Intrinsic::uadd_with_overflow:
522 case Intrinsic::ssub_with_overflow:
523 case Intrinsic::usub_with_overflow:
524 case Intrinsic::smul_with_overflow:
525 case Intrinsic::umul_with_overflow:
527 int NumConstants = (BitSize + 63) / 64;
534 case Intrinsic::experimental_stackmap:
535 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
538 case Intrinsic::experimental_patchpoint_void:
539 case Intrinsic::experimental_patchpoint:
540 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
543 case Intrinsic::experimental_gc_statepoint:
544 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
554 if (TyWidth == 32 || TyWidth == 64)
578 unsigned TotalHistCnts = 1;
588 unsigned EC = VTy->getElementCount().getKnownMinValue();
593 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
595 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
599 TotalHistCnts = EC / NaturalVectorWidth;
619 switch (ICA.
getID()) {
620 case Intrinsic::experimental_vector_histogram_add: {
627 case Intrinsic::umin:
628 case Intrinsic::umax:
629 case Intrinsic::smin:
630 case Intrinsic::smax: {
631 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
632 MVT::v8i16, MVT::v2i32, MVT::v4i32,
633 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
637 if (LT.second == MVT::v2i64)
639 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
643 case Intrinsic::sadd_sat:
644 case Intrinsic::ssub_sat:
645 case Intrinsic::uadd_sat:
646 case Intrinsic::usub_sat: {
647 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
648 MVT::v8i16, MVT::v2i32, MVT::v4i32,
654 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
655 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
656 return LT.first * Instrs;
661 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
662 return LT.first * Instrs;
666 case Intrinsic::abs: {
667 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
668 MVT::v8i16, MVT::v2i32, MVT::v4i32,
671 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
675 case Intrinsic::bswap: {
676 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
677 MVT::v4i32, MVT::v2i64};
679 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }) &&
680 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
685 case Intrinsic::fmuladd: {
690 (EltTy->
isHalfTy() && ST->hasFullFP16()))
694 case Intrinsic::stepvector: {
703 Cost += AddCost * (LT.first - 1);
707 case Intrinsic::vector_extract:
708 case Intrinsic::vector_insert: {
721 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
722 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
730 getTLI()->getTypeConversion(
C, SubVecVT);
732 getTLI()->getTypeConversion(
C, VecVT);
740 case Intrinsic::bitreverse: {
742 {Intrinsic::bitreverse, MVT::i32, 1},
743 {Intrinsic::bitreverse, MVT::i64, 1},
744 {Intrinsic::bitreverse, MVT::v8i8, 1},
745 {Intrinsic::bitreverse, MVT::v16i8, 1},
746 {Intrinsic::bitreverse, MVT::v4i16, 2},
747 {Intrinsic::bitreverse, MVT::v8i16, 2},
748 {Intrinsic::bitreverse, MVT::v2i32, 2},
749 {Intrinsic::bitreverse, MVT::v4i32, 2},
750 {Intrinsic::bitreverse, MVT::v1i64, 2},
751 {Intrinsic::bitreverse, MVT::v2i64, 2},
759 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
760 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
761 return LegalisationCost.first * Entry->Cost + 1;
763 return LegalisationCost.first * Entry->Cost;
767 case Intrinsic::ctpop: {
768 if (!ST->hasNEON()) {
789 RetTy->getScalarSizeInBits()
792 return LT.first * Entry->Cost + ExtraCost;
796 case Intrinsic::sadd_with_overflow:
797 case Intrinsic::uadd_with_overflow:
798 case Intrinsic::ssub_with_overflow:
799 case Intrinsic::usub_with_overflow:
800 case Intrinsic::smul_with_overflow:
801 case Intrinsic::umul_with_overflow: {
803 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
804 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
805 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
806 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
807 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
808 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
809 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
810 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
811 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
812 {Intrinsic::usub_with_overflow, MVT::i8, 3},
813 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
814 {Intrinsic::usub_with_overflow, MVT::i16, 3},
815 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
816 {Intrinsic::usub_with_overflow, MVT::i32, 1},
817 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
818 {Intrinsic::usub_with_overflow, MVT::i64, 1},
819 {Intrinsic::smul_with_overflow, MVT::i8, 5},
820 {Intrinsic::umul_with_overflow, MVT::i8, 4},
821 {Intrinsic::smul_with_overflow, MVT::i16, 5},
822 {Intrinsic::umul_with_overflow, MVT::i16, 4},
823 {Intrinsic::smul_with_overflow, MVT::i32, 2},
824 {Intrinsic::umul_with_overflow, MVT::i32, 2},
825 {Intrinsic::smul_with_overflow, MVT::i64, 3},
826 {Intrinsic::umul_with_overflow, MVT::i64, 3},
828 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
835 case Intrinsic::fptosi_sat:
836 case Intrinsic::fptoui_sat: {
839 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
841 EVT MTy = TLI->getValueType(
DL, RetTy);
844 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
845 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
846 LT.second == MVT::v2f64)) {
848 (LT.second == MVT::f64 && MTy == MVT::i32) ||
849 (LT.second == MVT::f32 && MTy == MVT::i64)))
858 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
865 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
866 (LT.second == MVT::f16 && MTy == MVT::i64) ||
867 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
881 if ((LT.second.getScalarType() == MVT::f32 ||
882 LT.second.getScalarType() == MVT::f64 ||
883 LT.second.getScalarType() == MVT::f16) &&
887 if (LT.second.isVector())
891 LegalTy, {LegalTy, LegalTy});
894 LegalTy, {LegalTy, LegalTy});
896 return LT.first *
Cost +
897 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
903 RetTy = RetTy->getScalarType();
904 if (LT.second.isVector()) {
922 return LT.first *
Cost;
924 case Intrinsic::fshl:
925 case Intrinsic::fshr: {
934 if (RetTy->isIntegerTy() && ICA.
getArgs()[0] == ICA.
getArgs()[1] &&
935 (RetTy->getPrimitiveSizeInBits() == 32 ||
936 RetTy->getPrimitiveSizeInBits() == 64)) {
949 {Intrinsic::fshl, MVT::v4i32, 2},
950 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
951 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
952 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
958 return LegalisationCost.first * Entry->Cost;
962 if (!RetTy->isIntegerTy())
967 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
968 RetTy->getScalarSizeInBits() < 64) ||
969 (RetTy->getScalarSizeInBits() % 64 != 0);
970 unsigned ExtraCost = HigherCost ? 1 : 0;
971 if (RetTy->getScalarSizeInBits() == 32 ||
972 RetTy->getScalarSizeInBits() == 64)
979 return TyL.first + ExtraCost;
981 case Intrinsic::get_active_lane_mask: {
983 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
985 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
988 if (RetTy->isScalableTy()) {
989 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
999 if (ST->hasSVE2p1() || ST->hasSME2()) {
1014 return Cost + (SplitCost * (
Cost - 1));
1029 case Intrinsic::experimental_vector_match: {
1032 unsigned SearchSize = NeedleTy->getNumElements();
1033 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1046 case Intrinsic::experimental_cttz_elts: {
1048 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1056 case Intrinsic::experimental_vector_extract_last_active:
1057 if (ST->isSVEorStreamingSVEAvailable()) {
1074 auto RequiredType =
II.getType();
1077 assert(PN &&
"Expected Phi Node!");
1080 if (!PN->hasOneUse())
1081 return std::nullopt;
1083 for (
Value *IncValPhi : PN->incoming_values()) {
1086 Reinterpret->getIntrinsicID() !=
1087 Intrinsic::aarch64_sve_convert_to_svbool ||
1088 RequiredType != Reinterpret->getArgOperand(0)->getType())
1089 return std::nullopt;
1097 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1099 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1172 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1177 return GoverningPredicateIdx;
1182 GoverningPredicateIdx = Index;
1200 return UndefIntrinsic;
1205 UndefIntrinsic = IID;
1227 return ResultLanes == InactiveLanesTakenFromOperand;
1232 return OperandIdxForInactiveLanes;
1236 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1237 ResultLanes = InactiveLanesTakenFromOperand;
1238 OperandIdxForInactiveLanes = Index;
1243 return ResultLanes == InactiveLanesAreNotDefined;
1247 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1248 ResultLanes = InactiveLanesAreNotDefined;
1253 return ResultLanes == InactiveLanesAreUnused;
1257 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1258 ResultLanes = InactiveLanesAreUnused;
1268 ResultIsZeroInitialized =
true;
1279 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1284 return OperandIdxWithNoActiveLanes;
1289 OperandIdxWithNoActiveLanes = Index;
1294 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1297 unsigned IROpcode = 0;
1299 enum PredicationStyle {
1301 InactiveLanesTakenFromOperand,
1302 InactiveLanesAreNotDefined,
1303 InactiveLanesAreUnused
1306 bool ResultIsZeroInitialized =
false;
1307 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1308 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1316 return !isa<ScalableVectorType>(V->getType());
1324 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1325 case Intrinsic::aarch64_sve_fcvt_f16f32:
1326 case Intrinsic::aarch64_sve_fcvt_f16f64:
1327 case Intrinsic::aarch64_sve_fcvt_f32f16:
1328 case Intrinsic::aarch64_sve_fcvt_f32f64:
1329 case Intrinsic::aarch64_sve_fcvt_f64f16:
1330 case Intrinsic::aarch64_sve_fcvt_f64f32:
1331 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1332 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1333 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1334 case Intrinsic::aarch64_sve_fcvtzs:
1335 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1336 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1337 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1338 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1339 case Intrinsic::aarch64_sve_fcvtzu:
1340 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1341 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1342 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1343 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1344 case Intrinsic::aarch64_sve_scvtf:
1345 case Intrinsic::aarch64_sve_scvtf_f16i32:
1346 case Intrinsic::aarch64_sve_scvtf_f16i64:
1347 case Intrinsic::aarch64_sve_scvtf_f32i64:
1348 case Intrinsic::aarch64_sve_scvtf_f64i32:
1349 case Intrinsic::aarch64_sve_ucvtf:
1350 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1351 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1352 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1353 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1356 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1357 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1358 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1359 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1362 case Intrinsic::aarch64_sve_fabd:
1364 case Intrinsic::aarch64_sve_fadd:
1367 case Intrinsic::aarch64_sve_fdiv:
1370 case Intrinsic::aarch64_sve_fmax:
1372 case Intrinsic::aarch64_sve_fmaxnm:
1374 case Intrinsic::aarch64_sve_fmin:
1376 case Intrinsic::aarch64_sve_fminnm:
1378 case Intrinsic::aarch64_sve_fmla:
1380 case Intrinsic::aarch64_sve_fmls:
1382 case Intrinsic::aarch64_sve_fmul:
1385 case Intrinsic::aarch64_sve_fmulx:
1387 case Intrinsic::aarch64_sve_fnmla:
1389 case Intrinsic::aarch64_sve_fnmls:
1391 case Intrinsic::aarch64_sve_fsub:
1394 case Intrinsic::aarch64_sve_add:
1397 case Intrinsic::aarch64_sve_mla:
1399 case Intrinsic::aarch64_sve_mls:
1401 case Intrinsic::aarch64_sve_mul:
1404 case Intrinsic::aarch64_sve_sabd:
1406 case Intrinsic::aarch64_sve_sdiv:
1409 case Intrinsic::aarch64_sve_smax:
1411 case Intrinsic::aarch64_sve_smin:
1413 case Intrinsic::aarch64_sve_smulh:
1415 case Intrinsic::aarch64_sve_sub:
1418 case Intrinsic::aarch64_sve_uabd:
1420 case Intrinsic::aarch64_sve_udiv:
1423 case Intrinsic::aarch64_sve_umax:
1425 case Intrinsic::aarch64_sve_umin:
1427 case Intrinsic::aarch64_sve_umulh:
1429 case Intrinsic::aarch64_sve_asr:
1432 case Intrinsic::aarch64_sve_lsl:
1435 case Intrinsic::aarch64_sve_lsr:
1438 case Intrinsic::aarch64_sve_and:
1441 case Intrinsic::aarch64_sve_bic:
1443 case Intrinsic::aarch64_sve_eor:
1446 case Intrinsic::aarch64_sve_orr:
1449 case Intrinsic::aarch64_sve_sqsub:
1451 case Intrinsic::aarch64_sve_uqsub:
1454 case Intrinsic::aarch64_sve_add_u:
1457 case Intrinsic::aarch64_sve_and_u:
1460 case Intrinsic::aarch64_sve_asr_u:
1463 case Intrinsic::aarch64_sve_eor_u:
1466 case Intrinsic::aarch64_sve_fadd_u:
1469 case Intrinsic::aarch64_sve_fdiv_u:
1472 case Intrinsic::aarch64_sve_fmul_u:
1475 case Intrinsic::aarch64_sve_fsub_u:
1478 case Intrinsic::aarch64_sve_lsl_u:
1481 case Intrinsic::aarch64_sve_lsr_u:
1484 case Intrinsic::aarch64_sve_mul_u:
1487 case Intrinsic::aarch64_sve_orr_u:
1490 case Intrinsic::aarch64_sve_sdiv_u:
1493 case Intrinsic::aarch64_sve_sub_u:
1496 case Intrinsic::aarch64_sve_udiv_u:
1500 case Intrinsic::aarch64_sve_addqv:
1501 case Intrinsic::aarch64_sve_and_z:
1502 case Intrinsic::aarch64_sve_bic_z:
1503 case Intrinsic::aarch64_sve_brka_z:
1504 case Intrinsic::aarch64_sve_brkb_z:
1505 case Intrinsic::aarch64_sve_brkn_z:
1506 case Intrinsic::aarch64_sve_brkpa_z:
1507 case Intrinsic::aarch64_sve_brkpb_z:
1508 case Intrinsic::aarch64_sve_cntp:
1509 case Intrinsic::aarch64_sve_compact:
1510 case Intrinsic::aarch64_sve_eor_z:
1511 case Intrinsic::aarch64_sve_eorv:
1512 case Intrinsic::aarch64_sve_eorqv:
1513 case Intrinsic::aarch64_sve_nand_z:
1514 case Intrinsic::aarch64_sve_nor_z:
1515 case Intrinsic::aarch64_sve_orn_z:
1516 case Intrinsic::aarch64_sve_orr_z:
1517 case Intrinsic::aarch64_sve_orv:
1518 case Intrinsic::aarch64_sve_orqv:
1519 case Intrinsic::aarch64_sve_pnext:
1520 case Intrinsic::aarch64_sve_rdffr_z:
1521 case Intrinsic::aarch64_sve_saddv:
1522 case Intrinsic::aarch64_sve_uaddv:
1523 case Intrinsic::aarch64_sve_umaxv:
1524 case Intrinsic::aarch64_sve_umaxqv:
1525 case Intrinsic::aarch64_sve_cmpeq:
1526 case Intrinsic::aarch64_sve_cmpeq_wide:
1527 case Intrinsic::aarch64_sve_cmpge:
1528 case Intrinsic::aarch64_sve_cmpge_wide:
1529 case Intrinsic::aarch64_sve_cmpgt:
1530 case Intrinsic::aarch64_sve_cmpgt_wide:
1531 case Intrinsic::aarch64_sve_cmphi:
1532 case Intrinsic::aarch64_sve_cmphi_wide:
1533 case Intrinsic::aarch64_sve_cmphs:
1534 case Intrinsic::aarch64_sve_cmphs_wide:
1535 case Intrinsic::aarch64_sve_cmple_wide:
1536 case Intrinsic::aarch64_sve_cmplo_wide:
1537 case Intrinsic::aarch64_sve_cmpls_wide:
1538 case Intrinsic::aarch64_sve_cmplt_wide:
1539 case Intrinsic::aarch64_sve_cmpne:
1540 case Intrinsic::aarch64_sve_cmpne_wide:
1541 case Intrinsic::aarch64_sve_facge:
1542 case Intrinsic::aarch64_sve_facgt:
1543 case Intrinsic::aarch64_sve_fcmpeq:
1544 case Intrinsic::aarch64_sve_fcmpge:
1545 case Intrinsic::aarch64_sve_fcmpgt:
1546 case Intrinsic::aarch64_sve_fcmpne:
1547 case Intrinsic::aarch64_sve_fcmpuo:
1548 case Intrinsic::aarch64_sve_ld1:
1549 case Intrinsic::aarch64_sve_ld1_gather:
1550 case Intrinsic::aarch64_sve_ld1_gather_index:
1551 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1552 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1553 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1554 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1555 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1556 case Intrinsic::aarch64_sve_ld1q_gather_index:
1557 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1558 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1559 case Intrinsic::aarch64_sve_ld1ro:
1560 case Intrinsic::aarch64_sve_ld1rq:
1561 case Intrinsic::aarch64_sve_ld1udq:
1562 case Intrinsic::aarch64_sve_ld1uwq:
1563 case Intrinsic::aarch64_sve_ld2_sret:
1564 case Intrinsic::aarch64_sve_ld2q_sret:
1565 case Intrinsic::aarch64_sve_ld3_sret:
1566 case Intrinsic::aarch64_sve_ld3q_sret:
1567 case Intrinsic::aarch64_sve_ld4_sret:
1568 case Intrinsic::aarch64_sve_ld4q_sret:
1569 case Intrinsic::aarch64_sve_ldff1:
1570 case Intrinsic::aarch64_sve_ldff1_gather:
1571 case Intrinsic::aarch64_sve_ldff1_gather_index:
1572 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1573 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1574 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1575 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1576 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1577 case Intrinsic::aarch64_sve_ldnf1:
1578 case Intrinsic::aarch64_sve_ldnt1:
1579 case Intrinsic::aarch64_sve_ldnt1_gather:
1580 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1581 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1582 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1585 case Intrinsic::aarch64_sve_prf:
1586 case Intrinsic::aarch64_sve_prfb_gather_index:
1587 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1588 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1589 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1590 case Intrinsic::aarch64_sve_prfd_gather_index:
1591 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1592 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1593 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1594 case Intrinsic::aarch64_sve_prfh_gather_index:
1595 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1596 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1597 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1598 case Intrinsic::aarch64_sve_prfw_gather_index:
1599 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1600 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1601 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1604 case Intrinsic::aarch64_sve_st1_scatter:
1605 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1606 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1607 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1608 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1609 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1610 case Intrinsic::aarch64_sve_st1dq:
1611 case Intrinsic::aarch64_sve_st1q_scatter_index:
1612 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1613 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1614 case Intrinsic::aarch64_sve_st1wq:
1615 case Intrinsic::aarch64_sve_stnt1:
1616 case Intrinsic::aarch64_sve_stnt1_scatter:
1617 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1618 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1619 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1621 case Intrinsic::aarch64_sve_st2:
1622 case Intrinsic::aarch64_sve_st2q:
1624 case Intrinsic::aarch64_sve_st3:
1625 case Intrinsic::aarch64_sve_st3q:
1627 case Intrinsic::aarch64_sve_st4:
1628 case Intrinsic::aarch64_sve_st4q:
1636 Value *UncastedPred;
1642 Pred = UncastedPred;
1648 if (OrigPredTy->getMinNumElements() <=
1650 ->getMinNumElements())
1651 Pred = UncastedPred;
1655 return C &&
C->isAllOnesValue();
1662 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1663 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1671static std::optional<Instruction *>
1678 Value *Op1 =
II.getOperand(1);
1679 Value *Op2 =
II.getOperand(2);
1705 return std::nullopt;
1713 if (SimpleII == Inactive)
1723static std::optional<Instruction *>
1727 return std::nullopt;
1756 II.setCalledFunction(NewDecl);
1766 return std::nullopt;
1778static std::optional<Instruction *>
1782 return std::nullopt;
1784 auto IntrinsicID = BinOp->getIntrinsicID();
1785 switch (IntrinsicID) {
1786 case Intrinsic::aarch64_sve_and_z:
1787 case Intrinsic::aarch64_sve_bic_z:
1788 case Intrinsic::aarch64_sve_eor_z:
1789 case Intrinsic::aarch64_sve_nand_z:
1790 case Intrinsic::aarch64_sve_nor_z:
1791 case Intrinsic::aarch64_sve_orn_z:
1792 case Intrinsic::aarch64_sve_orr_z:
1795 return std::nullopt;
1798 auto BinOpPred = BinOp->getOperand(0);
1799 auto BinOpOp1 = BinOp->getOperand(1);
1800 auto BinOpOp2 = BinOp->getOperand(2);
1804 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1805 return std::nullopt;
1807 auto PredOp = PredIntr->getOperand(0);
1809 if (PredOpTy !=
II.getType())
1810 return std::nullopt;
1814 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1815 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1816 if (BinOpOp1 == BinOpOp2)
1817 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1820 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1822 auto NarrowedBinOp =
1827static std::optional<Instruction *>
1834 return BinOpCombine;
1839 return std::nullopt;
1842 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1851 if (CursorVTy->getElementCount().getKnownMinValue() <
1852 IVTy->getElementCount().getKnownMinValue())
1856 if (Cursor->getType() == IVTy)
1857 EarliestReplacement = Cursor;
1862 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1863 Intrinsic::aarch64_sve_convert_to_svbool ||
1864 IntrinsicCursor->getIntrinsicID() ==
1865 Intrinsic::aarch64_sve_convert_from_svbool))
1868 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
1869 Cursor = IntrinsicCursor->getOperand(0);
1874 if (!EarliestReplacement)
1875 return std::nullopt;
1883 auto *OpPredicate =
II.getOperand(0);
1896 return std::nullopt;
1899 return std::nullopt;
1901 const auto PTruePattern =
1903 if (PTruePattern != AArch64SVEPredPattern::vl1)
1904 return std::nullopt;
1909 II.getArgOperand(0),
II.getArgOperand(2), ConstantInt::get(IdxTy, 0));
1910 Insert->insertBefore(
II.getIterator());
1911 Insert->takeName(&
II);
1921 II.getArgOperand(0));
1931 return std::nullopt;
1936 if (!SplatValue || !SplatValue->isZero())
1937 return std::nullopt;
1942 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1943 return std::nullopt;
1947 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
1948 return std::nullopt;
1951 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1952 return std::nullopt;
1957 return std::nullopt;
1960 return std::nullopt;
1964 return std::nullopt;
1968 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
1969 return std::nullopt;
1971 unsigned NumElts = VecTy->getNumElements();
1972 unsigned PredicateBits = 0;
1975 for (
unsigned I = 0;
I < NumElts; ++
I) {
1978 return std::nullopt;
1980 PredicateBits |= 1 << (
I * (16 / NumElts));
1984 if (PredicateBits == 0) {
1986 PFalse->takeName(&
II);
1992 for (
unsigned I = 0;
I < 16; ++
I)
1993 if ((PredicateBits & (1 <<
I)) != 0)
1996 unsigned PredSize = Mask & -Mask;
2001 for (
unsigned I = 0;
I < 16;
I += PredSize)
2002 if ((PredicateBits & (1 <<
I)) == 0)
2003 return std::nullopt;
2008 {PredType}, {PTruePat});
2010 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
2011 auto *ConvertFromSVBool =
2013 {
II.getType()}, {ConvertToSVBool});
2021 Value *Pg =
II.getArgOperand(0);
2022 Value *Vec =
II.getArgOperand(1);
2023 auto IntrinsicID =
II.getIntrinsicID();
2024 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2036 auto OpC = OldBinOp->getOpcode();
2042 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2048 if (IsAfter &&
C &&
C->isNullValue()) {
2052 Extract->insertBefore(
II.getIterator());
2053 Extract->takeName(&
II);
2059 return std::nullopt;
2061 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2062 return std::nullopt;
2064 const auto PTruePattern =
2070 return std::nullopt;
2072 unsigned Idx = MinNumElts - 1;
2082 if (Idx >= PgVTy->getMinNumElements())
2083 return std::nullopt;
2088 Extract->insertBefore(
II.getIterator());
2089 Extract->takeName(&
II);
2102 Value *Pg =
II.getArgOperand(0);
2104 Value *Vec =
II.getArgOperand(2);
2107 if (!Ty->isIntegerTy())
2108 return std::nullopt;
2113 return std::nullopt;
2130 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2143 {
II.getType()}, {AllPat});
2150static std::optional<Instruction *>
2154 if (
Pattern == AArch64SVEPredPattern::all) {
2163 return MinNumElts && NumElts >= MinNumElts
2165 II, ConstantInt::get(
II.getType(), MinNumElts)))
2169static std::optional<Instruction *>
2172 if (!ST->isStreaming())
2173 return std::nullopt;
2185 Value *PgVal =
II.getArgOperand(0);
2186 Value *OpVal =
II.getArgOperand(1);
2190 if (PgVal == OpVal &&
2191 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2192 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2207 return std::nullopt;
2211 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2212 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2226 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2227 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2228 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2229 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2230 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2231 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2232 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2233 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2234 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2235 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2236 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2237 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2238 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2248 return std::nullopt;
2251template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2252static std::optional<Instruction *>
2254 bool MergeIntoAddendOp) {
2256 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2257 if (MergeIntoAddendOp) {
2258 AddendOp =
II.getOperand(1);
2259 Mul =
II.getOperand(2);
2261 AddendOp =
II.getOperand(2);
2262 Mul =
II.getOperand(1);
2267 return std::nullopt;
2269 if (!
Mul->hasOneUse())
2270 return std::nullopt;
2273 if (
II.getType()->isFPOrFPVectorTy()) {
2278 return std::nullopt;
2280 return std::nullopt;
2285 if (MergeIntoAddendOp)
2295static std::optional<Instruction *>
2297 Value *Pred =
II.getOperand(0);
2298 Value *PtrOp =
II.getOperand(1);
2299 Type *VecTy =
II.getType();
2303 Load->copyMetadata(
II);
2314static std::optional<Instruction *>
2316 Value *VecOp =
II.getOperand(0);
2317 Value *Pred =
II.getOperand(1);
2318 Value *PtrOp =
II.getOperand(2);
2322 Store->copyMetadata(
II);
2334 case Intrinsic::aarch64_sve_fmul_u:
2335 return Instruction::BinaryOps::FMul;
2336 case Intrinsic::aarch64_sve_fadd_u:
2337 return Instruction::BinaryOps::FAdd;
2338 case Intrinsic::aarch64_sve_fsub_u:
2339 return Instruction::BinaryOps::FSub;
2341 return Instruction::BinaryOpsEnd;
2345static std::optional<Instruction *>
2348 if (
II.isStrictFP())
2349 return std::nullopt;
2351 auto *OpPredicate =
II.getOperand(0);
2353 if (BinOpCode == Instruction::BinaryOpsEnd ||
2355 return std::nullopt;
2357 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2364 Intrinsic::aarch64_sve_mla>(
2368 Intrinsic::aarch64_sve_mad>(
2371 return std::nullopt;
2374static std::optional<Instruction *>
2378 Intrinsic::aarch64_sve_fmla>(IC,
II,
2383 Intrinsic::aarch64_sve_fmad>(IC,
II,
2388 Intrinsic::aarch64_sve_fmla>(IC,
II,
2391 return std::nullopt;
2394static std::optional<Instruction *>
2398 Intrinsic::aarch64_sve_fmla>(IC,
II,
2403 Intrinsic::aarch64_sve_fmad>(IC,
II,
2408 Intrinsic::aarch64_sve_fmla_u>(
2414static std::optional<Instruction *>
2418 Intrinsic::aarch64_sve_fmls>(IC,
II,
2423 Intrinsic::aarch64_sve_fnmsb>(
2428 Intrinsic::aarch64_sve_fmls>(IC,
II,
2431 return std::nullopt;
2434static std::optional<Instruction *>
2438 Intrinsic::aarch64_sve_fmls>(IC,
II,
2443 Intrinsic::aarch64_sve_fnmsb>(
2448 Intrinsic::aarch64_sve_fmls_u>(
2457 Intrinsic::aarch64_sve_mls>(
2460 return std::nullopt;
2465 Value *UnpackArg =
II.getArgOperand(0);
2467 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2468 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2481 return std::nullopt;
2485 auto *OpVal =
II.getOperand(0);
2486 auto *OpIndices =
II.getOperand(1);
2493 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2494 return std::nullopt;
2509 Type *RetTy =
II.getType();
2510 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2511 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2515 if ((
match(
II.getArgOperand(0),
2522 if (TyA ==
B->getType() &&
2527 TyA->getMinNumElements());
2533 return std::nullopt;
2541 if (
match(
II.getArgOperand(0),
2546 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2548 return std::nullopt;
2551static std::optional<Instruction *>
2553 Value *Mask =
II.getOperand(0);
2554 Value *BasePtr =
II.getOperand(1);
2555 Value *Index =
II.getOperand(2);
2566 BasePtr->getPointerAlignment(
II.getDataLayout());
2569 BasePtr, IndexBase);
2576 return std::nullopt;
2579static std::optional<Instruction *>
2581 Value *Val =
II.getOperand(0);
2582 Value *Mask =
II.getOperand(1);
2583 Value *BasePtr =
II.getOperand(2);
2584 Value *Index =
II.getOperand(3);
2594 BasePtr->getPointerAlignment(
II.getDataLayout());
2597 BasePtr, IndexBase);
2603 return std::nullopt;
2609 Value *Pred =
II.getOperand(0);
2610 Value *Vec =
II.getOperand(1);
2611 Value *DivVec =
II.getOperand(2);
2615 if (!SplatConstantInt)
2616 return std::nullopt;
2620 if (DivisorValue == -1)
2621 return std::nullopt;
2622 if (DivisorValue == 1)
2628 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2635 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2637 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2641 return std::nullopt;
2645 size_t VecSize = Vec.
size();
2650 size_t HalfVecSize = VecSize / 2;
2654 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2662 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2680 return std::nullopt;
2687 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2688 CurrentInsertElt = InsertElt->getOperand(0);
2694 return std::nullopt;
2698 for (
size_t I = 0;
I < Elts.
size();
I++) {
2699 if (Elts[
I] ==
nullptr)
2704 if (InsertEltChain ==
nullptr)
2705 return std::nullopt;
2711 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2712 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2713 IIScalableTy->getMinNumElements() /
2718 auto *WideShuffleMaskTy =
2729 auto NarrowBitcast =
2742 return std::nullopt;
2747 Value *Pred =
II.getOperand(0);
2748 Value *Vec =
II.getOperand(1);
2749 Value *Shift =
II.getOperand(2);
2752 Value *AbsPred, *MergedValue;
2758 return std::nullopt;
2766 return std::nullopt;
2771 return std::nullopt;
2774 {
II.getType()}, {Pred, Vec, Shift});
2781 Value *Vec =
II.getOperand(0);
2786 return std::nullopt;
2792 auto *NI =
II.getNextNode();
2795 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2797 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2798 auto *NIBB = NI->getParent();
2799 NI = NI->getNextNode();
2801 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2802 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2808 if (NextII &&
II.isIdenticalTo(NextII))
2811 return std::nullopt;
2819 {II.getType(), II.getOperand(0)->getType()},
2820 {II.getOperand(0), II.getOperand(1)}));
2827 return std::nullopt;
2833 Value *Passthru =
II.getOperand(0);
2841 auto *Mask = ConstantInt::get(Ty, MaskValue);
2847 return std::nullopt;
2850static std::optional<Instruction *>
2857 return std::nullopt;
2860std::optional<Instruction *>
2871 case Intrinsic::aarch64_dmb:
2873 case Intrinsic::aarch64_neon_fmaxnm:
2874 case Intrinsic::aarch64_neon_fminnm:
2876 case Intrinsic::aarch64_sve_convert_from_svbool:
2878 case Intrinsic::aarch64_sve_dup:
2880 case Intrinsic::aarch64_sve_dup_x:
2882 case Intrinsic::aarch64_sve_cmpne:
2883 case Intrinsic::aarch64_sve_cmpne_wide:
2885 case Intrinsic::aarch64_sve_rdffr:
2887 case Intrinsic::aarch64_sve_lasta:
2888 case Intrinsic::aarch64_sve_lastb:
2890 case Intrinsic::aarch64_sve_clasta_n:
2891 case Intrinsic::aarch64_sve_clastb_n:
2893 case Intrinsic::aarch64_sve_cntd:
2895 case Intrinsic::aarch64_sve_cntw:
2897 case Intrinsic::aarch64_sve_cnth:
2899 case Intrinsic::aarch64_sve_cntb:
2901 case Intrinsic::aarch64_sme_cntsd:
2903 case Intrinsic::aarch64_sve_ptest_any:
2904 case Intrinsic::aarch64_sve_ptest_first:
2905 case Intrinsic::aarch64_sve_ptest_last:
2907 case Intrinsic::aarch64_sve_fadd:
2909 case Intrinsic::aarch64_sve_fadd_u:
2911 case Intrinsic::aarch64_sve_fmul_u:
2913 case Intrinsic::aarch64_sve_fsub:
2915 case Intrinsic::aarch64_sve_fsub_u:
2917 case Intrinsic::aarch64_sve_add:
2919 case Intrinsic::aarch64_sve_add_u:
2921 Intrinsic::aarch64_sve_mla_u>(
2923 case Intrinsic::aarch64_sve_sub:
2925 case Intrinsic::aarch64_sve_sub_u:
2927 Intrinsic::aarch64_sve_mls_u>(
2929 case Intrinsic::aarch64_sve_tbl:
2931 case Intrinsic::aarch64_sve_uunpkhi:
2932 case Intrinsic::aarch64_sve_uunpklo:
2933 case Intrinsic::aarch64_sve_sunpkhi:
2934 case Intrinsic::aarch64_sve_sunpklo:
2936 case Intrinsic::aarch64_sve_uzp1:
2938 case Intrinsic::aarch64_sve_zip1:
2939 case Intrinsic::aarch64_sve_zip2:
2941 case Intrinsic::aarch64_sve_ld1_gather_index:
2943 case Intrinsic::aarch64_sve_st1_scatter_index:
2945 case Intrinsic::aarch64_sve_ld1:
2947 case Intrinsic::aarch64_sve_st1:
2949 case Intrinsic::aarch64_sve_sdiv:
2951 case Intrinsic::aarch64_sve_sel:
2953 case Intrinsic::aarch64_sve_srshl:
2955 case Intrinsic::aarch64_sve_dupq_lane:
2957 case Intrinsic::aarch64_sve_insr:
2959 case Intrinsic::aarch64_sve_whilelo:
2961 case Intrinsic::aarch64_sve_ptrue:
2963 case Intrinsic::aarch64_sve_uxtb:
2965 case Intrinsic::aarch64_sve_uxth:
2967 case Intrinsic::aarch64_sve_uxtw:
2969 case Intrinsic::aarch64_sme_in_streaming_mode:
2973 return std::nullopt;
2980 SimplifyAndSetOp)
const {
2981 switch (
II.getIntrinsicID()) {
2984 case Intrinsic::aarch64_neon_fcvtxn:
2985 case Intrinsic::aarch64_neon_rshrn:
2986 case Intrinsic::aarch64_neon_sqrshrn:
2987 case Intrinsic::aarch64_neon_sqrshrun:
2988 case Intrinsic::aarch64_neon_sqshrn:
2989 case Intrinsic::aarch64_neon_sqshrun:
2990 case Intrinsic::aarch64_neon_sqxtn:
2991 case Intrinsic::aarch64_neon_sqxtun:
2992 case Intrinsic::aarch64_neon_uqrshrn:
2993 case Intrinsic::aarch64_neon_uqshrn:
2994 case Intrinsic::aarch64_neon_uqxtn:
2995 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
2999 return std::nullopt;
3003 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3013 if (ST->useSVEForFixedLengthVectors() &&
3016 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3017 else if (ST->isNeonAvailable())
3022 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3031bool AArch64TTIImpl::isSingleExtWideningInstruction(
3033 Type *SrcOverrideTy)
const {
3048 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3051 Type *SrcTy = SrcOverrideTy;
3053 case Instruction::Add:
3054 case Instruction::Sub: {
3063 if (Opcode == Instruction::Sub)
3087 assert(SrcTy &&
"Expected some SrcTy");
3089 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3095 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3097 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3101 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3104Type *AArch64TTIImpl::isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
3106 Type *SrcOverrideTy)
const {
3107 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3108 Opcode != Instruction::Mul)
3118 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3121 auto getScalarSizeWithOverride = [&](
const Value *
V) {
3127 ->getScalarSizeInBits();
3130 unsigned MaxEltSize = 0;
3133 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3134 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3135 MaxEltSize = std::max(EltSize0, EltSize1);
3138 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3139 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3142 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3144 MaxEltSize = DstEltSize / 2;
3145 }
else if (Opcode == Instruction::Mul &&
3158 getScalarSizeWithOverride(
isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
3162 if (MaxEltSize * 2 > DstEltSize)
3180 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3181 (Src->isScalableTy() && !ST->hasSVE2()))
3191 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3195 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3199 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3200 Src->getScalarSizeInBits() !=
3224 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3228 if (
I &&
I->hasOneUser()) {
3231 if (
Type *ExtTy = isBinExtWideningInstruction(
3232 SingleUser->getOpcode(), Dst, Operands,
3233 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3246 if (isSingleExtWideningInstruction(
3247 SingleUser->getOpcode(), Dst, Operands,
3248 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3252 if (SingleUser->getOpcode() == Instruction::Add) {
3253 if (
I == SingleUser->getOperand(1) ||
3255 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3273 return Cost == 0 ? 0 : 1;
3277 EVT SrcTy = TLI->getValueType(
DL, Src);
3278 EVT DstTy = TLI->getValueType(
DL, Dst);
3280 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3286 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3310 return AdjustCost(Entry->Cost);
3318 const unsigned int SVE_EXT_COST = 1;
3319 const unsigned int SVE_FCVT_COST = 1;
3320 const unsigned int SVE_UNPACK_ONCE = 4;
3321 const unsigned int SVE_UNPACK_TWICE = 16;
3399 {ISD::FP_EXTEND, MVT::f64, MVT::f32, 1},
3400 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f32, 1},
3401 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 2},
3403 {ISD::FP_EXTEND, MVT::f32, MVT::f16, 1},
3404 {ISD::FP_EXTEND, MVT::f64, MVT::f16, 1},
3405 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},
3406 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 2},
3407 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2},
3408 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3},
3409 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6},
3411 {ISD::FP_EXTEND, MVT::f32, MVT::bf16, 1},
3412 {ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2},
3413 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1},
3414 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2},
3415 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2},
3416 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3},
3417 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6},
3450 SVE_EXT_COST + SVE_FCVT_COST},
3455 SVE_EXT_COST + SVE_FCVT_COST},
3462 SVE_EXT_COST + SVE_FCVT_COST},
3466 SVE_EXT_COST + SVE_FCVT_COST},
3472 SVE_EXT_COST + SVE_FCVT_COST},
3475 SVE_EXT_COST + SVE_FCVT_COST},
3480 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3482 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3492 SVE_EXT_COST + SVE_FCVT_COST},
3497 SVE_EXT_COST + SVE_FCVT_COST},
3510 SVE_EXT_COST + SVE_FCVT_COST},
3514 SVE_EXT_COST + SVE_FCVT_COST},
3526 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3528 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3530 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3532 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3536 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3538 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3554 SVE_EXT_COST + SVE_FCVT_COST},
3559 SVE_EXT_COST + SVE_FCVT_COST},
3570 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3572 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3574 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3576 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3578 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3580 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3584 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3586 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3588 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3590 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3734 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2f16, 1},
3735 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
3736 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},
3739 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2bf16, 1},
3740 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4bf16, 1},
3741 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 4},
3744 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
3745 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
3746 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},
3749 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2bf16, 2},
3750 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6},
3751 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8bf16, 14},
3754 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
3755 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
3756 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},
3759 {ISD::BITCAST, MVT::nxv2f16, MVT::nxv2i16, 0},
3760 {ISD::BITCAST, MVT::nxv4f16, MVT::nxv4i16, 0},
3761 {ISD::BITCAST, MVT::nxv2f32, MVT::nxv2i32, 0},
3764 {ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0},
3765 {ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0},
3766 {ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0},
3789 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3792 ST->useSVEForFixedLengthVectors(WiderTy)) {
3793 std::pair<InstructionCost, MVT> LT =
3795 unsigned NumElements =
3807 return AdjustCost(Entry->Cost);
3834 if (ST->hasFullFP16())
3837 return AdjustCost(Entry->Cost);
3855 ST->isSVEorStreamingSVEAvailable() &&
3856 TLI->getTypeAction(Src->getContext(), SrcTy) ==
3858 TLI->getTypeAction(Dst->getContext(), DstTy) ==
3867 Opcode, LegalTy, Src, CCH,
CostKind,
I);
3870 return Part1 + Part2;
3877 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
3890 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3903 CostKind, Index,
nullptr,
nullptr);
3907 auto DstVT = TLI->getValueType(
DL, Dst);
3908 auto SrcVT = TLI->getValueType(
DL, Src);
3913 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
3919 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
3929 case Instruction::SExt:
3934 case Instruction::ZExt:
3935 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
3948 return Opcode == Instruction::PHI ? 0 : 1;
3957 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
3965 if (!LT.second.isVector())
3970 if (LT.second.isFixedLengthVector()) {
3971 unsigned Width = LT.second.getVectorNumElements();
3972 Index = Index % Width;
4020 auto ExtractCanFuseWithFmul = [&]() {
4027 auto IsAllowedScalarTy = [&](
const Type *
T) {
4028 return T->isFloatTy() ||
T->isDoubleTy() ||
4029 (
T->isHalfTy() && ST->hasFullFP16());
4033 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
4036 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4037 !BO->getType()->isVectorTy();
4042 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
4046 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4055 DenseMap<User *, unsigned> UserToExtractIdx;
4056 for (
auto *U :
Scalar->users()) {
4057 if (!IsUserFMulScalarTy(U))
4061 UserToExtractIdx[
U];
4063 if (UserToExtractIdx.
empty())
4065 for (
auto &[S, U, L] : ScalarUserAndIdx) {
4066 for (
auto *U : S->users()) {
4067 if (UserToExtractIdx.
contains(U)) {
4069 auto *Op0 =
FMul->getOperand(0);
4070 auto *Op1 =
FMul->getOperand(1);
4071 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4072 UserToExtractIdx[
U] =
L;
4078 for (
auto &[U, L] : UserToExtractIdx) {
4090 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
4091 if (!IsUserFMulScalarTy(U))
4096 const auto *BO = cast<BinaryOperator>(U);
4097 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4098 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4100 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4103 return IsExtractLaneEquivalentToZero(
4104 cast<ConstantInt>(OtherEE->getIndexOperand())
4107 OtherEE->getType()->getScalarSizeInBits());
4115 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4116 ExtractCanFuseWithFmul())
4121 :
ST->getVectorInsertExtractBaseCost();
4128 const Value *Op1)
const {
4132 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4135 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index);
4141 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
4142 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4149 unsigned Index)
const {
4150 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I);
4156 unsigned Index)
const {
4168 : ST->getVectorInsertExtractBaseCost() + 1;
4177 if (Ty->getElementType()->isFloatingPointTy())
4180 unsigned VecInstCost =
4182 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4189 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4190 return std::nullopt;
4191 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4192 return std::nullopt;
4193 if (CanUseSVE && Ty->isScalableTy() && ST->hasSVEB16B16() &&
4194 ST->isNonStreamingSVEorSME2Available())
4195 return std::nullopt;
4202 Cost += InstCost(PromotedTy);
4225 Op2Info, Args, CxtI);
4229 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4236 Ty,
CostKind, Op1Info, Op2Info,
true,
4239 [&](
Type *PromotedTy) {
4243 return *PromotedCost;
4249 if (
Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4316 auto VT = TLI->getValueType(
DL, Ty);
4317 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4321 : (3 * AsrCost + AddCost);
4323 return MulCost + AsrCost + 2 * AddCost;
4325 }
else if (VT.isVector()) {
4335 if (Ty->isScalableTy() && ST->hasSVE())
4336 Cost += 2 * AsrCost;
4341 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4345 }
else if (LT.second == MVT::v2i64) {
4346 return VT.getVectorNumElements() *
4353 if (Ty->isScalableTy() && ST->hasSVE())
4354 return MulCost + 2 * AddCost + 2 * AsrCost;
4355 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4360 LT.second.isFixedLengthVector()) {
4370 return ExtractCost + InsertCost +
4378 auto VT = TLI->getValueType(
DL, Ty);
4394 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4395 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4396 LT.second == MVT::nxv16i8;
4397 bool Is128bit = LT.second.is128BitVector();
4409 (HasMULH ? 0 : ShrCost) +
4410 AddCost * 2 + ShrCost;
4411 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4418 if (!VT.isVector() && VT.getSizeInBits() > 64)
4422 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4424 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4428 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4438 if (
nullptr != Entry)
4443 if (LT.second.getScalarType() == MVT::i8)
4445 else if (LT.second.getScalarType() == MVT::i16)
4457 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4458 return (4 + DivCost) * VTy->getNumElements();
4464 -1,
nullptr,
nullptr);
4478 if (LT.second == MVT::v2i64 && ST->hasSVE())
4491 if (LT.second != MVT::v2i64)
4513 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4514 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4523 if (!Ty->getScalarType()->isFP128Ty())
4530 if (!Ty->getScalarType()->isFP128Ty())
4531 return 2 * LT.first;
4538 if (!Ty->isVectorTy())
4554 int MaxMergeDistance = 64;
4558 return NumVectorInstToHideOverhead;
4568 unsigned Opcode1,
unsigned Opcode2)
const {
4571 if (!
Sched.hasInstrSchedModel())
4575 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4577 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4583 "Cannot handle variant scheduling classes without an MI");
4599 const int AmortizationCost = 20;
4607 VecPred = CurrentPred;
4615 static const auto ValidMinMaxTys = {
4616 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4617 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4618 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4621 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
4622 (ST->hasFullFP16() &&
4623 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
4628 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4629 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4630 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4631 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4632 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4633 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4634 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4635 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4636 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4637 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4638 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4640 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4641 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4650 if (Opcode == Instruction::FCmp) {
4652 ValTy,
CostKind, Op1Info, Op2Info,
false,
4654 false, [&](
Type *PromotedTy) {
4666 return *PromotedCost;
4670 if (LT.second.getScalarType() != MVT::f64 &&
4671 LT.second.getScalarType() != MVT::f32 &&
4672 LT.second.getScalarType() != MVT::f16)
4677 unsigned Factor = 1;
4692 AArch64::FCMEQv4f32))
4704 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4723 Op1Info, Op2Info,
I);
4729 if (ST->requiresStrictAlign()) {
4734 Options.AllowOverlappingLoads =
true;
4735 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4740 Options.LoadSizes = {8, 4, 2, 1};
4741 Options.AllowedTailExpansions = {3, 5, 6};
4746 return ST->hasSVE();
4757 if (!LT.first.isValid())
4762 if (VT->getElementType()->isIntegerTy(1))
4779 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4780 "Should be called on only load or stores.");
4782 case Instruction::Load:
4785 return ST->getGatherOverhead();
4787 case Instruction::Store:
4790 return ST->getScatterOverhead();
4798 unsigned Opcode,
Type *DataTy,
const Value *Ptr,
bool VariableMask,
4805 if (!LT.first.isValid())
4809 if (!LT.second.isVector() ||
4811 VT->getElementType()->isIntegerTy(1))
4821 ElementCount LegalVF = LT.second.getVectorElementCount();
4824 {TTI::OK_AnyValue, TTI::OP_None},
I);
4840 EVT VT = TLI->getValueType(
DL, Ty,
true);
4842 if (VT == MVT::Other)
4847 if (!LT.first.isValid())
4857 (VTy->getElementType()->isIntegerTy(1) &&
4858 !VTy->getElementCount().isKnownMultipleOf(
4869 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
4870 LT.second.is128BitVector() && Alignment <
Align(16)) {
4876 const int AmortizationCost = 6;
4878 return LT.first * 2 * AmortizationCost;
4882 if (Ty->isPtrOrPtrVectorTy())
4887 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
4889 if (VT == MVT::v4i8)
4896 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
4911 while (!TypeWorklist.
empty()) {
4933 bool UseMaskForCond,
bool UseMaskForGaps)
const {
4934 assert(Factor >= 2 &&
"Invalid interleave factor");
4949 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
4952 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
4953 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
4956 VecVTy->getElementCount().divideCoefficientBy(Factor));
4962 if (MinElts % Factor == 0 &&
4963 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
4964 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
4969 UseMaskForCond, UseMaskForGaps);
4976 for (
auto *
I : Tys) {
4977 if (!
I->isVectorTy())
4988 return ST->getMaxInterleaveFactor();
4998 enum { MaxStridedLoads = 7 };
5000 int StridedLoads = 0;
5003 for (
const auto BB : L->blocks()) {
5004 for (
auto &
I : *BB) {
5010 if (L->isLoopInvariant(PtrValue))
5015 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
5024 if (StridedLoads > MaxStridedLoads / 2)
5025 return StridedLoads;
5028 return StridedLoads;
5031 int StridedLoads = countStridedLoads(L, SE);
5033 <<
" strided loads\n");
5049 unsigned *FinalSize) {
5053 for (
auto *BB : L->getBlocks()) {
5054 for (
auto &
I : *BB) {
5060 if (!Cost.isValid())
5064 if (LoopCost > Budget)
5086 if (MaxTC > 0 && MaxTC <= 32)
5097 if (Blocks.
size() != 2)
5119 if (!L->isInnermost() || L->getNumBlocks() > 8)
5123 if (!L->getExitBlock())
5129 bool HasParellelizableReductions =
5130 L->getNumBlocks() == 1 &&
5131 any_of(L->getHeader()->phis(),
5133 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5136 if (HasParellelizableReductions &&
5158 if (HasParellelizableReductions) {
5169 if (Header == Latch) {
5172 unsigned Width = 10;
5178 unsigned MaxInstsPerLine = 16;
5180 unsigned BestUC = 1;
5181 unsigned SizeWithBestUC = BestUC *
Size;
5183 unsigned SizeWithUC = UC *
Size;
5184 if (SizeWithUC > 48)
5186 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5187 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5189 SizeWithBestUC = BestUC *
Size;
5199 for (
auto *BB : L->blocks()) {
5200 for (
auto &
I : *BB) {
5210 for (
auto *U :
I.users())
5212 LoadedValuesPlus.
insert(U);
5219 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5232 if (!Term || !Term->isConditional() || Preds.
size() == 1 ||
5246 auto *I = dyn_cast<Instruction>(V);
5247 return I && DependsOnLoopLoad(I, Depth + 1);
5254 DependsOnLoopLoad(
I, 0)) {
5270 if (L->getLoopDepth() > 1)
5281 for (
auto *BB : L->getBlocks()) {
5282 for (
auto &
I : *BB) {
5286 if (IsVectorized &&
I.getType()->isVectorTy())
5303 switch (ST->getProcFamily()) {
5304 case AArch64Subtarget::AppleA14:
5305 case AArch64Subtarget::AppleA15:
5306 case AArch64Subtarget::AppleA16:
5307 case AArch64Subtarget::AppleM4:
5310 case AArch64Subtarget::Falkor:
5336 !ST->getSchedModel().isOutOfOrder()) {
5359 bool CanCreate)
const {
5363 case Intrinsic::aarch64_neon_st2:
5364 case Intrinsic::aarch64_neon_st3:
5365 case Intrinsic::aarch64_neon_st4: {
5368 if (!CanCreate || !ST)
5370 unsigned NumElts = Inst->
arg_size() - 1;
5371 if (ST->getNumElements() != NumElts)
5373 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5379 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5381 Res = Builder.CreateInsertValue(Res, L, i);
5385 case Intrinsic::aarch64_neon_ld2:
5386 case Intrinsic::aarch64_neon_ld3:
5387 case Intrinsic::aarch64_neon_ld4:
5388 if (Inst->
getType() == ExpectedType)
5399 case Intrinsic::aarch64_neon_ld2:
5400 case Intrinsic::aarch64_neon_ld3:
5401 case Intrinsic::aarch64_neon_ld4:
5402 Info.ReadMem =
true;
5403 Info.WriteMem =
false;
5406 case Intrinsic::aarch64_neon_st2:
5407 case Intrinsic::aarch64_neon_st3:
5408 case Intrinsic::aarch64_neon_st4:
5409 Info.ReadMem =
false;
5410 Info.WriteMem =
true;
5418 case Intrinsic::aarch64_neon_ld2:
5419 case Intrinsic::aarch64_neon_st2:
5420 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5422 case Intrinsic::aarch64_neon_ld3:
5423 case Intrinsic::aarch64_neon_st3:
5424 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5426 case Intrinsic::aarch64_neon_ld4:
5427 case Intrinsic::aarch64_neon_st4:
5428 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5440 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5441 bool Considerable =
false;
5442 AllowPromotionWithoutCommonHeader =
false;
5445 Type *ConsideredSExtType =
5447 if (
I.getType() != ConsideredSExtType)
5451 for (
const User *U :
I.users()) {
5453 Considerable =
true;
5457 if (GEPInst->getNumOperands() > 2) {
5458 AllowPromotionWithoutCommonHeader =
true;
5463 return Considerable;
5511 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5521 return LegalizationCost + 2;
5531 LegalizationCost *= LT.first - 1;
5534 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5543 return LegalizationCost + 2;
5551 std::optional<FastMathFlags> FMF,
5567 return BaseCost + FixedVTy->getNumElements();
5570 if (Opcode != Instruction::FAdd)
5584 MVT MTy = LT.second;
5585 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5633 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5634 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5636 if (ValTy->getElementCount().getFixedValue() >= 2 && NElts >= 2 &&
5646 return (LT.first - 1) +
Log2_32(NElts);
5651 return (LT.first - 1) + Entry->Cost;
5663 if (LT.first != 1) {
5669 ExtraCost *= LT.first - 1;
5672 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5673 return Cost + ExtraCost;
5681 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
5683 EVT VecVT = TLI->getValueType(
DL, VecTy);
5684 EVT ResVT = TLI->getValueType(
DL, ResTy);
5694 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5696 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5698 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5700 return (LT.first - 1) * 2 + 2;
5711 EVT VecVT = TLI->getValueType(
DL, VecTy);
5712 EVT ResVT = TLI->getValueType(
DL, ResTy);
5715 RedOpcode == Instruction::Add) {
5721 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5723 return LT.first + 2;
5758 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5759 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
5773 if (LT.second.getScalarType() == MVT::i1) {
5782 assert(Entry &&
"Illegal Type for Splice");
5783 LegalizationCost += Entry->Cost;
5784 return LegalizationCost * LT.first;
5788 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
5797 if (VF.
isFixed() && !ST->isSVEorStreamingSVEAvailable() &&
5798 (!ST->isNeonAvailable() || !ST->hasDotProd()))
5801 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub) ||
5806 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
5807 "Unexpected values for OpBExtend or InputTypeB");
5811 if (BinOp && (*BinOp != Instruction::Mul || InputTypeA != InputTypeB))
5814 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
5815 if (IsUSDot && !ST->hasMatMulInt8())
5827 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
5836 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
5842 std::pair<InstructionCost, MVT> AccumLT =
5844 std::pair<InstructionCost, MVT> InputLT =
5857 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
5859 if (AccumLT.second.getScalarType() == MVT::i64 &&
5860 InputLT.second.getScalarType() == MVT::i16)
5863 if (AccumLT.second.getScalarType() == MVT::i64 &&
5864 InputLT.second.getScalarType() == MVT::i8)
5874 if (ST->isSVEorStreamingSVEAvailable() ||
5875 (AccumLT.second.isFixedLengthVector() && ST->isNeonAvailable() &&
5876 ST->hasDotProd())) {
5877 if (AccumLT.second.getScalarType() == MVT::i32 &&
5878 InputLT.second.getScalarType() == MVT::i8)
5894 "Expected the Mask to match the return size if given");
5896 "Expected the same scalar types");
5902 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
5903 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
5904 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
5912 return std::max<InstructionCost>(1, LT.first / 4);
5920 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
5922 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
5925 unsigned TpNumElts = Mask.size();
5926 unsigned LTNumElts = LT.second.getVectorNumElements();
5927 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
5929 LT.second.getVectorElementCount());
5931 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
5933 for (
unsigned N = 0;
N < NumVecs;
N++) {
5937 unsigned Source1 = -1U, Source2 = -1U;
5938 unsigned NumSources = 0;
5939 for (
unsigned E = 0; E < LTNumElts; E++) {
5940 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
5949 unsigned Source = MaskElt / LTNumElts;
5950 if (NumSources == 0) {
5953 }
else if (NumSources == 1 && Source != Source1) {
5956 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
5962 if (Source == Source1)
5964 else if (Source == Source2)
5965 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
5974 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
5985 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
5988 Result.first->second = NCost;
6002 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
6003 if (LT.second.getFixedSizeInBits() >= 128 &&
6005 LT.second.getVectorNumElements() / 2) {
6008 if (Index == (
int)LT.second.getVectorNumElements() / 2)
6022 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6025 return M.value() < 0 || M.value() == (int)M.index();
6031 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6032 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6041 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6042 ST->isSVEorStreamingSVEAvailable() &&
6047 if (ST->isSVEorStreamingSVEAvailable() &&
6061 if (IsLoad && LT.second.isVector() &&
6063 LT.second.getVectorElementCount()))
6069 if (Mask.size() == 4 &&
6071 (SrcTy->getScalarSizeInBits() == 16 ||
6072 SrcTy->getScalarSizeInBits() == 32) &&
6073 all_of(Mask, [](
int E) {
return E < 8; }))
6079 if (LT.second.isFixedLengthVector() &&
6080 LT.second.getVectorNumElements() == Mask.size() &&
6082 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6083 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6084 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6085 LT.second.getVectorNumElements(), 16) ||
6086 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6087 LT.second.getVectorNumElements(), 32) ||
6088 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6089 LT.second.getVectorNumElements(), 64) ||
6092 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6221 return LT.first * Entry->Cost;
6230 LT.second.getSizeInBits() <= 128 && SubTp) {
6232 if (SubLT.second.isVector()) {
6233 int NumElts = LT.second.getVectorNumElements();
6234 int NumSubElts = SubLT.second.getVectorNumElements();
6235 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6241 if (IsExtractSubvector)
6258 if (
getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
6277 return ST->useFixedOverScalableIfEqualCost();
6281 return ST->getEpilogueVectorizationMinVF();
6316 unsigned NumInsns = 0;
6318 NumInsns += BB->sizeWithoutDebug();
6328 int64_t Scale,
unsigned AddrSpace)
const {
6356 if (
I->getOpcode() == Instruction::Or &&
6361 if (
I->getOpcode() == Instruction::Add ||
6362 I->getOpcode() == Instruction::Sub)
6387 return all_equal(Shuf->getShuffleMask());
6394 bool AllowSplat =
false) {
6399 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6400 auto *FullTy = FullV->
getType();
6401 auto *HalfTy = HalfV->getType();
6403 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6406 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6409 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6413 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6427 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6428 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6442 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6443 (M2Start != 0 && M2Start != (NumElements / 2)))
6445 if (S1Op1 && S2Op1 && M1Start != M2Start)
6455 return Ext->getType()->getScalarSizeInBits() ==
6456 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6470 Value *VectorOperand =
nullptr;
6487 if (!
GEP ||
GEP->getNumOperands() != 2)
6491 Value *Offsets =
GEP->getOperand(1);
6494 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6500 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6501 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6502 Ops.push_back(&
GEP->getOperandUse(1));
6536 switch (
II->getIntrinsicID()) {
6537 case Intrinsic::aarch64_neon_smull:
6538 case Intrinsic::aarch64_neon_umull:
6541 Ops.push_back(&
II->getOperandUse(0));
6542 Ops.push_back(&
II->getOperandUse(1));
6547 case Intrinsic::fma:
6548 case Intrinsic::fmuladd:
6554 case Intrinsic::aarch64_neon_sqdmull:
6555 case Intrinsic::aarch64_neon_sqdmulh:
6556 case Intrinsic::aarch64_neon_sqrdmulh:
6559 Ops.push_back(&
II->getOperandUse(0));
6561 Ops.push_back(&
II->getOperandUse(1));
6562 return !
Ops.empty();
6563 case Intrinsic::aarch64_neon_fmlal:
6564 case Intrinsic::aarch64_neon_fmlal2:
6565 case Intrinsic::aarch64_neon_fmlsl:
6566 case Intrinsic::aarch64_neon_fmlsl2:
6569 Ops.push_back(&
II->getOperandUse(1));
6571 Ops.push_back(&
II->getOperandUse(2));
6572 return !
Ops.empty();
6573 case Intrinsic::aarch64_sve_ptest_first:
6574 case Intrinsic::aarch64_sve_ptest_last:
6576 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6577 Ops.push_back(&
II->getOperandUse(0));
6578 return !
Ops.empty();
6579 case Intrinsic::aarch64_sme_write_horiz:
6580 case Intrinsic::aarch64_sme_write_vert:
6581 case Intrinsic::aarch64_sme_writeq_horiz:
6582 case Intrinsic::aarch64_sme_writeq_vert: {
6584 if (!Idx || Idx->getOpcode() != Instruction::Add)
6586 Ops.push_back(&
II->getOperandUse(1));
6589 case Intrinsic::aarch64_sme_read_horiz:
6590 case Intrinsic::aarch64_sme_read_vert:
6591 case Intrinsic::aarch64_sme_readq_horiz:
6592 case Intrinsic::aarch64_sme_readq_vert:
6593 case Intrinsic::aarch64_sme_ld1b_vert:
6594 case Intrinsic::aarch64_sme_ld1h_vert:
6595 case Intrinsic::aarch64_sme_ld1w_vert:
6596 case Intrinsic::aarch64_sme_ld1d_vert:
6597 case Intrinsic::aarch64_sme_ld1q_vert:
6598 case Intrinsic::aarch64_sme_st1b_vert:
6599 case Intrinsic::aarch64_sme_st1h_vert:
6600 case Intrinsic::aarch64_sme_st1w_vert:
6601 case Intrinsic::aarch64_sme_st1d_vert:
6602 case Intrinsic::aarch64_sme_st1q_vert:
6603 case Intrinsic::aarch64_sme_ld1b_horiz:
6604 case Intrinsic::aarch64_sme_ld1h_horiz:
6605 case Intrinsic::aarch64_sme_ld1w_horiz:
6606 case Intrinsic::aarch64_sme_ld1d_horiz:
6607 case Intrinsic::aarch64_sme_ld1q_horiz:
6608 case Intrinsic::aarch64_sme_st1b_horiz:
6609 case Intrinsic::aarch64_sme_st1h_horiz:
6610 case Intrinsic::aarch64_sme_st1w_horiz:
6611 case Intrinsic::aarch64_sme_st1d_horiz:
6612 case Intrinsic::aarch64_sme_st1q_horiz: {
6614 if (!Idx || Idx->getOpcode() != Instruction::Add)
6616 Ops.push_back(&
II->getOperandUse(3));
6619 case Intrinsic::aarch64_neon_pmull:
6622 Ops.push_back(&
II->getOperandUse(0));
6623 Ops.push_back(&
II->getOperandUse(1));
6625 case Intrinsic::aarch64_neon_pmull64:
6627 II->getArgOperand(1)))
6629 Ops.push_back(&
II->getArgOperandUse(0));
6630 Ops.push_back(&
II->getArgOperandUse(1));
6632 case Intrinsic::masked_gather:
6635 Ops.push_back(&
II->getArgOperandUse(0));
6637 case Intrinsic::masked_scatter:
6640 Ops.push_back(&
II->getArgOperandUse(1));
6647 auto ShouldSinkCondition = [](
Value *
Cond,
6652 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6656 Ops.push_back(&
II->getOperandUse(0));
6660 switch (
I->getOpcode()) {
6661 case Instruction::GetElementPtr:
6662 case Instruction::Add:
6663 case Instruction::Sub:
6665 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
6667 Ops.push_back(&
I->getOperandUse(
Op));
6672 case Instruction::Select: {
6673 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
6676 Ops.push_back(&
I->getOperandUse(0));
6679 case Instruction::Br: {
6686 Ops.push_back(&
I->getOperandUse(0));
6693 if (!
I->getType()->isVectorTy())
6696 switch (
I->getOpcode()) {
6697 case Instruction::Sub:
6698 case Instruction::Add: {
6707 Ops.push_back(&Ext1->getOperandUse(0));
6708 Ops.push_back(&Ext2->getOperandUse(0));
6711 Ops.push_back(&
I->getOperandUse(0));
6712 Ops.push_back(&
I->getOperandUse(1));
6716 case Instruction::Or: {
6719 if (ST->hasNEON()) {
6733 if (
I->getParent() != MainAnd->
getParent() ||
6738 if (
I->getParent() != IA->getParent() ||
6739 I->getParent() != IB->getParent())
6744 Ops.push_back(&
I->getOperandUse(0));
6745 Ops.push_back(&
I->getOperandUse(1));
6754 case Instruction::Mul: {
6755 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
6758 if (Ty->isScalableTy())
6762 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
6765 int NumZExts = 0, NumSExts = 0;
6766 for (
auto &
Op :
I->operands()) {
6773 auto *ExtOp = Ext->getOperand(0);
6774 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
6775 Ops.push_back(&Ext->getOperandUse(0));
6783 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
6784 I->getType()->getScalarSizeInBits())
6821 if (!ElementConstant || !ElementConstant->
isZero())
6824 unsigned Opcode = OperandInstr->
getOpcode();
6825 if (Opcode == Instruction::SExt)
6827 else if (Opcode == Instruction::ZExt)
6832 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
6842 Ops.push_back(&Insert->getOperandUse(1));
6848 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
6852 if (!ShouldSinkSplatForIndexedVariant(
I))
6857 Ops.push_back(&
I->getOperandUse(0));
6859 Ops.push_back(&
I->getOperandUse(1));
6861 return !
Ops.empty();
6863 case Instruction::FMul: {
6865 if (
I->getType()->isScalableTy())
6874 Ops.push_back(&
I->getOperandUse(0));
6876 Ops.push_back(&
I->getOperandUse(1));
6877 return !
Ops.empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static unsigned getNumElements(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
Information for memory intrinsic cost model.
Type * getDataType() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...