21#include "llvm/IR/IntrinsicsAArch64.h"
31#define DEBUG_TYPE "aarch64tti"
43class TailFoldingKind {
48 enum TailFoldingOpts {
53 TFAll = TFReductions | TFRecurrences | TFSimple
56 void operator=(
const std::string &Val) {
61 for (
auto TailFoldType : TailFoldTypes) {
62 if (TailFoldType ==
"disabled")
64 else if (TailFoldType ==
"all")
66 else if (TailFoldType ==
"default")
68 else if (TailFoldType ==
"simple")
70 else if (TailFoldType ==
"reductions")
72 else if (TailFoldType ==
"recurrences")
74 else if (TailFoldType ==
"noreductions")
76 else if (TailFoldType ==
"norecurrences")
80 <<
"invalid argument " << TailFoldType.str()
81 <<
" to -sve-tail-folding=; each element must be one of: disabled, "
82 "all, default, simple, reductions, noreductions, recurrences, "
88 operator uint8_t()
const {
return Bits; }
100 "Control the use of vectorisation using tail-folding for SVE:"
101 "\ndisabled No loop types will vectorize using tail-folding"
102 "\ndefault Uses the default tail-folding settings for the target "
104 "\nall All legal loop types will vectorize using tail-folding"
105 "\nsimple Use tail-folding for simple loops (not reductions or "
107 "\nreductions Use tail-folding for loops containing reductions"
108 "\nrecurrences Use tail-folding for loops containing fixed order "
137 TM.getSubtargetImpl(*Caller)->getFeatureBits();
139 TM.getSubtargetImpl(*Callee)->getFeatureBits();
143 return (CallerBits & CalleeBits) == CalleeBits;
181 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
186 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
192 return std::max<InstructionCost>(1,
Cost);
207 unsigned ImmIdx = ~0U;
211 case Instruction::GetElementPtr:
216 case Instruction::Store:
219 case Instruction::Add:
220 case Instruction::Sub:
221 case Instruction::Mul:
222 case Instruction::UDiv:
223 case Instruction::SDiv:
224 case Instruction::URem:
225 case Instruction::SRem:
226 case Instruction::And:
227 case Instruction::Or:
228 case Instruction::Xor:
229 case Instruction::ICmp:
233 case Instruction::Shl:
234 case Instruction::LShr:
235 case Instruction::AShr:
239 case Instruction::Trunc:
240 case Instruction::ZExt:
241 case Instruction::SExt:
242 case Instruction::IntToPtr:
243 case Instruction::PtrToInt:
244 case Instruction::BitCast:
245 case Instruction::PHI:
246 case Instruction::Call:
247 case Instruction::Select:
248 case Instruction::Ret:
249 case Instruction::Load:
254 int NumConstants = (BitSize + 63) / 64;
278 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
284 case Intrinsic::sadd_with_overflow:
285 case Intrinsic::uadd_with_overflow:
286 case Intrinsic::ssub_with_overflow:
287 case Intrinsic::usub_with_overflow:
288 case Intrinsic::smul_with_overflow:
289 case Intrinsic::umul_with_overflow:
291 int NumConstants = (BitSize + 63) / 64;
298 case Intrinsic::experimental_stackmap:
299 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
302 case Intrinsic::experimental_patchpoint_void:
303 case Intrinsic::experimental_patchpoint_i64:
304 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
307 case Intrinsic::experimental_gc_statepoint:
308 if ((
Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
318 if (TyWidth == 32 || TyWidth == 64)
328 switch (ICA.
getID()) {
329 case Intrinsic::umin:
330 case Intrinsic::umax:
331 case Intrinsic::smin:
332 case Intrinsic::smax: {
339 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
343 case Intrinsic::sadd_sat:
344 case Intrinsic::ssub_sat:
345 case Intrinsic::uadd_sat:
346 case Intrinsic::usub_sat: {
354 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits() ? 1 : 4;
355 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
356 return LT.first * Instrs;
359 case Intrinsic::abs: {
364 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
368 case Intrinsic::experimental_stepvector: {
377 Cost += AddCost * (LT.first - 1);
381 case Intrinsic::bitreverse: {
383 {Intrinsic::bitreverse,
MVT::i32, 1},
384 {Intrinsic::bitreverse,
MVT::i64, 1},
402 return LegalisationCost.first * Entry->Cost + 1;
404 return LegalisationCost.first * Entry->Cost;
408 case Intrinsic::ctpop: {
409 if (!ST->hasNEON()) {
430 RetTy->getScalarSizeInBits()
433 return LT.first * Entry->Cost + ExtraCost;
437 case Intrinsic::sadd_with_overflow:
438 case Intrinsic::uadd_with_overflow:
439 case Intrinsic::ssub_with_overflow:
440 case Intrinsic::usub_with_overflow:
441 case Intrinsic::smul_with_overflow:
442 case Intrinsic::umul_with_overflow: {
444 {Intrinsic::sadd_with_overflow,
MVT::i8, 3},
445 {Intrinsic::uadd_with_overflow,
MVT::i8, 3},
446 {Intrinsic::sadd_with_overflow,
MVT::i16, 3},
447 {Intrinsic::uadd_with_overflow,
MVT::i16, 3},
448 {Intrinsic::sadd_with_overflow,
MVT::i32, 1},
449 {Intrinsic::uadd_with_overflow,
MVT::i32, 1},
450 {Intrinsic::sadd_with_overflow,
MVT::i64, 1},
451 {Intrinsic::uadd_with_overflow,
MVT::i64, 1},
452 {Intrinsic::ssub_with_overflow,
MVT::i8, 3},
453 {Intrinsic::usub_with_overflow,
MVT::i8, 3},
454 {Intrinsic::ssub_with_overflow,
MVT::i16, 3},
455 {Intrinsic::usub_with_overflow,
MVT::i16, 3},
456 {Intrinsic::ssub_with_overflow,
MVT::i32, 1},
457 {Intrinsic::usub_with_overflow,
MVT::i32, 1},
458 {Intrinsic::ssub_with_overflow,
MVT::i64, 1},
459 {Intrinsic::usub_with_overflow,
MVT::i64, 1},
460 {Intrinsic::smul_with_overflow,
MVT::i8, 5},
461 {Intrinsic::umul_with_overflow,
MVT::i8, 4},
462 {Intrinsic::smul_with_overflow,
MVT::i16, 5},
463 {Intrinsic::umul_with_overflow,
MVT::i16, 4},
464 {Intrinsic::smul_with_overflow,
MVT::i32, 2},
465 {Intrinsic::umul_with_overflow,
MVT::i32, 2},
466 {Intrinsic::smul_with_overflow,
MVT::i64, 3},
467 {Intrinsic::umul_with_overflow,
MVT::i64, 3},
476 case Intrinsic::fptosi_sat:
477 case Intrinsic::fptoui_sat: {
480 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
493 if (ST->hasFullFP16() &&
500 if ((LT.second.getScalarType() ==
MVT::f32 ||
501 LT.second.getScalarType() ==
MVT::f64 ||
502 (ST->hasFullFP16() && LT.second.getScalarType() ==
MVT::f16)) &&
506 if (LT.second.isVector())
510 LegalTy, {LegalTy, LegalTy});
513 LegalTy, {LegalTy, LegalTy});
515 return LT.first *
Cost;
530 auto RequiredType = II.
getType();
533 assert(PN &&
"Expected Phi Node!");
536 if (!PN->hasOneUse())
539 for (
Value *IncValPhi : PN->incoming_values()) {
540 auto *Reinterpret = dyn_cast<IntrinsicInst>(IncValPhi);
542 Reinterpret->getIntrinsicID() !=
543 Intrinsic::aarch64_sve_convert_to_svbool ||
544 RequiredType != Reinterpret->getArgOperand(0)->getType())
552 PHINode *NPN =
Builder.CreatePHI(RequiredType, PN->getNumIncomingValues());
555 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
556 auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(
I));
557 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
574static std::optional<Instruction *>
576 auto BinOp = dyn_cast<IntrinsicInst>(II.
getOperand(0));
580 auto IntrinsicID = BinOp->getIntrinsicID();
581 switch (IntrinsicID) {
582 case Intrinsic::aarch64_sve_and_z:
583 case Intrinsic::aarch64_sve_bic_z:
584 case Intrinsic::aarch64_sve_eor_z:
585 case Intrinsic::aarch64_sve_nand_z:
586 case Intrinsic::aarch64_sve_nor_z:
587 case Intrinsic::aarch64_sve_orn_z:
588 case Intrinsic::aarch64_sve_orr_z:
594 auto BinOpPred = BinOp->getOperand(0);
595 auto BinOpOp1 = BinOp->getOperand(1);
596 auto BinOpOp2 = BinOp->getOperand(2);
598 auto PredIntr = dyn_cast<IntrinsicInst>(BinOpPred);
600 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
603 auto PredOp = PredIntr->getOperand(0);
604 auto PredOpTy = cast<VectorType>(PredOp->getType());
612 auto NarrowBinOpOp1 =
Builder.CreateIntrinsic(
613 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
614 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
615 if (BinOpOp1 == BinOpOp2)
616 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
619 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
622 Builder.CreateIntrinsic(IntrinsicID, {PredOpTy}, NarrowedBinOpArgs);
626static std::optional<Instruction *>
638 const auto *IVTy = cast<VectorType>(II.
getType());
644 const auto *CursorVTy = cast<VectorType>(Cursor->
getType());
645 if (CursorVTy->getElementCount().getKnownMinValue() <
646 IVTy->getElementCount().getKnownMinValue())
651 EarliestReplacement = Cursor;
653 auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
656 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
657 Intrinsic::aarch64_sve_convert_to_svbool ||
658 IntrinsicCursor->getIntrinsicID() ==
659 Intrinsic::aarch64_sve_convert_from_svbool))
662 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
663 Cursor = IntrinsicCursor->getOperand(0);
668 if (!EarliestReplacement)
691 const auto PTruePattern =
692 cast<ConstantInt>(Pg->
getOperand(0))->getZExtValue();
693 if (PTruePattern != AArch64SVEPredPattern::vl1)
700 Insert->insertBefore(&II);
701 Insert->takeName(&II);
726 if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
729 const auto PTruePattern =
730 cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
731 if (PTruePattern != AArch64SVEPredPattern::all)
737 if (!SplatValue || !SplatValue->isZero())
741 auto *DupQLane = dyn_cast<IntrinsicInst>(II.
getArgOperand(1));
743 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
747 if (!cast<ConstantInt>(DupQLane->getArgOperand(1))->isZero())
750 auto *VecIns = dyn_cast<IntrinsicInst>(DupQLane->getArgOperand(0));
751 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
756 if (!isa<UndefValue>(VecIns->getArgOperand(0)))
759 if (!cast<ConstantInt>(VecIns->getArgOperand(2))->isZero())
762 auto *ConstVec = dyn_cast<Constant>(VecIns->getArgOperand(1));
766 auto *VecTy = dyn_cast<FixedVectorType>(ConstVec->getType());
767 auto *OutTy = dyn_cast<ScalableVectorType>(II.
getType());
768 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
771 unsigned NumElts = VecTy->getNumElements();
772 unsigned PredicateBits = 0;
775 for (
unsigned I = 0;
I < NumElts; ++
I) {
776 auto *
Arg = dyn_cast<ConstantInt>(ConstVec->getAggregateElement(
I));
780 PredicateBits |= 1 << (
I * (16 / NumElts));
784 if (PredicateBits == 0) {
786 PFalse->takeName(&II);
792 for (
unsigned I = 0;
I < 16; ++
I)
793 if ((PredicateBits & (1 <<
I)) != 0)
796 unsigned PredSize = Mask & -Mask;
801 for (
unsigned I = 0;
I < 16;
I += PredSize)
802 if ((PredicateBits & (1 <<
I)) == 0)
807 auto *PTrue =
Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue,
808 {PredType}, {PTruePat});
809 auto *ConvertToSVBool =
Builder.CreateIntrinsic(
810 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
811 auto *ConvertFromSVBool =
812 Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
813 {II.
getType()}, {ConvertToSVBool});
815 ConvertFromSVBool->takeName(&II);
826 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
837 auto *OldBinOp = cast<BinaryOperator>(Vec);
838 auto OpC = OldBinOp->getOpcode();
844 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(), &II);
849 auto *
C = dyn_cast<Constant>(Pg);
850 if (IsAfter &&
C &&
C->isNullValue()) {
854 Extract->insertBefore(&II);
855 Extract->takeName(&II);
859 auto *IntrPG = dyn_cast<IntrinsicInst>(Pg);
863 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
866 const auto PTruePattern =
867 cast<ConstantInt>(IntrPG->getOperand(0))->getZExtValue();
874 unsigned Idx = MinNumElts - 1;
883 auto *PgVTy = cast<ScalableVectorType>(Pg->
getType());
884 if (
Idx >= PgVTy->getMinNumElements())
890 Extract->insertBefore(&II);
891 Extract->takeName(&II);
931 FPTy, cast<VectorType>(Vec->
getType())->getElementCount());
934 {Pg, FPFallBack, FPVec});
948 auto *PTrue =
Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue,
951 Builder.CreateIntrinsic(Intrinsic::aarch64_sve_rdffr_z, {}, {PTrue});
952 RDFFR->takeName(&II);
956static std::optional<Instruction *>
960 if (
Pattern == AArch64SVEPredPattern::all) {
966 auto *VScale =
Builder.CreateVScale(StepVal);
967 VScale->takeName(&II);
973 return MinNumElts && NumElts >= MinNumElts
989 if (PgVal == OpVal &&
992 Value *Ops[] = {PgVal, OpVal};
996 Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptest_any, Tys, Ops);
997 PTest->takeName(&II);
1006 return std::nullopt;
1010 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
1011 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
1018 PTest->takeName(&II);
1025 if ((Pg == Op) && (II.
getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
1026 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
1027 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
1028 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
1029 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
1030 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
1031 (OpIID == Intrinsic::aarch64_sve_and_z) ||
1032 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
1033 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
1034 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
1035 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
1036 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
1037 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
1042 PTest->takeName(&II);
1047 return std::nullopt;
1050template <Intrinsic::ID MulOpc,
typename Intrinsic::ID FuseOpc>
1051static std::optional<Instruction *>
1053 bool MergeIntoAddendOp) {
1055 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
1056 if (MergeIntoAddendOp) {
1066 return std::nullopt;
1068 if (!
Mul->hasOneUse())
1069 return std::nullopt;
1076 if (FAddFlags != cast<CallInst>(
Mul)->getFastMathFlags())
1077 return std::nullopt;
1079 return std::nullopt;
1087 if (MergeIntoAddendOp)
1089 {
P, AddendOp, MulOp0, MulOp1}, FMFSource);
1092 {
P, MulOp0, MulOp1, AddendOp}, FMFSource);
1099 Value *UncastedPred;
1100 if (
match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_from_svbool>(
1101 m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
1105 if (cast<ScalableVectorType>(Pred->
getType())->getMinNumElements() <=
1106 cast<ScalableVectorType>(UncastedPred->
getType())->getMinNumElements())
1107 Pred = UncastedPred;
1109 return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1110 m_ConstantInt<AArch64SVEPredPattern::all>()));
1113static std::optional<Instruction *>
1125 Load->copyMetadata(II);
1136static std::optional<Instruction *>
1149 Store->copyMetadata(II);
1160 switch (Intrinsic) {
1161 case Intrinsic::aarch64_sve_fmul:
1162 return Instruction::BinaryOps::FMul;
1163 case Intrinsic::aarch64_sve_fadd:
1164 return Instruction::BinaryOps::FAdd;
1165 case Intrinsic::aarch64_sve_fsub:
1166 return Instruction::BinaryOps::FSub;
1168 return Instruction::BinaryOpsEnd;
1172static std::optional<Instruction *>
1176 if (BinOpCode == Instruction::BinaryOpsEnd ||
1177 !
match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1178 m_ConstantInt<AArch64SVEPredPattern::all>())))
1179 return std::nullopt;
1192 Intrinsic::aarch64_sve_fmla>(IC, II,
1196 Intrinsic::aarch64_sve_mla>(
1201 Intrinsic::aarch64_sve_fmad>(IC, II,
1205 Intrinsic::aarch64_sve_mad>(
1215 Intrinsic::aarch64_sve_fmls>(IC, II,
1219 Intrinsic::aarch64_sve_mls>(
1224 Intrinsic::aarch64_sve_fnmsb>(
1240 auto IsUnitSplat = [](
auto *
I) {
1249 auto IsUnitDup = [](
auto *
I) {
1250 auto *IntrI = dyn_cast<IntrinsicInst>(
I);
1251 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup)
1254 auto *SplatValue = IntrI->getOperand(2);
1258 if (IsUnitSplat(OpMultiplier)) {
1260 OpMultiplicand->takeName(&II);
1262 }
else if (IsUnitDup(OpMultiplier)) {
1264 auto *DupInst = cast<IntrinsicInst>(OpMultiplier);
1265 auto *DupPg = DupInst->getOperand(1);
1268 if (OpPredicate == DupPg) {
1269 OpMultiplicand->takeName(&II);
1283 bool IsSigned = II.
getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
1290 Builder.CreateIntCast(ScalarArg,
RetTy->getScalarType(), IsSigned);
1292 Builder.CreateVectorSplat(
RetTy->getElementCount(), ScalarArg);
1297 return std::nullopt;
1307 auto *SplatValue = dyn_cast_or_null<ConstantInt>(
getSplatValue(OpIndices));
1309 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
1310 return std::nullopt;
1316 auto *Extract =
Builder.CreateExtractElement(OpVal, SplatValue);
1318 Builder.CreateVectorSplat(VTy->getElementCount(), Extract);
1320 VectorSplat->takeName(&II);
1336 return std::nullopt;
1339static std::optional<Instruction *>
1351 if (
match(
Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1360 Value *
Ptr =
Builder.CreateGEP(cast<VectorType>(Ty)->getElementType(),
1361 BasePtr, IndexBase);
1364 Builder.CreateMaskedLoad(Ty,
Ptr, Alignment, Mask, PassThru);
1369 return std::nullopt;
1372static std::optional<Instruction *>
1384 if (
match(
Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1392 Value *
Ptr =
Builder.CreateGEP(cast<VectorType>(Ty)->getElementType(),
1393 BasePtr, IndexBase);
1397 (void)
Builder.CreateMaskedStore(Val,
Ptr, Alignment, Mask);
1402 return std::nullopt;
1415 ConstantInt *SplatConstantInt = dyn_cast_or_null<ConstantInt>(SplatValue);
1416 if (!SplatConstantInt)
1417 return std::nullopt;
1422 auto ASRD =
Builder.CreateIntrinsic(
1423 Intrinsic::aarch64_sve_asrd, {II.
getType()}, {Pred, Vec, DivisorLog2});
1429 auto ASRD =
Builder.CreateIntrinsic(
1430 Intrinsic::aarch64_sve_asrd, {II.
getType()}, {Pred, Vec, DivisorLog2});
1431 auto NEG =
Builder.CreateIntrinsic(Intrinsic::aarch64_sve_neg,
1432 {ASRD->getType()}, {ASRD, Pred, ASRD});
1436 return std::nullopt;
1440 size_t VecSize = Vec.
size();
1445 size_t HalfVecSize = VecSize / 2;
1449 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
1457 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
1472 m_Intrinsic<Intrinsic::vector_insert>(
1474 !isa<FixedVectorType>(CurrentInsertElt->
getType()))
1475 return std::nullopt;
1476 auto IIScalableTy = cast<ScalableVectorType>(II.
getType());
1480 while (
auto InsertElt = dyn_cast<InsertElementInst>(CurrentInsertElt)) {
1481 auto Idx = cast<ConstantInt>(InsertElt->getOperand(2));
1482 Elts[
Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
1483 CurrentInsertElt = InsertElt->getOperand(0);
1487 isa<PoisonValue>(CurrentInsertElt) && isa<PoisonValue>(
Default);
1489 return std::nullopt;
1495 for (
size_t I = 0;
I < Elts.
size();
I++) {
1496 if (Elts[
I] ==
nullptr)
1498 InsertEltChain =
Builder.CreateInsertElement(InsertEltChain, Elts[
I],
1501 if (InsertEltChain ==
nullptr)
1502 return std::nullopt;
1508 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
1509 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
1510 IIScalableTy->getMinNumElements() /
1515 auto *WideShuffleMaskTy =
1519 auto InsertSubvector =
Builder.CreateInsertVector(
1522 Builder.CreateBitOrPointerCast(InsertSubvector, WideScalableTy);
1524 auto WideShuffle =
Builder.CreateShuffleVector(
1526 auto NarrowBitcast =
1539 return std::nullopt;
1550 Value *AbsPred, *MergedValue;
1551 if (!
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_sqabs>(
1553 !
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_abs>(
1556 return std::nullopt;
1564 return std::nullopt;
1569 return std::nullopt;
1571 auto LSL =
Builder.CreateIntrinsic(Intrinsic::aarch64_sve_lsl, {II.
getType()},
1572 {Pred, Vec, Shift});
1577std::optional<Instruction *>
1584 case Intrinsic::aarch64_neon_fmaxnm:
1585 case Intrinsic::aarch64_neon_fminnm:
1587 case Intrinsic::aarch64_sve_convert_from_svbool:
1589 case Intrinsic::aarch64_sve_dup:
1591 case Intrinsic::aarch64_sve_dup_x:
1593 case Intrinsic::aarch64_sve_cmpne:
1594 case Intrinsic::aarch64_sve_cmpne_wide:
1596 case Intrinsic::aarch64_sve_rdffr:
1598 case Intrinsic::aarch64_sve_lasta:
1599 case Intrinsic::aarch64_sve_lastb:
1601 case Intrinsic::aarch64_sve_clasta_n:
1602 case Intrinsic::aarch64_sve_clastb_n:
1604 case Intrinsic::aarch64_sve_cntd:
1606 case Intrinsic::aarch64_sve_cntw:
1608 case Intrinsic::aarch64_sve_cnth:
1610 case Intrinsic::aarch64_sve_cntb:
1612 case Intrinsic::aarch64_sve_ptest_any:
1613 case Intrinsic::aarch64_sve_ptest_first:
1614 case Intrinsic::aarch64_sve_ptest_last:
1616 case Intrinsic::aarch64_sve_mul:
1617 case Intrinsic::aarch64_sve_fmul:
1619 case Intrinsic::aarch64_sve_fadd:
1620 case Intrinsic::aarch64_sve_add:
1622 case Intrinsic::aarch64_sve_fsub:
1623 case Intrinsic::aarch64_sve_sub:
1625 case Intrinsic::aarch64_sve_tbl:
1627 case Intrinsic::aarch64_sve_uunpkhi:
1628 case Intrinsic::aarch64_sve_uunpklo:
1629 case Intrinsic::aarch64_sve_sunpkhi:
1630 case Intrinsic::aarch64_sve_sunpklo:
1632 case Intrinsic::aarch64_sve_zip1:
1633 case Intrinsic::aarch64_sve_zip2:
1635 case Intrinsic::aarch64_sve_ld1_gather_index:
1637 case Intrinsic::aarch64_sve_st1_scatter_index:
1639 case Intrinsic::aarch64_sve_ld1:
1641 case Intrinsic::aarch64_sve_st1:
1643 case Intrinsic::aarch64_sve_sdiv:
1645 case Intrinsic::aarch64_sve_sel:
1647 case Intrinsic::aarch64_sve_srshl:
1649 case Intrinsic::aarch64_sve_dupq_lane:
1653 return std::nullopt;
1660 SimplifyAndSetOp)
const {
1664 case Intrinsic::aarch64_neon_fcvtxn:
1665 case Intrinsic::aarch64_neon_rshrn:
1666 case Intrinsic::aarch64_neon_sqrshrn:
1667 case Intrinsic::aarch64_neon_sqrshrun:
1668 case Intrinsic::aarch64_neon_sqshrn:
1669 case Intrinsic::aarch64_neon_sqshrun:
1670 case Intrinsic::aarch64_neon_sqxtn:
1671 case Intrinsic::aarch64_neon_sqxtun:
1672 case Intrinsic::aarch64_neon_uqrshrn:
1673 case Intrinsic::aarch64_neon_uqshrn:
1674 case Intrinsic::aarch64_neon_uqxtn:
1675 SimplifyAndSetOp(&II, 0, OrigDemandedElts, UndefElts);
1679 return std::nullopt;
1706bool AArch64TTIImpl::isWideningInstruction(
Type *DstTy,
unsigned Opcode,
1711 auto toVectorTy = [&](
Type *ArgTy) {
1713 cast<VectorType>(DstTy)->getElementCount());
1732 case Instruction::Add:
1733 case Instruction::Sub:
1734 case Instruction::Mul:
1742 if (
Args.size() != 2 ||
1743 (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])))
1745 auto *Extend = cast<CastInst>(Args[1]);
1746 auto *Arg0 = dyn_cast<CastInst>(Args[0]);
1750 if (Opcode == Instruction::Mul &&
1751 (!Arg0 || Arg0->getOpcode() != Extend->getOpcode() ||
1752 Arg0->getOperand(0)->getType() != Extend->getOperand(0)->getType()))
1758 unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
1764 auto *SrcTy = toVectorTy(Extend->getSrcTy());
1766 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
1767 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
1772 DstTyL.first * DstTyL.second.getVectorMinNumElements();
1774 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
1778 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
1787 assert(ISD &&
"Invalid opcode");
1791 if (
I &&
I->hasOneUser()) {
1792 auto *SingleUser = cast<Instruction>(*
I->user_begin());
1794 if (isWideningInstruction(Dst, SingleUser->getOpcode(),
Operands)) {
1797 if (
I == SingleUser->getOperand(1))
1802 if (
auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
1803 if (
I->getOpcode() ==
unsigned(Cast->getOpcode()) &&
1804 cast<CastInst>(
I)->getSrcTy() == Cast->getSrcTy())
1812 return Cost == 0 ? 0 : 1;
2080 return AdjustCost(Entry->Cost);
2107 if (ST->hasFullFP16())
2110 return AdjustCost(Entry->Cost);
2122 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
2130 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) &&
"Invalid type");
2146 if (!VecLT.second.isVector() || !TLI->
isTypeLegal(DstVT))
2152 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
2162 case Instruction::SExt:
2167 case Instruction::ZExt:
2168 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
2181 return Opcode == Instruction::PHI ? 0 : 1;
2197 if (!LT.second.isVector())
2202 if (LT.second.isFixedLengthVector()) {
2203 unsigned Width = LT.second.getVectorNumElements();
2231 return getVectorInstrCostHelper(Val,
Index,
false );
2238 return getVectorInstrCostHelper(Val,
Index,
true );
2250 Op2Info, Args, CxtI);
2292 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
2297 Opcode, Ty,
CostKind, Op1Info, Op2Info);
2302 if (isa<FixedVectorType>(Ty) && cast<FixedVectorType>(Ty)
2303 ->getPrimitiveSizeInBits()
2304 .getFixedValue() < 128) {
2315 if (
nullptr != Entry)
2320 if (LT.second.getScalarType() ==
MVT::i8)
2322 else if (LT.second.getScalarType() ==
MVT::i16)
2332 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
2335 return (4 + DivCost) * VTy->getNumElements();
2370 if (LT.second !=
MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
2372 return LT.first * 14;
2392 return 2 * LT.first;
2406 unsigned NumVectorInstToHideOverhead = 10;
2407 int MaxMergeDistance = 64;
2411 return NumVectorInstToHideOverhead;
2431 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SELECT) {
2433 const int AmortizationCost = 20;
2441 VecPred = CurrentPred;
2449 static const auto ValidMinMaxTys = {
2455 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
2456 (ST->hasFullFP16() &&
2457 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
2462 VectorSelectTbl[] = {
2488 if (ST->requiresStrictAlign()) {
2493 Options.AllowOverlappingLoads =
true;
2499 Options.LoadSizes = {8, 4, 2, 1};
2504 return ST->hasSVE();
2515 if (!LT.first.isValid())
2533 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
2538 auto *VT = cast<VectorType>(DataTy);
2540 if (!LT.first.isValid())
2547 if (cast<VectorType>(DataTy)->getElementCount() ==
2551 ElementCount LegalVF = LT.second.getVectorElementCount();
2554 {TTI::OK_AnyValue, TTI::OP_None},
I);
2579 if (!LT.first.isValid())
2586 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
2597 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
2598 LT.second.is128BitVector() && (!Alignment || *Alignment <
Align(16))) {
2604 const int AmortizationCost = 6;
2606 return LT.first * 2 * AmortizationCost;
2620 return cast<FixedVectorType>(Ty)->getNumElements() * 2;
2629 bool UseMaskForCond,
bool UseMaskForGaps) {
2630 assert(Factor >= 2 &&
"Invalid interleave factor");
2631 auto *VecVTy = cast<FixedVectorType>(VecTy);
2633 if (!UseMaskForCond && !UseMaskForGaps &&
2634 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
2635 unsigned NumElts = VecVTy->getNumElements();
2643 if (NumElts % Factor == 0 &&
2650 UseMaskForCond, UseMaskForGaps);
2657 for (
auto *
I : Tys) {
2658 if (!
I->isVectorTy())
2660 if (
I->getScalarSizeInBits() * cast<FixedVectorType>(
I)->getNumElements() ==
2679 enum { MaxStridedLoads = 7 };
2681 int StridedLoads = 0;
2684 for (
const auto BB : L->
blocks()) {
2685 for (
auto &
I : *BB) {
2686 LoadInst *LMemI = dyn_cast<LoadInst>(&
I);
2695 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
2696 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
2705 if (StridedLoads > MaxStridedLoads / 2)
2706 return StridedLoads;
2709 return StridedLoads;
2712 int StridedLoads = countStridedLoads(L, SE);
2714 <<
" strided loads\n");
2749 for (
auto &
I : *BB) {
2751 if (
I.getType()->isVectorTy())
2754 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
2769 !ST->getSchedModel().isOutOfOrder()) {
2786 Type *ExpectedType) {
2790 case Intrinsic::aarch64_neon_st2:
2791 case Intrinsic::aarch64_neon_st3:
2792 case Intrinsic::aarch64_neon_st4: {
2794 StructType *ST = dyn_cast<StructType>(ExpectedType);
2797 unsigned NumElts = Inst->
arg_size() - 1;
2798 if (ST->getNumElements() != NumElts)
2800 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
2806 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
2808 Res =
Builder.CreateInsertValue(Res, L, i);
2812 case Intrinsic::aarch64_neon_ld2:
2813 case Intrinsic::aarch64_neon_ld3:
2814 case Intrinsic::aarch64_neon_ld4:
2815 if (Inst->
getType() == ExpectedType)
2826 case Intrinsic::aarch64_neon_ld2:
2827 case Intrinsic::aarch64_neon_ld3:
2828 case Intrinsic::aarch64_neon_ld4:
2829 Info.ReadMem =
true;
2830 Info.WriteMem =
false;
2833 case Intrinsic::aarch64_neon_st2:
2834 case Intrinsic::aarch64_neon_st3:
2835 case Intrinsic::aarch64_neon_st4:
2836 Info.ReadMem =
false;
2837 Info.WriteMem =
true;
2845 case Intrinsic::aarch64_neon_ld2:
2846 case Intrinsic::aarch64_neon_st2:
2847 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
2849 case Intrinsic::aarch64_neon_ld3:
2850 case Intrinsic::aarch64_neon_st3:
2851 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
2853 case Intrinsic::aarch64_neon_ld4:
2854 case Intrinsic::aarch64_neon_st4:
2855 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
2867 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) {
2868 bool Considerable =
false;
2869 AllowPromotionWithoutCommonHeader =
false;
2870 if (!isa<SExtInst>(&
I))
2872 Type *ConsideredSExtType =
2874 if (
I.getType() != ConsideredSExtType)
2878 for (
const User *U :
I.users()) {
2880 Considerable =
true;
2884 if (GEPInst->getNumOperands() > 2) {
2885 AllowPromotionWithoutCommonHeader =
true;
2890 return Considerable;
2929 if (LT.second.getScalarType() ==
MVT::f16 && !ST->hasFullFP16())
2932 assert((isa<ScalableVectorType>(Ty) == isa<ScalableVectorType>(CondTy)) &&
2933 "Both vector needs to be equally scalable");
2938 unsigned MinMaxOpcode =
2941 : (IsUnsigned ? Intrinsic::umin : Intrinsic::smin);
2946 return LegalizationCost + 2;
2956 LegalizationCost *= LT.first - 1;
2960 assert(ISD &&
"Invalid opcode");
2968 return LegalizationCost + 2;
2976 std::optional<FastMathFlags> FMF,
2979 if (
auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
2984 return BaseCost + FixedVTy->getNumElements();
2987 if (Opcode != Instruction::FAdd)
2990 auto *VTy = cast<ScalableVectorType>(ValTy);
2997 if (isa<ScalableVectorType>(ValTy))
3001 MVT MTy = LT.second;
3003 assert(ISD &&
"Invalid opcode");
3047 return (LT.first - 1) + Entry->Cost;
3055 auto *ValVTy = cast<FixedVectorType>(ValTy);
3056 if (!ValVTy->getElementType()->isIntegerTy(1) &&
3060 if (LT.first != 1) {
3066 ExtraCost *= LT.first - 1;
3068 return Entry->Cost + ExtraCost;
3102 EVT PromotedVT = LT.second.getScalarType() ==
MVT::i1
3117 if (LT.second.getScalarType() ==
MVT::i1) {
3126 assert(Entry &&
"Illegal Type for Splice");
3127 LegalizationCost += Entry->Cost;
3128 return LegalizationCost * LT.first;
3140 if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
3142 cast<FixedVectorType>(Tp)->getNumElements() >
3143 LT.second.getVectorNumElements() &&
3145 unsigned TpNumElts = cast<FixedVectorType>(Tp)->getNumElements();
3146 assert(Mask.size() == TpNumElts &&
"Expected Mask and Tp size to match!");
3147 unsigned LTNumElts = LT.second.getVectorNumElements();
3148 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
3152 for (
unsigned N = 0;
N < NumVecs;
N++) {
3156 unsigned Source1, Source2;
3157 unsigned NumSources = 0;
3158 for (
unsigned E = 0;
E < LTNumElts;
E++) {
3159 int MaskElt = (
N * LTNumElts +
E < TpNumElts) ? Mask[
N * LTNumElts +
E]
3168 unsigned Source = MaskElt / LTNumElts;
3169 if (NumSources == 0) {
3172 }
else if (NumSources == 1 && Source != Source1) {
3175 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
3181 if (Source == Source1)
3183 else if (Source == Source2)
3184 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
3190 if (NumSources <= 2)
3193 NTp, NMask,
CostKind, 0,
nullptr, Args);
3195 return ME.value() % LTNumElts == ME.index();
3197 Cost += LTNumElts - 1;
3208 bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0]);
3209 if (IsLoad && LT.second.isVector() &&
3211 LT.second.getVectorElementCount()))
3219 all_of(Mask, [](
int E) {
return E < 8; }))
3338 return LT.first * Entry->Cost;
3347 LT.second.getSizeInBits() <= 128 && SubTp) {
3349 if (SubLT.second.isVector()) {
3350 int NumElts = LT.second.getVectorNumElements();
3351 int NumSubElts = SubLT.second.getVectorNumElements();
3352 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
3373 TailFoldingKind Required;
3375 Required.add(TailFoldingKind::TFReductions);
3377 Required.add(TailFoldingKind::TFRecurrences);
3379 Required.add(TailFoldingKind::TFSimple);
3386 int64_t BaseOffset,
bool HasBaseReg,
3387 int64_t Scale,
unsigned AddrSpace)
const {
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
static unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
amdgpu AMDGPU Register Bank Select
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
This file defines the LoopVectorizationLegality class.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isStreamingSVEModeDisabled() const
unsigned getVectorInsertExtractBaseCost() const
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
unsigned getMaxInterleaveFactor() const
bool useSVEForFixedLengthVectors() const
unsigned getMinSVEVectorSizeInBits() const
InstructionCost getSpliceCost(VectorType *Tp, int Index)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
bool prefersVectorizedAddressing() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind)
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isElementTypeLegalForScalableVector(Type *Ty) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getMaxInterleaveFactor(unsigned VF)
bool useNeonVector(const Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
EVT getPromotedVTForPredicate(EVT VT) const
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, bool UseScalable) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const
Returns true if VecTy is a legal interleaved access type.
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
void negate()
Negate this APInt in place.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
bool isIntPredicate() const
static ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
Drive the analysis of interleaved memory accesses in the loop.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
unsigned getLoopDepth() const
Return the nesting level of this loop.
iterator_range< block_iterator > blocks() const
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNewZAInterface() const
std::optional< bool > requiresSMChange(const SMEAttrs &Callee, bool BodyOverridesInterface=false) const
bool requiresLazySave(const SMEAttrs &Callee) const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
Provides information about what library functions are available for the current target.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimunSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isFP128Ty() const
Return true if this is 'fp128'.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVMContext & getContext() const
All values hold a context through their type.
void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static constexpr unsigned SVEBitsPerBlock
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
OneUse_match< T > m_OneUse(const T &SubPattern)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
initializer< Ty > init(const Ty &Val)