21#include "llvm/IR/IntrinsicsAArch64.h"
31#define DEBUG_TYPE "aarch64tti"
52 "Penalty of calling a function that requires a change to PSTATE.SM"));
56 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
65class TailFoldingOption {
80 bool NeedsDefault =
true;
84 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
99 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
100 "Initial bits should only include one of "
101 "(disabled|all|simple|default)");
102 Bits = NeedsDefault ? DefaultBits : InitialBits;
104 Bits &= ~DisableBits;
110 errs() <<
"invalid argument '" << Opt
111 <<
"' to -sve-tail-folding=; the option should be of the form\n"
112 " (disabled|all|default|simple)[+(reductions|recurrences"
113 "|reverse|noreductions|norecurrences|noreverse)]\n";
119 void operator=(
const std::string &Val) {
128 setNeedsDefault(
false);
133 unsigned StartIdx = 1;
134 if (TailFoldTypes[0] ==
"disabled")
135 setInitialBits(TailFoldingOpts::Disabled);
136 else if (TailFoldTypes[0] ==
"all")
137 setInitialBits(TailFoldingOpts::All);
138 else if (TailFoldTypes[0] ==
"default")
139 setNeedsDefault(
true);
140 else if (TailFoldTypes[0] ==
"simple")
141 setInitialBits(TailFoldingOpts::Simple);
144 setInitialBits(TailFoldingOpts::Disabled);
147 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
148 if (TailFoldTypes[
I] ==
"reductions")
149 setEnableBit(TailFoldingOpts::Reductions);
150 else if (TailFoldTypes[
I] ==
"recurrences")
151 setEnableBit(TailFoldingOpts::Recurrences);
152 else if (TailFoldTypes[
I] ==
"reverse")
153 setEnableBit(TailFoldingOpts::Reverse);
154 else if (TailFoldTypes[
I] ==
"noreductions")
155 setDisableBit(TailFoldingOpts::Reductions);
156 else if (TailFoldTypes[
I] ==
"norecurrences")
157 setDisableBit(TailFoldingOpts::Recurrences);
158 else if (TailFoldTypes[
I] ==
"noreverse")
159 setDisableBit(TailFoldingOpts::Reverse);
176 "Control the use of vectorisation using tail-folding for SVE where the"
177 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
178 "\ndisabled (Initial) No loop types will vectorize using "
180 "\ndefault (Initial) Uses the default tail-folding settings for "
182 "\nall (Initial) All legal loop types will vectorize using "
184 "\nsimple (Initial) Use tail-folding for simple loops (not "
185 "reductions or recurrences)"
186 "\nreductions Use tail-folding for loops containing reductions"
187 "\nnoreductions Inverse of above"
188 "\nrecurrences Use tail-folding for loops containing fixed order "
190 "\nnorecurrences Inverse of above"
191 "\nreverse Use tail-folding for loops requiring reversed "
193 "\nnoreverse Inverse of above"),
211 .
Case(
"__arm_sme_state",
true)
212 .
Case(
"__arm_tpidr2_save",
true)
213 .
Case(
"__arm_tpidr2_restore",
true)
214 .
Case(
"__arm_za_disable",
true)
228 if (isa<CallInst>(
I) && !
I.isDebugOrPseudoInst() &&
229 (cast<CallInst>(
I).isInlineAsm() || isa<IntrinsicInst>(
I) ||
239 SMEAttrs CallerAttrs(*Caller), CalleeAttrs(*Callee);
251 if (CallerAttrs.requiresLazySave(CalleeAttrs) ||
252 CallerAttrs.requiresSMChange(CalleeAttrs)) {
260 TM.getSubtargetImpl(*Caller)->getFeatureBits();
262 TM.getSubtargetImpl(*Callee)->getFeatureBits();
266 return (CallerBits & CalleeBits) == CalleeBits;
284 auto FVTy = dyn_cast<FixedVectorType>(Ty);
286 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
295 unsigned DefaultCallPenalty)
const {
318 if (
F == Call.getCaller())
324 return DefaultCallPenalty;
363 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
368 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
374 return std::max<InstructionCost>(1,
Cost);
389 unsigned ImmIdx = ~0U;
393 case Instruction::GetElementPtr:
398 case Instruction::Store:
401 case Instruction::Add:
402 case Instruction::Sub:
403 case Instruction::Mul:
404 case Instruction::UDiv:
405 case Instruction::SDiv:
406 case Instruction::URem:
407 case Instruction::SRem:
408 case Instruction::And:
409 case Instruction::Or:
410 case Instruction::Xor:
411 case Instruction::ICmp:
415 case Instruction::Shl:
416 case Instruction::LShr:
417 case Instruction::AShr:
421 case Instruction::Trunc:
422 case Instruction::ZExt:
423 case Instruction::SExt:
424 case Instruction::IntToPtr:
425 case Instruction::PtrToInt:
426 case Instruction::BitCast:
427 case Instruction::PHI:
428 case Instruction::Call:
429 case Instruction::Select:
430 case Instruction::Ret:
431 case Instruction::Load:
436 int NumConstants = (BitSize + 63) / 64;
460 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
466 case Intrinsic::sadd_with_overflow:
467 case Intrinsic::uadd_with_overflow:
468 case Intrinsic::ssub_with_overflow:
469 case Intrinsic::usub_with_overflow:
470 case Intrinsic::smul_with_overflow:
471 case Intrinsic::umul_with_overflow:
473 int NumConstants = (BitSize + 63) / 64;
480 case Intrinsic::experimental_stackmap:
481 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
484 case Intrinsic::experimental_patchpoint_void:
485 case Intrinsic::experimental_patchpoint:
486 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
489 case Intrinsic::experimental_gc_statepoint:
490 if ((
Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
500 if (TyWidth == 32 || TyWidth == 64)
515 switch (ICA.
getID()) {
516 case Intrinsic::umin:
517 case Intrinsic::umax:
518 case Intrinsic::smin:
519 case Intrinsic::smax: {
520 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
521 MVT::v8i16, MVT::v2i32, MVT::v4i32,
522 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
526 if (LT.second == MVT::v2i64)
528 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
532 case Intrinsic::sadd_sat:
533 case Intrinsic::ssub_sat:
534 case Intrinsic::uadd_sat:
535 case Intrinsic::usub_sat: {
536 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
537 MVT::v8i16, MVT::v2i32, MVT::v4i32,
543 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits() ? 1 : 4;
544 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
545 return LT.first * Instrs;
548 case Intrinsic::abs: {
549 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
550 MVT::v8i16, MVT::v2i32, MVT::v4i32,
553 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
557 case Intrinsic::bswap: {
558 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
559 MVT::v4i32, MVT::v2i64};
561 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }) &&
562 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits())
566 case Intrinsic::experimental_stepvector: {
575 Cost += AddCost * (LT.first - 1);
579 case Intrinsic::vector_extract:
580 case Intrinsic::vector_insert: {
593 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
612 case Intrinsic::bitreverse: {
614 {Intrinsic::bitreverse, MVT::i32, 1},
615 {Intrinsic::bitreverse, MVT::i64, 1},
616 {Intrinsic::bitreverse, MVT::v8i8, 1},
617 {Intrinsic::bitreverse, MVT::v16i8, 1},
618 {Intrinsic::bitreverse, MVT::v4i16, 2},
619 {Intrinsic::bitreverse, MVT::v8i16, 2},
620 {Intrinsic::bitreverse, MVT::v2i32, 2},
621 {Intrinsic::bitreverse, MVT::v4i32, 2},
622 {Intrinsic::bitreverse, MVT::v1i64, 2},
623 {Intrinsic::bitreverse, MVT::v2i64, 2},
633 return LegalisationCost.first * Entry->Cost + 1;
635 return LegalisationCost.first * Entry->Cost;
639 case Intrinsic::ctpop: {
640 if (!ST->hasNEON()) {
661 RetTy->getScalarSizeInBits()
664 return LT.first * Entry->Cost + ExtraCost;
668 case Intrinsic::sadd_with_overflow:
669 case Intrinsic::uadd_with_overflow:
670 case Intrinsic::ssub_with_overflow:
671 case Intrinsic::usub_with_overflow:
672 case Intrinsic::smul_with_overflow:
673 case Intrinsic::umul_with_overflow: {
675 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
676 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
677 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
678 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
679 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
680 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
681 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
682 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
683 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
684 {Intrinsic::usub_with_overflow, MVT::i8, 3},
685 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
686 {Intrinsic::usub_with_overflow, MVT::i16, 3},
687 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
688 {Intrinsic::usub_with_overflow, MVT::i32, 1},
689 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
690 {Intrinsic::usub_with_overflow, MVT::i64, 1},
691 {Intrinsic::smul_with_overflow, MVT::i8, 5},
692 {Intrinsic::umul_with_overflow, MVT::i8, 4},
693 {Intrinsic::smul_with_overflow, MVT::i16, 5},
694 {Intrinsic::umul_with_overflow, MVT::i16, 4},
695 {Intrinsic::smul_with_overflow, MVT::i32, 2},
696 {Intrinsic::umul_with_overflow, MVT::i32, 2},
697 {Intrinsic::smul_with_overflow, MVT::i64, 3},
698 {Intrinsic::umul_with_overflow, MVT::i64, 3},
707 case Intrinsic::fptosi_sat:
708 case Intrinsic::fptoui_sat: {
711 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
716 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
717 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
718 LT.second == MVT::v2f64) &&
720 (LT.second == MVT::f64 && MTy == MVT::i32) ||
721 (LT.second == MVT::f32 && MTy == MVT::i64)))
724 if (ST->hasFullFP16() &&
725 ((LT.second == MVT::f16 && MTy == MVT::i32) ||
726 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
731 if ((LT.second.getScalarType() == MVT::f32 ||
732 LT.second.getScalarType() == MVT::f64 ||
733 (ST->hasFullFP16() && LT.second.getScalarType() == MVT::f16)) &&
737 if (LT.second.isVector())
741 LegalTy, {LegalTy, LegalTy});
744 LegalTy, {LegalTy, LegalTy});
746 return LT.first *
Cost;
750 case Intrinsic::fshl:
751 case Intrinsic::fshr: {
764 {Intrinsic::fshl, MVT::v4i32, 3},
765 {Intrinsic::fshl, MVT::v2i64, 3}, {Intrinsic::fshl, MVT::v16i8, 4},
766 {Intrinsic::fshl, MVT::v8i16, 4}, {Intrinsic::fshl, MVT::v2i32, 3},
767 {Intrinsic::fshl, MVT::v8i8, 4}, {Intrinsic::fshl, MVT::v4i16, 4}};
773 return LegalisationCost.first * Entry->Cost;
777 if (!
RetTy->isIntegerTy())
782 bool HigherCost = (
RetTy->getScalarSizeInBits() != 32 &&
783 RetTy->getScalarSizeInBits() < 64) ||
784 (
RetTy->getScalarSizeInBits() % 64 != 0);
785 unsigned ExtraCost = HigherCost ? 1 : 0;
786 if (
RetTy->getScalarSizeInBits() == 32 ||
787 RetTy->getScalarSizeInBits() == 64)
794 return TyL.first + ExtraCost;
796 case Intrinsic::get_active_lane_mask: {
801 if (!getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) &&
812 return RetTy->getNumElements() * 2;
828 auto RequiredType =
II.getType();
830 auto *PN = dyn_cast<PHINode>(
II.getArgOperand(0));
831 assert(PN &&
"Expected Phi Node!");
834 if (!PN->hasOneUse())
837 for (
Value *IncValPhi : PN->incoming_values()) {
838 auto *Reinterpret = dyn_cast<IntrinsicInst>(IncValPhi);
840 Reinterpret->getIntrinsicID() !=
841 Intrinsic::aarch64_sve_convert_to_svbool ||
842 RequiredType != Reinterpret->getArgOperand(0)->getType())
851 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
852 auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(
I));
853 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
870static std::optional<Instruction *>
872 auto BinOp = dyn_cast<IntrinsicInst>(
II.getOperand(0));
876 auto IntrinsicID = BinOp->getIntrinsicID();
877 switch (IntrinsicID) {
878 case Intrinsic::aarch64_sve_and_z:
879 case Intrinsic::aarch64_sve_bic_z:
880 case Intrinsic::aarch64_sve_eor_z:
881 case Intrinsic::aarch64_sve_nand_z:
882 case Intrinsic::aarch64_sve_nor_z:
883 case Intrinsic::aarch64_sve_orn_z:
884 case Intrinsic::aarch64_sve_orr_z:
890 auto BinOpPred = BinOp->getOperand(0);
891 auto BinOpOp1 = BinOp->getOperand(1);
892 auto BinOpOp2 = BinOp->getOperand(2);
894 auto PredIntr = dyn_cast<IntrinsicInst>(BinOpPred);
896 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
899 auto PredOp = PredIntr->getOperand(0);
900 auto PredOpTy = cast<VectorType>(PredOp->getType());
901 if (PredOpTy !=
II.getType())
906 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
907 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
908 if (BinOpOp1 == BinOpOp2)
909 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
912 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
919static std::optional<Instruction *>
922 if (isa<PHINode>(
II.getArgOperand(0)))
929 if (isa<TargetExtType>(
II.getArgOperand(0)->getType()) ||
930 isa<TargetExtType>(
II.getType()))
934 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
936 const auto *IVTy = cast<VectorType>(
II.getType());
942 const auto *CursorVTy = cast<VectorType>(Cursor->
getType());
943 if (CursorVTy->getElementCount().getKnownMinValue() <
944 IVTy->getElementCount().getKnownMinValue())
949 EarliestReplacement = Cursor;
951 auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
954 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
955 Intrinsic::aarch64_sve_convert_to_svbool ||
956 IntrinsicCursor->getIntrinsicID() ==
957 Intrinsic::aarch64_sve_convert_from_svbool))
960 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
961 Cursor = IntrinsicCursor->getOperand(0);
966 if (!EarliestReplacement)
975 if (
match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_from_svbool>(
976 m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
980 if (cast<ScalableVectorType>(Pred->
getType())->getMinNumElements() <=
981 cast<ScalableVectorType>(UncastedPred->
getType())->getMinNumElements())
984 return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
985 m_ConstantInt<AArch64SVEPredPattern::all>()));
990static std::optional<Instruction *>
995 if (
RetTy->isStructTy()) {
996 auto StructT = cast<StructType>(
RetTy);
997 auto VecT = StructT->getElementType(0);
999 for (
unsigned i = 0; i < StructT->getNumElements(); i++) {
1000 ZerVec.
push_back(VecT->isFPOrFPVectorTy() ? ConstantFP::get(VecT, 0.0)
1001 : ConstantInt::get(VecT, 0));
1004 }
else if (
RetTy->isFPOrFPVectorTy())
1007 Node = ConstantInt::get(
II.getType(), 0);
1012 return std::nullopt;
1018 auto *OpPredicate =
II.getOperand(0);
1031 return std::nullopt;
1034 return std::nullopt;
1036 const auto PTruePattern =
1037 cast<ConstantInt>(Pg->
getOperand(0))->getZExtValue();
1038 if (PTruePattern != AArch64SVEPredPattern::vl1)
1039 return std::nullopt;
1044 II.getArgOperand(0),
II.getArgOperand(2), ConstantInt::get(IdxTy, 0));
1045 Insert->insertBefore(&
II);
1046 Insert->takeName(&
II);
1054 auto *
RetTy = cast<ScalableVectorType>(
II.getType());
1056 II.getArgOperand(0));
1066 auto *Pg = dyn_cast<IntrinsicInst>(
II.getArgOperand(0));
1067 if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1068 return std::nullopt;
1070 const auto PTruePattern =
1071 cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
1072 if (PTruePattern != AArch64SVEPredPattern::all)
1073 return std::nullopt;
1078 if (!SplatValue || !SplatValue->isZero())
1079 return std::nullopt;
1082 auto *DupQLane = dyn_cast<IntrinsicInst>(
II.getArgOperand(1));
1084 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1085 return std::nullopt;
1088 if (!cast<ConstantInt>(DupQLane->getArgOperand(1))->isZero())
1089 return std::nullopt;
1091 auto *VecIns = dyn_cast<IntrinsicInst>(DupQLane->getArgOperand(0));
1092 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1093 return std::nullopt;
1097 if (!isa<UndefValue>(VecIns->getArgOperand(0)))
1098 return std::nullopt;
1100 if (!cast<ConstantInt>(VecIns->getArgOperand(2))->isZero())
1101 return std::nullopt;
1103 auto *ConstVec = dyn_cast<Constant>(VecIns->getArgOperand(1));
1105 return std::nullopt;
1107 auto *VecTy = dyn_cast<FixedVectorType>(ConstVec->getType());
1108 auto *OutTy = dyn_cast<ScalableVectorType>(
II.getType());
1109 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
1110 return std::nullopt;
1112 unsigned NumElts = VecTy->getNumElements();
1113 unsigned PredicateBits = 0;
1116 for (
unsigned I = 0;
I < NumElts; ++
I) {
1117 auto *Arg = dyn_cast<ConstantInt>(ConstVec->getAggregateElement(
I));
1119 return std::nullopt;
1121 PredicateBits |= 1 << (
I * (16 / NumElts));
1125 if (PredicateBits == 0) {
1127 PFalse->takeName(&
II);
1133 for (
unsigned I = 0;
I < 16; ++
I)
1134 if ((PredicateBits & (1 <<
I)) != 0)
1137 unsigned PredSize = Mask & -Mask;
1142 for (
unsigned I = 0;
I < 16;
I += PredSize)
1143 if ((PredicateBits & (1 <<
I)) == 0)
1144 return std::nullopt;
1149 {PredType}, {PTruePat});
1151 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
1152 auto *ConvertFromSVBool =
1154 {
II.getType()}, {ConvertToSVBool});
1162 Value *Pg =
II.getArgOperand(0);
1163 Value *Vec =
II.getArgOperand(1);
1164 auto IntrinsicID =
II.getIntrinsicID();
1165 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
1176 auto *OldBinOp = cast<BinaryOperator>(Vec);
1177 auto OpC = OldBinOp->getOpcode();
1183 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
1188 auto *
C = dyn_cast<Constant>(Pg);
1189 if (IsAfter &&
C &&
C->isNullValue()) {
1193 Extract->insertBefore(&
II);
1194 Extract->takeName(&
II);
1198 auto *IntrPG = dyn_cast<IntrinsicInst>(Pg);
1200 return std::nullopt;
1202 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1203 return std::nullopt;
1205 const auto PTruePattern =
1206 cast<ConstantInt>(IntrPG->getOperand(0))->getZExtValue();
1211 return std::nullopt;
1213 unsigned Idx = MinNumElts - 1;
1222 auto *PgVTy = cast<ScalableVectorType>(Pg->
getType());
1223 if (
Idx >= PgVTy->getMinNumElements())
1224 return std::nullopt;
1229 Extract->insertBefore(&
II);
1230 Extract->takeName(&
II);
1243 Value *Pg =
II.getArgOperand(0);
1245 Value *Vec =
II.getArgOperand(2);
1249 return std::nullopt;
1254 return std::nullopt;
1268 FPTy, cast<VectorType>(Vec->
getType())->getElementCount());
1271 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
1284 {
II.getType()}, {AllPat});
1291static std::optional<Instruction *>
1293 const auto Pattern = cast<ConstantInt>(
II.getArgOperand(0))->getZExtValue();
1295 if (
Pattern == AArch64SVEPredPattern::all) {
1296 Constant *StepVal = ConstantInt::get(
II.getType(), NumElts);
1304 return MinNumElts && NumElts >= MinNumElts
1306 II, ConstantInt::get(
II.getType(), MinNumElts)))
1312 Value *PgVal =
II.getArgOperand(0);
1313 Value *OpVal =
II.getArgOperand(1);
1317 if (PgVal == OpVal &&
1318 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
1319 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
1320 Value *Ops[] = {PgVal, OpVal};
1334 return std::nullopt;
1338 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
1339 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
1353 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
1354 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
1355 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
1356 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
1357 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
1358 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
1359 (OpIID == Intrinsic::aarch64_sve_and_z) ||
1360 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
1361 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
1362 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
1363 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
1364 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
1365 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
1375 return std::nullopt;
1378template <Intrinsic::ID MulOpc,
typename Intrinsic::ID FuseOpc>
1379static std::optional<Instruction *>
1381 bool MergeIntoAddendOp) {
1383 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
1384 if (MergeIntoAddendOp) {
1385 AddendOp =
II.getOperand(1);
1386 Mul =
II.getOperand(2);
1388 AddendOp =
II.getOperand(2);
1389 Mul =
II.getOperand(1);
1394 return std::nullopt;
1396 if (!
Mul->hasOneUse())
1397 return std::nullopt;
1400 if (
II.getType()->isFPOrFPVectorTy()) {
1405 return std::nullopt;
1407 return std::nullopt;
1412 if (MergeIntoAddendOp)
1414 {
P, AddendOp, MulOp0, MulOp1}, FMFSource);
1417 {
P, MulOp0, MulOp1, AddendOp}, FMFSource);
1422static std::optional<Instruction *>
1424 Value *Pred =
II.getOperand(0);
1425 Value *PtrOp =
II.getOperand(1);
1426 Type *VecTy =
II.getType();
1434 Load->copyMetadata(
II);
1445static std::optional<Instruction *>
1447 Value *VecOp =
II.getOperand(0);
1448 Value *Pred =
II.getOperand(1);
1449 Value *PtrOp =
II.getOperand(2);
1453 Store->copyMetadata(
II);
1464 switch (Intrinsic) {
1465 case Intrinsic::aarch64_sve_fmul_u:
1466 return Instruction::BinaryOps::FMul;
1467 case Intrinsic::aarch64_sve_fadd_u:
1468 return Instruction::BinaryOps::FAdd;
1469 case Intrinsic::aarch64_sve_fsub_u:
1470 return Instruction::BinaryOps::FSub;
1472 return Instruction::BinaryOpsEnd;
1476static std::optional<Instruction *>
1479 if (
II.isStrictFP())
1480 return std::nullopt;
1482 auto *OpPredicate =
II.getOperand(0);
1484 if (BinOpCode == Instruction::BinaryOpsEnd ||
1485 !
match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1486 m_ConstantInt<AArch64SVEPredPattern::all>())))
1487 return std::nullopt;
1499 auto *OpPredicate =
II.getOperand(0);
1500 if (!
match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1501 m_ConstantInt<AArch64SVEPredPattern::all>())))
1502 return std::nullopt;
1504 auto *
Mod =
II.getModule();
1506 II.setCalledFunction(NewDecl);
1513static std::optional<Instruction *>
1530 Intrinsic::aarch64_sve_mla>(
1534 Intrinsic::aarch64_sve_mad>(
1537 return std::nullopt;
1540static std::optional<Instruction *>
1547 Intrinsic::aarch64_sve_fmla>(IC,
II,
1552 Intrinsic::aarch64_sve_fmad>(IC,
II,
1557 Intrinsic::aarch64_sve_fmla>(IC,
II,
1560 return std::nullopt;
1563static std::optional<Instruction *>
1567 Intrinsic::aarch64_sve_fmla>(IC,
II,
1572 Intrinsic::aarch64_sve_fmad>(IC,
II,
1577 Intrinsic::aarch64_sve_fmla_u>(
1583static std::optional<Instruction *>
1590 Intrinsic::aarch64_sve_fmls>(IC,
II,
1595 Intrinsic::aarch64_sve_fnmsb>(
1600 Intrinsic::aarch64_sve_fmls>(IC,
II,
1603 return std::nullopt;
1606static std::optional<Instruction *>
1610 Intrinsic::aarch64_sve_fmls>(IC,
II,
1615 Intrinsic::aarch64_sve_fnmsb>(
1620 Intrinsic::aarch64_sve_fmls_u>(
1632 Intrinsic::aarch64_sve_mls>(
1635 return std::nullopt;
1641 auto *OpPredicate =
II.getOperand(0);
1642 auto *OpMultiplicand =
II.getOperand(1);
1643 auto *OpMultiplier =
II.getOperand(2);
1646 auto IsUnitSplat = [](
auto *
I) {
1655 auto IsUnitDup = [](
auto *
I) {
1656 auto *IntrI = dyn_cast<IntrinsicInst>(
I);
1657 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup)
1660 auto *SplatValue = IntrI->getOperand(2);
1664 if (IsUnitSplat(OpMultiplier)) {
1666 OpMultiplicand->takeName(&
II);
1668 }
else if (IsUnitDup(OpMultiplier)) {
1670 auto *DupInst = cast<IntrinsicInst>(OpMultiplier);
1671 auto *DupPg = DupInst->getOperand(1);
1674 if (OpPredicate == DupPg) {
1675 OpMultiplicand->takeName(&
II);
1685 Value *UnpackArg =
II.getArgOperand(0);
1686 auto *
RetTy = cast<ScalableVectorType>(
II.getType());
1687 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
1688 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
1701 return std::nullopt;
1705 auto *OpVal =
II.getOperand(0);
1706 auto *OpIndices =
II.getOperand(1);
1711 auto *SplatValue = dyn_cast_or_null<ConstantInt>(
getSplatValue(OpIndices));
1713 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
1714 return std::nullopt;
1730 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
1731 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
1735 if ((
match(
II.getArgOperand(0),
1736 m_Intrinsic<FromSVB>(m_Intrinsic<ToSVB>(
m_Value(
A)))) &&
1738 m_Intrinsic<FromSVB>(m_Intrinsic<ToSVB>(
m_Value(
B))))) ||
1741 auto *TyA = cast<ScalableVectorType>(
A->getType());
1742 if (TyA ==
B->getType() &&
1753 return std::nullopt;
1761 if (
match(
II.getArgOperand(0),
1763 match(
II.getArgOperand(1), m_Intrinsic<Intrinsic::aarch64_sve_uzp2>(
1766 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
1768 return std::nullopt;
1771static std::optional<Instruction *>
1773 Value *Mask =
II.getOperand(0);
1774 Value *BasePtr =
II.getOperand(1);
1787 if (
match(
Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1790 BasePtr->getPointerAlignment(
II.getDataLayout());
1794 BasePtr, IndexBase);
1802 return std::nullopt;
1805static std::optional<Instruction *>
1807 Value *Val =
II.getOperand(0);
1808 Value *Mask =
II.getOperand(1);
1809 Value *BasePtr =
II.getOperand(2);
1817 if (
match(
Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1820 BasePtr->getPointerAlignment(
II.getDataLayout());
1823 BasePtr, IndexBase);
1832 return std::nullopt;
1838 Value *Pred =
II.getOperand(0);
1839 Value *Vec =
II.getOperand(1);
1840 Value *DivVec =
II.getOperand(2);
1843 ConstantInt *SplatConstantInt = dyn_cast_or_null<ConstantInt>(SplatValue);
1844 if (!SplatConstantInt)
1845 return std::nullopt;
1851 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
1858 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
1860 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
1864 return std::nullopt;
1868 size_t VecSize = Vec.
size();
1873 size_t HalfVecSize = VecSize / 2;
1877 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
1885 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
1900 m_Intrinsic<Intrinsic::vector_insert>(
1902 !isa<FixedVectorType>(CurrentInsertElt->
getType()))
1903 return std::nullopt;
1904 auto IIScalableTy = cast<ScalableVectorType>(
II.getType());
1908 while (
auto InsertElt = dyn_cast<InsertElementInst>(CurrentInsertElt)) {
1909 auto Idx = cast<ConstantInt>(InsertElt->getOperand(2));
1910 Elts[
Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
1911 CurrentInsertElt = InsertElt->getOperand(0);
1915 isa<PoisonValue>(CurrentInsertElt) && isa<PoisonValue>(
Default);
1917 return std::nullopt;
1921 for (
size_t I = 0;
I < Elts.
size();
I++) {
1922 if (Elts[
I] ==
nullptr)
1927 if (InsertEltChain ==
nullptr)
1928 return std::nullopt;
1934 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
1935 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
1936 IIScalableTy->getMinNumElements() /
1941 auto *WideShuffleMaskTy =
1952 auto NarrowBitcast =
1965 return std::nullopt;
1970 Value *Pred =
II.getOperand(0);
1971 Value *Vec =
II.getOperand(1);
1972 Value *Shift =
II.getOperand(2);
1975 Value *AbsPred, *MergedValue;
1976 if (!
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_sqabs>(
1978 !
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_abs>(
1981 return std::nullopt;
1989 return std::nullopt;
1994 return std::nullopt;
1997 {
II.getType()}, {Pred, Vec, Shift});
2002std::optional<Instruction *>
2010 case Intrinsic::aarch64_sve_ld1_gather:
2011 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
2012 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
2013 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
2014 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
2015 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
2016 case Intrinsic::aarch64_sve_ld1q_gather_index:
2017 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
2018 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
2019 case Intrinsic::aarch64_sve_ld1ro:
2020 case Intrinsic::aarch64_sve_ld1rq:
2021 case Intrinsic::aarch64_sve_ld1udq:
2022 case Intrinsic::aarch64_sve_ld1uwq:
2023 case Intrinsic::aarch64_sve_ld2_sret:
2024 case Intrinsic::aarch64_sve_ld2q_sret:
2025 case Intrinsic::aarch64_sve_ld3_sret:
2026 case Intrinsic::aarch64_sve_ld3q_sret:
2027 case Intrinsic::aarch64_sve_ld4_sret:
2028 case Intrinsic::aarch64_sve_ld4q_sret:
2029 case Intrinsic::aarch64_sve_ldff1:
2030 case Intrinsic::aarch64_sve_ldff1_gather:
2031 case Intrinsic::aarch64_sve_ldff1_gather_index:
2032 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
2033 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
2034 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
2035 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
2036 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
2037 case Intrinsic::aarch64_sve_ldnf1:
2038 case Intrinsic::aarch64_sve_ldnt1:
2039 case Intrinsic::aarch64_sve_ldnt1_gather:
2040 case Intrinsic::aarch64_sve_ldnt1_gather_index:
2041 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
2042 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
2044 case Intrinsic::aarch64_neon_fmaxnm:
2045 case Intrinsic::aarch64_neon_fminnm:
2047 case Intrinsic::aarch64_sve_convert_from_svbool:
2049 case Intrinsic::aarch64_sve_dup:
2051 case Intrinsic::aarch64_sve_dup_x:
2053 case Intrinsic::aarch64_sve_cmpne:
2054 case Intrinsic::aarch64_sve_cmpne_wide:
2056 case Intrinsic::aarch64_sve_rdffr:
2058 case Intrinsic::aarch64_sve_lasta:
2059 case Intrinsic::aarch64_sve_lastb:
2061 case Intrinsic::aarch64_sve_clasta_n:
2062 case Intrinsic::aarch64_sve_clastb_n:
2064 case Intrinsic::aarch64_sve_cntd:
2066 case Intrinsic::aarch64_sve_cntw:
2068 case Intrinsic::aarch64_sve_cnth:
2070 case Intrinsic::aarch64_sve_cntb:
2072 case Intrinsic::aarch64_sve_ptest_any:
2073 case Intrinsic::aarch64_sve_ptest_first:
2074 case Intrinsic::aarch64_sve_ptest_last:
2076 case Intrinsic::aarch64_sve_fabd:
2078 case Intrinsic::aarch64_sve_fadd:
2080 case Intrinsic::aarch64_sve_fadd_u:
2082 case Intrinsic::aarch64_sve_fdiv:
2084 case Intrinsic::aarch64_sve_fmax:
2086 case Intrinsic::aarch64_sve_fmaxnm:
2088 case Intrinsic::aarch64_sve_fmin:
2090 case Intrinsic::aarch64_sve_fminnm:
2092 case Intrinsic::aarch64_sve_fmla:
2094 case Intrinsic::aarch64_sve_fmls:
2096 case Intrinsic::aarch64_sve_fmul:
2101 case Intrinsic::aarch64_sve_fmul_u:
2103 case Intrinsic::aarch64_sve_fmulx:
2105 case Intrinsic::aarch64_sve_fnmla:
2107 case Intrinsic::aarch64_sve_fnmls:
2109 case Intrinsic::aarch64_sve_fsub:
2111 case Intrinsic::aarch64_sve_fsub_u:
2113 case Intrinsic::aarch64_sve_add:
2115 case Intrinsic::aarch64_sve_add_u:
2117 Intrinsic::aarch64_sve_mla_u>(
2119 case Intrinsic::aarch64_sve_mla:
2121 case Intrinsic::aarch64_sve_mls:
2123 case Intrinsic::aarch64_sve_mul:
2128 case Intrinsic::aarch64_sve_mul_u:
2130 case Intrinsic::aarch64_sve_sabd:
2132 case Intrinsic::aarch64_sve_smax:
2134 case Intrinsic::aarch64_sve_smin:
2136 case Intrinsic::aarch64_sve_smulh:
2138 case Intrinsic::aarch64_sve_sub:
2140 case Intrinsic::aarch64_sve_sub_u:
2142 Intrinsic::aarch64_sve_mls_u>(
2144 case Intrinsic::aarch64_sve_uabd:
2146 case Intrinsic::aarch64_sve_umax:
2148 case Intrinsic::aarch64_sve_umin:
2150 case Intrinsic::aarch64_sve_umulh:
2152 case Intrinsic::aarch64_sve_asr:
2154 case Intrinsic::aarch64_sve_lsl:
2156 case Intrinsic::aarch64_sve_lsr:
2158 case Intrinsic::aarch64_sve_and:
2160 case Intrinsic::aarch64_sve_bic:
2162 case Intrinsic::aarch64_sve_eor:
2164 case Intrinsic::aarch64_sve_orr:
2166 case Intrinsic::aarch64_sve_sqsub:
2168 case Intrinsic::aarch64_sve_uqsub:
2170 case Intrinsic::aarch64_sve_tbl:
2172 case Intrinsic::aarch64_sve_uunpkhi:
2173 case Intrinsic::aarch64_sve_uunpklo:
2174 case Intrinsic::aarch64_sve_sunpkhi:
2175 case Intrinsic::aarch64_sve_sunpklo:
2177 case Intrinsic::aarch64_sve_uzp1:
2179 case Intrinsic::aarch64_sve_zip1:
2180 case Intrinsic::aarch64_sve_zip2:
2182 case Intrinsic::aarch64_sve_ld1_gather_index:
2184 case Intrinsic::aarch64_sve_st1_scatter_index:
2186 case Intrinsic::aarch64_sve_ld1:
2188 case Intrinsic::aarch64_sve_st1:
2190 case Intrinsic::aarch64_sve_sdiv:
2192 case Intrinsic::aarch64_sve_sel:
2194 case Intrinsic::aarch64_sve_srshl:
2196 case Intrinsic::aarch64_sve_dupq_lane:
2200 return std::nullopt;
2207 SimplifyAndSetOp)
const {
2208 switch (
II.getIntrinsicID()) {
2211 case Intrinsic::aarch64_neon_fcvtxn:
2212 case Intrinsic::aarch64_neon_rshrn:
2213 case Intrinsic::aarch64_neon_sqrshrn:
2214 case Intrinsic::aarch64_neon_sqrshrun:
2215 case Intrinsic::aarch64_neon_sqshrn:
2216 case Intrinsic::aarch64_neon_sqshrun:
2217 case Intrinsic::aarch64_neon_sqxtn:
2218 case Intrinsic::aarch64_neon_sqxtun:
2219 case Intrinsic::aarch64_neon_uqrshrn:
2220 case Intrinsic::aarch64_neon_uqshrn:
2221 case Intrinsic::aarch64_neon_uqxtn:
2222 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
2226 return std::nullopt;
2253bool AArch64TTIImpl::isWideningInstruction(
Type *DstTy,
unsigned Opcode,
2255 Type *SrcOverrideTy) {
2258 auto toVectorTy = [&](
Type *ArgTy) {
2260 cast<VectorType>(DstTy)->getElementCount());
2270 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
2280 Type *SrcTy = SrcOverrideTy;
2282 case Instruction::Add:
2283 case Instruction::Sub:
2285 if (isa<SExtInst>(Args[1]) || isa<ZExtInst>(Args[1])) {
2288 toVectorTy(cast<Instruction>(Args[1])->getOperand(0)->
getType());
2292 case Instruction::Mul: {
2294 if ((isa<SExtInst>(Args[0]) && isa<SExtInst>(Args[1])) ||
2295 (isa<ZExtInst>(Args[0]) && isa<ZExtInst>(Args[1]))) {
2298 toVectorTy(cast<Instruction>(Args[0])->getOperand(0)->
getType());
2299 }
else if (isa<ZExtInst>(Args[0]) || isa<ZExtInst>(Args[1])) {
2328 assert(SrcTy &&
"Expected some SrcTy");
2330 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
2336 DstTyL.first * DstTyL.second.getVectorMinNumElements();
2338 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
2342 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
2355 (Src->isScalableTy() && !ST->hasSVE2()))
2364 dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
2365 if (AddUser && AddUser->getOpcode() == Instruction::Add)
2368 auto *Shr = dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
2369 if (!Shr || Shr->getOpcode() != Instruction::LShr)
2372 auto *Trunc = dyn_cast_or_null<Instruction>(Shr->getUniqueUndroppableUser());
2373 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
2374 Src->getScalarSizeInBits() !=
2375 cast<CastInst>(Trunc)->getDestTy()->getScalarSizeInBits())
2399 assert(ISD &&
"Invalid opcode");
2402 if (
I &&
I->hasOneUser()) {
2403 auto *SingleUser = cast<Instruction>(*
I->user_begin());
2405 if (isWideningInstruction(Dst, SingleUser->getOpcode(),
Operands, Src)) {
2409 if (SingleUser->getOpcode() == Instruction::Add) {
2410 if (
I == SingleUser->getOperand(1) ||
2411 (isa<CastInst>(SingleUser->getOperand(1)) &&
2412 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
2419 if ((isa<ZExtInst>(
I) || isa<SExtInst>(
I)) &&
2427 return Cost == 0 ? 0 : 1;
2712 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
2716 std::pair<InstructionCost, MVT> LT =
2719 LT.second.getVectorElementType().getSizeInBits();
2731 return AdjustCost(Entry->Cost);
2758 if (ST->hasFullFP16())
2761 return AdjustCost(Entry->Cost);
2777 Opcode, LegalTy, Src, CCH,
CostKind,
I);
2780 return Part1 + Part2;
2800 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
2808 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) &&
"Invalid type");
2824 if (!VecLT.second.isVector() || !TLI->
isTypeLegal(DstVT))
2830 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
2840 case Instruction::SExt:
2845 case Instruction::ZExt:
2846 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
2859 return Opcode == Instruction::PHI ? 0 : 1;
2876 if (!LT.second.isVector())
2881 if (LT.second.isFixedLengthVector()) {
2882 unsigned Width = LT.second.getVectorNumElements();
2899 if (
I && dyn_cast<LoadInst>(
I->getOperand(1)))
2923 Opcode == Instruction::InsertElement && Op0 && !isa<UndefValue>(Op0);
2924 return getVectorInstrCostHelper(
nullptr, Val,
Index, HasRealUse);
2931 return getVectorInstrCostHelper(&
I, Val,
Index,
true );
2937 if (isa<ScalableVectorType>(Ty))
2942 return DemandedElts.
popcount() * (Insert + Extract) *
2955 Op2Info, Args, CxtI);
2997 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
3002 Opcode, Ty,
CostKind, Op1Info, Op2Info);
3007 if (isa<FixedVectorType>(Ty) && cast<FixedVectorType>(Ty)
3008 ->getPrimitiveSizeInBits()
3009 .getFixedValue() < 128) {
3020 if (
nullptr != Entry)
3025 if (LT.second.getScalarType() == MVT::i8)
3027 else if (LT.second.getScalarType() == MVT::i16)
3037 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
3040 return (4 + DivCost) * VTy->getNumElements();
3060 if (LT.second == MVT::v2i64 && ST->hasSVE())
3075 if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
3077 return LT.first * 14;
3096 return 2 * LT.first;
3105 return 2 * LT.first;
3127 int MaxMergeDistance = 64;
3131 return NumVectorInstToHideOverhead;
3151 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SELECT) {
3153 const int AmortizationCost = 20;
3161 VecPred = CurrentPred;
3169 static const auto ValidMinMaxTys = {
3170 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
3171 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
3172 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
3175 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
3176 (ST->hasFullFP16() &&
3177 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
3182 VectorSelectTbl[] = {
3191 {
ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
3192 {
ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
3193 {
ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
3206 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SETCC) {
3209 if (LT.second == MVT::v4f16 && !ST->hasFullFP16())
3210 return LT.first * 4;
3231 if (ST->requiresStrictAlign()) {
3236 Options.AllowOverlappingLoads =
true;
3242 Options.LoadSizes = {8, 4, 2, 1};
3243 Options.AllowedTailExpansions = {3, 5, 6};
3248 return ST->hasSVE();
3259 if (!LT.first.isValid())
3263 auto *VT = cast<VectorType>(Src);
3264 if (VT->getElementType()->isIntegerTy(1))
3282 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
3287 auto *VT = cast<VectorType>(DataTy);
3289 if (!LT.first.isValid())
3293 if (!LT.second.isVector() ||
3295 VT->getElementType()->isIntegerTy(1))
3305 ElementCount LegalVF = LT.second.getVectorElementCount();
3308 {TTI::OK_AnyValue, TTI::OP_None},
I);
3328 if (VT == MVT::Other)
3333 if (!LT.first.isValid())
3341 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
3343 (VTy->getElementType()->isIntegerTy(1) &&
3344 !VTy->getElementCount().isKnownMultipleOf(
3355 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
3356 LT.second.is128BitVector() && (!Alignment || *Alignment <
Align(16))) {
3362 const int AmortizationCost = 6;
3364 return LT.first * 2 * AmortizationCost;
3375 if (VT == MVT::v4i8)
3378 return cast<FixedVectorType>(Ty)->getNumElements() * 2;
3382 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
3384 *Alignment !=
Align(1))
3398 while (!TypeWorklist.
empty()) {
3420 bool UseMaskForCond,
bool UseMaskForGaps) {
3421 assert(Factor >= 2 &&
"Invalid interleave factor");
3422 auto *VecVTy = cast<VectorType>(VecTy);
3424 if (VecTy->
isScalableTy() && (!ST->hasSVE() || Factor != 2))
3429 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
3432 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
3433 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
3436 VecVTy->getElementCount().divideCoefficientBy(Factor));
3442 if (MinElts % Factor == 0 &&
3449 UseMaskForCond, UseMaskForGaps);
3456 for (
auto *
I : Tys) {
3457 if (!
I->isVectorTy())
3459 if (
I->getScalarSizeInBits() * cast<FixedVectorType>(
I)->getNumElements() ==
3478 enum { MaxStridedLoads = 7 };
3480 int StridedLoads = 0;
3483 for (
const auto BB : L->blocks()) {
3484 for (
auto &
I : *BB) {
3485 LoadInst *LMemI = dyn_cast<LoadInst>(&
I);
3490 if (L->isLoopInvariant(PtrValue))
3494 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
3495 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
3504 if (StridedLoads > MaxStridedLoads / 2)
3505 return StridedLoads;
3508 return StridedLoads;
3511 int StridedLoads = countStridedLoads(L, SE);
3513 <<
" strided loads\n");
3534 if (L->getLoopDepth() > 1)
3547 for (
auto *BB : L->getBlocks()) {
3548 for (
auto &
I : *BB) {
3550 if (
I.getType()->isVectorTy())
3553 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
3568 !ST->getSchedModel().isOutOfOrder()) {
3585 Type *ExpectedType) {
3589 case Intrinsic::aarch64_neon_st2:
3590 case Intrinsic::aarch64_neon_st3:
3591 case Intrinsic::aarch64_neon_st4: {
3593 StructType *ST = dyn_cast<StructType>(ExpectedType);
3596 unsigned NumElts = Inst->
arg_size() - 1;
3597 if (ST->getNumElements() != NumElts)
3599 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
3605 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
3611 case Intrinsic::aarch64_neon_ld2:
3612 case Intrinsic::aarch64_neon_ld3:
3613 case Intrinsic::aarch64_neon_ld4:
3614 if (Inst->
getType() == ExpectedType)
3625 case Intrinsic::aarch64_neon_ld2:
3626 case Intrinsic::aarch64_neon_ld3:
3627 case Intrinsic::aarch64_neon_ld4:
3628 Info.ReadMem =
true;
3629 Info.WriteMem =
false;
3632 case Intrinsic::aarch64_neon_st2:
3633 case Intrinsic::aarch64_neon_st3:
3634 case Intrinsic::aarch64_neon_st4:
3635 Info.ReadMem =
false;
3636 Info.WriteMem =
true;
3644 case Intrinsic::aarch64_neon_ld2:
3645 case Intrinsic::aarch64_neon_st2:
3646 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
3648 case Intrinsic::aarch64_neon_ld3:
3649 case Intrinsic::aarch64_neon_st3:
3650 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
3652 case Intrinsic::aarch64_neon_ld4:
3653 case Intrinsic::aarch64_neon_st4:
3654 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
3666 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) {
3667 bool Considerable =
false;
3668 AllowPromotionWithoutCommonHeader =
false;
3669 if (!isa<SExtInst>(&
I))
3671 Type *ConsideredSExtType =
3673 if (
I.getType() != ConsideredSExtType)
3677 for (
const User *U :
I.users()) {
3679 Considerable =
true;
3683 if (GEPInst->getNumOperands() > 2) {
3684 AllowPromotionWithoutCommonHeader =
true;
3689 return Considerable;
3728 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
3738 return LegalizationCost + 2;
3748 LegalizationCost *= LT.first - 1;
3752 assert(ISD &&
"Invalid opcode");
3760 return LegalizationCost + 2;
3768 std::optional<FastMathFlags> FMF,
3771 if (
auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
3776 return BaseCost + FixedVTy->getNumElements();
3779 if (Opcode != Instruction::FAdd)
3782 auto *VTy = cast<ScalableVectorType>(ValTy);
3789 if (isa<ScalableVectorType>(ValTy))
3793 MVT MTy = LT.second;
3795 assert(ISD &&
"Invalid opcode");
3839 return (LT.first - 1) + Entry->Cost;
3847 auto *ValVTy = cast<FixedVectorType>(ValTy);
3851 if (LT.first != 1) {
3857 ExtraCost *= LT.first - 1;
3860 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
3861 return Cost + ExtraCost;
3895 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
3910 if (LT.second.getScalarType() == MVT::i1) {
3919 assert(Entry &&
"Illegal Type for Splice");
3920 LegalizationCost += Entry->Cost;
3921 return LegalizationCost * LT.first;
3932 if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
3934 Mask.size() > LT.second.getVectorNumElements() && !
Index && !SubTp) {
3940 if (Args.size() >= 1 && isa<LoadInst>(Args[0]) &&
3943 return std::max<InstructionCost>(1, LT.first / 4);
3956 unsigned TpNumElts = Mask.size();
3957 unsigned LTNumElts = LT.second.getVectorNumElements();
3958 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
3962 for (
unsigned N = 0;
N < NumVecs;
N++) {
3966 unsigned Source1, Source2;
3967 unsigned NumSources = 0;
3968 for (
unsigned E = 0; E < LTNumElts; E++) {
3969 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
3978 unsigned Source = MaskElt / LTNumElts;
3979 if (NumSources == 0) {
3982 }
else if (NumSources == 1 && Source != Source1) {
3985 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
3991 if (Source == Source1)
3993 else if (Source == Source2)
3994 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
4000 if (NumSources <= 2)
4003 NTp, NMask,
CostKind, 0,
nullptr, Args, CxtI);
4005 return ME.value() % LTNumElts == ME.index();
4007 Cost += LTNumElts - 1;
4017 if (IsExtractSubvector && LT.second.isFixedLengthVector())
4028 bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0]);
4029 if (IsLoad && LT.second.isVector() &&
4031 LT.second.getVectorElementCount()))
4039 all_of(Mask, [](
int E) {
return E < 8; }))
4043 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
4046 return M.value() < 0 || M.value() == (
int)M.index();
4053 if (LT.second.isFixedLengthVector() &&
4054 LT.second.getVectorNumElements() == Mask.size() &&
4056 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
4057 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
4060 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
4183 return LT.first * Entry->Cost;
4192 LT.second.getSizeInBits() <= 128 && SubTp) {
4194 if (SubLT.second.isVector()) {
4195 int NumElts = LT.second.getVectorNumElements();
4196 int NumSubElts = SubLT.second.getVectorNumElements();
4197 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
4203 if (IsExtractSubvector)
4216 if (isa<LoadInst>(&
I) || isa<StoreInst>(&
I)) {
4260 unsigned NumInsns = 0;
4262 NumInsns += BB->sizeWithoutDebug();
4272 int64_t Scale,
unsigned AddrSpace)
const {
4299 isa<BranchInst>(
I->getNextNode()) &&
4300 cast<BranchInst>(
I->getNextNode())->isUnconditional())
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
This file defines the LoopVectorizationLegality class.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
uint64_t IntrinsicInst * II
const char LLVMTargetMachineRef TM
static uint64_t getBits(uint64_t Val, int Start, int End)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getFastMathFlags(const MachineInstr &I)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
unsigned getVectorInsertExtractBaseCost() const
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
unsigned getMaxInterleaveFactor() const
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
TailFoldingOpts getSVETailFoldingDefaultOpts() const
bool useSVEForFixedLengthVectors() const
unsigned getMinSVEVectorSizeInBits() const
bool isSVEAvailable() const
Returns true if the target has SVE and can use the full range of SVE instructions,...
InstructionCost getSpliceCost(VectorType *Tp, int Index)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool shouldTreatInstructionLikeSelect(const Instruction *I)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
bool prefersVectorizedAddressing() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind)
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isElementTypeLegalForScalableVector(Type *Ty) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool useNeonVector(const Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool isLegalMaskedGatherScatter(Type *DataType) const
unsigned getMaxInterleaveFactor(ElementCount VF)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
EVT getPromotedVTForPredicate(EVT VT) const
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, bool UseScalable) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const
Returns true if VecTy is a legal interleaved access type.
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
bool isTypeLegal(Type *Ty)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
bool isIntPredicate() const
static ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isEquality() const
Return true if this predicate is either EQ or NE.
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
static InstructionCost getInvalid(CostType Val=0)
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool requiresSMChange(const SMEAttrs &Callee) const
void set(unsigned M, bool Enable=true)
bool hasStreamingBody() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Class to represent struct types.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const
Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isFP128Ty() const
Return true if this is 'fp128'.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static constexpr unsigned SVEBitsPerBlock
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
constexpr int PoisonMaskElem
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Mod
The access may modify the value stored in memory.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FAnyOf
Any_of reduction with select(fcmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ IAnyOf
Any_of reduction with select(icmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
@ Default
The result values are uniform if and only if all operands are uniform.
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.