23#include "llvm/IR/IntrinsicsAArch64.h"
33#define DEBUG_TYPE "aarch64tti"
39 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
57 "Penalty of calling a function that requires a change to PSTATE.SM"));
61 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
72 cl::desc(
"The cost of a histcnt instruction"));
76 cl::desc(
"The number of instructions to search for a redundant dmb"));
79class TailFoldingOption {
94 bool NeedsDefault =
true;
98 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
113 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
114 "Initial bits should only include one of "
115 "(disabled|all|simple|default)");
116 Bits = NeedsDefault ? DefaultBits : InitialBits;
118 Bits &= ~DisableBits;
124 errs() <<
"invalid argument '" << Opt
125 <<
"' to -sve-tail-folding=; the option should be of the form\n"
126 " (disabled|all|default|simple)[+(reductions|recurrences"
127 "|reverse|noreductions|norecurrences|noreverse)]\n";
133 void operator=(
const std::string &Val) {
142 setNeedsDefault(
false);
147 unsigned StartIdx = 1;
148 if (TailFoldTypes[0] ==
"disabled")
149 setInitialBits(TailFoldingOpts::Disabled);
150 else if (TailFoldTypes[0] ==
"all")
151 setInitialBits(TailFoldingOpts::All);
152 else if (TailFoldTypes[0] ==
"default")
153 setNeedsDefault(
true);
154 else if (TailFoldTypes[0] ==
"simple")
155 setInitialBits(TailFoldingOpts::Simple);
158 setInitialBits(TailFoldingOpts::Disabled);
161 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
162 if (TailFoldTypes[
I] ==
"reductions")
163 setEnableBit(TailFoldingOpts::Reductions);
164 else if (TailFoldTypes[
I] ==
"recurrences")
165 setEnableBit(TailFoldingOpts::Recurrences);
166 else if (TailFoldTypes[
I] ==
"reverse")
167 setEnableBit(TailFoldingOpts::Reverse);
168 else if (TailFoldTypes[
I] ==
"noreductions")
169 setDisableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[
I] ==
"norecurrences")
171 setDisableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[
I] ==
"noreverse")
173 setDisableBit(TailFoldingOpts::Reverse);
190 "Control the use of vectorisation using tail-folding for SVE where the"
191 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
192 "\ndisabled (Initial) No loop types will vectorize using "
194 "\ndefault (Initial) Uses the default tail-folding settings for "
196 "\nall (Initial) All legal loop types will vectorize using "
198 "\nsimple (Initial) Use tail-folding for simple loops (not "
199 "reductions or recurrences)"
200 "\nreductions Use tail-folding for loops containing reductions"
201 "\nnoreductions Inverse of above"
202 "\nrecurrences Use tail-folding for loops containing fixed order "
204 "\nnorecurrences Inverse of above"
205 "\nreverse Use tail-folding for loops requiring reversed "
207 "\nnoreverse Inverse of above"),
225 .
Case(
"__arm_sme_state",
true)
226 .
Case(
"__arm_tpidr2_save",
true)
227 .
Case(
"__arm_tpidr2_restore",
true)
228 .
Case(
"__arm_za_disable",
true)
242 if (isa<CallInst>(
I) && !
I.isDebugOrPseudoInst() &&
243 (cast<CallInst>(
I).isInlineAsm() || isa<IntrinsicInst>(
I) ||
253 SMEAttrs CallerAttrs(*Caller), CalleeAttrs(*Callee);
265 if (CallerAttrs.requiresLazySave(CalleeAttrs) ||
266 CallerAttrs.requiresSMChange(CalleeAttrs) ||
267 CallerAttrs.requiresPreservingZT0(CalleeAttrs) ||
268 CallerAttrs.requiresPreservingAllZAState(CalleeAttrs)) {
291 auto FVTy = dyn_cast<FixedVectorType>(Ty);
293 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
302 unsigned DefaultCallPenalty)
const {
325 if (
F == Call.getCaller())
331 return DefaultCallPenalty;
370 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
375 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
381 return std::max<InstructionCost>(1,
Cost);
396 unsigned ImmIdx = ~0U;
400 case Instruction::GetElementPtr:
405 case Instruction::Store:
408 case Instruction::Add:
409 case Instruction::Sub:
410 case Instruction::Mul:
411 case Instruction::UDiv:
412 case Instruction::SDiv:
413 case Instruction::URem:
414 case Instruction::SRem:
415 case Instruction::And:
416 case Instruction::Or:
417 case Instruction::Xor:
418 case Instruction::ICmp:
422 case Instruction::Shl:
423 case Instruction::LShr:
424 case Instruction::AShr:
428 case Instruction::Trunc:
429 case Instruction::ZExt:
430 case Instruction::SExt:
431 case Instruction::IntToPtr:
432 case Instruction::PtrToInt:
433 case Instruction::BitCast:
434 case Instruction::PHI:
435 case Instruction::Call:
436 case Instruction::Select:
437 case Instruction::Ret:
438 case Instruction::Load:
443 int NumConstants = (BitSize + 63) / 64;
467 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
473 case Intrinsic::sadd_with_overflow:
474 case Intrinsic::uadd_with_overflow:
475 case Intrinsic::ssub_with_overflow:
476 case Intrinsic::usub_with_overflow:
477 case Intrinsic::smul_with_overflow:
478 case Intrinsic::umul_with_overflow:
480 int NumConstants = (BitSize + 63) / 64;
487 case Intrinsic::experimental_stackmap:
488 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
491 case Intrinsic::experimental_patchpoint_void:
492 case Intrinsic::experimental_patchpoint:
493 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
496 case Intrinsic::experimental_gc_statepoint:
497 if ((
Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
507 if (TyWidth == 32 || TyWidth == 64)
521 unsigned TotalHistCnts = 1;
530 if (
VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy)) {
531 unsigned EC = VTy->getElementCount().getKnownMinValue();
536 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
538 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
542 TotalHistCnts = EC / NaturalVectorWidth;
556 if (
auto *VTy = dyn_cast<ScalableVectorType>(
RetTy))
560 switch (ICA.
getID()) {
561 case Intrinsic::experimental_vector_histogram_add:
565 case Intrinsic::umin:
566 case Intrinsic::umax:
567 case Intrinsic::smin:
568 case Intrinsic::smax: {
569 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
570 MVT::v8i16, MVT::v2i32, MVT::v4i32,
571 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
575 if (LT.second == MVT::v2i64)
577 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
581 case Intrinsic::sadd_sat:
582 case Intrinsic::ssub_sat:
583 case Intrinsic::uadd_sat:
584 case Intrinsic::usub_sat: {
585 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
586 MVT::v8i16, MVT::v2i32, MVT::v4i32,
592 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits() ? 1 : 4;
593 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
594 return LT.first * Instrs;
597 case Intrinsic::abs: {
598 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
599 MVT::v8i16, MVT::v2i32, MVT::v4i32,
602 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
606 case Intrinsic::bswap: {
607 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
608 MVT::v4i32, MVT::v2i64};
610 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }) &&
611 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits())
615 case Intrinsic::stepvector: {
624 Cost += AddCost * (LT.first - 1);
628 case Intrinsic::vector_extract:
629 case Intrinsic::vector_insert: {
642 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
661 case Intrinsic::bitreverse: {
663 {Intrinsic::bitreverse, MVT::i32, 1},
664 {Intrinsic::bitreverse, MVT::i64, 1},
665 {Intrinsic::bitreverse, MVT::v8i8, 1},
666 {Intrinsic::bitreverse, MVT::v16i8, 1},
667 {Intrinsic::bitreverse, MVT::v4i16, 2},
668 {Intrinsic::bitreverse, MVT::v8i16, 2},
669 {Intrinsic::bitreverse, MVT::v2i32, 2},
670 {Intrinsic::bitreverse, MVT::v4i32, 2},
671 {Intrinsic::bitreverse, MVT::v1i64, 2},
672 {Intrinsic::bitreverse, MVT::v2i64, 2},
682 return LegalisationCost.first * Entry->Cost + 1;
684 return LegalisationCost.first * Entry->Cost;
688 case Intrinsic::ctpop: {
689 if (!ST->hasNEON()) {
710 RetTy->getScalarSizeInBits()
713 return LT.first * Entry->Cost + ExtraCost;
717 case Intrinsic::sadd_with_overflow:
718 case Intrinsic::uadd_with_overflow:
719 case Intrinsic::ssub_with_overflow:
720 case Intrinsic::usub_with_overflow:
721 case Intrinsic::smul_with_overflow:
722 case Intrinsic::umul_with_overflow: {
724 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
725 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
726 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
727 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
728 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
729 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
730 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
731 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
732 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
733 {Intrinsic::usub_with_overflow, MVT::i8, 3},
734 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
735 {Intrinsic::usub_with_overflow, MVT::i16, 3},
736 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
737 {Intrinsic::usub_with_overflow, MVT::i32, 1},
738 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
739 {Intrinsic::usub_with_overflow, MVT::i64, 1},
740 {Intrinsic::smul_with_overflow, MVT::i8, 5},
741 {Intrinsic::umul_with_overflow, MVT::i8, 4},
742 {Intrinsic::smul_with_overflow, MVT::i16, 5},
743 {Intrinsic::umul_with_overflow, MVT::i16, 4},
744 {Intrinsic::smul_with_overflow, MVT::i32, 2},
745 {Intrinsic::umul_with_overflow, MVT::i32, 2},
746 {Intrinsic::smul_with_overflow, MVT::i64, 3},
747 {Intrinsic::umul_with_overflow, MVT::i64, 3},
756 case Intrinsic::fptosi_sat:
757 case Intrinsic::fptoui_sat: {
760 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
765 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
766 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
767 LT.second == MVT::v2f64)) {
769 (LT.second == MVT::f64 && MTy == MVT::i32) ||
770 (LT.second == MVT::f32 && MTy == MVT::i64)))
779 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
786 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
787 (LT.second == MVT::f16 && MTy == MVT::i64) ||
788 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
802 if ((LT.second.getScalarType() == MVT::f32 ||
803 LT.second.getScalarType() == MVT::f64 ||
804 LT.second.getScalarType() == MVT::f16) &&
808 if (LT.second.isVector())
812 LegalTy, {LegalTy, LegalTy});
815 LegalTy, {LegalTy, LegalTy});
817 return LT.first *
Cost +
818 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
825 if (LT.second.isVector()) {
837 Type *CondTy =
RetTy->getWithNewBitWidth(1);
843 return LT.first *
Cost;
845 case Intrinsic::fshl:
846 case Intrinsic::fshr: {
859 {Intrinsic::fshl, MVT::v4i32, 3},
860 {Intrinsic::fshl, MVT::v2i64, 3}, {Intrinsic::fshl, MVT::v16i8, 4},
861 {Intrinsic::fshl, MVT::v8i16, 4}, {Intrinsic::fshl, MVT::v2i32, 3},
862 {Intrinsic::fshl, MVT::v8i8, 4}, {Intrinsic::fshl, MVT::v4i16, 4}};
868 return LegalisationCost.first * Entry->Cost;
872 if (!
RetTy->isIntegerTy())
877 bool HigherCost = (
RetTy->getScalarSizeInBits() != 32 &&
878 RetTy->getScalarSizeInBits() < 64) ||
879 (
RetTy->getScalarSizeInBits() % 64 != 0);
880 unsigned ExtraCost = HigherCost ? 1 : 0;
881 if (
RetTy->getScalarSizeInBits() == 32 ||
882 RetTy->getScalarSizeInBits() == 64)
889 return TyL.first + ExtraCost;
891 case Intrinsic::get_active_lane_mask: {
896 if (!getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) &&
907 return RetTy->getNumElements() * 2;
912 case Intrinsic::experimental_vector_match: {
913 auto *NeedleTy = cast<FixedVectorType>(ICA.
getArgTypes()[1]);
915 unsigned SearchSize = NeedleTy->getNumElements();
916 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
923 if (isa<FixedVectorType>(
RetTy))
940 auto RequiredType =
II.getType();
942 auto *PN = dyn_cast<PHINode>(
II.getArgOperand(0));
943 assert(PN &&
"Expected Phi Node!");
946 if (!PN->hasOneUse())
949 for (
Value *IncValPhi : PN->incoming_values()) {
950 auto *Reinterpret = dyn_cast<IntrinsicInst>(IncValPhi);
952 Reinterpret->getIntrinsicID() !=
953 Intrinsic::aarch64_sve_convert_to_svbool ||
954 RequiredType != Reinterpret->getArgOperand(0)->getType())
963 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
964 auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(
I));
965 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
982static std::optional<Instruction *>
984 auto BinOp = dyn_cast<IntrinsicInst>(
II.getOperand(0));
988 auto IntrinsicID = BinOp->getIntrinsicID();
989 switch (IntrinsicID) {
990 case Intrinsic::aarch64_sve_and_z:
991 case Intrinsic::aarch64_sve_bic_z:
992 case Intrinsic::aarch64_sve_eor_z:
993 case Intrinsic::aarch64_sve_nand_z:
994 case Intrinsic::aarch64_sve_nor_z:
995 case Intrinsic::aarch64_sve_orn_z:
996 case Intrinsic::aarch64_sve_orr_z:
1002 auto BinOpPred = BinOp->getOperand(0);
1003 auto BinOpOp1 = BinOp->getOperand(1);
1004 auto BinOpOp2 = BinOp->getOperand(2);
1006 auto PredIntr = dyn_cast<IntrinsicInst>(BinOpPred);
1008 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1009 return std::nullopt;
1011 auto PredOp = PredIntr->getOperand(0);
1012 auto PredOpTy = cast<VectorType>(PredOp->getType());
1013 if (PredOpTy !=
II.getType())
1014 return std::nullopt;
1018 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1019 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1020 if (BinOpOp1 == BinOpOp2)
1021 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1024 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1026 auto NarrowedBinOp =
1031static std::optional<Instruction *>
1034 if (isa<PHINode>(
II.getArgOperand(0)))
1038 return BinOpCombine;
1041 if (isa<TargetExtType>(
II.getArgOperand(0)->getType()) ||
1042 isa<TargetExtType>(
II.getType()))
1043 return std::nullopt;
1046 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1048 const auto *IVTy = cast<VectorType>(
II.getType());
1054 const auto *CursorVTy = cast<VectorType>(Cursor->
getType());
1055 if (CursorVTy->getElementCount().getKnownMinValue() <
1056 IVTy->getElementCount().getKnownMinValue())
1060 if (Cursor->
getType() == IVTy)
1061 EarliestReplacement = Cursor;
1063 auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
1066 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1067 Intrinsic::aarch64_sve_convert_to_svbool ||
1068 IntrinsicCursor->getIntrinsicID() ==
1069 Intrinsic::aarch64_sve_convert_from_svbool))
1072 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
1073 Cursor = IntrinsicCursor->getOperand(0);
1078 if (!EarliestReplacement)
1079 return std::nullopt;
1086 Value *UncastedPred;
1087 if (
match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_from_svbool>(
1088 m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
1092 if (cast<ScalableVectorType>(Pred->
getType())->getMinNumElements() <=
1093 cast<ScalableVectorType>(UncastedPred->
getType())->getMinNumElements())
1094 Pred = UncastedPred;
1096 return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1097 m_ConstantInt<AArch64SVEPredPattern::all>()));
1102static std::optional<Instruction *>
1104 bool hasInactiveVector) {
1105 int PredOperand = hasInactiveVector ? 1 : 0;
1106 int ReplaceOperand = hasInactiveVector ? 0 : 1;
1111 return std::nullopt;
1116static std::optional<Instruction *>
1119 !isa<llvm::UndefValue>(
II.getOperand(0)) &&
1120 !isa<llvm::PoisonValue>(
II.getOperand(0))) {
1128static std::optional<Instruction *>
1134 return std::nullopt;
1139static std::optional<Instruction *>
1144 if (
RetTy->isStructTy()) {
1145 auto StructT = cast<StructType>(
RetTy);
1146 auto VecT = StructT->getElementType(0);
1148 for (
unsigned i = 0; i < StructT->getNumElements(); i++) {
1149 ZerVec.
push_back(VecT->isFPOrFPVectorTy() ? ConstantFP::get(VecT, 0.0)
1150 : ConstantInt::get(VecT, 0));
1155 : ConstantInt::get(
II.getType(), 0);
1160 return std::nullopt;
1166 auto *OpPredicate =
II.getOperand(0);
1179 return std::nullopt;
1182 return std::nullopt;
1184 const auto PTruePattern =
1185 cast<ConstantInt>(Pg->
getOperand(0))->getZExtValue();
1186 if (PTruePattern != AArch64SVEPredPattern::vl1)
1187 return std::nullopt;
1192 II.getArgOperand(0),
II.getArgOperand(2), ConstantInt::get(IdxTy, 0));
1193 Insert->insertBefore(&
II);
1194 Insert->takeName(&
II);
1202 auto *
RetTy = cast<ScalableVectorType>(
II.getType());
1204 II.getArgOperand(0));
1218 auto *Pg = dyn_cast<IntrinsicInst>(
II.getArgOperand(0));
1219 if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1220 return std::nullopt;
1222 const auto PTruePattern =
1223 cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
1224 if (PTruePattern != AArch64SVEPredPattern::all)
1225 return std::nullopt;
1230 if (!SplatValue || !SplatValue->isZero())
1231 return std::nullopt;
1234 auto *DupQLane = dyn_cast<IntrinsicInst>(
II.getArgOperand(1));
1236 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1237 return std::nullopt;
1240 auto *DupQLaneIdx = dyn_cast<ConstantInt>(DupQLane->getArgOperand(1));
1241 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
1242 return std::nullopt;
1244 auto *VecIns = dyn_cast<IntrinsicInst>(DupQLane->getArgOperand(0));
1245 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1246 return std::nullopt;
1250 if (!isa<UndefValue>(VecIns->getArgOperand(0)))
1251 return std::nullopt;
1253 if (!cast<ConstantInt>(VecIns->getArgOperand(2))->isZero())
1254 return std::nullopt;
1256 auto *ConstVec = dyn_cast<Constant>(VecIns->getArgOperand(1));
1258 return std::nullopt;
1260 auto *VecTy = dyn_cast<FixedVectorType>(ConstVec->getType());
1261 auto *OutTy = dyn_cast<ScalableVectorType>(
II.getType());
1262 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
1263 return std::nullopt;
1265 unsigned NumElts = VecTy->getNumElements();
1266 unsigned PredicateBits = 0;
1269 for (
unsigned I = 0;
I < NumElts; ++
I) {
1270 auto *Arg = dyn_cast<ConstantInt>(ConstVec->getAggregateElement(
I));
1272 return std::nullopt;
1274 PredicateBits |= 1 << (
I * (16 / NumElts));
1278 if (PredicateBits == 0) {
1280 PFalse->takeName(&
II);
1286 for (
unsigned I = 0;
I < 16; ++
I)
1287 if ((PredicateBits & (1 <<
I)) != 0)
1290 unsigned PredSize = Mask & -Mask;
1295 for (
unsigned I = 0;
I < 16;
I += PredSize)
1296 if ((PredicateBits & (1 <<
I)) == 0)
1297 return std::nullopt;
1302 {PredType}, {PTruePat});
1304 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
1305 auto *ConvertFromSVBool =
1307 {
II.getType()}, {ConvertToSVBool});
1315 Value *Pg =
II.getArgOperand(0);
1316 Value *Vec =
II.getArgOperand(1);
1317 auto IntrinsicID =
II.getIntrinsicID();
1318 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
1329 auto *OldBinOp = cast<BinaryOperator>(Vec);
1330 auto OpC = OldBinOp->getOpcode();
1336 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
1341 auto *
C = dyn_cast<Constant>(Pg);
1342 if (IsAfter &&
C &&
C->isNullValue()) {
1346 Extract->insertBefore(&
II);
1347 Extract->takeName(&
II);
1351 auto *IntrPG = dyn_cast<IntrinsicInst>(Pg);
1353 return std::nullopt;
1355 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1356 return std::nullopt;
1358 const auto PTruePattern =
1359 cast<ConstantInt>(IntrPG->getOperand(0))->getZExtValue();
1364 return std::nullopt;
1366 unsigned Idx = MinNumElts - 1;
1375 auto *PgVTy = cast<ScalableVectorType>(Pg->
getType());
1376 if (
Idx >= PgVTy->getMinNumElements())
1377 return std::nullopt;
1382 Extract->insertBefore(&
II);
1383 Extract->takeName(&
II);
1396 Value *Pg =
II.getArgOperand(0);
1398 Value *Vec =
II.getArgOperand(2);
1402 return std::nullopt;
1407 return std::nullopt;
1421 FPTy, cast<VectorType>(Vec->
getType())->getElementCount());
1424 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
1437 {
II.getType()}, {AllPat});
1444static std::optional<Instruction *>
1446 const auto Pattern = cast<ConstantInt>(
II.getArgOperand(0))->getZExtValue();
1448 if (
Pattern == AArch64SVEPredPattern::all) {
1449 Constant *StepVal = ConstantInt::get(
II.getType(), NumElts);
1457 return MinNumElts && NumElts >= MinNumElts
1459 II, ConstantInt::get(
II.getType(), MinNumElts)))
1465 Value *PgVal =
II.getArgOperand(0);
1466 Value *OpVal =
II.getArgOperand(1);
1470 if (PgVal == OpVal &&
1471 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
1472 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
1473 Value *Ops[] = {PgVal, OpVal};
1487 return std::nullopt;
1491 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
1492 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
1506 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
1507 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
1508 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
1509 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
1510 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
1511 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
1512 (OpIID == Intrinsic::aarch64_sve_and_z) ||
1513 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
1514 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
1515 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
1516 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
1517 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
1518 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
1528 return std::nullopt;
1531template <Intrinsic::ID MulOpc,
typename Intrinsic::ID FuseOpc>
1532static std::optional<Instruction *>
1534 bool MergeIntoAddendOp) {
1536 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
1537 if (MergeIntoAddendOp) {
1538 AddendOp =
II.getOperand(1);
1539 Mul =
II.getOperand(2);
1541 AddendOp =
II.getOperand(2);
1542 Mul =
II.getOperand(1);
1547 return std::nullopt;
1549 if (!
Mul->hasOneUse())
1550 return std::nullopt;
1553 if (
II.getType()->isFPOrFPVectorTy()) {
1557 if (FAddFlags != cast<CallInst>(
Mul)->getFastMathFlags())
1558 return std::nullopt;
1560 return std::nullopt;
1565 if (MergeIntoAddendOp)
1575static std::optional<Instruction *>
1577 Value *Pred =
II.getOperand(0);
1578 Value *PtrOp =
II.getOperand(1);
1579 Type *VecTy =
II.getType();
1587 Load->copyMetadata(
II);
1598static std::optional<Instruction *>
1600 Value *VecOp =
II.getOperand(0);
1601 Value *Pred =
II.getOperand(1);
1602 Value *PtrOp =
II.getOperand(2);
1606 Store->copyMetadata(
II);
1617 switch (Intrinsic) {
1618 case Intrinsic::aarch64_sve_fmul_u:
1619 return Instruction::BinaryOps::FMul;
1620 case Intrinsic::aarch64_sve_fadd_u:
1621 return Instruction::BinaryOps::FAdd;
1622 case Intrinsic::aarch64_sve_fsub_u:
1623 return Instruction::BinaryOps::FSub;
1625 return Instruction::BinaryOpsEnd;
1629static std::optional<Instruction *>
1632 if (
II.isStrictFP())
1633 return std::nullopt;
1635 auto *OpPredicate =
II.getOperand(0);
1637 if (BinOpCode == Instruction::BinaryOpsEnd ||
1638 !
match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1639 m_ConstantInt<AArch64SVEPredPattern::all>())))
1640 return std::nullopt;
1642 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
1650 auto *OpPredicate =
II.getOperand(0);
1651 if (!
match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1652 m_ConstantInt<AArch64SVEPredPattern::all>())))
1653 return std::nullopt;
1655 auto *
Mod =
II.getModule();
1657 II.setCalledFunction(NewDecl);
1664static std::optional<Instruction *>
1681 Intrinsic::aarch64_sve_mla>(
1685 Intrinsic::aarch64_sve_mad>(
1688 return std::nullopt;
1691static std::optional<Instruction *>
1698 Intrinsic::aarch64_sve_fmla>(IC,
II,
1703 Intrinsic::aarch64_sve_fmad>(IC,
II,
1708 Intrinsic::aarch64_sve_fmla>(IC,
II,
1711 return std::nullopt;
1714static std::optional<Instruction *>
1718 Intrinsic::aarch64_sve_fmla>(IC,
II,
1723 Intrinsic::aarch64_sve_fmad>(IC,
II,
1728 Intrinsic::aarch64_sve_fmla_u>(
1734static std::optional<Instruction *>
1741 Intrinsic::aarch64_sve_fmls>(IC,
II,
1746 Intrinsic::aarch64_sve_fnmsb>(
1751 Intrinsic::aarch64_sve_fmls>(IC,
II,
1754 return std::nullopt;
1757static std::optional<Instruction *>
1761 Intrinsic::aarch64_sve_fmls>(IC,
II,
1766 Intrinsic::aarch64_sve_fnmsb>(
1771 Intrinsic::aarch64_sve_fmls_u>(
1783 Intrinsic::aarch64_sve_mls>(
1786 return std::nullopt;
1792 auto *OpPredicate =
II.getOperand(0);
1793 auto *OpMultiplicand =
II.getOperand(1);
1794 auto *OpMultiplier =
II.getOperand(2);
1797 auto IsUnitSplat = [](
auto *
I) {
1806 auto IsUnitDup = [](
auto *
I) {
1807 auto *IntrI = dyn_cast<IntrinsicInst>(
I);
1808 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup)
1811 auto *SplatValue = IntrI->getOperand(2);
1815 if (IsUnitSplat(OpMultiplier)) {
1817 OpMultiplicand->takeName(&
II);
1819 }
else if (IsUnitDup(OpMultiplier)) {
1821 auto *DupInst = cast<IntrinsicInst>(OpMultiplier);
1822 auto *DupPg = DupInst->getOperand(1);
1825 if (OpPredicate == DupPg) {
1826 OpMultiplicand->takeName(&
II);
1836 Value *UnpackArg =
II.getArgOperand(0);
1837 auto *
RetTy = cast<ScalableVectorType>(
II.getType());
1838 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
1839 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
1852 return std::nullopt;
1856 auto *OpVal =
II.getOperand(0);
1857 auto *OpIndices =
II.getOperand(1);
1862 auto *SplatValue = dyn_cast_or_null<ConstantInt>(
getSplatValue(OpIndices));
1864 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
1865 return std::nullopt;
1881 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
1882 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
1886 if ((
match(
II.getArgOperand(0),
1887 m_Intrinsic<FromSVB>(m_Intrinsic<ToSVB>(
m_Value(
A)))) &&
1889 m_Intrinsic<FromSVB>(m_Intrinsic<ToSVB>(
m_Value(
B))))) ||
1892 auto *TyA = cast<ScalableVectorType>(
A->getType());
1893 if (TyA ==
B->getType() &&
1904 return std::nullopt;
1912 if (
match(
II.getArgOperand(0),
1914 match(
II.getArgOperand(1), m_Intrinsic<Intrinsic::aarch64_sve_uzp2>(
1917 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
1919 return std::nullopt;
1922static std::optional<Instruction *>
1924 Value *Mask =
II.getOperand(0);
1925 Value *BasePtr =
II.getOperand(1);
1926 Value *Index =
II.getOperand(2);
1938 if (
match(Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1941 BasePtr->getPointerAlignment(
II.getDataLayout());
1945 BasePtr, IndexBase);
1953 return std::nullopt;
1956static std::optional<Instruction *>
1958 Value *Val =
II.getOperand(0);
1959 Value *Mask =
II.getOperand(1);
1960 Value *BasePtr =
II.getOperand(2);
1961 Value *Index =
II.getOperand(3);
1968 if (
match(Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1971 BasePtr->getPointerAlignment(
II.getDataLayout());
1974 BasePtr, IndexBase);
1983 return std::nullopt;
1989 Value *Pred =
II.getOperand(0);
1990 Value *Vec =
II.getOperand(1);
1991 Value *DivVec =
II.getOperand(2);
1994 ConstantInt *SplatConstantInt = dyn_cast_or_null<ConstantInt>(SplatValue);
1995 if (!SplatConstantInt)
1996 return std::nullopt;
2000 if (DivisorValue == -1)
2001 return std::nullopt;
2002 if (DivisorValue == 1)
2008 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2015 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2017 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2021 return std::nullopt;
2025 size_t VecSize = Vec.
size();
2030 size_t HalfVecSize = VecSize / 2;
2034 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2042 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2057 m_Intrinsic<Intrinsic::vector_insert>(
2059 !isa<FixedVectorType>(CurrentInsertElt->
getType()))
2060 return std::nullopt;
2061 auto IIScalableTy = cast<ScalableVectorType>(
II.getType());
2065 while (
auto InsertElt = dyn_cast<InsertElementInst>(CurrentInsertElt)) {
2066 auto Idx = cast<ConstantInt>(InsertElt->getOperand(2));
2067 Elts[
Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2068 CurrentInsertElt = InsertElt->getOperand(0);
2072 isa<PoisonValue>(CurrentInsertElt) && isa<PoisonValue>(
Default);
2074 return std::nullopt;
2078 for (
size_t I = 0;
I < Elts.
size();
I++) {
2079 if (Elts[
I] ==
nullptr)
2084 if (InsertEltChain ==
nullptr)
2085 return std::nullopt;
2091 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2092 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2093 IIScalableTy->getMinNumElements() /
2098 auto *WideShuffleMaskTy =
2109 auto NarrowBitcast =
2122 return std::nullopt;
2127 Value *Pred =
II.getOperand(0);
2128 Value *Vec =
II.getOperand(1);
2129 Value *Shift =
II.getOperand(2);
2132 Value *AbsPred, *MergedValue;
2133 if (!
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_sqabs>(
2135 !
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_abs>(
2138 return std::nullopt;
2146 return std::nullopt;
2151 return std::nullopt;
2154 {
II.getType()}, {Pred, Vec, Shift});
2161 Value *Vec =
II.getOperand(0);
2166 return std::nullopt;
2172 auto *NI =
II.getNextNonDebugInstruction();
2175 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2177 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2178 auto *NIBB = NI->getParent();
2179 NI = NI->getNextNonDebugInstruction();
2181 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2182 NI = SuccBB->getFirstNonPHIOrDbgOrLifetime();
2187 auto *NextII = dyn_cast_or_null<IntrinsicInst>(NI);
2188 if (NextII &&
II.isIdenticalTo(NextII))
2191 return std::nullopt;
2194std::optional<Instruction *>
2201 case Intrinsic::aarch64_dmb:
2203 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
2204 case Intrinsic::aarch64_sve_fcvt_f16f32:
2205 case Intrinsic::aarch64_sve_fcvt_f16f64:
2206 case Intrinsic::aarch64_sve_fcvt_f32f16:
2207 case Intrinsic::aarch64_sve_fcvt_f32f64:
2208 case Intrinsic::aarch64_sve_fcvt_f64f16:
2209 case Intrinsic::aarch64_sve_fcvt_f64f32:
2210 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
2211 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
2212 case Intrinsic::aarch64_sve_fcvtx_f32f64:
2213 case Intrinsic::aarch64_sve_fcvtzs:
2214 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
2215 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
2216 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
2217 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
2218 case Intrinsic::aarch64_sve_fcvtzu:
2219 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
2220 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
2221 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
2222 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
2223 case Intrinsic::aarch64_sve_scvtf:
2224 case Intrinsic::aarch64_sve_scvtf_f16i32:
2225 case Intrinsic::aarch64_sve_scvtf_f16i64:
2226 case Intrinsic::aarch64_sve_scvtf_f32i64:
2227 case Intrinsic::aarch64_sve_scvtf_f64i32:
2228 case Intrinsic::aarch64_sve_ucvtf:
2229 case Intrinsic::aarch64_sve_ucvtf_f16i32:
2230 case Intrinsic::aarch64_sve_ucvtf_f16i64:
2231 case Intrinsic::aarch64_sve_ucvtf_f32i64:
2232 case Intrinsic::aarch64_sve_ucvtf_f64i32:
2234 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
2235 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
2236 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
2237 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
2239 case Intrinsic::aarch64_sve_st1_scatter:
2240 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
2241 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
2242 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
2243 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
2244 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
2245 case Intrinsic::aarch64_sve_st1dq:
2246 case Intrinsic::aarch64_sve_st1q_scatter_index:
2247 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
2248 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
2249 case Intrinsic::aarch64_sve_st1wq:
2250 case Intrinsic::aarch64_sve_stnt1:
2251 case Intrinsic::aarch64_sve_stnt1_scatter:
2252 case Intrinsic::aarch64_sve_stnt1_scatter_index:
2253 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
2254 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
2256 case Intrinsic::aarch64_sve_st2:
2257 case Intrinsic::aarch64_sve_st2q:
2259 case Intrinsic::aarch64_sve_st3:
2260 case Intrinsic::aarch64_sve_st3q:
2262 case Intrinsic::aarch64_sve_st4:
2263 case Intrinsic::aarch64_sve_st4q:
2265 case Intrinsic::aarch64_sve_addqv:
2266 case Intrinsic::aarch64_sve_and_z:
2267 case Intrinsic::aarch64_sve_bic_z:
2268 case Intrinsic::aarch64_sve_brka_z:
2269 case Intrinsic::aarch64_sve_brkb_z:
2270 case Intrinsic::aarch64_sve_brkn_z:
2271 case Intrinsic::aarch64_sve_brkpa_z:
2272 case Intrinsic::aarch64_sve_brkpb_z:
2273 case Intrinsic::aarch64_sve_cntp:
2274 case Intrinsic::aarch64_sve_compact:
2275 case Intrinsic::aarch64_sve_eor_z:
2276 case Intrinsic::aarch64_sve_eorv:
2277 case Intrinsic::aarch64_sve_eorqv:
2278 case Intrinsic::aarch64_sve_nand_z:
2279 case Intrinsic::aarch64_sve_nor_z:
2280 case Intrinsic::aarch64_sve_orn_z:
2281 case Intrinsic::aarch64_sve_orr_z:
2282 case Intrinsic::aarch64_sve_orv:
2283 case Intrinsic::aarch64_sve_orqv:
2284 case Intrinsic::aarch64_sve_pnext:
2285 case Intrinsic::aarch64_sve_rdffr_z:
2286 case Intrinsic::aarch64_sve_saddv:
2287 case Intrinsic::aarch64_sve_uaddv:
2288 case Intrinsic::aarch64_sve_umaxv:
2289 case Intrinsic::aarch64_sve_umaxqv:
2290 case Intrinsic::aarch64_sve_cmpeq:
2291 case Intrinsic::aarch64_sve_cmpeq_wide:
2292 case Intrinsic::aarch64_sve_cmpge:
2293 case Intrinsic::aarch64_sve_cmpge_wide:
2294 case Intrinsic::aarch64_sve_cmpgt:
2295 case Intrinsic::aarch64_sve_cmpgt_wide:
2296 case Intrinsic::aarch64_sve_cmphi:
2297 case Intrinsic::aarch64_sve_cmphi_wide:
2298 case Intrinsic::aarch64_sve_cmphs:
2299 case Intrinsic::aarch64_sve_cmphs_wide:
2300 case Intrinsic::aarch64_sve_cmple_wide:
2301 case Intrinsic::aarch64_sve_cmplo_wide:
2302 case Intrinsic::aarch64_sve_cmpls_wide:
2303 case Intrinsic::aarch64_sve_cmplt_wide:
2304 case Intrinsic::aarch64_sve_facge:
2305 case Intrinsic::aarch64_sve_facgt:
2306 case Intrinsic::aarch64_sve_fcmpeq:
2307 case Intrinsic::aarch64_sve_fcmpge:
2308 case Intrinsic::aarch64_sve_fcmpgt:
2309 case Intrinsic::aarch64_sve_fcmpne:
2310 case Intrinsic::aarch64_sve_fcmpuo:
2311 case Intrinsic::aarch64_sve_ld1_gather:
2312 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
2313 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
2314 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
2315 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
2316 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
2317 case Intrinsic::aarch64_sve_ld1q_gather_index:
2318 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
2319 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
2320 case Intrinsic::aarch64_sve_ld1ro:
2321 case Intrinsic::aarch64_sve_ld1rq:
2322 case Intrinsic::aarch64_sve_ld1udq:
2323 case Intrinsic::aarch64_sve_ld1uwq:
2324 case Intrinsic::aarch64_sve_ld2_sret:
2325 case Intrinsic::aarch64_sve_ld2q_sret:
2326 case Intrinsic::aarch64_sve_ld3_sret:
2327 case Intrinsic::aarch64_sve_ld3q_sret:
2328 case Intrinsic::aarch64_sve_ld4_sret:
2329 case Intrinsic::aarch64_sve_ld4q_sret:
2330 case Intrinsic::aarch64_sve_ldff1:
2331 case Intrinsic::aarch64_sve_ldff1_gather:
2332 case Intrinsic::aarch64_sve_ldff1_gather_index:
2333 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
2334 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
2335 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
2336 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
2337 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
2338 case Intrinsic::aarch64_sve_ldnf1:
2339 case Intrinsic::aarch64_sve_ldnt1:
2340 case Intrinsic::aarch64_sve_ldnt1_gather:
2341 case Intrinsic::aarch64_sve_ldnt1_gather_index:
2342 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
2343 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
2345 case Intrinsic::aarch64_sve_prf:
2346 case Intrinsic::aarch64_sve_prfb_gather_index:
2347 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
2348 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
2349 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
2350 case Intrinsic::aarch64_sve_prfd_gather_index:
2351 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
2352 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
2353 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
2354 case Intrinsic::aarch64_sve_prfh_gather_index:
2355 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
2356 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
2357 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
2358 case Intrinsic::aarch64_sve_prfw_gather_index:
2359 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
2360 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
2361 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
2363 case Intrinsic::aarch64_neon_fmaxnm:
2364 case Intrinsic::aarch64_neon_fminnm:
2366 case Intrinsic::aarch64_sve_convert_from_svbool:
2368 case Intrinsic::aarch64_sve_dup:
2370 case Intrinsic::aarch64_sve_dup_x:
2372 case Intrinsic::aarch64_sve_cmpne:
2373 case Intrinsic::aarch64_sve_cmpne_wide:
2375 case Intrinsic::aarch64_sve_rdffr:
2377 case Intrinsic::aarch64_sve_lasta:
2378 case Intrinsic::aarch64_sve_lastb:
2380 case Intrinsic::aarch64_sve_clasta_n:
2381 case Intrinsic::aarch64_sve_clastb_n:
2383 case Intrinsic::aarch64_sve_cntd:
2385 case Intrinsic::aarch64_sve_cntw:
2387 case Intrinsic::aarch64_sve_cnth:
2389 case Intrinsic::aarch64_sve_cntb:
2391 case Intrinsic::aarch64_sve_ptest_any:
2392 case Intrinsic::aarch64_sve_ptest_first:
2393 case Intrinsic::aarch64_sve_ptest_last:
2395 case Intrinsic::aarch64_sve_fabd:
2397 case Intrinsic::aarch64_sve_fadd:
2399 case Intrinsic::aarch64_sve_fadd_u:
2401 case Intrinsic::aarch64_sve_fdiv:
2403 case Intrinsic::aarch64_sve_fmax:
2405 case Intrinsic::aarch64_sve_fmaxnm:
2407 case Intrinsic::aarch64_sve_fmin:
2409 case Intrinsic::aarch64_sve_fminnm:
2411 case Intrinsic::aarch64_sve_fmla:
2413 case Intrinsic::aarch64_sve_fmls:
2415 case Intrinsic::aarch64_sve_fmul:
2420 case Intrinsic::aarch64_sve_fmul_u:
2422 case Intrinsic::aarch64_sve_fmulx:
2424 case Intrinsic::aarch64_sve_fnmla:
2426 case Intrinsic::aarch64_sve_fnmls:
2428 case Intrinsic::aarch64_sve_fsub:
2430 case Intrinsic::aarch64_sve_fsub_u:
2432 case Intrinsic::aarch64_sve_add:
2434 case Intrinsic::aarch64_sve_add_u:
2436 Intrinsic::aarch64_sve_mla_u>(
2438 case Intrinsic::aarch64_sve_mla:
2440 case Intrinsic::aarch64_sve_mls:
2442 case Intrinsic::aarch64_sve_mul:
2447 case Intrinsic::aarch64_sve_mul_u:
2449 case Intrinsic::aarch64_sve_sabd:
2451 case Intrinsic::aarch64_sve_smax:
2453 case Intrinsic::aarch64_sve_smin:
2455 case Intrinsic::aarch64_sve_smulh:
2457 case Intrinsic::aarch64_sve_sub:
2459 case Intrinsic::aarch64_sve_sub_u:
2461 Intrinsic::aarch64_sve_mls_u>(
2463 case Intrinsic::aarch64_sve_uabd:
2465 case Intrinsic::aarch64_sve_umax:
2467 case Intrinsic::aarch64_sve_umin:
2469 case Intrinsic::aarch64_sve_umulh:
2471 case Intrinsic::aarch64_sve_asr:
2473 case Intrinsic::aarch64_sve_lsl:
2475 case Intrinsic::aarch64_sve_lsr:
2477 case Intrinsic::aarch64_sve_and:
2479 case Intrinsic::aarch64_sve_bic:
2481 case Intrinsic::aarch64_sve_eor:
2483 case Intrinsic::aarch64_sve_orr:
2485 case Intrinsic::aarch64_sve_sqsub:
2487 case Intrinsic::aarch64_sve_uqsub:
2489 case Intrinsic::aarch64_sve_tbl:
2491 case Intrinsic::aarch64_sve_uunpkhi:
2492 case Intrinsic::aarch64_sve_uunpklo:
2493 case Intrinsic::aarch64_sve_sunpkhi:
2494 case Intrinsic::aarch64_sve_sunpklo:
2496 case Intrinsic::aarch64_sve_uzp1:
2498 case Intrinsic::aarch64_sve_zip1:
2499 case Intrinsic::aarch64_sve_zip2:
2501 case Intrinsic::aarch64_sve_ld1_gather_index:
2503 case Intrinsic::aarch64_sve_st1_scatter_index:
2505 case Intrinsic::aarch64_sve_ld1:
2507 case Intrinsic::aarch64_sve_st1:
2509 case Intrinsic::aarch64_sve_sdiv:
2511 case Intrinsic::aarch64_sve_sel:
2513 case Intrinsic::aarch64_sve_srshl:
2515 case Intrinsic::aarch64_sve_dupq_lane:
2517 case Intrinsic::aarch64_sve_insr:
2521 return std::nullopt;
2528 SimplifyAndSetOp)
const {
2529 switch (
II.getIntrinsicID()) {
2532 case Intrinsic::aarch64_neon_fcvtxn:
2533 case Intrinsic::aarch64_neon_rshrn:
2534 case Intrinsic::aarch64_neon_sqrshrn:
2535 case Intrinsic::aarch64_neon_sqrshrun:
2536 case Intrinsic::aarch64_neon_sqshrn:
2537 case Intrinsic::aarch64_neon_sqshrun:
2538 case Intrinsic::aarch64_neon_sqxtn:
2539 case Intrinsic::aarch64_neon_sqxtun:
2540 case Intrinsic::aarch64_neon_uqrshrn:
2541 case Intrinsic::aarch64_neon_uqshrn:
2542 case Intrinsic::aarch64_neon_uqxtn:
2543 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
2547 return std::nullopt;
2579bool AArch64TTIImpl::isWideningInstruction(
Type *DstTy,
unsigned Opcode,
2581 Type *SrcOverrideTy) {
2586 cast<VectorType>(DstTy)->getElementCount());
2596 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
2606 Type *SrcTy = SrcOverrideTy;
2608 case Instruction::Add:
2609 case Instruction::Sub:
2611 if (isa<SExtInst>(Args[1]) || isa<ZExtInst>(Args[1])) {
2618 case Instruction::Mul: {
2620 if ((isa<SExtInst>(Args[0]) && isa<SExtInst>(Args[1])) ||
2621 (isa<ZExtInst>(Args[0]) && isa<ZExtInst>(Args[1]))) {
2625 }
else if (isa<ZExtInst>(Args[0]) || isa<ZExtInst>(Args[1])) {
2654 assert(SrcTy &&
"Expected some SrcTy");
2656 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
2662 DstTyL.first * DstTyL.second.getVectorMinNumElements();
2664 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
2668 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
2681 (Src->isScalableTy() && !ST->hasSVE2()))
2690 dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
2691 if (AddUser && AddUser->getOpcode() == Instruction::Add)
2694 auto *Shr = dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
2695 if (!Shr || Shr->getOpcode() != Instruction::LShr)
2698 auto *Trunc = dyn_cast_or_null<Instruction>(Shr->getUniqueUndroppableUser());
2699 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
2700 Src->getScalarSizeInBits() !=
2701 cast<CastInst>(Trunc)->getDestTy()->getScalarSizeInBits())
2725 assert(ISD &&
"Invalid opcode");
2728 if (
I &&
I->hasOneUser()) {
2729 auto *SingleUser = cast<Instruction>(*
I->user_begin());
2731 if (isWideningInstruction(Dst, SingleUser->getOpcode(),
Operands, Src)) {
2735 if (SingleUser->getOpcode() == Instruction::Add) {
2736 if (
I == SingleUser->getOperand(1) ||
2737 (isa<CastInst>(SingleUser->getOperand(1)) &&
2738 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
2745 if ((isa<ZExtInst>(
I) || isa<SExtInst>(
I)) &&
2753 return Cost == 0 ? 0 : 1;
2777 return AdjustCost(Entry->Cost);
3111 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3115 std::pair<InstructionCost, MVT> LT =
3117 unsigned NumElements =
3129 return AdjustCost(Entry->Cost);
3156 if (ST->hasFullFP16())
3159 return AdjustCost(Entry->Cost);
3175 Opcode, LegalTy, Src, CCH,
CostKind,
I);
3178 return Part1 + Part2;
3198 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3206 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) &&
"Invalid type");
3212 CostKind, Index,
nullptr,
nullptr);
3222 if (!VecLT.second.isVector() || !TLI->
isTypeLegal(DstVT))
3228 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
3238 case Instruction::SExt:
3243 case Instruction::ZExt:
3244 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
3257 return Opcode == Instruction::PHI ? 0 : 1;
3264 unsigned Opcode,
Type *Val,
unsigned Index,
bool HasRealUse,
3266 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {
3274 if (!LT.second.isVector())
3279 if (LT.second.isFixedLengthVector()) {
3280 unsigned Width = LT.second.getVectorNumElements();
3281 Index = Index % Width;
3297 if (
I && dyn_cast<LoadInst>(
I->getOperand(1)))
3327 auto ExtractCanFuseWithFmul = [&]() {
3334 auto IsAllowedScalarTy = [&](
const Type *
T) {
3335 return T->isFloatTy() ||
T->isDoubleTy() ||
3336 (
T->isHalfTy() && ST->hasFullFP16());
3340 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
3342 const auto *BO = dyn_cast<BinaryOperator>(EEUser);
3343 return BO && BO->getOpcode() == BinaryOperator::FMul &&
3344 !BO->getType()->isVectorTy();
3349 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
3353 return Idx == 0 || (RegWidth != 0 && (
Idx * EltSz) % RegWidth == 0);
3358 if (!isa<FixedVectorType>(Val) || !IsAllowedScalarTy(Val->
getScalarType()))
3363 for (
auto *U :
Scalar->users()) {
3364 if (!IsUserFMulScalarTy(U))
3368 UserToExtractIdx[
U];
3370 if (UserToExtractIdx.
empty())
3372 for (
auto &[S, U, L] : ScalarUserAndIdx) {
3373 for (
auto *U : S->users()) {
3374 if (UserToExtractIdx.
find(U) != UserToExtractIdx.
end()) {
3375 auto *
FMul = cast<BinaryOperator>(U);
3376 auto *Op0 =
FMul->getOperand(0);
3377 auto *Op1 =
FMul->getOperand(1);
3378 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
3379 UserToExtractIdx[
U] =
L;
3385 for (
auto &[U, L] : UserToExtractIdx) {
3391 const auto *EE = cast<ExtractElementInst>(
I);
3393 const auto *IdxOp = dyn_cast<ConstantInt>(EE->getIndexOperand());
3397 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
3398 if (!IsUserFMulScalarTy(U))
3403 const auto *BO = cast<BinaryOperator>(U);
3404 const auto *OtherEE = dyn_cast<ExtractElementInst>(
3405 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
3407 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
3410 return IsExtractLaneEquivalentToZero(
3411 cast<ConstantInt>(OtherEE->getIndexOperand())
3414 OtherEE->getType()->getScalarSizeInBits());
3422 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
3423 ExtractCanFuseWithFmul())
3427 return ST->getVectorInsertExtractBaseCost();
3432 unsigned Index,
Value *Op0,
3435 Opcode == Instruction::InsertElement && Op0 && !isa<UndefValue>(Op0);
3436 return getVectorInstrCostHelper(Opcode, Val, Index, HasRealUse);
3442 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {
3443 return getVectorInstrCostHelper(Opcode, Val, Index,
false,
nullptr, Scalar,
3451 return getVectorInstrCostHelper(
I.getOpcode(), Val, Index,
3458 if (isa<ScalableVectorType>(Ty))
3463 return DemandedElts.
popcount() * (Insert + Extract) *
3477 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
3484 Op2Info, Args, CxtI);
3526 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
3533 if (!VT.isVector() && VT.getSizeInBits() > 64)
3537 Opcode, Ty,
CostKind, Op1Info, Op2Info);
3542 if (isa<FixedVectorType>(Ty) && cast<FixedVectorType>(Ty)
3543 ->getPrimitiveSizeInBits()
3544 .getFixedValue() < 128) {
3555 if (
nullptr != Entry)
3560 if (LT.second.getScalarType() == MVT::i8)
3562 else if (LT.second.getScalarType() == MVT::i16)
3572 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
3575 return (4 + DivCost) * VTy->getNumElements();
3595 if (LT.second == MVT::v2i64 && ST->hasSVE())
3610 if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
3612 return LT.first * 14;
3627 (Ty->
isHalfTy() && ST->hasFullFP16())) &&
3640 return 2 * LT.first;
3649 return 2 * LT.first;
3671 int MaxMergeDistance = 64;
3675 return NumVectorInstToHideOverhead;
3689 Op1Info, Op2Info,
I);
3694 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SELECT) {
3696 const int AmortizationCost = 20;
3704 VecPred = CurrentPred;
3712 static const auto ValidMinMaxTys = {
3713 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
3714 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
3715 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
3718 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
3719 (ST->hasFullFP16() &&
3720 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
3725 VectorSelectTbl[] = {
3734 {
ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
3735 {
ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
3736 {
ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
3749 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SETCC) {
3752 if (LT.second == MVT::v4f16 && !ST->hasFullFP16())
3753 return LT.first * 4;
3769 Op1Info, Op2Info,
I);
3775 if (ST->requiresStrictAlign()) {
3780 Options.AllowOverlappingLoads =
true;
3786 Options.LoadSizes = {8, 4, 2, 1};
3787 Options.AllowedTailExpansions = {3, 5, 6};
3792 return ST->hasSVE();
3803 if (!LT.first.isValid())
3807 auto *VT = cast<VectorType>(Src);
3808 if (VT->getElementType()->isIntegerTy(1))
3825 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
3826 "Should be called on only load or stores.");
3828 case Instruction::Load:
3831 return ST->getGatherOverhead();
3833 case Instruction::Store:
3836 return ST->getScatterOverhead();
3844 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
3849 auto *VT = cast<VectorType>(DataTy);
3851 if (!LT.first.isValid())
3855 if (!LT.second.isVector() ||
3857 VT->getElementType()->isIntegerTy(1))
3867 ElementCount LegalVF = LT.second.getVectorElementCount();
3870 {TTI::OK_AnyValue, TTI::OP_None},
I);
3888 if (VT == MVT::Other)
3893 if (!LT.first.isValid())
3901 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
3903 (VTy->getElementType()->isIntegerTy(1) &&
3904 !VTy->getElementCount().isKnownMultipleOf(
3915 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
3916 LT.second.is128BitVector() && (!Alignment || *Alignment <
Align(16))) {
3922 const int AmortizationCost = 6;
3924 return LT.first * 2 * AmortizationCost;
3935 if (VT == MVT::v4i8)
3938 return cast<FixedVectorType>(Ty)->getNumElements() * 2;
3942 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
3944 *Alignment !=
Align(1))
3958 while (!TypeWorklist.
empty()) {
3980 bool UseMaskForCond,
bool UseMaskForGaps) {
3981 assert(Factor >= 2 &&
"Invalid interleave factor");
3982 auto *VecVTy = cast<VectorType>(VecTy);
3989 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
3992 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
3993 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
3996 VecVTy->getElementCount().divideCoefficientBy(Factor));
4002 if (MinElts % Factor == 0 &&
4009 UseMaskForCond, UseMaskForGaps);
4016 for (
auto *
I : Tys) {
4017 if (!
I->isVectorTy())
4019 if (
I->getScalarSizeInBits() * cast<FixedVectorType>(
I)->getNumElements() ==
4038 enum { MaxStridedLoads = 7 };
4040 int StridedLoads = 0;
4043 for (
const auto BB : L->blocks()) {
4044 for (
auto &
I : *BB) {
4045 LoadInst *LMemI = dyn_cast<LoadInst>(&
I);
4050 if (L->isLoopInvariant(PtrValue))
4054 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
4055 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
4064 if (StridedLoads > MaxStridedLoads / 2)
4065 return StridedLoads;
4068 return StridedLoads;
4071 int StridedLoads = countStridedLoads(L, SE);
4073 <<
" strided loads\n");
4095 if (!L->isInnermost() || !L->getExitBlock() || L->getNumBlocks() > 8)
4099 if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC) ||
4107 for (
auto *BB : L->getBlocks()) {
4108 for (
auto &
I : *BB) {
4109 if (!isa<IntrinsicInst>(&
I) && isa<CallBase>(&
I))
4123 if (Header == L->getLoopLatch()) {
4129 for (
auto *BB : L->blocks()) {
4130 for (
auto &
I : *BB) {
4137 if (isa<LoadInst>(&
I))
4146 unsigned MaxInstsPerLine = 16;
4148 unsigned BestUC = 1;
4149 unsigned SizeWithBestUC = BestUC *
Size;
4151 unsigned SizeWithUC = UC *
Size;
4152 if (SizeWithUC > 48)
4154 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
4155 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
4157 SizeWithBestUC = BestUC *
Size;
4163 return LoadedValues.
contains(SI->getOperand(0));
4174 auto *Term = dyn_cast<BranchInst>(Header->getTerminator());
4175 auto *Latch = L->getLoopLatch();
4177 if (!Term || !Term->isConditional() || Preds.
size() == 1 ||
4184 if (isa<PHINode>(
I) || L->isLoopInvariant(
I) ||
Depth > 8)
4187 if (isa<LoadInst>(
I))
4191 auto *I = dyn_cast<Instruction>(V);
4192 return I && DependsOnLoopLoad(I, Depth + 1);
4199 DependsOnLoopLoad(
I, 0)) {
4215 if (L->getLoopDepth() > 1)
4223 case AArch64Subtarget::AppleA14:
4224 case AArch64Subtarget::AppleA15:
4225 case AArch64Subtarget::AppleA16:
4226 case AArch64Subtarget::AppleM4:
4229 case AArch64Subtarget::Falkor:
4240 for (
auto *BB : L->getBlocks()) {
4241 for (
auto &
I : *BB) {
4243 if (
I.getType()->isVectorTy())
4246 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
4261 !ST->getSchedModel().isOutOfOrder()) {
4278 Type *ExpectedType) {
4282 case Intrinsic::aarch64_neon_st2:
4283 case Intrinsic::aarch64_neon_st3:
4284 case Intrinsic::aarch64_neon_st4: {
4286 StructType *ST = dyn_cast<StructType>(ExpectedType);
4289 unsigned NumElts = Inst->
arg_size() - 1;
4290 if (ST->getNumElements() != NumElts)
4292 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
4298 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
4304 case Intrinsic::aarch64_neon_ld2:
4305 case Intrinsic::aarch64_neon_ld3:
4306 case Intrinsic::aarch64_neon_ld4:
4307 if (Inst->
getType() == ExpectedType)
4318 case Intrinsic::aarch64_neon_ld2:
4319 case Intrinsic::aarch64_neon_ld3:
4320 case Intrinsic::aarch64_neon_ld4:
4321 Info.ReadMem =
true;
4322 Info.WriteMem =
false;
4325 case Intrinsic::aarch64_neon_st2:
4326 case Intrinsic::aarch64_neon_st3:
4327 case Intrinsic::aarch64_neon_st4:
4328 Info.ReadMem =
false;
4329 Info.WriteMem =
true;
4337 case Intrinsic::aarch64_neon_ld2:
4338 case Intrinsic::aarch64_neon_st2:
4339 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
4341 case Intrinsic::aarch64_neon_ld3:
4342 case Intrinsic::aarch64_neon_st3:
4343 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
4345 case Intrinsic::aarch64_neon_ld4:
4346 case Intrinsic::aarch64_neon_st4:
4347 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
4359 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) {
4360 bool Considerable =
false;
4361 AllowPromotionWithoutCommonHeader =
false;
4362 if (!isa<SExtInst>(&
I))
4364 Type *ConsideredSExtType =
4366 if (
I.getType() != ConsideredSExtType)
4370 for (
const User *U :
I.users()) {
4372 Considerable =
true;
4376 if (GEPInst->getNumOperands() > 2) {
4377 AllowPromotionWithoutCommonHeader =
true;
4382 return Considerable;
4423 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
4429 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
4439 return LegalizationCost + 2;
4449 LegalizationCost *= LT.first - 1;
4453 assert(ISD &&
"Invalid opcode");
4461 return LegalizationCost + 2;
4469 std::optional<FastMathFlags> FMF,
4475 if (
auto *VTy = dyn_cast<ScalableVectorType>(ValTy))
4480 if (
auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
4485 return BaseCost + FixedVTy->getNumElements();
4488 if (Opcode != Instruction::FAdd)
4491 auto *VTy = cast<ScalableVectorType>(ValTy);
4498 if (isa<ScalableVectorType>(ValTy))
4502 MVT MTy = LT.second;
4504 assert(ISD &&
"Invalid opcode");
4550 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
4551 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
4563 return (LT.first - 1) +
Log2_32(NElts);
4568 return (LT.first - 1) + Entry->Cost;
4576 auto *ValVTy = cast<FixedVectorType>(ValTy);
4580 if (LT.first != 1) {
4586 ExtraCost *= LT.first - 1;
4589 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
4590 return Cost + ExtraCost;
4624 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
4639 if (LT.second.getScalarType() == MVT::i1) {
4648 assert(Entry &&
"Illegal Type for Splice");
4649 LegalizationCost += Entry->Cost;
4650 return LegalizationCost * LT.first;
4661 if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
4663 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
4669 if (Args.size() >= 1 && isa<LoadInst>(Args[0]) &&
4672 return std::max<InstructionCost>(1, LT.first / 4);
4685 unsigned TpNumElts = Mask.size();
4686 unsigned LTNumElts = LT.second.getVectorNumElements();
4687 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
4691 for (
unsigned N = 0;
N < NumVecs;
N++) {
4695 unsigned Source1, Source2;
4696 unsigned NumSources = 0;
4697 for (
unsigned E = 0; E < LTNumElts; E++) {
4698 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
4707 unsigned Source = MaskElt / LTNumElts;
4708 if (NumSources == 0) {
4711 }
else if (NumSources == 1 && Source != Source1) {
4714 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
4720 if (Source == Source1)
4722 else if (Source == Source2)
4723 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
4730 if (NumSources <= 2)
4733 NTp, NMask,
CostKind, 0,
nullptr, Args, CxtI);
4745 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
4746 if (LT.second.is128BitVector() &&
4747 cast<FixedVectorType>(SubTp)->getNumElements() ==
4748 LT.second.getVectorNumElements() / 2) {
4751 if (Index == (
int)LT.second.getVectorNumElements() / 2)
4765 bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0]);
4766 if (IsLoad && LT.second.isVector() &&
4768 LT.second.getVectorElementCount()))
4776 all_of(Mask, [](
int E) {
return E < 8; }))
4780 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
4783 return M.value() < 0 || M.value() == (
int)M.index();
4790 if (LT.second.isFixedLengthVector() &&
4791 LT.second.getVectorNumElements() == Mask.size() &&
4793 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
4794 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
4797 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
4920 return LT.first * Entry->Cost;
4929 LT.second.getSizeInBits() <= 128 && SubTp) {
4931 if (SubLT.second.isVector()) {
4932 int NumElts = LT.second.getVectorNumElements();
4933 int NumSubElts = SubLT.second.getVectorNumElements();
4934 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
4940 if (IsExtractSubvector)
4953 if (isa<LoadInst>(&
I) || isa<StoreInst>(&
I)) {
4969 return ST->useFixedOverScalableIfEqualCost();
5007 unsigned NumInsns = 0;
5009 NumInsns += BB->sizeWithoutDebug();
5019 int64_t Scale,
unsigned AddrSpace)
const {
5046 if (
I->getOpcode() == Instruction::Or &&
5047 isa<BranchInst>(
I->getNextNode()) &&
5048 cast<BranchInst>(
I->getNextNode())->isUnconditional())
5051 if (
I->getOpcode() == Instruction::Add ||
5052 I->getOpcode() == Instruction::Sub)
5075 if (
auto *Shuf = dyn_cast<ShuffleVectorInst>(V))
5076 return all_equal(Shuf->getShuffleMask());
5083 bool AllowSplat =
false) {
5088 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
5089 auto *FullTy = FullV->
getType();
5090 auto *HalfTy = HalfV->getType();
5092 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
5095 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
5096 auto *FullVT = cast<FixedVectorType>(FullV->
getType());
5097 auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
5098 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
5102 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
5116 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
5117 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
5124 int NumElements = cast<FixedVectorType>(Op1->
getType())->getNumElements() * 2;
5131 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
5132 (M2Start != 0 && M2Start != (NumElements / 2)))
5134 if (S1Op1 && S2Op1 && M1Start != M2Start)
5144 return Ext->getType()->getScalarSizeInBits() ==
5145 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
5150 !areExtDoubled(cast<Instruction>(Ext1)) ||
5151 !areExtDoubled(cast<Instruction>(Ext2)))
5159 Value *VectorOperand =
nullptr;
5164 isa<FixedVectorType>(VectorOperand->
getType()) &&
5165 cast<FixedVectorType>(VectorOperand->
getType())->getNumElements() == 2;
5175 auto *
GEP = dyn_cast<GetElementPtrInst>(Ptrs);
5176 if (!
GEP ||
GEP->getNumOperands() != 2)
5180 Value *Offsets =
GEP->getOperand(1);
5183 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
5187 if (isa<SExtInst>(Offsets) || isa<ZExtInst>(Offsets)) {
5188 auto *OffsetsInst = cast<Instruction>(Offsets);
5189 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
5190 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
5206 Ops.
push_back(&cast<Instruction>(
Op)->getOperandUse(0));
5211 Value *ZExtOp = cast<Instruction>(
Op)->getOperand(0);
5212 Ops.
push_back(&cast<Instruction>(ZExtOp)->getOperandUse(0));
5213 Ops.
push_back(&cast<Instruction>(
Op)->getOperandUse(0));
5225 switch (
II->getIntrinsicID()) {
5226 case Intrinsic::aarch64_neon_smull:
5227 case Intrinsic::aarch64_neon_umull:
5236 case Intrinsic::fma:
5237 case Intrinsic::fmuladd:
5238 if (isa<VectorType>(
I->getType()) &&
5239 cast<VectorType>(
I->getType())->getElementType()->isHalfTy() &&
5243 case Intrinsic::aarch64_neon_sqdmull:
5244 case Intrinsic::aarch64_neon_sqdmulh:
5245 case Intrinsic::aarch64_neon_sqrdmulh:
5251 return !Ops.
empty();
5252 case Intrinsic::aarch64_neon_fmlal:
5253 case Intrinsic::aarch64_neon_fmlal2:
5254 case Intrinsic::aarch64_neon_fmlsl:
5255 case Intrinsic::aarch64_neon_fmlsl2:
5261 return !Ops.
empty();
5262 case Intrinsic::aarch64_sve_ptest_first:
5263 case Intrinsic::aarch64_sve_ptest_last:
5264 if (
auto *IIOp = dyn_cast<IntrinsicInst>(
II->getOperand(0)))
5265 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
5267 return !Ops.
empty();
5268 case Intrinsic::aarch64_sme_write_horiz:
5269 case Intrinsic::aarch64_sme_write_vert:
5270 case Intrinsic::aarch64_sme_writeq_horiz:
5271 case Intrinsic::aarch64_sme_writeq_vert: {
5272 auto *
Idx = dyn_cast<Instruction>(
II->getOperand(1));
5273 if (!
Idx ||
Idx->getOpcode() != Instruction::Add)
5278 case Intrinsic::aarch64_sme_read_horiz:
5279 case Intrinsic::aarch64_sme_read_vert:
5280 case Intrinsic::aarch64_sme_readq_horiz:
5281 case Intrinsic::aarch64_sme_readq_vert:
5282 case Intrinsic::aarch64_sme_ld1b_vert:
5283 case Intrinsic::aarch64_sme_ld1h_vert:
5284 case Intrinsic::aarch64_sme_ld1w_vert:
5285 case Intrinsic::aarch64_sme_ld1d_vert:
5286 case Intrinsic::aarch64_sme_ld1q_vert:
5287 case Intrinsic::aarch64_sme_st1b_vert:
5288 case Intrinsic::aarch64_sme_st1h_vert:
5289 case Intrinsic::aarch64_sme_st1w_vert:
5290 case Intrinsic::aarch64_sme_st1d_vert:
5291 case Intrinsic::aarch64_sme_st1q_vert:
5292 case Intrinsic::aarch64_sme_ld1b_horiz:
5293 case Intrinsic::aarch64_sme_ld1h_horiz:
5294 case Intrinsic::aarch64_sme_ld1w_horiz:
5295 case Intrinsic::aarch64_sme_ld1d_horiz:
5296 case Intrinsic::aarch64_sme_ld1q_horiz:
5297 case Intrinsic::aarch64_sme_st1b_horiz:
5298 case Intrinsic::aarch64_sme_st1h_horiz:
5299 case Intrinsic::aarch64_sme_st1w_horiz:
5300 case Intrinsic::aarch64_sme_st1d_horiz:
5301 case Intrinsic::aarch64_sme_st1q_horiz: {
5302 auto *
Idx = dyn_cast<Instruction>(
II->getOperand(3));
5303 if (!
Idx ||
Idx->getOpcode() != Instruction::Add)
5308 case Intrinsic::aarch64_neon_pmull:
5314 case Intrinsic::aarch64_neon_pmull64:
5316 II->getArgOperand(1)))
5321 case Intrinsic::masked_gather:
5326 case Intrinsic::masked_scatter:
5336 auto ShouldSinkCondition = [](
Value *
Cond) ->
bool {
5337 auto *
II = dyn_cast<IntrinsicInst>(
Cond);
5338 return II &&
II->getIntrinsicID() == Intrinsic::vector_reduce_or &&
5339 isa<ScalableVectorType>(
II->getOperand(0)->getType());
5342 switch (
I->getOpcode()) {
5343 case Instruction::GetElementPtr:
5344 case Instruction::Add:
5345 case Instruction::Sub:
5347 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
5354 case Instruction::Select: {
5355 if (!ShouldSinkCondition(
I->getOperand(0)))
5361 case Instruction::Br: {
5362 if (cast<BranchInst>(
I)->isUnconditional())
5365 if (!ShouldSinkCondition(cast<BranchInst>(
I)->getCondition()))
5375 if (!
I->getType()->isVectorTy())
5378 switch (
I->getOpcode()) {
5379 case Instruction::Sub:
5380 case Instruction::Add: {
5386 auto Ext1 = cast<Instruction>(
I->getOperand(0));
5387 auto Ext2 = cast<Instruction>(
I->getOperand(1));
5398 case Instruction::Or: {
5401 if (ST->hasNEON()) {
5411 ? cast<Instruction>(
I->getOperand(1))
5412 : cast<Instruction>(
I->getOperand(0));
5415 if (
I->getParent() != MainAnd->
getParent() ||
5420 if (
I->getParent() != IA->getParent() ||
5421 I->getParent() != IB->getParent())
5436 case Instruction::Mul: {
5437 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
5438 auto *Ty = cast<VectorType>(V->getType());
5440 if (Ty->isScalableTy())
5444 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
5447 int NumZExts = 0, NumSExts = 0;
5448 for (
auto &
Op :
I->operands()) {
5450 if (
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
5454 auto *Ext = cast<Instruction>(
Op);
5455 auto *ExtOp = Ext->getOperand(0);
5456 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
5460 if (isa<SExtInst>(Ext))
5491 Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
5496 dyn_cast<ConstantInt>(Insert->getOperand(2));
5498 if (!ElementConstant || !ElementConstant->
isZero())
5501 unsigned Opcode = OperandInstr->
getOpcode();
5502 if (Opcode == Instruction::SExt)
5504 else if (Opcode == Instruction::ZExt)
5509 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
5520 Ops.
push_back(&Insert->getOperandUse(1));
5526 if (!Ops.
empty() && (NumSExts == 2 || NumZExts == 2))
5530 if (!ShouldSinkSplatForIndexedVariant(
I))
5539 return !Ops.
empty();
5541 case Instruction::FMul: {
5543 if (
I->getType()->isScalableTy())
5546 if (cast<VectorType>(
I->getType())->getElementType()->isHalfTy() &&
5555 return !Ops.
empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file provides the interface for the instcombine pass implementation.
This file defines the LoopVectorizationLegality class.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
unsigned getVectorInsertExtractBaseCost() const
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
unsigned getMaxInterleaveFactor() const
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
TailFoldingOpts getSVETailFoldingDefaultOpts() const
bool useSVEForFixedLengthVectors() const
unsigned getEpilogueVectorizationMinVF() const
unsigned getMinSVEVectorSizeInBits() const
bool isSVEAvailable() const
Returns true if the target has SVE and can use the full range of SVE instructions,...
InstructionCost getSpliceCost(VectorType *Tp, int Index)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool shouldTreatInstructionLikeSelect(const Instruction *I)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
bool prefersVectorizedAddressing() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
unsigned getEpilogueVectorizationMinVF() const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind)
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isElementTypeLegalForScalableVector(Type *Ty) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
bool preferFixedOverScalableIfEqualCost() const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool enableScalableVectorization() const
bool useNeonVector(const Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
bool isLegalMaskedGatherScatter(Type *DataType) const
unsigned getMaxInterleaveFactor(ElementCount VF)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
EVT getPromotedVTForPredicate(EVT VT) const
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, bool UseScalable) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const
Returns true if VecTy is a legal interleaved access type.
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
bool isTypeLegal(Type *Ty)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
Estimate the overhead of scalarizing an instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isIntPredicate() const
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isEquality() const
Return true if this predicate is either EQ or NE.
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool requiresSMChange(const SMEAttrs &Callee) const
void set(unsigned M, bool Enable=true)
bool hasStreamingBody() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
This instruction constructs a fixed permutation of two input vectors.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Class to represent struct types.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const
Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isFP128Ty() const
Return true if this is 'fp128'.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static constexpr unsigned SVEBitsPerBlock
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
VScaleVal_match m_VScale()
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
constexpr int PoisonMaskElem
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Mod
The access may modify the value stored in memory.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FAnyOf
Any_of reduction with select(fcmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ IAnyOf
Any_of reduction with select(icmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.