23#include "llvm/IR/IntrinsicsAArch64.h"
33#define DEBUG_TYPE "aarch64tti"
54 "Penalty of calling a function that requires a change to PSTATE.SM"));
58 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
69 cl::desc(
"The cost of a histcnt instruction"));
73 cl::desc(
"The number of instructions to search for a redundant dmb"));
76class TailFoldingOption {
91 bool NeedsDefault =
true;
95 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
110 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
111 "Initial bits should only include one of "
112 "(disabled|all|simple|default)");
113 Bits = NeedsDefault ? DefaultBits : InitialBits;
115 Bits &= ~DisableBits;
121 errs() <<
"invalid argument '" << Opt
122 <<
"' to -sve-tail-folding=; the option should be of the form\n"
123 " (disabled|all|default|simple)[+(reductions|recurrences"
124 "|reverse|noreductions|norecurrences|noreverse)]\n";
130 void operator=(
const std::string &Val) {
139 setNeedsDefault(
false);
144 unsigned StartIdx = 1;
145 if (TailFoldTypes[0] ==
"disabled")
146 setInitialBits(TailFoldingOpts::Disabled);
147 else if (TailFoldTypes[0] ==
"all")
148 setInitialBits(TailFoldingOpts::All);
149 else if (TailFoldTypes[0] ==
"default")
150 setNeedsDefault(
true);
151 else if (TailFoldTypes[0] ==
"simple")
152 setInitialBits(TailFoldingOpts::Simple);
155 setInitialBits(TailFoldingOpts::Disabled);
158 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
159 if (TailFoldTypes[
I] ==
"reductions")
160 setEnableBit(TailFoldingOpts::Reductions);
161 else if (TailFoldTypes[
I] ==
"recurrences")
162 setEnableBit(TailFoldingOpts::Recurrences);
163 else if (TailFoldTypes[
I] ==
"reverse")
164 setEnableBit(TailFoldingOpts::Reverse);
165 else if (TailFoldTypes[
I] ==
"noreductions")
166 setDisableBit(TailFoldingOpts::Reductions);
167 else if (TailFoldTypes[
I] ==
"norecurrences")
168 setDisableBit(TailFoldingOpts::Recurrences);
169 else if (TailFoldTypes[
I] ==
"noreverse")
170 setDisableBit(TailFoldingOpts::Reverse);
187 "Control the use of vectorisation using tail-folding for SVE where the"
188 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
189 "\ndisabled (Initial) No loop types will vectorize using "
191 "\ndefault (Initial) Uses the default tail-folding settings for "
193 "\nall (Initial) All legal loop types will vectorize using "
195 "\nsimple (Initial) Use tail-folding for simple loops (not "
196 "reductions or recurrences)"
197 "\nreductions Use tail-folding for loops containing reductions"
198 "\nnoreductions Inverse of above"
199 "\nrecurrences Use tail-folding for loops containing fixed order "
201 "\nnorecurrences Inverse of above"
202 "\nreverse Use tail-folding for loops requiring reversed "
204 "\nnoreverse Inverse of above"),
222 .
Case(
"__arm_sme_state",
true)
223 .
Case(
"__arm_tpidr2_save",
true)
224 .
Case(
"__arm_tpidr2_restore",
true)
225 .
Case(
"__arm_za_disable",
true)
239 if (isa<CallInst>(
I) && !
I.isDebugOrPseudoInst() &&
240 (cast<CallInst>(
I).isInlineAsm() || isa<IntrinsicInst>(
I) ||
250 SMEAttrs CallerAttrs(*Caller), CalleeAttrs(*Callee);
262 if (CallerAttrs.requiresLazySave(CalleeAttrs) ||
263 CallerAttrs.requiresSMChange(CalleeAttrs) ||
264 CallerAttrs.requiresPreservingZT0(CalleeAttrs)) {
287 auto FVTy = dyn_cast<FixedVectorType>(Ty);
289 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
298 unsigned DefaultCallPenalty)
const {
321 if (
F == Call.getCaller())
327 return DefaultCallPenalty;
366 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
371 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
377 return std::max<InstructionCost>(1,
Cost);
392 unsigned ImmIdx = ~0U;
396 case Instruction::GetElementPtr:
401 case Instruction::Store:
404 case Instruction::Add:
405 case Instruction::Sub:
406 case Instruction::Mul:
407 case Instruction::UDiv:
408 case Instruction::SDiv:
409 case Instruction::URem:
410 case Instruction::SRem:
411 case Instruction::And:
412 case Instruction::Or:
413 case Instruction::Xor:
414 case Instruction::ICmp:
418 case Instruction::Shl:
419 case Instruction::LShr:
420 case Instruction::AShr:
424 case Instruction::Trunc:
425 case Instruction::ZExt:
426 case Instruction::SExt:
427 case Instruction::IntToPtr:
428 case Instruction::PtrToInt:
429 case Instruction::BitCast:
430 case Instruction::PHI:
431 case Instruction::Call:
432 case Instruction::Select:
433 case Instruction::Ret:
434 case Instruction::Load:
439 int NumConstants = (BitSize + 63) / 64;
463 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
469 case Intrinsic::sadd_with_overflow:
470 case Intrinsic::uadd_with_overflow:
471 case Intrinsic::ssub_with_overflow:
472 case Intrinsic::usub_with_overflow:
473 case Intrinsic::smul_with_overflow:
474 case Intrinsic::umul_with_overflow:
476 int NumConstants = (BitSize + 63) / 64;
483 case Intrinsic::experimental_stackmap:
484 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
487 case Intrinsic::experimental_patchpoint_void:
488 case Intrinsic::experimental_patchpoint:
489 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
492 case Intrinsic::experimental_gc_statepoint:
493 if ((
Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
503 if (TyWidth == 32 || TyWidth == 64)
517 unsigned TotalHistCnts = 1;
526 if (
VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy)) {
527 unsigned EC = VTy->getElementCount().getKnownMinValue();
532 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
534 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
538 TotalHistCnts = EC / NaturalVectorWidth;
552 if (
auto *VTy = dyn_cast<ScalableVectorType>(
RetTy))
556 switch (ICA.
getID()) {
557 case Intrinsic::experimental_vector_histogram_add:
561 case Intrinsic::umin:
562 case Intrinsic::umax:
563 case Intrinsic::smin:
564 case Intrinsic::smax: {
565 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
566 MVT::v8i16, MVT::v2i32, MVT::v4i32,
567 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
571 if (LT.second == MVT::v2i64)
573 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
577 case Intrinsic::sadd_sat:
578 case Intrinsic::ssub_sat:
579 case Intrinsic::uadd_sat:
580 case Intrinsic::usub_sat: {
581 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
582 MVT::v8i16, MVT::v2i32, MVT::v4i32,
588 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits() ? 1 : 4;
589 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
590 return LT.first * Instrs;
593 case Intrinsic::abs: {
594 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
595 MVT::v8i16, MVT::v2i32, MVT::v4i32,
598 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
602 case Intrinsic::bswap: {
603 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
604 MVT::v4i32, MVT::v2i64};
606 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }) &&
607 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits())
611 case Intrinsic::stepvector: {
620 Cost += AddCost * (LT.first - 1);
624 case Intrinsic::vector_extract:
625 case Intrinsic::vector_insert: {
638 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
657 case Intrinsic::bitreverse: {
659 {Intrinsic::bitreverse, MVT::i32, 1},
660 {Intrinsic::bitreverse, MVT::i64, 1},
661 {Intrinsic::bitreverse, MVT::v8i8, 1},
662 {Intrinsic::bitreverse, MVT::v16i8, 1},
663 {Intrinsic::bitreverse, MVT::v4i16, 2},
664 {Intrinsic::bitreverse, MVT::v8i16, 2},
665 {Intrinsic::bitreverse, MVT::v2i32, 2},
666 {Intrinsic::bitreverse, MVT::v4i32, 2},
667 {Intrinsic::bitreverse, MVT::v1i64, 2},
668 {Intrinsic::bitreverse, MVT::v2i64, 2},
678 return LegalisationCost.first * Entry->Cost + 1;
680 return LegalisationCost.first * Entry->Cost;
684 case Intrinsic::ctpop: {
685 if (!ST->hasNEON()) {
706 RetTy->getScalarSizeInBits()
709 return LT.first * Entry->Cost + ExtraCost;
713 case Intrinsic::sadd_with_overflow:
714 case Intrinsic::uadd_with_overflow:
715 case Intrinsic::ssub_with_overflow:
716 case Intrinsic::usub_with_overflow:
717 case Intrinsic::smul_with_overflow:
718 case Intrinsic::umul_with_overflow: {
720 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
721 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
722 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
723 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
724 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
725 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
726 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
727 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
728 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
729 {Intrinsic::usub_with_overflow, MVT::i8, 3},
730 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
731 {Intrinsic::usub_with_overflow, MVT::i16, 3},
732 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
733 {Intrinsic::usub_with_overflow, MVT::i32, 1},
734 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
735 {Intrinsic::usub_with_overflow, MVT::i64, 1},
736 {Intrinsic::smul_with_overflow, MVT::i8, 5},
737 {Intrinsic::umul_with_overflow, MVT::i8, 4},
738 {Intrinsic::smul_with_overflow, MVT::i16, 5},
739 {Intrinsic::umul_with_overflow, MVT::i16, 4},
740 {Intrinsic::smul_with_overflow, MVT::i32, 2},
741 {Intrinsic::umul_with_overflow, MVT::i32, 2},
742 {Intrinsic::smul_with_overflow, MVT::i64, 3},
743 {Intrinsic::umul_with_overflow, MVT::i64, 3},
752 case Intrinsic::fptosi_sat:
753 case Intrinsic::fptoui_sat: {
756 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
761 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
762 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
763 LT.second == MVT::v2f64)) {
765 (LT.second == MVT::f64 && MTy == MVT::i32) ||
766 (LT.second == MVT::f32 && MTy == MVT::i64)))
775 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
782 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
783 (LT.second == MVT::f16 && MTy == MVT::i64) ||
784 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
798 if ((LT.second.getScalarType() == MVT::f32 ||
799 LT.second.getScalarType() == MVT::f64 ||
800 LT.second.getScalarType() == MVT::f16) &&
804 if (LT.second.isVector())
808 LegalTy, {LegalTy, LegalTy});
811 LegalTy, {LegalTy, LegalTy});
813 return LT.first *
Cost +
814 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
821 if (LT.second.isVector()) {
833 Type *CondTy =
RetTy->getWithNewBitWidth(1);
839 return LT.first *
Cost;
841 case Intrinsic::fshl:
842 case Intrinsic::fshr: {
855 {Intrinsic::fshl, MVT::v4i32, 3},
856 {Intrinsic::fshl, MVT::v2i64, 3}, {Intrinsic::fshl, MVT::v16i8, 4},
857 {Intrinsic::fshl, MVT::v8i16, 4}, {Intrinsic::fshl, MVT::v2i32, 3},
858 {Intrinsic::fshl, MVT::v8i8, 4}, {Intrinsic::fshl, MVT::v4i16, 4}};
864 return LegalisationCost.first * Entry->Cost;
868 if (!
RetTy->isIntegerTy())
873 bool HigherCost = (
RetTy->getScalarSizeInBits() != 32 &&
874 RetTy->getScalarSizeInBits() < 64) ||
875 (
RetTy->getScalarSizeInBits() % 64 != 0);
876 unsigned ExtraCost = HigherCost ? 1 : 0;
877 if (
RetTy->getScalarSizeInBits() == 32 ||
878 RetTy->getScalarSizeInBits() == 64)
885 return TyL.first + ExtraCost;
887 case Intrinsic::get_active_lane_mask: {
892 if (!getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) &&
903 return RetTy->getNumElements() * 2;
908 case Intrinsic::experimental_vector_match: {
909 auto *NeedleTy = cast<FixedVectorType>(ICA.
getArgTypes()[1]);
911 unsigned SearchSize = NeedleTy->getNumElements();
912 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
919 if (isa<FixedVectorType>(
RetTy))
936 auto RequiredType =
II.getType();
938 auto *PN = dyn_cast<PHINode>(
II.getArgOperand(0));
939 assert(PN &&
"Expected Phi Node!");
942 if (!PN->hasOneUse())
945 for (
Value *IncValPhi : PN->incoming_values()) {
946 auto *Reinterpret = dyn_cast<IntrinsicInst>(IncValPhi);
948 Reinterpret->getIntrinsicID() !=
949 Intrinsic::aarch64_sve_convert_to_svbool ||
950 RequiredType != Reinterpret->getArgOperand(0)->getType())
959 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
960 auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(
I));
961 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
978static std::optional<Instruction *>
980 auto BinOp = dyn_cast<IntrinsicInst>(
II.getOperand(0));
984 auto IntrinsicID = BinOp->getIntrinsicID();
985 switch (IntrinsicID) {
986 case Intrinsic::aarch64_sve_and_z:
987 case Intrinsic::aarch64_sve_bic_z:
988 case Intrinsic::aarch64_sve_eor_z:
989 case Intrinsic::aarch64_sve_nand_z:
990 case Intrinsic::aarch64_sve_nor_z:
991 case Intrinsic::aarch64_sve_orn_z:
992 case Intrinsic::aarch64_sve_orr_z:
998 auto BinOpPred = BinOp->getOperand(0);
999 auto BinOpOp1 = BinOp->getOperand(1);
1000 auto BinOpOp2 = BinOp->getOperand(2);
1002 auto PredIntr = dyn_cast<IntrinsicInst>(BinOpPred);
1004 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1005 return std::nullopt;
1007 auto PredOp = PredIntr->getOperand(0);
1008 auto PredOpTy = cast<VectorType>(PredOp->getType());
1009 if (PredOpTy !=
II.getType())
1010 return std::nullopt;
1014 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1015 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1016 if (BinOpOp1 == BinOpOp2)
1017 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1020 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1022 auto NarrowedBinOp =
1027static std::optional<Instruction *>
1030 if (isa<PHINode>(
II.getArgOperand(0)))
1034 return BinOpCombine;
1037 if (isa<TargetExtType>(
II.getArgOperand(0)->getType()) ||
1038 isa<TargetExtType>(
II.getType()))
1039 return std::nullopt;
1042 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1044 const auto *IVTy = cast<VectorType>(
II.getType());
1050 const auto *CursorVTy = cast<VectorType>(Cursor->
getType());
1051 if (CursorVTy->getElementCount().getKnownMinValue() <
1052 IVTy->getElementCount().getKnownMinValue())
1056 if (Cursor->
getType() == IVTy)
1057 EarliestReplacement = Cursor;
1059 auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
1062 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1063 Intrinsic::aarch64_sve_convert_to_svbool ||
1064 IntrinsicCursor->getIntrinsicID() ==
1065 Intrinsic::aarch64_sve_convert_from_svbool))
1068 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
1069 Cursor = IntrinsicCursor->getOperand(0);
1074 if (!EarliestReplacement)
1075 return std::nullopt;
1082 Value *UncastedPred;
1083 if (
match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_from_svbool>(
1084 m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
1088 if (cast<ScalableVectorType>(Pred->
getType())->getMinNumElements() <=
1089 cast<ScalableVectorType>(UncastedPred->
getType())->getMinNumElements())
1090 Pred = UncastedPred;
1092 return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1093 m_ConstantInt<AArch64SVEPredPattern::all>()));
1098static std::optional<Instruction *>
1100 bool hasInactiveVector) {
1101 int PredOperand = hasInactiveVector ? 1 : 0;
1102 int ReplaceOperand = hasInactiveVector ? 0 : 1;
1107 return std::nullopt;
1112static std::optional<Instruction *>
1115 !isa<llvm::UndefValue>(
II.getOperand(0)) &&
1116 !isa<llvm::PoisonValue>(
II.getOperand(0))) {
1124static std::optional<Instruction *>
1130 return std::nullopt;
1135static std::optional<Instruction *>
1140 if (
RetTy->isStructTy()) {
1141 auto StructT = cast<StructType>(
RetTy);
1142 auto VecT = StructT->getElementType(0);
1144 for (
unsigned i = 0; i < StructT->getNumElements(); i++) {
1145 ZerVec.
push_back(VecT->isFPOrFPVectorTy() ? ConstantFP::get(VecT, 0.0)
1146 : ConstantInt::get(VecT, 0));
1151 : ConstantInt::get(
II.getType(), 0);
1156 return std::nullopt;
1162 auto *OpPredicate =
II.getOperand(0);
1175 return std::nullopt;
1178 return std::nullopt;
1180 const auto PTruePattern =
1181 cast<ConstantInt>(Pg->
getOperand(0))->getZExtValue();
1182 if (PTruePattern != AArch64SVEPredPattern::vl1)
1183 return std::nullopt;
1188 II.getArgOperand(0),
II.getArgOperand(2), ConstantInt::get(IdxTy, 0));
1189 Insert->insertBefore(&
II);
1190 Insert->takeName(&
II);
1198 auto *
RetTy = cast<ScalableVectorType>(
II.getType());
1200 II.getArgOperand(0));
1214 auto *Pg = dyn_cast<IntrinsicInst>(
II.getArgOperand(0));
1215 if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1216 return std::nullopt;
1218 const auto PTruePattern =
1219 cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
1220 if (PTruePattern != AArch64SVEPredPattern::all)
1221 return std::nullopt;
1226 if (!SplatValue || !SplatValue->isZero())
1227 return std::nullopt;
1230 auto *DupQLane = dyn_cast<IntrinsicInst>(
II.getArgOperand(1));
1232 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1233 return std::nullopt;
1236 auto *DupQLaneIdx = dyn_cast<ConstantInt>(DupQLane->getArgOperand(1));
1237 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
1238 return std::nullopt;
1240 auto *VecIns = dyn_cast<IntrinsicInst>(DupQLane->getArgOperand(0));
1241 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1242 return std::nullopt;
1246 if (!isa<UndefValue>(VecIns->getArgOperand(0)))
1247 return std::nullopt;
1249 if (!cast<ConstantInt>(VecIns->getArgOperand(2))->isZero())
1250 return std::nullopt;
1252 auto *ConstVec = dyn_cast<Constant>(VecIns->getArgOperand(1));
1254 return std::nullopt;
1256 auto *VecTy = dyn_cast<FixedVectorType>(ConstVec->getType());
1257 auto *OutTy = dyn_cast<ScalableVectorType>(
II.getType());
1258 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
1259 return std::nullopt;
1261 unsigned NumElts = VecTy->getNumElements();
1262 unsigned PredicateBits = 0;
1265 for (
unsigned I = 0;
I < NumElts; ++
I) {
1266 auto *Arg = dyn_cast<ConstantInt>(ConstVec->getAggregateElement(
I));
1268 return std::nullopt;
1270 PredicateBits |= 1 << (
I * (16 / NumElts));
1274 if (PredicateBits == 0) {
1276 PFalse->takeName(&
II);
1282 for (
unsigned I = 0;
I < 16; ++
I)
1283 if ((PredicateBits & (1 <<
I)) != 0)
1286 unsigned PredSize = Mask & -Mask;
1291 for (
unsigned I = 0;
I < 16;
I += PredSize)
1292 if ((PredicateBits & (1 <<
I)) == 0)
1293 return std::nullopt;
1298 {PredType}, {PTruePat});
1300 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
1301 auto *ConvertFromSVBool =
1303 {
II.getType()}, {ConvertToSVBool});
1311 Value *Pg =
II.getArgOperand(0);
1312 Value *Vec =
II.getArgOperand(1);
1313 auto IntrinsicID =
II.getIntrinsicID();
1314 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
1325 auto *OldBinOp = cast<BinaryOperator>(Vec);
1326 auto OpC = OldBinOp->getOpcode();
1332 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
1337 auto *
C = dyn_cast<Constant>(Pg);
1338 if (IsAfter &&
C &&
C->isNullValue()) {
1342 Extract->insertBefore(&
II);
1343 Extract->takeName(&
II);
1347 auto *IntrPG = dyn_cast<IntrinsicInst>(Pg);
1349 return std::nullopt;
1351 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1352 return std::nullopt;
1354 const auto PTruePattern =
1355 cast<ConstantInt>(IntrPG->getOperand(0))->getZExtValue();
1360 return std::nullopt;
1362 unsigned Idx = MinNumElts - 1;
1371 auto *PgVTy = cast<ScalableVectorType>(Pg->
getType());
1372 if (
Idx >= PgVTy->getMinNumElements())
1373 return std::nullopt;
1378 Extract->insertBefore(&
II);
1379 Extract->takeName(&
II);
1392 Value *Pg =
II.getArgOperand(0);
1394 Value *Vec =
II.getArgOperand(2);
1398 return std::nullopt;
1403 return std::nullopt;
1417 FPTy, cast<VectorType>(Vec->
getType())->getElementCount());
1420 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
1433 {
II.getType()}, {AllPat});
1440static std::optional<Instruction *>
1442 const auto Pattern = cast<ConstantInt>(
II.getArgOperand(0))->getZExtValue();
1444 if (
Pattern == AArch64SVEPredPattern::all) {
1445 Constant *StepVal = ConstantInt::get(
II.getType(), NumElts);
1453 return MinNumElts && NumElts >= MinNumElts
1455 II, ConstantInt::get(
II.getType(), MinNumElts)))
1461 Value *PgVal =
II.getArgOperand(0);
1462 Value *OpVal =
II.getArgOperand(1);
1466 if (PgVal == OpVal &&
1467 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
1468 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
1469 Value *Ops[] = {PgVal, OpVal};
1483 return std::nullopt;
1487 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
1488 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
1502 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
1503 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
1504 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
1505 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
1506 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
1507 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
1508 (OpIID == Intrinsic::aarch64_sve_and_z) ||
1509 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
1510 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
1511 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
1512 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
1513 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
1514 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
1524 return std::nullopt;
1527template <Intrinsic::ID MulOpc,
typename Intrinsic::ID FuseOpc>
1528static std::optional<Instruction *>
1530 bool MergeIntoAddendOp) {
1532 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
1533 if (MergeIntoAddendOp) {
1534 AddendOp =
II.getOperand(1);
1535 Mul =
II.getOperand(2);
1537 AddendOp =
II.getOperand(2);
1538 Mul =
II.getOperand(1);
1543 return std::nullopt;
1545 if (!
Mul->hasOneUse())
1546 return std::nullopt;
1549 if (
II.getType()->isFPOrFPVectorTy()) {
1554 return std::nullopt;
1556 return std::nullopt;
1561 if (MergeIntoAddendOp)
1563 {
P, AddendOp, MulOp0, MulOp1}, FMFSource);
1566 {
P, MulOp0, MulOp1, AddendOp}, FMFSource);
1571static std::optional<Instruction *>
1573 Value *Pred =
II.getOperand(0);
1574 Value *PtrOp =
II.getOperand(1);
1575 Type *VecTy =
II.getType();
1583 Load->copyMetadata(
II);
1594static std::optional<Instruction *>
1596 Value *VecOp =
II.getOperand(0);
1597 Value *Pred =
II.getOperand(1);
1598 Value *PtrOp =
II.getOperand(2);
1602 Store->copyMetadata(
II);
1613 switch (Intrinsic) {
1614 case Intrinsic::aarch64_sve_fmul_u:
1615 return Instruction::BinaryOps::FMul;
1616 case Intrinsic::aarch64_sve_fadd_u:
1617 return Instruction::BinaryOps::FAdd;
1618 case Intrinsic::aarch64_sve_fsub_u:
1619 return Instruction::BinaryOps::FSub;
1621 return Instruction::BinaryOpsEnd;
1625static std::optional<Instruction *>
1628 if (
II.isStrictFP())
1629 return std::nullopt;
1631 auto *OpPredicate =
II.getOperand(0);
1633 if (BinOpCode == Instruction::BinaryOpsEnd ||
1634 !
match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1635 m_ConstantInt<AArch64SVEPredPattern::all>())))
1636 return std::nullopt;
1648 auto *OpPredicate =
II.getOperand(0);
1649 if (!
match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1650 m_ConstantInt<AArch64SVEPredPattern::all>())))
1651 return std::nullopt;
1653 auto *
Mod =
II.getModule();
1655 II.setCalledFunction(NewDecl);
1662static std::optional<Instruction *>
1679 Intrinsic::aarch64_sve_mla>(
1683 Intrinsic::aarch64_sve_mad>(
1686 return std::nullopt;
1689static std::optional<Instruction *>
1696 Intrinsic::aarch64_sve_fmla>(IC,
II,
1701 Intrinsic::aarch64_sve_fmad>(IC,
II,
1706 Intrinsic::aarch64_sve_fmla>(IC,
II,
1709 return std::nullopt;
1712static std::optional<Instruction *>
1716 Intrinsic::aarch64_sve_fmla>(IC,
II,
1721 Intrinsic::aarch64_sve_fmad>(IC,
II,
1726 Intrinsic::aarch64_sve_fmla_u>(
1732static std::optional<Instruction *>
1739 Intrinsic::aarch64_sve_fmls>(IC,
II,
1744 Intrinsic::aarch64_sve_fnmsb>(
1749 Intrinsic::aarch64_sve_fmls>(IC,
II,
1752 return std::nullopt;
1755static std::optional<Instruction *>
1759 Intrinsic::aarch64_sve_fmls>(IC,
II,
1764 Intrinsic::aarch64_sve_fnmsb>(
1769 Intrinsic::aarch64_sve_fmls_u>(
1781 Intrinsic::aarch64_sve_mls>(
1784 return std::nullopt;
1790 auto *OpPredicate =
II.getOperand(0);
1791 auto *OpMultiplicand =
II.getOperand(1);
1792 auto *OpMultiplier =
II.getOperand(2);
1795 auto IsUnitSplat = [](
auto *
I) {
1804 auto IsUnitDup = [](
auto *
I) {
1805 auto *IntrI = dyn_cast<IntrinsicInst>(
I);
1806 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup)
1809 auto *SplatValue = IntrI->getOperand(2);
1813 if (IsUnitSplat(OpMultiplier)) {
1815 OpMultiplicand->takeName(&
II);
1817 }
else if (IsUnitDup(OpMultiplier)) {
1819 auto *DupInst = cast<IntrinsicInst>(OpMultiplier);
1820 auto *DupPg = DupInst->getOperand(1);
1823 if (OpPredicate == DupPg) {
1824 OpMultiplicand->takeName(&
II);
1834 Value *UnpackArg =
II.getArgOperand(0);
1835 auto *
RetTy = cast<ScalableVectorType>(
II.getType());
1836 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
1837 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
1850 return std::nullopt;
1854 auto *OpVal =
II.getOperand(0);
1855 auto *OpIndices =
II.getOperand(1);
1860 auto *SplatValue = dyn_cast_or_null<ConstantInt>(
getSplatValue(OpIndices));
1862 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
1863 return std::nullopt;
1879 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
1880 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
1884 if ((
match(
II.getArgOperand(0),
1885 m_Intrinsic<FromSVB>(m_Intrinsic<ToSVB>(
m_Value(
A)))) &&
1887 m_Intrinsic<FromSVB>(m_Intrinsic<ToSVB>(
m_Value(
B))))) ||
1890 auto *TyA = cast<ScalableVectorType>(
A->getType());
1891 if (TyA ==
B->getType() &&
1902 return std::nullopt;
1910 if (
match(
II.getArgOperand(0),
1912 match(
II.getArgOperand(1), m_Intrinsic<Intrinsic::aarch64_sve_uzp2>(
1915 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
1917 return std::nullopt;
1920static std::optional<Instruction *>
1922 Value *Mask =
II.getOperand(0);
1923 Value *BasePtr =
II.getOperand(1);
1924 Value *Index =
II.getOperand(2);
1936 if (
match(Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1939 BasePtr->getPointerAlignment(
II.getDataLayout());
1943 BasePtr, IndexBase);
1951 return std::nullopt;
1954static std::optional<Instruction *>
1956 Value *Val =
II.getOperand(0);
1957 Value *Mask =
II.getOperand(1);
1958 Value *BasePtr =
II.getOperand(2);
1959 Value *Index =
II.getOperand(3);
1966 if (
match(Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1969 BasePtr->getPointerAlignment(
II.getDataLayout());
1972 BasePtr, IndexBase);
1981 return std::nullopt;
1987 Value *Pred =
II.getOperand(0);
1988 Value *Vec =
II.getOperand(1);
1989 Value *DivVec =
II.getOperand(2);
1992 ConstantInt *SplatConstantInt = dyn_cast_or_null<ConstantInt>(SplatValue);
1993 if (!SplatConstantInt)
1994 return std::nullopt;
1998 if (DivisorValue == -1)
1999 return std::nullopt;
2000 if (DivisorValue == 1)
2006 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2013 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2015 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2019 return std::nullopt;
2023 size_t VecSize = Vec.
size();
2028 size_t HalfVecSize = VecSize / 2;
2032 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2040 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2055 m_Intrinsic<Intrinsic::vector_insert>(
2057 !isa<FixedVectorType>(CurrentInsertElt->
getType()))
2058 return std::nullopt;
2059 auto IIScalableTy = cast<ScalableVectorType>(
II.getType());
2063 while (
auto InsertElt = dyn_cast<InsertElementInst>(CurrentInsertElt)) {
2064 auto Idx = cast<ConstantInt>(InsertElt->getOperand(2));
2065 Elts[
Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2066 CurrentInsertElt = InsertElt->getOperand(0);
2070 isa<PoisonValue>(CurrentInsertElt) && isa<PoisonValue>(
Default);
2072 return std::nullopt;
2076 for (
size_t I = 0;
I < Elts.
size();
I++) {
2077 if (Elts[
I] ==
nullptr)
2082 if (InsertEltChain ==
nullptr)
2083 return std::nullopt;
2089 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2090 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2091 IIScalableTy->getMinNumElements() /
2096 auto *WideShuffleMaskTy =
2107 auto NarrowBitcast =
2120 return std::nullopt;
2125 Value *Pred =
II.getOperand(0);
2126 Value *Vec =
II.getOperand(1);
2127 Value *Shift =
II.getOperand(2);
2130 Value *AbsPred, *MergedValue;
2131 if (!
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_sqabs>(
2133 !
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_abs>(
2136 return std::nullopt;
2144 return std::nullopt;
2149 return std::nullopt;
2152 {
II.getType()}, {Pred, Vec, Shift});
2159 Value *Vec =
II.getOperand(0);
2164 return std::nullopt;
2170 auto *NI =
II.getNextNonDebugInstruction();
2173 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2175 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2176 auto *NIBB = NI->getParent();
2177 NI = NI->getNextNonDebugInstruction();
2179 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2180 NI = SuccBB->getFirstNonPHIOrDbgOrLifetime();
2185 auto *NextII = dyn_cast_or_null<IntrinsicInst>(NI);
2186 if (NextII &&
II.isIdenticalTo(NextII))
2189 return std::nullopt;
2192std::optional<Instruction *>
2199 case Intrinsic::aarch64_dmb:
2201 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
2202 case Intrinsic::aarch64_sve_fcvt_f16f32:
2203 case Intrinsic::aarch64_sve_fcvt_f16f64:
2204 case Intrinsic::aarch64_sve_fcvt_f32f16:
2205 case Intrinsic::aarch64_sve_fcvt_f32f64:
2206 case Intrinsic::aarch64_sve_fcvt_f64f16:
2207 case Intrinsic::aarch64_sve_fcvt_f64f32:
2208 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
2209 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
2210 case Intrinsic::aarch64_sve_fcvtx_f32f64:
2211 case Intrinsic::aarch64_sve_fcvtzs:
2212 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
2213 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
2214 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
2215 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
2216 case Intrinsic::aarch64_sve_fcvtzu:
2217 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
2218 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
2219 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
2220 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
2221 case Intrinsic::aarch64_sve_scvtf:
2222 case Intrinsic::aarch64_sve_scvtf_f16i32:
2223 case Intrinsic::aarch64_sve_scvtf_f16i64:
2224 case Intrinsic::aarch64_sve_scvtf_f32i64:
2225 case Intrinsic::aarch64_sve_scvtf_f64i32:
2226 case Intrinsic::aarch64_sve_ucvtf:
2227 case Intrinsic::aarch64_sve_ucvtf_f16i32:
2228 case Intrinsic::aarch64_sve_ucvtf_f16i64:
2229 case Intrinsic::aarch64_sve_ucvtf_f32i64:
2230 case Intrinsic::aarch64_sve_ucvtf_f64i32:
2232 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
2233 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
2234 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
2235 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
2237 case Intrinsic::aarch64_sve_st1_scatter:
2238 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
2239 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
2240 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
2241 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
2242 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
2243 case Intrinsic::aarch64_sve_st1dq:
2244 case Intrinsic::aarch64_sve_st1q_scatter_index:
2245 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
2246 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
2247 case Intrinsic::aarch64_sve_st1wq:
2248 case Intrinsic::aarch64_sve_stnt1:
2249 case Intrinsic::aarch64_sve_stnt1_scatter:
2250 case Intrinsic::aarch64_sve_stnt1_scatter_index:
2251 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
2252 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
2254 case Intrinsic::aarch64_sve_st2:
2255 case Intrinsic::aarch64_sve_st2q:
2257 case Intrinsic::aarch64_sve_st3:
2258 case Intrinsic::aarch64_sve_st3q:
2260 case Intrinsic::aarch64_sve_st4:
2261 case Intrinsic::aarch64_sve_st4q:
2263 case Intrinsic::aarch64_sve_addqv:
2264 case Intrinsic::aarch64_sve_and_z:
2265 case Intrinsic::aarch64_sve_bic_z:
2266 case Intrinsic::aarch64_sve_brka_z:
2267 case Intrinsic::aarch64_sve_brkb_z:
2268 case Intrinsic::aarch64_sve_brkn_z:
2269 case Intrinsic::aarch64_sve_brkpa_z:
2270 case Intrinsic::aarch64_sve_brkpb_z:
2271 case Intrinsic::aarch64_sve_cntp:
2272 case Intrinsic::aarch64_sve_compact:
2273 case Intrinsic::aarch64_sve_eor_z:
2274 case Intrinsic::aarch64_sve_eorv:
2275 case Intrinsic::aarch64_sve_eorqv:
2276 case Intrinsic::aarch64_sve_nand_z:
2277 case Intrinsic::aarch64_sve_nor_z:
2278 case Intrinsic::aarch64_sve_orn_z:
2279 case Intrinsic::aarch64_sve_orr_z:
2280 case Intrinsic::aarch64_sve_orv:
2281 case Intrinsic::aarch64_sve_orqv:
2282 case Intrinsic::aarch64_sve_pnext:
2283 case Intrinsic::aarch64_sve_rdffr_z:
2284 case Intrinsic::aarch64_sve_saddv:
2285 case Intrinsic::aarch64_sve_uaddv:
2286 case Intrinsic::aarch64_sve_umaxv:
2287 case Intrinsic::aarch64_sve_umaxqv:
2288 case Intrinsic::aarch64_sve_cmpeq:
2289 case Intrinsic::aarch64_sve_cmpeq_wide:
2290 case Intrinsic::aarch64_sve_cmpge:
2291 case Intrinsic::aarch64_sve_cmpge_wide:
2292 case Intrinsic::aarch64_sve_cmpgt:
2293 case Intrinsic::aarch64_sve_cmpgt_wide:
2294 case Intrinsic::aarch64_sve_cmphi:
2295 case Intrinsic::aarch64_sve_cmphi_wide:
2296 case Intrinsic::aarch64_sve_cmphs:
2297 case Intrinsic::aarch64_sve_cmphs_wide:
2298 case Intrinsic::aarch64_sve_cmple_wide:
2299 case Intrinsic::aarch64_sve_cmplo_wide:
2300 case Intrinsic::aarch64_sve_cmpls_wide:
2301 case Intrinsic::aarch64_sve_cmplt_wide:
2302 case Intrinsic::aarch64_sve_facge:
2303 case Intrinsic::aarch64_sve_facgt:
2304 case Intrinsic::aarch64_sve_fcmpeq:
2305 case Intrinsic::aarch64_sve_fcmpge:
2306 case Intrinsic::aarch64_sve_fcmpgt:
2307 case Intrinsic::aarch64_sve_fcmpne:
2308 case Intrinsic::aarch64_sve_fcmpuo:
2309 case Intrinsic::aarch64_sve_ld1_gather:
2310 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
2311 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
2312 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
2313 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
2314 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
2315 case Intrinsic::aarch64_sve_ld1q_gather_index:
2316 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
2317 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
2318 case Intrinsic::aarch64_sve_ld1ro:
2319 case Intrinsic::aarch64_sve_ld1rq:
2320 case Intrinsic::aarch64_sve_ld1udq:
2321 case Intrinsic::aarch64_sve_ld1uwq:
2322 case Intrinsic::aarch64_sve_ld2_sret:
2323 case Intrinsic::aarch64_sve_ld2q_sret:
2324 case Intrinsic::aarch64_sve_ld3_sret:
2325 case Intrinsic::aarch64_sve_ld3q_sret:
2326 case Intrinsic::aarch64_sve_ld4_sret:
2327 case Intrinsic::aarch64_sve_ld4q_sret:
2328 case Intrinsic::aarch64_sve_ldff1:
2329 case Intrinsic::aarch64_sve_ldff1_gather:
2330 case Intrinsic::aarch64_sve_ldff1_gather_index:
2331 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
2332 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
2333 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
2334 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
2335 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
2336 case Intrinsic::aarch64_sve_ldnf1:
2337 case Intrinsic::aarch64_sve_ldnt1:
2338 case Intrinsic::aarch64_sve_ldnt1_gather:
2339 case Intrinsic::aarch64_sve_ldnt1_gather_index:
2340 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
2341 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
2343 case Intrinsic::aarch64_sve_prf:
2344 case Intrinsic::aarch64_sve_prfb_gather_index:
2345 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
2346 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
2347 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
2348 case Intrinsic::aarch64_sve_prfd_gather_index:
2349 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
2350 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
2351 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
2352 case Intrinsic::aarch64_sve_prfh_gather_index:
2353 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
2354 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
2355 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
2356 case Intrinsic::aarch64_sve_prfw_gather_index:
2357 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
2358 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
2359 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
2361 case Intrinsic::aarch64_neon_fmaxnm:
2362 case Intrinsic::aarch64_neon_fminnm:
2364 case Intrinsic::aarch64_sve_convert_from_svbool:
2366 case Intrinsic::aarch64_sve_dup:
2368 case Intrinsic::aarch64_sve_dup_x:
2370 case Intrinsic::aarch64_sve_cmpne:
2371 case Intrinsic::aarch64_sve_cmpne_wide:
2373 case Intrinsic::aarch64_sve_rdffr:
2375 case Intrinsic::aarch64_sve_lasta:
2376 case Intrinsic::aarch64_sve_lastb:
2378 case Intrinsic::aarch64_sve_clasta_n:
2379 case Intrinsic::aarch64_sve_clastb_n:
2381 case Intrinsic::aarch64_sve_cntd:
2383 case Intrinsic::aarch64_sve_cntw:
2385 case Intrinsic::aarch64_sve_cnth:
2387 case Intrinsic::aarch64_sve_cntb:
2389 case Intrinsic::aarch64_sve_ptest_any:
2390 case Intrinsic::aarch64_sve_ptest_first:
2391 case Intrinsic::aarch64_sve_ptest_last:
2393 case Intrinsic::aarch64_sve_fabd:
2395 case Intrinsic::aarch64_sve_fadd:
2397 case Intrinsic::aarch64_sve_fadd_u:
2399 case Intrinsic::aarch64_sve_fdiv:
2401 case Intrinsic::aarch64_sve_fmax:
2403 case Intrinsic::aarch64_sve_fmaxnm:
2405 case Intrinsic::aarch64_sve_fmin:
2407 case Intrinsic::aarch64_sve_fminnm:
2409 case Intrinsic::aarch64_sve_fmla:
2411 case Intrinsic::aarch64_sve_fmls:
2413 case Intrinsic::aarch64_sve_fmul:
2418 case Intrinsic::aarch64_sve_fmul_u:
2420 case Intrinsic::aarch64_sve_fmulx:
2422 case Intrinsic::aarch64_sve_fnmla:
2424 case Intrinsic::aarch64_sve_fnmls:
2426 case Intrinsic::aarch64_sve_fsub:
2428 case Intrinsic::aarch64_sve_fsub_u:
2430 case Intrinsic::aarch64_sve_add:
2432 case Intrinsic::aarch64_sve_add_u:
2434 Intrinsic::aarch64_sve_mla_u>(
2436 case Intrinsic::aarch64_sve_mla:
2438 case Intrinsic::aarch64_sve_mls:
2440 case Intrinsic::aarch64_sve_mul:
2445 case Intrinsic::aarch64_sve_mul_u:
2447 case Intrinsic::aarch64_sve_sabd:
2449 case Intrinsic::aarch64_sve_smax:
2451 case Intrinsic::aarch64_sve_smin:
2453 case Intrinsic::aarch64_sve_smulh:
2455 case Intrinsic::aarch64_sve_sub:
2457 case Intrinsic::aarch64_sve_sub_u:
2459 Intrinsic::aarch64_sve_mls_u>(
2461 case Intrinsic::aarch64_sve_uabd:
2463 case Intrinsic::aarch64_sve_umax:
2465 case Intrinsic::aarch64_sve_umin:
2467 case Intrinsic::aarch64_sve_umulh:
2469 case Intrinsic::aarch64_sve_asr:
2471 case Intrinsic::aarch64_sve_lsl:
2473 case Intrinsic::aarch64_sve_lsr:
2475 case Intrinsic::aarch64_sve_and:
2477 case Intrinsic::aarch64_sve_bic:
2479 case Intrinsic::aarch64_sve_eor:
2481 case Intrinsic::aarch64_sve_orr:
2483 case Intrinsic::aarch64_sve_sqsub:
2485 case Intrinsic::aarch64_sve_uqsub:
2487 case Intrinsic::aarch64_sve_tbl:
2489 case Intrinsic::aarch64_sve_uunpkhi:
2490 case Intrinsic::aarch64_sve_uunpklo:
2491 case Intrinsic::aarch64_sve_sunpkhi:
2492 case Intrinsic::aarch64_sve_sunpklo:
2494 case Intrinsic::aarch64_sve_uzp1:
2496 case Intrinsic::aarch64_sve_zip1:
2497 case Intrinsic::aarch64_sve_zip2:
2499 case Intrinsic::aarch64_sve_ld1_gather_index:
2501 case Intrinsic::aarch64_sve_st1_scatter_index:
2503 case Intrinsic::aarch64_sve_ld1:
2505 case Intrinsic::aarch64_sve_st1:
2507 case Intrinsic::aarch64_sve_sdiv:
2509 case Intrinsic::aarch64_sve_sel:
2511 case Intrinsic::aarch64_sve_srshl:
2513 case Intrinsic::aarch64_sve_dupq_lane:
2515 case Intrinsic::aarch64_sve_insr:
2519 return std::nullopt;
2526 SimplifyAndSetOp)
const {
2527 switch (
II.getIntrinsicID()) {
2530 case Intrinsic::aarch64_neon_fcvtxn:
2531 case Intrinsic::aarch64_neon_rshrn:
2532 case Intrinsic::aarch64_neon_sqrshrn:
2533 case Intrinsic::aarch64_neon_sqrshrun:
2534 case Intrinsic::aarch64_neon_sqshrn:
2535 case Intrinsic::aarch64_neon_sqshrun:
2536 case Intrinsic::aarch64_neon_sqxtn:
2537 case Intrinsic::aarch64_neon_sqxtun:
2538 case Intrinsic::aarch64_neon_uqrshrn:
2539 case Intrinsic::aarch64_neon_uqshrn:
2540 case Intrinsic::aarch64_neon_uqxtn:
2541 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
2545 return std::nullopt;
2577bool AArch64TTIImpl::isWideningInstruction(
Type *DstTy,
unsigned Opcode,
2579 Type *SrcOverrideTy) {
2582 auto toVectorTy = [&](
Type *ArgTy) {
2584 cast<VectorType>(DstTy)->getElementCount());
2594 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
2604 Type *SrcTy = SrcOverrideTy;
2606 case Instruction::Add:
2607 case Instruction::Sub:
2609 if (isa<SExtInst>(Args[1]) || isa<ZExtInst>(Args[1])) {
2612 toVectorTy(cast<Instruction>(Args[1])->getOperand(0)->
getType());
2616 case Instruction::Mul: {
2618 if ((isa<SExtInst>(Args[0]) && isa<SExtInst>(Args[1])) ||
2619 (isa<ZExtInst>(Args[0]) && isa<ZExtInst>(Args[1]))) {
2622 toVectorTy(cast<Instruction>(Args[0])->getOperand(0)->
getType());
2623 }
else if (isa<ZExtInst>(Args[0]) || isa<ZExtInst>(Args[1])) {
2652 assert(SrcTy &&
"Expected some SrcTy");
2654 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
2660 DstTyL.first * DstTyL.second.getVectorMinNumElements();
2662 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
2666 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
2679 (Src->isScalableTy() && !ST->hasSVE2()))
2688 dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
2689 if (AddUser && AddUser->getOpcode() == Instruction::Add)
2692 auto *Shr = dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
2693 if (!Shr || Shr->getOpcode() != Instruction::LShr)
2696 auto *Trunc = dyn_cast_or_null<Instruction>(Shr->getUniqueUndroppableUser());
2697 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
2698 Src->getScalarSizeInBits() !=
2699 cast<CastInst>(Trunc)->getDestTy()->getScalarSizeInBits())
2723 assert(ISD &&
"Invalid opcode");
2726 if (
I &&
I->hasOneUser()) {
2727 auto *SingleUser = cast<Instruction>(*
I->user_begin());
2729 if (isWideningInstruction(Dst, SingleUser->getOpcode(),
Operands, Src)) {
2733 if (SingleUser->getOpcode() == Instruction::Add) {
2734 if (
I == SingleUser->getOperand(1) ||
2735 (isa<CastInst>(SingleUser->getOperand(1)) &&
2736 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
2743 if ((isa<ZExtInst>(
I) || isa<SExtInst>(
I)) &&
2751 return Cost == 0 ? 0 : 1;
3077 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3081 std::pair<InstructionCost, MVT> LT =
3083 unsigned NumElements =
3095 return AdjustCost(Entry->Cost);
3122 if (ST->hasFullFP16())
3125 return AdjustCost(Entry->Cost);
3141 Opcode, LegalTy, Src, CCH,
CostKind,
I);
3144 return Part1 + Part2;
3164 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3172 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) &&
"Invalid type");
3178 CostKind, Index,
nullptr,
nullptr);
3188 if (!VecLT.second.isVector() || !TLI->
isTypeLegal(DstVT))
3194 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
3204 case Instruction::SExt:
3209 case Instruction::ZExt:
3210 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
3223 return Opcode == Instruction::PHI ? 0 : 1;
3230 unsigned Opcode,
Type *Val,
unsigned Index,
bool HasRealUse,
3232 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {
3240 if (!LT.second.isVector())
3245 if (LT.second.isFixedLengthVector()) {
3246 unsigned Width = LT.second.getVectorNumElements();
3247 Index = Index % Width;
3263 if (
I && dyn_cast<LoadInst>(
I->getOperand(1)))
3293 auto ExtractCanFuseWithFmul = [&]() {
3300 auto IsAllowedScalarTy = [&](
const Type *
T) {
3301 return T->isFloatTy() ||
T->isDoubleTy() ||
3302 (
T->isHalfTy() && ST->hasFullFP16());
3306 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
3308 const auto *BO = dyn_cast<BinaryOperator>(EEUser);
3309 return BO && BO->getOpcode() == BinaryOperator::FMul &&
3310 !BO->getType()->isVectorTy();
3315 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
3319 return Idx == 0 || (RegWidth != 0 && (
Idx * EltSz) % RegWidth == 0);
3324 if (!isa<FixedVectorType>(Val) || !IsAllowedScalarTy(Val->
getScalarType()))
3329 for (
auto *U :
Scalar->users()) {
3330 if (!IsUserFMulScalarTy(U))
3334 UserToExtractIdx[
U];
3336 if (UserToExtractIdx.
empty())
3338 for (
auto &[S, U, L] : ScalarUserAndIdx) {
3339 for (
auto *U : S->users()) {
3340 if (UserToExtractIdx.
find(U) != UserToExtractIdx.
end()) {
3341 auto *
FMul = cast<BinaryOperator>(U);
3342 auto *Op0 =
FMul->getOperand(0);
3343 auto *Op1 =
FMul->getOperand(1);
3344 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
3345 UserToExtractIdx[
U] =
L;
3351 for (
auto &[U, L] : UserToExtractIdx) {
3357 const auto *EE = cast<ExtractElementInst>(
I);
3359 const auto *IdxOp = dyn_cast<ConstantInt>(EE->getIndexOperand());
3363 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
3364 if (!IsUserFMulScalarTy(U))
3369 const auto *BO = cast<BinaryOperator>(U);
3370 const auto *OtherEE = dyn_cast<ExtractElementInst>(
3371 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
3373 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
3376 return IsExtractLaneEquivalentToZero(
3377 cast<ConstantInt>(OtherEE->getIndexOperand())
3380 OtherEE->getType()->getScalarSizeInBits());
3388 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
3389 ExtractCanFuseWithFmul())
3393 return ST->getVectorInsertExtractBaseCost();
3398 unsigned Index,
Value *Op0,
3401 Opcode == Instruction::InsertElement && Op0 && !isa<UndefValue>(Op0);
3402 return getVectorInstrCostHelper(Opcode, Val, Index, HasRealUse);
3408 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {
3409 return getVectorInstrCostHelper(Opcode, Val, Index,
false,
nullptr, Scalar,
3417 return getVectorInstrCostHelper(
I.getOpcode(), Val, Index,
3424 if (isa<ScalableVectorType>(Ty))
3429 return DemandedElts.
popcount() * (Insert + Extract) *
3443 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
3450 Op2Info, Args, CxtI);
3492 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
3499 if (!VT.isVector() && VT.getSizeInBits() > 64)
3503 Opcode, Ty,
CostKind, Op1Info, Op2Info);
3508 if (isa<FixedVectorType>(Ty) && cast<FixedVectorType>(Ty)
3509 ->getPrimitiveSizeInBits()
3510 .getFixedValue() < 128) {
3521 if (
nullptr != Entry)
3526 if (LT.second.getScalarType() == MVT::i8)
3528 else if (LT.second.getScalarType() == MVT::i16)
3538 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
3541 return (4 + DivCost) * VTy->getNumElements();
3561 if (LT.second == MVT::v2i64 && ST->hasSVE())
3576 if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
3578 return LT.first * 14;
3593 (Ty->
isHalfTy() && ST->hasFullFP16())) &&
3606 return 2 * LT.first;
3615 return 2 * LT.first;
3637 int MaxMergeDistance = 64;
3641 return NumVectorInstToHideOverhead;
3655 Op1Info, Op2Info,
I);
3660 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SELECT) {
3662 const int AmortizationCost = 20;
3670 VecPred = CurrentPred;
3678 static const auto ValidMinMaxTys = {
3679 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
3680 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
3681 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
3684 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
3685 (ST->hasFullFP16() &&
3686 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
3691 VectorSelectTbl[] = {
3700 {
ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
3701 {
ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
3702 {
ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
3715 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SETCC) {
3718 if (LT.second == MVT::v4f16 && !ST->hasFullFP16())
3719 return LT.first * 4;
3735 Op1Info, Op2Info,
I);
3741 if (ST->requiresStrictAlign()) {
3746 Options.AllowOverlappingLoads =
true;
3752 Options.LoadSizes = {8, 4, 2, 1};
3753 Options.AllowedTailExpansions = {3, 5, 6};
3758 return ST->hasSVE();
3769 if (!LT.first.isValid())
3773 auto *VT = cast<VectorType>(Src);
3774 if (VT->getElementType()->isIntegerTy(1))
3791 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
3792 "Should be called on only load or stores.");
3794 case Instruction::Load:
3797 return ST->getGatherOverhead();
3799 case Instruction::Store:
3802 return ST->getScatterOverhead();
3810 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
3815 auto *VT = cast<VectorType>(DataTy);
3817 if (!LT.first.isValid())
3821 if (!LT.second.isVector() ||
3823 VT->getElementType()->isIntegerTy(1))
3833 ElementCount LegalVF = LT.second.getVectorElementCount();
3836 {TTI::OK_AnyValue, TTI::OP_None},
I);
3854 if (VT == MVT::Other)
3859 if (!LT.first.isValid())
3867 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
3869 (VTy->getElementType()->isIntegerTy(1) &&
3870 !VTy->getElementCount().isKnownMultipleOf(
3881 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
3882 LT.second.is128BitVector() && (!Alignment || *Alignment <
Align(16))) {
3888 const int AmortizationCost = 6;
3890 return LT.first * 2 * AmortizationCost;
3901 if (VT == MVT::v4i8)
3904 return cast<FixedVectorType>(Ty)->getNumElements() * 2;
3908 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
3910 *Alignment !=
Align(1))
3924 while (!TypeWorklist.
empty()) {
3946 bool UseMaskForCond,
bool UseMaskForGaps) {
3947 assert(Factor >= 2 &&
"Invalid interleave factor");
3948 auto *VecVTy = cast<VectorType>(VecTy);
3955 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
3958 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
3959 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
3962 VecVTy->getElementCount().divideCoefficientBy(Factor));
3968 if (MinElts % Factor == 0 &&
3975 UseMaskForCond, UseMaskForGaps);
3982 for (
auto *
I : Tys) {
3983 if (!
I->isVectorTy())
3985 if (
I->getScalarSizeInBits() * cast<FixedVectorType>(
I)->getNumElements() ==
4004 enum { MaxStridedLoads = 7 };
4006 int StridedLoads = 0;
4009 for (
const auto BB : L->blocks()) {
4010 for (
auto &
I : *BB) {
4011 LoadInst *LMemI = dyn_cast<LoadInst>(&
I);
4016 if (L->isLoopInvariant(PtrValue))
4020 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
4021 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
4030 if (StridedLoads > MaxStridedLoads / 2)
4031 return StridedLoads;
4034 return StridedLoads;
4037 int StridedLoads = countStridedLoads(L, SE);
4039 <<
" strided loads\n");
4061 if (!L->isInnermost() || !L->getExitBlock() || L->getNumBlocks() > 8)
4065 if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC) ||
4073 for (
auto *BB : L->getBlocks()) {
4074 for (
auto &
I : *BB) {
4075 if (!isa<IntrinsicInst>(&
I) && isa<CallBase>(&
I))
4088 if (L->getHeader() != L->getLoopLatch() ||
Size > 8)
4093 for (
auto *BB : L->blocks()) {
4094 for (
auto &
I : *BB) {
4101 if (isa<LoadInst>(&
I))
4110 unsigned MaxInstsPerLine = 16;
4112 unsigned BestUC = 1;
4113 unsigned SizeWithBestUC = BestUC *
Size;
4115 unsigned SizeWithUC = UC *
Size;
4116 if (SizeWithUC > 48)
4118 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
4119 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
4121 SizeWithBestUC = BestUC *
Size;
4127 return LoadedValues.
contains(SI->getOperand(0));
4146 if (L->getLoopDepth() > 1)
4154 case AArch64Subtarget::AppleA14:
4155 case AArch64Subtarget::AppleA15:
4156 case AArch64Subtarget::AppleA16:
4157 case AArch64Subtarget::AppleM4:
4160 case AArch64Subtarget::Falkor:
4171 for (
auto *BB : L->getBlocks()) {
4172 for (
auto &
I : *BB) {
4174 if (
I.getType()->isVectorTy())
4177 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
4192 !ST->getSchedModel().isOutOfOrder()) {
4209 Type *ExpectedType) {
4213 case Intrinsic::aarch64_neon_st2:
4214 case Intrinsic::aarch64_neon_st3:
4215 case Intrinsic::aarch64_neon_st4: {
4217 StructType *ST = dyn_cast<StructType>(ExpectedType);
4220 unsigned NumElts = Inst->
arg_size() - 1;
4221 if (ST->getNumElements() != NumElts)
4223 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
4229 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
4235 case Intrinsic::aarch64_neon_ld2:
4236 case Intrinsic::aarch64_neon_ld3:
4237 case Intrinsic::aarch64_neon_ld4:
4238 if (Inst->
getType() == ExpectedType)
4249 case Intrinsic::aarch64_neon_ld2:
4250 case Intrinsic::aarch64_neon_ld3:
4251 case Intrinsic::aarch64_neon_ld4:
4252 Info.ReadMem =
true;
4253 Info.WriteMem =
false;
4256 case Intrinsic::aarch64_neon_st2:
4257 case Intrinsic::aarch64_neon_st3:
4258 case Intrinsic::aarch64_neon_st4:
4259 Info.ReadMem =
false;
4260 Info.WriteMem =
true;
4268 case Intrinsic::aarch64_neon_ld2:
4269 case Intrinsic::aarch64_neon_st2:
4270 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
4272 case Intrinsic::aarch64_neon_ld3:
4273 case Intrinsic::aarch64_neon_st3:
4274 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
4276 case Intrinsic::aarch64_neon_ld4:
4277 case Intrinsic::aarch64_neon_st4:
4278 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
4290 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) {
4291 bool Considerable =
false;
4292 AllowPromotionWithoutCommonHeader =
false;
4293 if (!isa<SExtInst>(&
I))
4295 Type *ConsideredSExtType =
4297 if (
I.getType() != ConsideredSExtType)
4301 for (
const User *U :
I.users()) {
4303 Considerable =
true;
4307 if (GEPInst->getNumOperands() > 2) {
4308 AllowPromotionWithoutCommonHeader =
true;
4313 return Considerable;
4354 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
4360 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
4370 return LegalizationCost + 2;
4380 LegalizationCost *= LT.first - 1;
4384 assert(ISD &&
"Invalid opcode");
4392 return LegalizationCost + 2;
4400 std::optional<FastMathFlags> FMF,
4406 if (
auto *VTy = dyn_cast<ScalableVectorType>(ValTy))
4411 if (
auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
4416 return BaseCost + FixedVTy->getNumElements();
4419 if (Opcode != Instruction::FAdd)
4422 auto *VTy = cast<ScalableVectorType>(ValTy);
4429 if (isa<ScalableVectorType>(ValTy))
4433 MVT MTy = LT.second;
4435 assert(ISD &&
"Invalid opcode");
4481 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
4482 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
4494 return (LT.first - 1) +
Log2_32(NElts);
4499 return (LT.first - 1) + Entry->Cost;
4507 auto *ValVTy = cast<FixedVectorType>(ValTy);
4511 if (LT.first != 1) {
4517 ExtraCost *= LT.first - 1;
4520 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
4521 return Cost + ExtraCost;
4555 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
4570 if (LT.second.getScalarType() == MVT::i1) {
4579 assert(Entry &&
"Illegal Type for Splice");
4580 LegalizationCost += Entry->Cost;
4581 return LegalizationCost * LT.first;
4592 if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
4594 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
4600 if (Args.size() >= 1 && isa<LoadInst>(Args[0]) &&
4603 return std::max<InstructionCost>(1, LT.first / 4);
4616 unsigned TpNumElts = Mask.size();
4617 unsigned LTNumElts = LT.second.getVectorNumElements();
4618 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
4622 for (
unsigned N = 0;
N < NumVecs;
N++) {
4626 unsigned Source1, Source2;
4627 unsigned NumSources = 0;
4628 for (
unsigned E = 0; E < LTNumElts; E++) {
4629 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
4638 unsigned Source = MaskElt / LTNumElts;
4639 if (NumSources == 0) {
4642 }
else if (NumSources == 1 && Source != Source1) {
4645 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
4651 if (Source == Source1)
4653 else if (Source == Source2)
4654 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
4661 if (NumSources <= 2)
4664 NTp, NMask,
CostKind, 0,
nullptr, Args, CxtI);
4674 if (IsExtractSubvector && LT.second.isFixedLengthVector())
4685 bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0]);
4686 if (IsLoad && LT.second.isVector() &&
4688 LT.second.getVectorElementCount()))
4696 all_of(Mask, [](
int E) {
return E < 8; }))
4700 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
4703 return M.value() < 0 || M.value() == (
int)M.index();
4710 if (LT.second.isFixedLengthVector() &&
4711 LT.second.getVectorNumElements() == Mask.size() &&
4713 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
4714 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
4717 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
4840 return LT.first * Entry->Cost;
4849 LT.second.getSizeInBits() <= 128 && SubTp) {
4851 if (SubLT.second.isVector()) {
4852 int NumElts = LT.second.getVectorNumElements();
4853 int NumSubElts = SubLT.second.getVectorNumElements();
4854 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
4860 if (IsExtractSubvector)
4873 if (isa<LoadInst>(&
I) || isa<StoreInst>(&
I)) {
4921 unsigned NumInsns = 0;
4923 NumInsns += BB->sizeWithoutDebug();
4933 int64_t Scale,
unsigned AddrSpace)
const {
4960 if (
I->getOpcode() == Instruction::Or &&
4961 isa<BranchInst>(
I->getNextNode()) &&
4962 cast<BranchInst>(
I->getNextNode())->isUnconditional())
4965 if (
I->getOpcode() == Instruction::Add ||
4966 I->getOpcode() == Instruction::Sub)
4989 if (
auto *Shuf = dyn_cast<ShuffleVectorInst>(V))
4990 return all_equal(Shuf->getShuffleMask());
4997 bool AllowSplat =
false) {
5002 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
5003 auto *FullTy = FullV->
getType();
5004 auto *HalfTy = HalfV->getType();
5006 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
5009 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
5010 auto *FullVT = cast<FixedVectorType>(FullV->
getType());
5011 auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
5012 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
5016 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
5030 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
5031 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
5038 int NumElements = cast<FixedVectorType>(Op1->
getType())->getNumElements() * 2;
5045 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
5046 (M2Start != 0 && M2Start != (NumElements / 2)))
5048 if (S1Op1 && S2Op1 && M1Start != M2Start)
5058 return Ext->getType()->getScalarSizeInBits() ==
5059 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
5064 !areExtDoubled(cast<Instruction>(Ext1)) ||
5065 !areExtDoubled(cast<Instruction>(Ext2)))
5073 Value *VectorOperand =
nullptr;
5078 isa<FixedVectorType>(VectorOperand->
getType()) &&
5079 cast<FixedVectorType>(VectorOperand->
getType())->getNumElements() == 2;
5089 auto *
GEP = dyn_cast<GetElementPtrInst>(Ptrs);
5090 if (!
GEP ||
GEP->getNumOperands() != 2)
5094 Value *Offsets =
GEP->getOperand(1);
5097 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
5101 if (isa<SExtInst>(Offsets) || isa<ZExtInst>(Offsets)) {
5102 auto *OffsetsInst = cast<Instruction>(Offsets);
5103 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
5104 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
5120 Ops.
push_back(&cast<Instruction>(
Op)->getOperandUse(0));
5125 Value *ZExtOp = cast<Instruction>(
Op)->getOperand(0);
5126 Ops.
push_back(&cast<Instruction>(ZExtOp)->getOperandUse(0));
5127 Ops.
push_back(&cast<Instruction>(
Op)->getOperandUse(0));
5139 switch (
II->getIntrinsicID()) {
5140 case Intrinsic::aarch64_neon_smull:
5141 case Intrinsic::aarch64_neon_umull:
5150 case Intrinsic::fma:
5151 case Intrinsic::fmuladd:
5152 if (isa<VectorType>(
I->getType()) &&
5153 cast<VectorType>(
I->getType())->getElementType()->isHalfTy() &&
5157 case Intrinsic::aarch64_neon_sqdmull:
5158 case Intrinsic::aarch64_neon_sqdmulh:
5159 case Intrinsic::aarch64_neon_sqrdmulh:
5165 return !Ops.
empty();
5166 case Intrinsic::aarch64_neon_fmlal:
5167 case Intrinsic::aarch64_neon_fmlal2:
5168 case Intrinsic::aarch64_neon_fmlsl:
5169 case Intrinsic::aarch64_neon_fmlsl2:
5175 return !Ops.
empty();
5176 case Intrinsic::aarch64_sve_ptest_first:
5177 case Intrinsic::aarch64_sve_ptest_last:
5178 if (
auto *IIOp = dyn_cast<IntrinsicInst>(
II->getOperand(0)))
5179 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
5181 return !Ops.
empty();
5182 case Intrinsic::aarch64_sme_write_horiz:
5183 case Intrinsic::aarch64_sme_write_vert:
5184 case Intrinsic::aarch64_sme_writeq_horiz:
5185 case Intrinsic::aarch64_sme_writeq_vert: {
5186 auto *
Idx = dyn_cast<Instruction>(
II->getOperand(1));
5187 if (!
Idx ||
Idx->getOpcode() != Instruction::Add)
5192 case Intrinsic::aarch64_sme_read_horiz:
5193 case Intrinsic::aarch64_sme_read_vert:
5194 case Intrinsic::aarch64_sme_readq_horiz:
5195 case Intrinsic::aarch64_sme_readq_vert:
5196 case Intrinsic::aarch64_sme_ld1b_vert:
5197 case Intrinsic::aarch64_sme_ld1h_vert:
5198 case Intrinsic::aarch64_sme_ld1w_vert:
5199 case Intrinsic::aarch64_sme_ld1d_vert:
5200 case Intrinsic::aarch64_sme_ld1q_vert:
5201 case Intrinsic::aarch64_sme_st1b_vert:
5202 case Intrinsic::aarch64_sme_st1h_vert:
5203 case Intrinsic::aarch64_sme_st1w_vert:
5204 case Intrinsic::aarch64_sme_st1d_vert:
5205 case Intrinsic::aarch64_sme_st1q_vert:
5206 case Intrinsic::aarch64_sme_ld1b_horiz:
5207 case Intrinsic::aarch64_sme_ld1h_horiz:
5208 case Intrinsic::aarch64_sme_ld1w_horiz:
5209 case Intrinsic::aarch64_sme_ld1d_horiz:
5210 case Intrinsic::aarch64_sme_ld1q_horiz:
5211 case Intrinsic::aarch64_sme_st1b_horiz:
5212 case Intrinsic::aarch64_sme_st1h_horiz:
5213 case Intrinsic::aarch64_sme_st1w_horiz:
5214 case Intrinsic::aarch64_sme_st1d_horiz:
5215 case Intrinsic::aarch64_sme_st1q_horiz: {
5216 auto *
Idx = dyn_cast<Instruction>(
II->getOperand(3));
5217 if (!
Idx ||
Idx->getOpcode() != Instruction::Add)
5222 case Intrinsic::aarch64_neon_pmull:
5228 case Intrinsic::aarch64_neon_pmull64:
5230 II->getArgOperand(1)))
5235 case Intrinsic::masked_gather:
5240 case Intrinsic::masked_scatter:
5251 switch (
I->getOpcode()) {
5252 case Instruction::GetElementPtr:
5253 case Instruction::Add:
5254 case Instruction::Sub:
5255 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
5266 if (!
I->getType()->isVectorTy())
5269 switch (
I->getOpcode()) {
5270 case Instruction::Sub:
5271 case Instruction::Add: {
5277 auto Ext1 = cast<Instruction>(
I->getOperand(0));
5278 auto Ext2 = cast<Instruction>(
I->getOperand(1));
5289 case Instruction::Or: {
5292 if (ST->hasNEON()) {
5302 ? cast<Instruction>(
I->getOperand(1))
5303 : cast<Instruction>(
I->getOperand(0));
5306 if (
I->getParent() != MainAnd->
getParent() ||
5311 if (
I->getParent() != IA->getParent() ||
5312 I->getParent() != IB->getParent())
5327 case Instruction::Mul: {
5328 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
5329 auto *Ty = cast<VectorType>(V->getType());
5331 if (Ty->isScalableTy())
5335 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
5338 int NumZExts = 0, NumSExts = 0;
5339 for (
auto &
Op :
I->operands()) {
5341 if (
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
5345 auto *Ext = cast<Instruction>(
Op);
5346 auto *ExtOp = Ext->getOperand(0);
5347 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
5351 if (isa<SExtInst>(Ext))
5382 Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
5387 dyn_cast<ConstantInt>(Insert->getOperand(2));
5389 if (!ElementConstant || !ElementConstant->
isZero())
5392 unsigned Opcode = OperandInstr->
getOpcode();
5393 if (Opcode == Instruction::SExt)
5395 else if (Opcode == Instruction::ZExt)
5400 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
5408 Ops.
push_back(&Insert->getOperandUse(1));
5414 if (!Ops.
empty() && (NumSExts == 2 || NumZExts == 2))
5418 if (!ShouldSinkSplatForIndexedVariant(
I))
5427 return !Ops.
empty();
5429 case Instruction::FMul: {
5431 if (
I->getType()->isScalableTy())
5434 if (cast<VectorType>(
I->getType())->getElementType()->isHalfTy() &&
5443 return !Ops.
empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file provides the interface for the instcombine pass implementation.
This file defines the LoopVectorizationLegality class.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
static uint64_t getBits(uint64_t Val, int Start, int End)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getFastMathFlags(const MachineInstr &I)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
unsigned getVectorInsertExtractBaseCost() const
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
unsigned getMaxInterleaveFactor() const
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
TailFoldingOpts getSVETailFoldingDefaultOpts() const
bool useSVEForFixedLengthVectors() const
unsigned getEpilogueVectorizationMinVF() const
unsigned getMinSVEVectorSizeInBits() const
bool isSVEAvailable() const
Returns true if the target has SVE and can use the full range of SVE instructions,...
InstructionCost getSpliceCost(VectorType *Tp, int Index)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool shouldTreatInstructionLikeSelect(const Instruction *I)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
bool prefersVectorizedAddressing() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
unsigned getEpilogueVectorizationMinVF() const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind)
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isElementTypeLegalForScalableVector(Type *Ty) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool enableScalableVectorization() const
bool useNeonVector(const Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
bool isLegalMaskedGatherScatter(Type *DataType) const
unsigned getMaxInterleaveFactor(ElementCount VF)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
EVT getPromotedVTForPredicate(EVT VT) const
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, bool UseScalable) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const
Returns true if VecTy is a legal interleaved access type.
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
bool isTypeLegal(Type *Ty)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
Estimate the overhead of scalarizing an instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isIntPredicate() const
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isEquality() const
Return true if this predicate is either EQ or NE.
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool requiresSMChange(const SMEAttrs &Callee) const
void set(unsigned M, bool Enable=true)
bool hasStreamingBody() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
This instruction constructs a fixed permutation of two input vectors.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Class to represent struct types.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const
Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isFP128Ty() const
Return true if this is 'fp128'.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static constexpr unsigned SVEBitsPerBlock
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
VScaleVal_match m_VScale()
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
constexpr int PoisonMaskElem
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Mod
The access may modify the value stored in memory.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FAnyOf
Any_of reduction with select(fcmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ IAnyOf
Any_of reduction with select(icmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
@ Default
The result values are uniform if and only if all operands are uniform.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.