21#include "llvm/IR/IntrinsicsAArch64.h"
31#define DEBUG_TYPE "aarch64tti"
52 "Penalty of calling a function that requires a change to PSTATE.SM"));
56 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
67 cl::desc(
"The cost of a histcnt instruction"));
70class TailFoldingOption {
85 bool NeedsDefault =
true;
89 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
104 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
105 "Initial bits should only include one of "
106 "(disabled|all|simple|default)");
107 Bits = NeedsDefault ? DefaultBits : InitialBits;
109 Bits &= ~DisableBits;
115 errs() <<
"invalid argument '" << Opt
116 <<
"' to -sve-tail-folding=; the option should be of the form\n"
117 " (disabled|all|default|simple)[+(reductions|recurrences"
118 "|reverse|noreductions|norecurrences|noreverse)]\n";
124 void operator=(
const std::string &Val) {
133 setNeedsDefault(
false);
138 unsigned StartIdx = 1;
139 if (TailFoldTypes[0] ==
"disabled")
140 setInitialBits(TailFoldingOpts::Disabled);
141 else if (TailFoldTypes[0] ==
"all")
142 setInitialBits(TailFoldingOpts::All);
143 else if (TailFoldTypes[0] ==
"default")
144 setNeedsDefault(
true);
145 else if (TailFoldTypes[0] ==
"simple")
146 setInitialBits(TailFoldingOpts::Simple);
149 setInitialBits(TailFoldingOpts::Disabled);
152 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
153 if (TailFoldTypes[
I] ==
"reductions")
154 setEnableBit(TailFoldingOpts::Reductions);
155 else if (TailFoldTypes[
I] ==
"recurrences")
156 setEnableBit(TailFoldingOpts::Recurrences);
157 else if (TailFoldTypes[
I] ==
"reverse")
158 setEnableBit(TailFoldingOpts::Reverse);
159 else if (TailFoldTypes[
I] ==
"noreductions")
160 setDisableBit(TailFoldingOpts::Reductions);
161 else if (TailFoldTypes[
I] ==
"norecurrences")
162 setDisableBit(TailFoldingOpts::Recurrences);
163 else if (TailFoldTypes[
I] ==
"noreverse")
164 setDisableBit(TailFoldingOpts::Reverse);
181 "Control the use of vectorisation using tail-folding for SVE where the"
182 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
183 "\ndisabled (Initial) No loop types will vectorize using "
185 "\ndefault (Initial) Uses the default tail-folding settings for "
187 "\nall (Initial) All legal loop types will vectorize using "
189 "\nsimple (Initial) Use tail-folding for simple loops (not "
190 "reductions or recurrences)"
191 "\nreductions Use tail-folding for loops containing reductions"
192 "\nnoreductions Inverse of above"
193 "\nrecurrences Use tail-folding for loops containing fixed order "
195 "\nnorecurrences Inverse of above"
196 "\nreverse Use tail-folding for loops requiring reversed "
198 "\nnoreverse Inverse of above"),
216 .
Case(
"__arm_sme_state",
true)
217 .
Case(
"__arm_tpidr2_save",
true)
218 .
Case(
"__arm_tpidr2_restore",
true)
219 .
Case(
"__arm_za_disable",
true)
233 if (isa<CallInst>(
I) && !
I.isDebugOrPseudoInst() &&
234 (cast<CallInst>(
I).isInlineAsm() || isa<IntrinsicInst>(
I) ||
244 SMEAttrs CallerAttrs(*Caller), CalleeAttrs(*Callee);
256 if (CallerAttrs.requiresLazySave(CalleeAttrs) ||
257 CallerAttrs.requiresSMChange(CalleeAttrs) ||
258 CallerAttrs.requiresPreservingZT0(CalleeAttrs)) {
266 TM.getSubtargetImpl(*Caller)->getFeatureBits();
268 TM.getSubtargetImpl(*Callee)->getFeatureBits();
272 return (CallerBits & CalleeBits) == CalleeBits;
290 auto FVTy = dyn_cast<FixedVectorType>(Ty);
292 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
301 unsigned DefaultCallPenalty)
const {
324 if (
F == Call.getCaller())
330 return DefaultCallPenalty;
369 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
374 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
380 return std::max<InstructionCost>(1,
Cost);
395 unsigned ImmIdx = ~0U;
399 case Instruction::GetElementPtr:
404 case Instruction::Store:
407 case Instruction::Add:
408 case Instruction::Sub:
409 case Instruction::Mul:
410 case Instruction::UDiv:
411 case Instruction::SDiv:
412 case Instruction::URem:
413 case Instruction::SRem:
414 case Instruction::And:
415 case Instruction::Or:
416 case Instruction::Xor:
417 case Instruction::ICmp:
421 case Instruction::Shl:
422 case Instruction::LShr:
423 case Instruction::AShr:
427 case Instruction::Trunc:
428 case Instruction::ZExt:
429 case Instruction::SExt:
430 case Instruction::IntToPtr:
431 case Instruction::PtrToInt:
432 case Instruction::BitCast:
433 case Instruction::PHI:
434 case Instruction::Call:
435 case Instruction::Select:
436 case Instruction::Ret:
437 case Instruction::Load:
442 int NumConstants = (BitSize + 63) / 64;
466 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
472 case Intrinsic::sadd_with_overflow:
473 case Intrinsic::uadd_with_overflow:
474 case Intrinsic::ssub_with_overflow:
475 case Intrinsic::usub_with_overflow:
476 case Intrinsic::smul_with_overflow:
477 case Intrinsic::umul_with_overflow:
479 int NumConstants = (BitSize + 63) / 64;
486 case Intrinsic::experimental_stackmap:
487 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
490 case Intrinsic::experimental_patchpoint_void:
491 case Intrinsic::experimental_patchpoint:
492 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
495 case Intrinsic::experimental_gc_statepoint:
496 if ((
Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
506 if (TyWidth == 32 || TyWidth == 64)
531 if (
VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy))
532 if ((VTy->getElementCount().getKnownMinValue() != 2 &&
533 VTy->getElementCount().getKnownMinValue() != 4) ||
534 VTy->getPrimitiveSizeInBits().getKnownMinValue() > 128 ||
535 !VTy->isScalableTy())
549 if (
auto *VTy = dyn_cast<ScalableVectorType>(
RetTy))
553 switch (ICA.
getID()) {
554 case Intrinsic::experimental_vector_histogram_add:
558 case Intrinsic::umin:
559 case Intrinsic::umax:
560 case Intrinsic::smin:
561 case Intrinsic::smax: {
562 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
563 MVT::v8i16, MVT::v2i32, MVT::v4i32,
564 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
568 if (LT.second == MVT::v2i64)
570 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
574 case Intrinsic::sadd_sat:
575 case Intrinsic::ssub_sat:
576 case Intrinsic::uadd_sat:
577 case Intrinsic::usub_sat: {
578 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
579 MVT::v8i16, MVT::v2i32, MVT::v4i32,
585 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits() ? 1 : 4;
586 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
587 return LT.first * Instrs;
590 case Intrinsic::abs: {
591 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
592 MVT::v8i16, MVT::v2i32, MVT::v4i32,
595 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
599 case Intrinsic::bswap: {
600 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
601 MVT::v4i32, MVT::v2i64};
603 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }) &&
604 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits())
608 case Intrinsic::experimental_stepvector: {
617 Cost += AddCost * (LT.first - 1);
621 case Intrinsic::vector_extract:
622 case Intrinsic::vector_insert: {
635 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
654 case Intrinsic::bitreverse: {
656 {Intrinsic::bitreverse, MVT::i32, 1},
657 {Intrinsic::bitreverse, MVT::i64, 1},
658 {Intrinsic::bitreverse, MVT::v8i8, 1},
659 {Intrinsic::bitreverse, MVT::v16i8, 1},
660 {Intrinsic::bitreverse, MVT::v4i16, 2},
661 {Intrinsic::bitreverse, MVT::v8i16, 2},
662 {Intrinsic::bitreverse, MVT::v2i32, 2},
663 {Intrinsic::bitreverse, MVT::v4i32, 2},
664 {Intrinsic::bitreverse, MVT::v1i64, 2},
665 {Intrinsic::bitreverse, MVT::v2i64, 2},
675 return LegalisationCost.first * Entry->Cost + 1;
677 return LegalisationCost.first * Entry->Cost;
681 case Intrinsic::ctpop: {
682 if (!ST->hasNEON()) {
703 RetTy->getScalarSizeInBits()
706 return LT.first * Entry->Cost + ExtraCost;
710 case Intrinsic::sadd_with_overflow:
711 case Intrinsic::uadd_with_overflow:
712 case Intrinsic::ssub_with_overflow:
713 case Intrinsic::usub_with_overflow:
714 case Intrinsic::smul_with_overflow:
715 case Intrinsic::umul_with_overflow: {
717 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
718 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
719 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
720 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
721 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
722 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
723 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
724 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
725 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
726 {Intrinsic::usub_with_overflow, MVT::i8, 3},
727 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
728 {Intrinsic::usub_with_overflow, MVT::i16, 3},
729 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
730 {Intrinsic::usub_with_overflow, MVT::i32, 1},
731 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
732 {Intrinsic::usub_with_overflow, MVT::i64, 1},
733 {Intrinsic::smul_with_overflow, MVT::i8, 5},
734 {Intrinsic::umul_with_overflow, MVT::i8, 4},
735 {Intrinsic::smul_with_overflow, MVT::i16, 5},
736 {Intrinsic::umul_with_overflow, MVT::i16, 4},
737 {Intrinsic::smul_with_overflow, MVT::i32, 2},
738 {Intrinsic::umul_with_overflow, MVT::i32, 2},
739 {Intrinsic::smul_with_overflow, MVT::i64, 3},
740 {Intrinsic::umul_with_overflow, MVT::i64, 3},
749 case Intrinsic::fptosi_sat:
750 case Intrinsic::fptoui_sat: {
753 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
758 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
759 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
760 LT.second == MVT::v2f64)) {
762 (LT.second == MVT::f64 && MTy == MVT::i32) ||
763 (LT.second == MVT::f32 && MTy == MVT::i64)))
772 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
779 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
780 (LT.second == MVT::f16 && MTy == MVT::i64) ||
781 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
795 if ((LT.second.getScalarType() == MVT::f32 ||
796 LT.second.getScalarType() == MVT::f64 ||
797 LT.second.getScalarType() == MVT::f16) &&
801 if (LT.second.isVector())
805 LegalTy, {LegalTy, LegalTy});
808 LegalTy, {LegalTy, LegalTy});
810 return LT.first *
Cost +
811 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
818 if (LT.second.isVector()) {
830 Type *CondTy =
RetTy->getWithNewBitWidth(1);
836 return LT.first *
Cost;
838 case Intrinsic::fshl:
839 case Intrinsic::fshr: {
852 {Intrinsic::fshl, MVT::v4i32, 3},
853 {Intrinsic::fshl, MVT::v2i64, 3}, {Intrinsic::fshl, MVT::v16i8, 4},
854 {Intrinsic::fshl, MVT::v8i16, 4}, {Intrinsic::fshl, MVT::v2i32, 3},
855 {Intrinsic::fshl, MVT::v8i8, 4}, {Intrinsic::fshl, MVT::v4i16, 4}};
861 return LegalisationCost.first * Entry->Cost;
865 if (!
RetTy->isIntegerTy())
870 bool HigherCost = (
RetTy->getScalarSizeInBits() != 32 &&
871 RetTy->getScalarSizeInBits() < 64) ||
872 (
RetTy->getScalarSizeInBits() % 64 != 0);
873 unsigned ExtraCost = HigherCost ? 1 : 0;
874 if (
RetTy->getScalarSizeInBits() == 32 ||
875 RetTy->getScalarSizeInBits() == 64)
882 return TyL.first + ExtraCost;
884 case Intrinsic::get_active_lane_mask: {
889 if (!getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) &&
900 return RetTy->getNumElements() * 2;
916 auto RequiredType =
II.getType();
918 auto *PN = dyn_cast<PHINode>(
II.getArgOperand(0));
919 assert(PN &&
"Expected Phi Node!");
922 if (!PN->hasOneUse())
925 for (
Value *IncValPhi : PN->incoming_values()) {
926 auto *Reinterpret = dyn_cast<IntrinsicInst>(IncValPhi);
928 Reinterpret->getIntrinsicID() !=
929 Intrinsic::aarch64_sve_convert_to_svbool ||
930 RequiredType != Reinterpret->getArgOperand(0)->getType())
939 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
940 auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(
I));
941 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
958static std::optional<Instruction *>
960 auto BinOp = dyn_cast<IntrinsicInst>(
II.getOperand(0));
964 auto IntrinsicID = BinOp->getIntrinsicID();
965 switch (IntrinsicID) {
966 case Intrinsic::aarch64_sve_and_z:
967 case Intrinsic::aarch64_sve_bic_z:
968 case Intrinsic::aarch64_sve_eor_z:
969 case Intrinsic::aarch64_sve_nand_z:
970 case Intrinsic::aarch64_sve_nor_z:
971 case Intrinsic::aarch64_sve_orn_z:
972 case Intrinsic::aarch64_sve_orr_z:
978 auto BinOpPred = BinOp->getOperand(0);
979 auto BinOpOp1 = BinOp->getOperand(1);
980 auto BinOpOp2 = BinOp->getOperand(2);
982 auto PredIntr = dyn_cast<IntrinsicInst>(BinOpPred);
984 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
987 auto PredOp = PredIntr->getOperand(0);
988 auto PredOpTy = cast<VectorType>(PredOp->getType());
989 if (PredOpTy !=
II.getType())
994 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
995 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
996 if (BinOpOp1 == BinOpOp2)
997 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1000 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1002 auto NarrowedBinOp =
1007static std::optional<Instruction *>
1010 if (isa<PHINode>(
II.getArgOperand(0)))
1014 return BinOpCombine;
1017 if (isa<TargetExtType>(
II.getArgOperand(0)->getType()) ||
1018 isa<TargetExtType>(
II.getType()))
1019 return std::nullopt;
1022 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1024 const auto *IVTy = cast<VectorType>(
II.getType());
1030 const auto *CursorVTy = cast<VectorType>(Cursor->
getType());
1031 if (CursorVTy->getElementCount().getKnownMinValue() <
1032 IVTy->getElementCount().getKnownMinValue())
1036 if (Cursor->
getType() == IVTy)
1037 EarliestReplacement = Cursor;
1039 auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
1042 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1043 Intrinsic::aarch64_sve_convert_to_svbool ||
1044 IntrinsicCursor->getIntrinsicID() ==
1045 Intrinsic::aarch64_sve_convert_from_svbool))
1048 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
1049 Cursor = IntrinsicCursor->getOperand(0);
1054 if (!EarliestReplacement)
1055 return std::nullopt;
1062 Value *UncastedPred;
1063 if (
match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_from_svbool>(
1064 m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
1068 if (cast<ScalableVectorType>(Pred->
getType())->getMinNumElements() <=
1069 cast<ScalableVectorType>(UncastedPred->
getType())->getMinNumElements())
1070 Pred = UncastedPred;
1072 return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1073 m_ConstantInt<AArch64SVEPredPattern::all>()));
1077static std::optional<Instruction *>
1083 return std::nullopt;
1088static std::optional<Instruction *>
1093 if (
RetTy->isStructTy()) {
1094 auto StructT = cast<StructType>(
RetTy);
1095 auto VecT = StructT->getElementType(0);
1097 for (
unsigned i = 0; i < StructT->getNumElements(); i++) {
1098 ZerVec.
push_back(VecT->isFPOrFPVectorTy() ? ConstantFP::get(VecT, 0.0)
1099 : ConstantInt::get(VecT, 0));
1102 }
else if (
RetTy->isFPOrFPVectorTy())
1105 Node = ConstantInt::get(
II.getType(), 0);
1110 return std::nullopt;
1116 auto *OpPredicate =
II.getOperand(0);
1129 return std::nullopt;
1132 return std::nullopt;
1134 const auto PTruePattern =
1135 cast<ConstantInt>(Pg->
getOperand(0))->getZExtValue();
1136 if (PTruePattern != AArch64SVEPredPattern::vl1)
1137 return std::nullopt;
1142 II.getArgOperand(0),
II.getArgOperand(2), ConstantInt::get(IdxTy, 0));
1143 Insert->insertBefore(&
II);
1144 Insert->takeName(&
II);
1152 auto *
RetTy = cast<ScalableVectorType>(
II.getType());
1154 II.getArgOperand(0));
1164 auto *Pg = dyn_cast<IntrinsicInst>(
II.getArgOperand(0));
1165 if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1166 return std::nullopt;
1168 const auto PTruePattern =
1169 cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
1170 if (PTruePattern != AArch64SVEPredPattern::all)
1171 return std::nullopt;
1176 if (!SplatValue || !SplatValue->isZero())
1177 return std::nullopt;
1180 auto *DupQLane = dyn_cast<IntrinsicInst>(
II.getArgOperand(1));
1182 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1183 return std::nullopt;
1186 if (!cast<ConstantInt>(DupQLane->getArgOperand(1))->isZero())
1187 return std::nullopt;
1189 auto *VecIns = dyn_cast<IntrinsicInst>(DupQLane->getArgOperand(0));
1190 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1191 return std::nullopt;
1195 if (!isa<UndefValue>(VecIns->getArgOperand(0)))
1196 return std::nullopt;
1198 if (!cast<ConstantInt>(VecIns->getArgOperand(2))->isZero())
1199 return std::nullopt;
1201 auto *ConstVec = dyn_cast<Constant>(VecIns->getArgOperand(1));
1203 return std::nullopt;
1205 auto *VecTy = dyn_cast<FixedVectorType>(ConstVec->getType());
1206 auto *OutTy = dyn_cast<ScalableVectorType>(
II.getType());
1207 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
1208 return std::nullopt;
1210 unsigned NumElts = VecTy->getNumElements();
1211 unsigned PredicateBits = 0;
1214 for (
unsigned I = 0;
I < NumElts; ++
I) {
1215 auto *Arg = dyn_cast<ConstantInt>(ConstVec->getAggregateElement(
I));
1217 return std::nullopt;
1219 PredicateBits |= 1 << (
I * (16 / NumElts));
1223 if (PredicateBits == 0) {
1225 PFalse->takeName(&
II);
1231 for (
unsigned I = 0;
I < 16; ++
I)
1232 if ((PredicateBits & (1 <<
I)) != 0)
1235 unsigned PredSize = Mask & -Mask;
1240 for (
unsigned I = 0;
I < 16;
I += PredSize)
1241 if ((PredicateBits & (1 <<
I)) == 0)
1242 return std::nullopt;
1247 {PredType}, {PTruePat});
1249 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
1250 auto *ConvertFromSVBool =
1252 {
II.getType()}, {ConvertToSVBool});
1260 Value *Pg =
II.getArgOperand(0);
1261 Value *Vec =
II.getArgOperand(1);
1262 auto IntrinsicID =
II.getIntrinsicID();
1263 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
1274 auto *OldBinOp = cast<BinaryOperator>(Vec);
1275 auto OpC = OldBinOp->getOpcode();
1281 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
1286 auto *
C = dyn_cast<Constant>(Pg);
1287 if (IsAfter &&
C &&
C->isNullValue()) {
1291 Extract->insertBefore(&
II);
1292 Extract->takeName(&
II);
1296 auto *IntrPG = dyn_cast<IntrinsicInst>(Pg);
1298 return std::nullopt;
1300 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1301 return std::nullopt;
1303 const auto PTruePattern =
1304 cast<ConstantInt>(IntrPG->getOperand(0))->getZExtValue();
1309 return std::nullopt;
1311 unsigned Idx = MinNumElts - 1;
1320 auto *PgVTy = cast<ScalableVectorType>(Pg->
getType());
1321 if (
Idx >= PgVTy->getMinNumElements())
1322 return std::nullopt;
1327 Extract->insertBefore(&
II);
1328 Extract->takeName(&
II);
1341 Value *Pg =
II.getArgOperand(0);
1343 Value *Vec =
II.getArgOperand(2);
1347 return std::nullopt;
1352 return std::nullopt;
1366 FPTy, cast<VectorType>(Vec->
getType())->getElementCount());
1369 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
1382 {
II.getType()}, {AllPat});
1389static std::optional<Instruction *>
1391 const auto Pattern = cast<ConstantInt>(
II.getArgOperand(0))->getZExtValue();
1393 if (
Pattern == AArch64SVEPredPattern::all) {
1394 Constant *StepVal = ConstantInt::get(
II.getType(), NumElts);
1402 return MinNumElts && NumElts >= MinNumElts
1404 II, ConstantInt::get(
II.getType(), MinNumElts)))
1410 Value *PgVal =
II.getArgOperand(0);
1411 Value *OpVal =
II.getArgOperand(1);
1415 if (PgVal == OpVal &&
1416 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
1417 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
1418 Value *Ops[] = {PgVal, OpVal};
1432 return std::nullopt;
1436 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
1437 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
1451 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
1452 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
1453 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
1454 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
1455 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
1456 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
1457 (OpIID == Intrinsic::aarch64_sve_and_z) ||
1458 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
1459 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
1460 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
1461 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
1462 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
1463 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
1473 return std::nullopt;
1476template <Intrinsic::ID MulOpc,
typename Intrinsic::ID FuseOpc>
1477static std::optional<Instruction *>
1479 bool MergeIntoAddendOp) {
1481 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
1482 if (MergeIntoAddendOp) {
1483 AddendOp =
II.getOperand(1);
1484 Mul =
II.getOperand(2);
1486 AddendOp =
II.getOperand(2);
1487 Mul =
II.getOperand(1);
1492 return std::nullopt;
1494 if (!
Mul->hasOneUse())
1495 return std::nullopt;
1498 if (
II.getType()->isFPOrFPVectorTy()) {
1503 return std::nullopt;
1505 return std::nullopt;
1510 if (MergeIntoAddendOp)
1512 {
P, AddendOp, MulOp0, MulOp1}, FMFSource);
1515 {
P, MulOp0, MulOp1, AddendOp}, FMFSource);
1520static std::optional<Instruction *>
1522 Value *Pred =
II.getOperand(0);
1523 Value *PtrOp =
II.getOperand(1);
1524 Type *VecTy =
II.getType();
1532 Load->copyMetadata(
II);
1543static std::optional<Instruction *>
1545 Value *VecOp =
II.getOperand(0);
1546 Value *Pred =
II.getOperand(1);
1547 Value *PtrOp =
II.getOperand(2);
1551 Store->copyMetadata(
II);
1562 switch (Intrinsic) {
1563 case Intrinsic::aarch64_sve_fmul_u:
1564 return Instruction::BinaryOps::FMul;
1565 case Intrinsic::aarch64_sve_fadd_u:
1566 return Instruction::BinaryOps::FAdd;
1567 case Intrinsic::aarch64_sve_fsub_u:
1568 return Instruction::BinaryOps::FSub;
1570 return Instruction::BinaryOpsEnd;
1574static std::optional<Instruction *>
1577 if (
II.isStrictFP())
1578 return std::nullopt;
1580 auto *OpPredicate =
II.getOperand(0);
1582 if (BinOpCode == Instruction::BinaryOpsEnd ||
1583 !
match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1584 m_ConstantInt<AArch64SVEPredPattern::all>())))
1585 return std::nullopt;
1597 auto *OpPredicate =
II.getOperand(0);
1598 if (!
match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1599 m_ConstantInt<AArch64SVEPredPattern::all>())))
1600 return std::nullopt;
1602 auto *
Mod =
II.getModule();
1604 II.setCalledFunction(NewDecl);
1611static std::optional<Instruction *>
1628 Intrinsic::aarch64_sve_mla>(
1632 Intrinsic::aarch64_sve_mad>(
1635 return std::nullopt;
1638static std::optional<Instruction *>
1645 Intrinsic::aarch64_sve_fmla>(IC,
II,
1650 Intrinsic::aarch64_sve_fmad>(IC,
II,
1655 Intrinsic::aarch64_sve_fmla>(IC,
II,
1658 return std::nullopt;
1661static std::optional<Instruction *>
1665 Intrinsic::aarch64_sve_fmla>(IC,
II,
1670 Intrinsic::aarch64_sve_fmad>(IC,
II,
1675 Intrinsic::aarch64_sve_fmla_u>(
1681static std::optional<Instruction *>
1688 Intrinsic::aarch64_sve_fmls>(IC,
II,
1693 Intrinsic::aarch64_sve_fnmsb>(
1698 Intrinsic::aarch64_sve_fmls>(IC,
II,
1701 return std::nullopt;
1704static std::optional<Instruction *>
1708 Intrinsic::aarch64_sve_fmls>(IC,
II,
1713 Intrinsic::aarch64_sve_fnmsb>(
1718 Intrinsic::aarch64_sve_fmls_u>(
1730 Intrinsic::aarch64_sve_mls>(
1733 return std::nullopt;
1739 auto *OpPredicate =
II.getOperand(0);
1740 auto *OpMultiplicand =
II.getOperand(1);
1741 auto *OpMultiplier =
II.getOperand(2);
1744 auto IsUnitSplat = [](
auto *
I) {
1753 auto IsUnitDup = [](
auto *
I) {
1754 auto *IntrI = dyn_cast<IntrinsicInst>(
I);
1755 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup)
1758 auto *SplatValue = IntrI->getOperand(2);
1762 if (IsUnitSplat(OpMultiplier)) {
1764 OpMultiplicand->takeName(&
II);
1766 }
else if (IsUnitDup(OpMultiplier)) {
1768 auto *DupInst = cast<IntrinsicInst>(OpMultiplier);
1769 auto *DupPg = DupInst->getOperand(1);
1772 if (OpPredicate == DupPg) {
1773 OpMultiplicand->takeName(&
II);
1783 Value *UnpackArg =
II.getArgOperand(0);
1784 auto *
RetTy = cast<ScalableVectorType>(
II.getType());
1785 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
1786 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
1799 return std::nullopt;
1803 auto *OpVal =
II.getOperand(0);
1804 auto *OpIndices =
II.getOperand(1);
1809 auto *SplatValue = dyn_cast_or_null<ConstantInt>(
getSplatValue(OpIndices));
1811 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
1812 return std::nullopt;
1828 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
1829 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
1833 if ((
match(
II.getArgOperand(0),
1834 m_Intrinsic<FromSVB>(m_Intrinsic<ToSVB>(
m_Value(
A)))) &&
1836 m_Intrinsic<FromSVB>(m_Intrinsic<ToSVB>(
m_Value(
B))))) ||
1839 auto *TyA = cast<ScalableVectorType>(
A->getType());
1840 if (TyA ==
B->getType() &&
1851 return std::nullopt;
1859 if (
match(
II.getArgOperand(0),
1861 match(
II.getArgOperand(1), m_Intrinsic<Intrinsic::aarch64_sve_uzp2>(
1864 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
1866 return std::nullopt;
1869static std::optional<Instruction *>
1871 Value *Mask =
II.getOperand(0);
1872 Value *BasePtr =
II.getOperand(1);
1885 if (
match(
Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1888 BasePtr->getPointerAlignment(
II.getDataLayout());
1892 BasePtr, IndexBase);
1900 return std::nullopt;
1903static std::optional<Instruction *>
1905 Value *Val =
II.getOperand(0);
1906 Value *Mask =
II.getOperand(1);
1907 Value *BasePtr =
II.getOperand(2);
1915 if (
match(
Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1918 BasePtr->getPointerAlignment(
II.getDataLayout());
1921 BasePtr, IndexBase);
1930 return std::nullopt;
1936 Value *Pred =
II.getOperand(0);
1937 Value *Vec =
II.getOperand(1);
1938 Value *DivVec =
II.getOperand(2);
1941 ConstantInt *SplatConstantInt = dyn_cast_or_null<ConstantInt>(SplatValue);
1942 if (!SplatConstantInt)
1943 return std::nullopt;
1949 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
1956 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
1958 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
1962 return std::nullopt;
1966 size_t VecSize = Vec.
size();
1971 size_t HalfVecSize = VecSize / 2;
1975 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
1983 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
1998 m_Intrinsic<Intrinsic::vector_insert>(
2000 !isa<FixedVectorType>(CurrentInsertElt->
getType()))
2001 return std::nullopt;
2002 auto IIScalableTy = cast<ScalableVectorType>(
II.getType());
2006 while (
auto InsertElt = dyn_cast<InsertElementInst>(CurrentInsertElt)) {
2007 auto Idx = cast<ConstantInt>(InsertElt->getOperand(2));
2008 Elts[
Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2009 CurrentInsertElt = InsertElt->getOperand(0);
2013 isa<PoisonValue>(CurrentInsertElt) && isa<PoisonValue>(
Default);
2015 return std::nullopt;
2019 for (
size_t I = 0;
I < Elts.
size();
I++) {
2020 if (Elts[
I] ==
nullptr)
2025 if (InsertEltChain ==
nullptr)
2026 return std::nullopt;
2032 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2033 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2034 IIScalableTy->getMinNumElements() /
2039 auto *WideShuffleMaskTy =
2050 auto NarrowBitcast =
2063 return std::nullopt;
2068 Value *Pred =
II.getOperand(0);
2069 Value *Vec =
II.getOperand(1);
2070 Value *Shift =
II.getOperand(2);
2073 Value *AbsPred, *MergedValue;
2074 if (!
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_sqabs>(
2076 !
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_abs>(
2079 return std::nullopt;
2087 return std::nullopt;
2092 return std::nullopt;
2095 {
II.getType()}, {Pred, Vec, Shift});
2100std::optional<Instruction *>
2108 case Intrinsic::aarch64_sve_st1_scatter:
2109 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
2110 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
2111 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
2112 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
2113 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
2114 case Intrinsic::aarch64_sve_st1dq:
2115 case Intrinsic::aarch64_sve_st1q_scatter_index:
2116 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
2117 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
2118 case Intrinsic::aarch64_sve_st1wq:
2119 case Intrinsic::aarch64_sve_stnt1:
2120 case Intrinsic::aarch64_sve_stnt1_scatter:
2121 case Intrinsic::aarch64_sve_stnt1_scatter_index:
2122 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
2123 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
2125 case Intrinsic::aarch64_sve_st2:
2126 case Intrinsic::aarch64_sve_st2q:
2128 case Intrinsic::aarch64_sve_st3:
2129 case Intrinsic::aarch64_sve_st3q:
2131 case Intrinsic::aarch64_sve_st4:
2132 case Intrinsic::aarch64_sve_st4q:
2134 case Intrinsic::aarch64_sve_ld1_gather:
2135 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
2136 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
2137 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
2138 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
2139 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
2140 case Intrinsic::aarch64_sve_ld1q_gather_index:
2141 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
2142 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
2143 case Intrinsic::aarch64_sve_ld1ro:
2144 case Intrinsic::aarch64_sve_ld1rq:
2145 case Intrinsic::aarch64_sve_ld1udq:
2146 case Intrinsic::aarch64_sve_ld1uwq:
2147 case Intrinsic::aarch64_sve_ld2_sret:
2148 case Intrinsic::aarch64_sve_ld2q_sret:
2149 case Intrinsic::aarch64_sve_ld3_sret:
2150 case Intrinsic::aarch64_sve_ld3q_sret:
2151 case Intrinsic::aarch64_sve_ld4_sret:
2152 case Intrinsic::aarch64_sve_ld4q_sret:
2153 case Intrinsic::aarch64_sve_ldff1:
2154 case Intrinsic::aarch64_sve_ldff1_gather:
2155 case Intrinsic::aarch64_sve_ldff1_gather_index:
2156 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
2157 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
2158 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
2159 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
2160 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
2161 case Intrinsic::aarch64_sve_ldnf1:
2162 case Intrinsic::aarch64_sve_ldnt1:
2163 case Intrinsic::aarch64_sve_ldnt1_gather:
2164 case Intrinsic::aarch64_sve_ldnt1_gather_index:
2165 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
2166 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
2168 case Intrinsic::aarch64_sve_prf:
2169 case Intrinsic::aarch64_sve_prfb_gather_index:
2170 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
2171 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
2172 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
2173 case Intrinsic::aarch64_sve_prfd_gather_index:
2174 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
2175 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
2176 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
2177 case Intrinsic::aarch64_sve_prfh_gather_index:
2178 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
2179 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
2180 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
2181 case Intrinsic::aarch64_sve_prfw_gather_index:
2182 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
2183 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
2184 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
2186 case Intrinsic::aarch64_neon_fmaxnm:
2187 case Intrinsic::aarch64_neon_fminnm:
2189 case Intrinsic::aarch64_sve_convert_from_svbool:
2191 case Intrinsic::aarch64_sve_dup:
2193 case Intrinsic::aarch64_sve_dup_x:
2195 case Intrinsic::aarch64_sve_cmpne:
2196 case Intrinsic::aarch64_sve_cmpne_wide:
2198 case Intrinsic::aarch64_sve_rdffr:
2200 case Intrinsic::aarch64_sve_lasta:
2201 case Intrinsic::aarch64_sve_lastb:
2203 case Intrinsic::aarch64_sve_clasta_n:
2204 case Intrinsic::aarch64_sve_clastb_n:
2206 case Intrinsic::aarch64_sve_cntd:
2208 case Intrinsic::aarch64_sve_cntw:
2210 case Intrinsic::aarch64_sve_cnth:
2212 case Intrinsic::aarch64_sve_cntb:
2214 case Intrinsic::aarch64_sve_ptest_any:
2215 case Intrinsic::aarch64_sve_ptest_first:
2216 case Intrinsic::aarch64_sve_ptest_last:
2218 case Intrinsic::aarch64_sve_fabd:
2220 case Intrinsic::aarch64_sve_fadd:
2222 case Intrinsic::aarch64_sve_fadd_u:
2224 case Intrinsic::aarch64_sve_fdiv:
2226 case Intrinsic::aarch64_sve_fmax:
2228 case Intrinsic::aarch64_sve_fmaxnm:
2230 case Intrinsic::aarch64_sve_fmin:
2232 case Intrinsic::aarch64_sve_fminnm:
2234 case Intrinsic::aarch64_sve_fmla:
2236 case Intrinsic::aarch64_sve_fmls:
2238 case Intrinsic::aarch64_sve_fmul:
2243 case Intrinsic::aarch64_sve_fmul_u:
2245 case Intrinsic::aarch64_sve_fmulx:
2247 case Intrinsic::aarch64_sve_fnmla:
2249 case Intrinsic::aarch64_sve_fnmls:
2251 case Intrinsic::aarch64_sve_fsub:
2253 case Intrinsic::aarch64_sve_fsub_u:
2255 case Intrinsic::aarch64_sve_add:
2257 case Intrinsic::aarch64_sve_add_u:
2259 Intrinsic::aarch64_sve_mla_u>(
2261 case Intrinsic::aarch64_sve_mla:
2263 case Intrinsic::aarch64_sve_mls:
2265 case Intrinsic::aarch64_sve_mul:
2270 case Intrinsic::aarch64_sve_mul_u:
2272 case Intrinsic::aarch64_sve_sabd:
2274 case Intrinsic::aarch64_sve_smax:
2276 case Intrinsic::aarch64_sve_smin:
2278 case Intrinsic::aarch64_sve_smulh:
2280 case Intrinsic::aarch64_sve_sub:
2282 case Intrinsic::aarch64_sve_sub_u:
2284 Intrinsic::aarch64_sve_mls_u>(
2286 case Intrinsic::aarch64_sve_uabd:
2288 case Intrinsic::aarch64_sve_umax:
2290 case Intrinsic::aarch64_sve_umin:
2292 case Intrinsic::aarch64_sve_umulh:
2294 case Intrinsic::aarch64_sve_asr:
2296 case Intrinsic::aarch64_sve_lsl:
2298 case Intrinsic::aarch64_sve_lsr:
2300 case Intrinsic::aarch64_sve_and:
2302 case Intrinsic::aarch64_sve_bic:
2304 case Intrinsic::aarch64_sve_eor:
2306 case Intrinsic::aarch64_sve_orr:
2308 case Intrinsic::aarch64_sve_sqsub:
2310 case Intrinsic::aarch64_sve_uqsub:
2312 case Intrinsic::aarch64_sve_tbl:
2314 case Intrinsic::aarch64_sve_uunpkhi:
2315 case Intrinsic::aarch64_sve_uunpklo:
2316 case Intrinsic::aarch64_sve_sunpkhi:
2317 case Intrinsic::aarch64_sve_sunpklo:
2319 case Intrinsic::aarch64_sve_uzp1:
2321 case Intrinsic::aarch64_sve_zip1:
2322 case Intrinsic::aarch64_sve_zip2:
2324 case Intrinsic::aarch64_sve_ld1_gather_index:
2326 case Intrinsic::aarch64_sve_st1_scatter_index:
2328 case Intrinsic::aarch64_sve_ld1:
2330 case Intrinsic::aarch64_sve_st1:
2332 case Intrinsic::aarch64_sve_sdiv:
2334 case Intrinsic::aarch64_sve_sel:
2336 case Intrinsic::aarch64_sve_srshl:
2338 case Intrinsic::aarch64_sve_dupq_lane:
2342 return std::nullopt;
2349 SimplifyAndSetOp)
const {
2350 switch (
II.getIntrinsicID()) {
2353 case Intrinsic::aarch64_neon_fcvtxn:
2354 case Intrinsic::aarch64_neon_rshrn:
2355 case Intrinsic::aarch64_neon_sqrshrn:
2356 case Intrinsic::aarch64_neon_sqrshrun:
2357 case Intrinsic::aarch64_neon_sqshrn:
2358 case Intrinsic::aarch64_neon_sqshrun:
2359 case Intrinsic::aarch64_neon_sqxtn:
2360 case Intrinsic::aarch64_neon_sqxtun:
2361 case Intrinsic::aarch64_neon_uqrshrn:
2362 case Intrinsic::aarch64_neon_uqshrn:
2363 case Intrinsic::aarch64_neon_uqxtn:
2364 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
2368 return std::nullopt;
2400bool AArch64TTIImpl::isWideningInstruction(
Type *DstTy,
unsigned Opcode,
2402 Type *SrcOverrideTy) {
2405 auto toVectorTy = [&](
Type *ArgTy) {
2407 cast<VectorType>(DstTy)->getElementCount());
2417 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
2427 Type *SrcTy = SrcOverrideTy;
2429 case Instruction::Add:
2430 case Instruction::Sub:
2432 if (isa<SExtInst>(Args[1]) || isa<ZExtInst>(Args[1])) {
2435 toVectorTy(cast<Instruction>(Args[1])->getOperand(0)->
getType());
2439 case Instruction::Mul: {
2441 if ((isa<SExtInst>(Args[0]) && isa<SExtInst>(Args[1])) ||
2442 (isa<ZExtInst>(Args[0]) && isa<ZExtInst>(Args[1]))) {
2445 toVectorTy(cast<Instruction>(Args[0])->getOperand(0)->
getType());
2446 }
else if (isa<ZExtInst>(Args[0]) || isa<ZExtInst>(Args[1])) {
2475 assert(SrcTy &&
"Expected some SrcTy");
2477 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
2483 DstTyL.first * DstTyL.second.getVectorMinNumElements();
2485 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
2489 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
2502 (Src->isScalableTy() && !ST->hasSVE2()))
2511 dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
2512 if (AddUser && AddUser->getOpcode() == Instruction::Add)
2515 auto *Shr = dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
2516 if (!Shr || Shr->getOpcode() != Instruction::LShr)
2519 auto *Trunc = dyn_cast_or_null<Instruction>(Shr->getUniqueUndroppableUser());
2520 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
2521 Src->getScalarSizeInBits() !=
2522 cast<CastInst>(Trunc)->getDestTy()->getScalarSizeInBits())
2546 assert(ISD &&
"Invalid opcode");
2549 if (
I &&
I->hasOneUser()) {
2550 auto *SingleUser = cast<Instruction>(*
I->user_begin());
2552 if (isWideningInstruction(Dst, SingleUser->getOpcode(),
Operands, Src)) {
2556 if (SingleUser->getOpcode() == Instruction::Add) {
2557 if (
I == SingleUser->getOperand(1) ||
2558 (isa<CastInst>(SingleUser->getOperand(1)) &&
2559 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
2566 if ((isa<ZExtInst>(
I) || isa<SExtInst>(
I)) &&
2574 return Cost == 0 ? 0 : 1;
2859 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
2863 std::pair<InstructionCost, MVT> LT =
2866 LT.second.getScalarSizeInBits();
2878 return AdjustCost(Entry->Cost);
2905 if (ST->hasFullFP16())
2908 return AdjustCost(Entry->Cost);
2924 Opcode, LegalTy, Src, CCH,
CostKind,
I);
2927 return Part1 + Part2;
2947 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
2955 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) &&
"Invalid type");
2971 if (!VecLT.second.isVector() || !TLI->
isTypeLegal(DstVT))
2977 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
2987 case Instruction::SExt:
2992 case Instruction::ZExt:
2993 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
3006 return Opcode == Instruction::PHI ? 0 : 1;
3023 if (!LT.second.isVector())
3028 if (LT.second.isFixedLengthVector()) {
3029 unsigned Width = LT.second.getVectorNumElements();
3046 if (
I && dyn_cast<LoadInst>(
I->getOperand(1)))
3070 Opcode == Instruction::InsertElement && Op0 && !isa<UndefValue>(Op0);
3071 return getVectorInstrCostHelper(
nullptr, Val,
Index, HasRealUse);
3078 return getVectorInstrCostHelper(&
I, Val,
Index,
true );
3084 if (isa<ScalableVectorType>(Ty))
3089 return DemandedElts.
popcount() * (Insert + Extract) *
3103 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
3110 Op2Info, Args, CxtI);
3152 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
3157 Opcode, Ty,
CostKind, Op1Info, Op2Info);
3162 if (isa<FixedVectorType>(Ty) && cast<FixedVectorType>(Ty)
3163 ->getPrimitiveSizeInBits()
3164 .getFixedValue() < 128) {
3175 if (
nullptr != Entry)
3180 if (LT.second.getScalarType() == MVT::i8)
3182 else if (LT.second.getScalarType() == MVT::i16)
3192 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
3195 return (4 + DivCost) * VTy->getNumElements();
3215 if (LT.second == MVT::v2i64 && ST->hasSVE())
3230 if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
3232 return LT.first * 14;
3247 (Ty->
isHalfTy() && ST->hasFullFP16())) &&
3260 return 2 * LT.first;
3269 return 2 * LT.first;
3291 int MaxMergeDistance = 64;
3295 return NumVectorInstToHideOverhead;
3315 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SELECT) {
3317 const int AmortizationCost = 20;
3325 VecPred = CurrentPred;
3333 static const auto ValidMinMaxTys = {
3334 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
3335 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
3336 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
3339 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
3340 (ST->hasFullFP16() &&
3341 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
3346 VectorSelectTbl[] = {
3355 {
ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
3356 {
ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
3357 {
ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
3370 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SETCC) {
3373 if (LT.second == MVT::v4f16 && !ST->hasFullFP16())
3374 return LT.first * 4;
3395 if (ST->requiresStrictAlign()) {
3400 Options.AllowOverlappingLoads =
true;
3406 Options.LoadSizes = {8, 4, 2, 1};
3407 Options.AllowedTailExpansions = {3, 5, 6};
3412 return ST->hasSVE();
3423 if (!LT.first.isValid())
3427 auto *VT = cast<VectorType>(Src);
3428 if (VT->getElementType()->isIntegerTy(1))
3445 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
3446 "Should be called on only load or stores.");
3448 case Instruction::Load:
3451 return ST->getGatherOverhead();
3453 case Instruction::Store:
3456 return ST->getScatterOverhead();
3464 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
3469 auto *VT = cast<VectorType>(DataTy);
3471 if (!LT.first.isValid())
3475 if (!LT.second.isVector() ||
3477 VT->getElementType()->isIntegerTy(1))
3487 ElementCount LegalVF = LT.second.getVectorElementCount();
3490 {TTI::OK_AnyValue, TTI::OP_None},
I);
3508 if (VT == MVT::Other)
3513 if (!LT.first.isValid())
3521 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
3523 (VTy->getElementType()->isIntegerTy(1) &&
3524 !VTy->getElementCount().isKnownMultipleOf(
3535 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
3536 LT.second.is128BitVector() && (!Alignment || *Alignment <
Align(16))) {
3542 const int AmortizationCost = 6;
3544 return LT.first * 2 * AmortizationCost;
3555 if (VT == MVT::v4i8)
3558 return cast<FixedVectorType>(Ty)->getNumElements() * 2;
3562 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
3564 *Alignment !=
Align(1))
3578 while (!TypeWorklist.
empty()) {
3600 bool UseMaskForCond,
bool UseMaskForGaps) {
3601 assert(Factor >= 2 &&
"Invalid interleave factor");
3602 auto *VecVTy = cast<VectorType>(VecTy);
3604 if (VecTy->
isScalableTy() && (!ST->hasSVE() || Factor != 2))
3609 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
3612 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
3613 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
3616 VecVTy->getElementCount().divideCoefficientBy(Factor));
3622 if (MinElts % Factor == 0 &&
3629 UseMaskForCond, UseMaskForGaps);
3636 for (
auto *
I : Tys) {
3637 if (!
I->isVectorTy())
3639 if (
I->getScalarSizeInBits() * cast<FixedVectorType>(
I)->getNumElements() ==
3658 enum { MaxStridedLoads = 7 };
3660 int StridedLoads = 0;
3663 for (
const auto BB : L->blocks()) {
3664 for (
auto &
I : *BB) {
3665 LoadInst *LMemI = dyn_cast<LoadInst>(&
I);
3670 if (L->isLoopInvariant(PtrValue))
3674 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
3675 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
3684 if (StridedLoads > MaxStridedLoads / 2)
3685 return StridedLoads;
3688 return StridedLoads;
3691 int StridedLoads = countStridedLoads(L, SE);
3693 <<
" strided loads\n");
3714 if (L->getLoopDepth() > 1)
3727 for (
auto *BB : L->getBlocks()) {
3728 for (
auto &
I : *BB) {
3730 if (
I.getType()->isVectorTy())
3733 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
3748 !ST->getSchedModel().isOutOfOrder()) {
3765 Type *ExpectedType) {
3769 case Intrinsic::aarch64_neon_st2:
3770 case Intrinsic::aarch64_neon_st3:
3771 case Intrinsic::aarch64_neon_st4: {
3773 StructType *ST = dyn_cast<StructType>(ExpectedType);
3776 unsigned NumElts = Inst->
arg_size() - 1;
3777 if (ST->getNumElements() != NumElts)
3779 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
3785 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
3791 case Intrinsic::aarch64_neon_ld2:
3792 case Intrinsic::aarch64_neon_ld3:
3793 case Intrinsic::aarch64_neon_ld4:
3794 if (Inst->
getType() == ExpectedType)
3805 case Intrinsic::aarch64_neon_ld2:
3806 case Intrinsic::aarch64_neon_ld3:
3807 case Intrinsic::aarch64_neon_ld4:
3808 Info.ReadMem =
true;
3809 Info.WriteMem =
false;
3812 case Intrinsic::aarch64_neon_st2:
3813 case Intrinsic::aarch64_neon_st3:
3814 case Intrinsic::aarch64_neon_st4:
3815 Info.ReadMem =
false;
3816 Info.WriteMem =
true;
3824 case Intrinsic::aarch64_neon_ld2:
3825 case Intrinsic::aarch64_neon_st2:
3826 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
3828 case Intrinsic::aarch64_neon_ld3:
3829 case Intrinsic::aarch64_neon_st3:
3830 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
3832 case Intrinsic::aarch64_neon_ld4:
3833 case Intrinsic::aarch64_neon_st4:
3834 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
3846 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) {
3847 bool Considerable =
false;
3848 AllowPromotionWithoutCommonHeader =
false;
3849 if (!isa<SExtInst>(&
I))
3851 Type *ConsideredSExtType =
3853 if (
I.getType() != ConsideredSExtType)
3857 for (
const User *U :
I.users()) {
3859 Considerable =
true;
3863 if (GEPInst->getNumOperands() > 2) {
3864 AllowPromotionWithoutCommonHeader =
true;
3869 return Considerable;
3910 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
3916 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
3926 return LegalizationCost + 2;
3936 LegalizationCost *= LT.first - 1;
3940 assert(ISD &&
"Invalid opcode");
3948 return LegalizationCost + 2;
3956 std::optional<FastMathFlags> FMF,
3962 if (
auto *VTy = dyn_cast<ScalableVectorType>(ValTy))
3967 if (
auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
3972 return BaseCost + FixedVTy->getNumElements();
3975 if (Opcode != Instruction::FAdd)
3978 auto *VTy = cast<ScalableVectorType>(ValTy);
3985 if (isa<ScalableVectorType>(ValTy))
3989 MVT MTy = LT.second;
3991 assert(ISD &&
"Invalid opcode");
4035 return (LT.first - 1) + Entry->Cost;
4043 auto *ValVTy = cast<FixedVectorType>(ValTy);
4047 if (LT.first != 1) {
4053 ExtraCost *= LT.first - 1;
4056 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
4057 return Cost + ExtraCost;
4091 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
4106 if (LT.second.getScalarType() == MVT::i1) {
4115 assert(Entry &&
"Illegal Type for Splice");
4116 LegalizationCost += Entry->Cost;
4117 return LegalizationCost * LT.first;
4128 if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
4130 Mask.size() > LT.second.getVectorNumElements() && !
Index && !SubTp) {
4136 if (Args.size() >= 1 && isa<LoadInst>(Args[0]) &&
4139 return std::max<InstructionCost>(1, LT.first / 4);
4152 unsigned TpNumElts = Mask.size();
4153 unsigned LTNumElts = LT.second.getVectorNumElements();
4154 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
4158 for (
unsigned N = 0;
N < NumVecs;
N++) {
4162 unsigned Source1, Source2;
4163 unsigned NumSources = 0;
4164 for (
unsigned E = 0; E < LTNumElts; E++) {
4165 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
4174 unsigned Source = MaskElt / LTNumElts;
4175 if (NumSources == 0) {
4178 }
else if (NumSources == 1 && Source != Source1) {
4181 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
4187 if (Source == Source1)
4189 else if (Source == Source2)
4190 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
4197 if (NumSources <= 2)
4200 NTp, NMask,
CostKind, 0,
nullptr, Args, CxtI);
4210 if (IsExtractSubvector && LT.second.isFixedLengthVector())
4221 bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0]);
4222 if (IsLoad && LT.second.isVector() &&
4224 LT.second.getVectorElementCount()))
4232 all_of(Mask, [](
int E) {
return E < 8; }))
4236 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
4239 return M.value() < 0 || M.value() == (
int)M.index();
4246 if (LT.second.isFixedLengthVector() &&
4247 LT.second.getVectorNumElements() == Mask.size() &&
4249 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
4250 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
4253 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
4376 return LT.first * Entry->Cost;
4385 LT.second.getSizeInBits() <= 128 && SubTp) {
4387 if (SubLT.second.isVector()) {
4388 int NumElts = LT.second.getVectorNumElements();
4389 int NumSubElts = SubLT.second.getVectorNumElements();
4390 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
4396 if (IsExtractSubvector)
4409 if (isa<LoadInst>(&
I) || isa<StoreInst>(&
I)) {
4453 unsigned NumInsns = 0;
4455 NumInsns += BB->sizeWithoutDebug();
4465 int64_t Scale,
unsigned AddrSpace)
const {
4492 isa<BranchInst>(
I->getNextNode()) &&
4493 cast<BranchInst>(
I->getNextNode())->isUnconditional())
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
amdgpu AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
This file defines the LoopVectorizationLegality class.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
static uint64_t getBits(uint64_t Val, int Start, int End)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getFastMathFlags(const MachineInstr &I)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
unsigned getVectorInsertExtractBaseCost() const
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
unsigned getMaxInterleaveFactor() const
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
TailFoldingOpts getSVETailFoldingDefaultOpts() const
bool useSVEForFixedLengthVectors() const
unsigned getMinSVEVectorSizeInBits() const
bool isSVEAvailable() const
Returns true if the target has SVE and can use the full range of SVE instructions,...
InstructionCost getSpliceCost(VectorType *Tp, int Index)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool shouldTreatInstructionLikeSelect(const Instruction *I)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
bool prefersVectorizedAddressing() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind)
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isElementTypeLegalForScalableVector(Type *Ty) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool enableScalableVectorization() const
bool useNeonVector(const Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool isLegalMaskedGatherScatter(Type *DataType) const
unsigned getMaxInterleaveFactor(ElementCount VF)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
EVT getPromotedVTForPredicate(EVT VT) const
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, bool UseScalable) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const
Returns true if VecTy is a legal interleaved access type.
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
bool isTypeLegal(Type *Ty)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isIntPredicate() const
static ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isEquality() const
Return true if this predicate is either EQ or NE.
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
static InstructionCost getInvalid(CostType Val=0)
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool requiresSMChange(const SMEAttrs &Callee) const
void set(unsigned M, bool Enable=true)
bool hasStreamingBody() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Class to represent struct types.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const
Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isFP128Ty() const
Return true if this is 'fp128'.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static constexpr unsigned SVEBitsPerBlock
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
constexpr int PoisonMaskElem
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Mod
The access may modify the value stored in memory.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FAnyOf
Any_of reduction with select(fcmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ IAnyOf
Any_of reduction with select(icmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
@ Default
The result values are uniform if and only if all operands are uniform.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.