21#include "llvm/IR/IntrinsicsAArch64.h"
31#define DEBUG_TYPE "aarch64tti"
52 "Penalty of calling a function that requires a change to PSTATE.SM"));
56 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
67 cl::desc(
"The cost of a histcnt instruction"));
70class TailFoldingOption {
85 bool NeedsDefault =
true;
89 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
104 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
105 "Initial bits should only include one of "
106 "(disabled|all|simple|default)");
107 Bits = NeedsDefault ? DefaultBits : InitialBits;
109 Bits &= ~DisableBits;
115 errs() <<
"invalid argument '" << Opt
116 <<
"' to -sve-tail-folding=; the option should be of the form\n"
117 " (disabled|all|default|simple)[+(reductions|recurrences"
118 "|reverse|noreductions|norecurrences|noreverse)]\n";
124 void operator=(
const std::string &Val) {
133 setNeedsDefault(
false);
138 unsigned StartIdx = 1;
139 if (TailFoldTypes[0] ==
"disabled")
140 setInitialBits(TailFoldingOpts::Disabled);
141 else if (TailFoldTypes[0] ==
"all")
142 setInitialBits(TailFoldingOpts::All);
143 else if (TailFoldTypes[0] ==
"default")
144 setNeedsDefault(
true);
145 else if (TailFoldTypes[0] ==
"simple")
146 setInitialBits(TailFoldingOpts::Simple);
149 setInitialBits(TailFoldingOpts::Disabled);
152 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
153 if (TailFoldTypes[
I] ==
"reductions")
154 setEnableBit(TailFoldingOpts::Reductions);
155 else if (TailFoldTypes[
I] ==
"recurrences")
156 setEnableBit(TailFoldingOpts::Recurrences);
157 else if (TailFoldTypes[
I] ==
"reverse")
158 setEnableBit(TailFoldingOpts::Reverse);
159 else if (TailFoldTypes[
I] ==
"noreductions")
160 setDisableBit(TailFoldingOpts::Reductions);
161 else if (TailFoldTypes[
I] ==
"norecurrences")
162 setDisableBit(TailFoldingOpts::Recurrences);
163 else if (TailFoldTypes[
I] ==
"noreverse")
164 setDisableBit(TailFoldingOpts::Reverse);
181 "Control the use of vectorisation using tail-folding for SVE where the"
182 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
183 "\ndisabled (Initial) No loop types will vectorize using "
185 "\ndefault (Initial) Uses the default tail-folding settings for "
187 "\nall (Initial) All legal loop types will vectorize using "
189 "\nsimple (Initial) Use tail-folding for simple loops (not "
190 "reductions or recurrences)"
191 "\nreductions Use tail-folding for loops containing reductions"
192 "\nnoreductions Inverse of above"
193 "\nrecurrences Use tail-folding for loops containing fixed order "
195 "\nnorecurrences Inverse of above"
196 "\nreverse Use tail-folding for loops requiring reversed "
198 "\nnoreverse Inverse of above"),
216 .
Case(
"__arm_sme_state",
true)
217 .
Case(
"__arm_tpidr2_save",
true)
218 .
Case(
"__arm_tpidr2_restore",
true)
219 .
Case(
"__arm_za_disable",
true)
233 if (isa<CallInst>(
I) && !
I.isDebugOrPseudoInst() &&
234 (cast<CallInst>(
I).isInlineAsm() || isa<IntrinsicInst>(
I) ||
244 SMEAttrs CallerAttrs(*Caller), CalleeAttrs(*Callee);
256 if (CallerAttrs.requiresLazySave(CalleeAttrs) ||
257 CallerAttrs.requiresSMChange(CalleeAttrs)) {
265 TM.getSubtargetImpl(*Caller)->getFeatureBits();
267 TM.getSubtargetImpl(*Callee)->getFeatureBits();
271 return (CallerBits & CalleeBits) == CalleeBits;
289 auto FVTy = dyn_cast<FixedVectorType>(Ty);
291 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
300 unsigned DefaultCallPenalty)
const {
323 if (
F == Call.getCaller())
329 return DefaultCallPenalty;
368 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
373 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
379 return std::max<InstructionCost>(1,
Cost);
394 unsigned ImmIdx = ~0U;
398 case Instruction::GetElementPtr:
403 case Instruction::Store:
406 case Instruction::Add:
407 case Instruction::Sub:
408 case Instruction::Mul:
409 case Instruction::UDiv:
410 case Instruction::SDiv:
411 case Instruction::URem:
412 case Instruction::SRem:
413 case Instruction::And:
414 case Instruction::Or:
415 case Instruction::Xor:
416 case Instruction::ICmp:
420 case Instruction::Shl:
421 case Instruction::LShr:
422 case Instruction::AShr:
426 case Instruction::Trunc:
427 case Instruction::ZExt:
428 case Instruction::SExt:
429 case Instruction::IntToPtr:
430 case Instruction::PtrToInt:
431 case Instruction::BitCast:
432 case Instruction::PHI:
433 case Instruction::Call:
434 case Instruction::Select:
435 case Instruction::Ret:
436 case Instruction::Load:
441 int NumConstants = (BitSize + 63) / 64;
465 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
471 case Intrinsic::sadd_with_overflow:
472 case Intrinsic::uadd_with_overflow:
473 case Intrinsic::ssub_with_overflow:
474 case Intrinsic::usub_with_overflow:
475 case Intrinsic::smul_with_overflow:
476 case Intrinsic::umul_with_overflow:
478 int NumConstants = (BitSize + 63) / 64;
485 case Intrinsic::experimental_stackmap:
486 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
489 case Intrinsic::experimental_patchpoint_void:
490 case Intrinsic::experimental_patchpoint:
491 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
494 case Intrinsic::experimental_gc_statepoint:
495 if ((
Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
505 if (TyWidth == 32 || TyWidth == 64)
530 if (
VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy))
531 if ((VTy->getElementCount().getKnownMinValue() != 2 &&
532 VTy->getElementCount().getKnownMinValue() != 4) ||
533 VTy->getPrimitiveSizeInBits().getKnownMinValue() > 128 ||
534 !VTy->isScalableTy())
544 switch (ICA.
getID()) {
545 case Intrinsic::experimental_vector_histogram_add:
549 case Intrinsic::umin:
550 case Intrinsic::umax:
551 case Intrinsic::smin:
552 case Intrinsic::smax: {
553 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
554 MVT::v8i16, MVT::v2i32, MVT::v4i32,
555 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
559 if (LT.second == MVT::v2i64)
561 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
565 case Intrinsic::sadd_sat:
566 case Intrinsic::ssub_sat:
567 case Intrinsic::uadd_sat:
568 case Intrinsic::usub_sat: {
569 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
570 MVT::v8i16, MVT::v2i32, MVT::v4i32,
576 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits() ? 1 : 4;
577 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
578 return LT.first * Instrs;
581 case Intrinsic::abs: {
582 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
583 MVT::v8i16, MVT::v2i32, MVT::v4i32,
586 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
590 case Intrinsic::bswap: {
591 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
592 MVT::v4i32, MVT::v2i64};
594 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }) &&
595 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits())
599 case Intrinsic::experimental_stepvector: {
608 Cost += AddCost * (LT.first - 1);
612 case Intrinsic::vector_extract:
613 case Intrinsic::vector_insert: {
626 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
645 case Intrinsic::bitreverse: {
647 {Intrinsic::bitreverse, MVT::i32, 1},
648 {Intrinsic::bitreverse, MVT::i64, 1},
649 {Intrinsic::bitreverse, MVT::v8i8, 1},
650 {Intrinsic::bitreverse, MVT::v16i8, 1},
651 {Intrinsic::bitreverse, MVT::v4i16, 2},
652 {Intrinsic::bitreverse, MVT::v8i16, 2},
653 {Intrinsic::bitreverse, MVT::v2i32, 2},
654 {Intrinsic::bitreverse, MVT::v4i32, 2},
655 {Intrinsic::bitreverse, MVT::v1i64, 2},
656 {Intrinsic::bitreverse, MVT::v2i64, 2},
666 return LegalisationCost.first * Entry->Cost + 1;
668 return LegalisationCost.first * Entry->Cost;
672 case Intrinsic::ctpop: {
673 if (!ST->hasNEON()) {
694 RetTy->getScalarSizeInBits()
697 return LT.first * Entry->Cost + ExtraCost;
701 case Intrinsic::sadd_with_overflow:
702 case Intrinsic::uadd_with_overflow:
703 case Intrinsic::ssub_with_overflow:
704 case Intrinsic::usub_with_overflow:
705 case Intrinsic::smul_with_overflow:
706 case Intrinsic::umul_with_overflow: {
708 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
709 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
710 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
711 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
712 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
713 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
714 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
715 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
716 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
717 {Intrinsic::usub_with_overflow, MVT::i8, 3},
718 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
719 {Intrinsic::usub_with_overflow, MVT::i16, 3},
720 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
721 {Intrinsic::usub_with_overflow, MVT::i32, 1},
722 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
723 {Intrinsic::usub_with_overflow, MVT::i64, 1},
724 {Intrinsic::smul_with_overflow, MVT::i8, 5},
725 {Intrinsic::umul_with_overflow, MVT::i8, 4},
726 {Intrinsic::smul_with_overflow, MVT::i16, 5},
727 {Intrinsic::umul_with_overflow, MVT::i16, 4},
728 {Intrinsic::smul_with_overflow, MVT::i32, 2},
729 {Intrinsic::umul_with_overflow, MVT::i32, 2},
730 {Intrinsic::smul_with_overflow, MVT::i64, 3},
731 {Intrinsic::umul_with_overflow, MVT::i64, 3},
740 case Intrinsic::fptosi_sat:
741 case Intrinsic::fptoui_sat: {
744 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
749 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
750 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
751 LT.second == MVT::v2f64) &&
753 (LT.second == MVT::f64 && MTy == MVT::i32) ||
754 (LT.second == MVT::f32 && MTy == MVT::i64)))
757 if (ST->hasFullFP16() &&
758 ((LT.second == MVT::f16 && MTy == MVT::i32) ||
759 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
764 if ((LT.second.getScalarType() == MVT::f32 ||
765 LT.second.getScalarType() == MVT::f64 ||
766 (ST->hasFullFP16() && LT.second.getScalarType() == MVT::f16)) &&
770 if (LT.second.isVector())
774 LegalTy, {LegalTy, LegalTy});
777 LegalTy, {LegalTy, LegalTy});
779 return LT.first *
Cost;
783 case Intrinsic::fshl:
784 case Intrinsic::fshr: {
797 {Intrinsic::fshl, MVT::v4i32, 3},
798 {Intrinsic::fshl, MVT::v2i64, 3}, {Intrinsic::fshl, MVT::v16i8, 4},
799 {Intrinsic::fshl, MVT::v8i16, 4}, {Intrinsic::fshl, MVT::v2i32, 3},
800 {Intrinsic::fshl, MVT::v8i8, 4}, {Intrinsic::fshl, MVT::v4i16, 4}};
806 return LegalisationCost.first * Entry->Cost;
810 if (!
RetTy->isIntegerTy())
815 bool HigherCost = (
RetTy->getScalarSizeInBits() != 32 &&
816 RetTy->getScalarSizeInBits() < 64) ||
817 (
RetTy->getScalarSizeInBits() % 64 != 0);
818 unsigned ExtraCost = HigherCost ? 1 : 0;
819 if (
RetTy->getScalarSizeInBits() == 32 ||
820 RetTy->getScalarSizeInBits() == 64)
827 return TyL.first + ExtraCost;
829 case Intrinsic::get_active_lane_mask: {
834 if (!getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) &&
845 return RetTy->getNumElements() * 2;
861 auto RequiredType =
II.getType();
863 auto *PN = dyn_cast<PHINode>(
II.getArgOperand(0));
864 assert(PN &&
"Expected Phi Node!");
867 if (!PN->hasOneUse())
870 for (
Value *IncValPhi : PN->incoming_values()) {
871 auto *Reinterpret = dyn_cast<IntrinsicInst>(IncValPhi);
873 Reinterpret->getIntrinsicID() !=
874 Intrinsic::aarch64_sve_convert_to_svbool ||
875 RequiredType != Reinterpret->getArgOperand(0)->getType())
884 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
885 auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(
I));
886 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
903static std::optional<Instruction *>
905 auto BinOp = dyn_cast<IntrinsicInst>(
II.getOperand(0));
909 auto IntrinsicID = BinOp->getIntrinsicID();
910 switch (IntrinsicID) {
911 case Intrinsic::aarch64_sve_and_z:
912 case Intrinsic::aarch64_sve_bic_z:
913 case Intrinsic::aarch64_sve_eor_z:
914 case Intrinsic::aarch64_sve_nand_z:
915 case Intrinsic::aarch64_sve_nor_z:
916 case Intrinsic::aarch64_sve_orn_z:
917 case Intrinsic::aarch64_sve_orr_z:
923 auto BinOpPred = BinOp->getOperand(0);
924 auto BinOpOp1 = BinOp->getOperand(1);
925 auto BinOpOp2 = BinOp->getOperand(2);
927 auto PredIntr = dyn_cast<IntrinsicInst>(BinOpPred);
929 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
932 auto PredOp = PredIntr->getOperand(0);
933 auto PredOpTy = cast<VectorType>(PredOp->getType());
934 if (PredOpTy !=
II.getType())
939 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
940 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
941 if (BinOpOp1 == BinOpOp2)
942 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
945 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
952static std::optional<Instruction *>
955 if (isa<PHINode>(
II.getArgOperand(0)))
962 if (isa<TargetExtType>(
II.getArgOperand(0)->getType()) ||
963 isa<TargetExtType>(
II.getType()))
967 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
969 const auto *IVTy = cast<VectorType>(
II.getType());
975 const auto *CursorVTy = cast<VectorType>(Cursor->
getType());
976 if (CursorVTy->getElementCount().getKnownMinValue() <
977 IVTy->getElementCount().getKnownMinValue())
982 EarliestReplacement = Cursor;
984 auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
987 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
988 Intrinsic::aarch64_sve_convert_to_svbool ||
989 IntrinsicCursor->getIntrinsicID() ==
990 Intrinsic::aarch64_sve_convert_from_svbool))
993 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
994 Cursor = IntrinsicCursor->getOperand(0);
999 if (!EarliestReplacement)
1000 return std::nullopt;
1007 Value *UncastedPred;
1008 if (
match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_from_svbool>(
1009 m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
1013 if (cast<ScalableVectorType>(Pred->
getType())->getMinNumElements() <=
1014 cast<ScalableVectorType>(UncastedPred->
getType())->getMinNumElements())
1015 Pred = UncastedPred;
1017 return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1018 m_ConstantInt<AArch64SVEPredPattern::all>()));
1022static std::optional<Instruction *>
1028 return std::nullopt;
1033static std::optional<Instruction *>
1038 if (
RetTy->isStructTy()) {
1039 auto StructT = cast<StructType>(
RetTy);
1040 auto VecT = StructT->getElementType(0);
1042 for (
unsigned i = 0; i < StructT->getNumElements(); i++) {
1043 ZerVec.
push_back(VecT->isFPOrFPVectorTy() ? ConstantFP::get(VecT, 0.0)
1044 : ConstantInt::get(VecT, 0));
1047 }
else if (
RetTy->isFPOrFPVectorTy())
1050 Node = ConstantInt::get(
II.getType(), 0);
1055 return std::nullopt;
1061 auto *OpPredicate =
II.getOperand(0);
1074 return std::nullopt;
1077 return std::nullopt;
1079 const auto PTruePattern =
1080 cast<ConstantInt>(Pg->
getOperand(0))->getZExtValue();
1081 if (PTruePattern != AArch64SVEPredPattern::vl1)
1082 return std::nullopt;
1087 II.getArgOperand(0),
II.getArgOperand(2), ConstantInt::get(IdxTy, 0));
1088 Insert->insertBefore(&
II);
1089 Insert->takeName(&
II);
1097 auto *
RetTy = cast<ScalableVectorType>(
II.getType());
1099 II.getArgOperand(0));
1109 auto *Pg = dyn_cast<IntrinsicInst>(
II.getArgOperand(0));
1110 if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1111 return std::nullopt;
1113 const auto PTruePattern =
1114 cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
1115 if (PTruePattern != AArch64SVEPredPattern::all)
1116 return std::nullopt;
1121 if (!SplatValue || !SplatValue->isZero())
1122 return std::nullopt;
1125 auto *DupQLane = dyn_cast<IntrinsicInst>(
II.getArgOperand(1));
1127 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1128 return std::nullopt;
1131 if (!cast<ConstantInt>(DupQLane->getArgOperand(1))->isZero())
1132 return std::nullopt;
1134 auto *VecIns = dyn_cast<IntrinsicInst>(DupQLane->getArgOperand(0));
1135 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1136 return std::nullopt;
1140 if (!isa<UndefValue>(VecIns->getArgOperand(0)))
1141 return std::nullopt;
1143 if (!cast<ConstantInt>(VecIns->getArgOperand(2))->isZero())
1144 return std::nullopt;
1146 auto *ConstVec = dyn_cast<Constant>(VecIns->getArgOperand(1));
1148 return std::nullopt;
1150 auto *VecTy = dyn_cast<FixedVectorType>(ConstVec->getType());
1151 auto *OutTy = dyn_cast<ScalableVectorType>(
II.getType());
1152 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
1153 return std::nullopt;
1155 unsigned NumElts = VecTy->getNumElements();
1156 unsigned PredicateBits = 0;
1159 for (
unsigned I = 0;
I < NumElts; ++
I) {
1160 auto *Arg = dyn_cast<ConstantInt>(ConstVec->getAggregateElement(
I));
1162 return std::nullopt;
1164 PredicateBits |= 1 << (
I * (16 / NumElts));
1168 if (PredicateBits == 0) {
1170 PFalse->takeName(&
II);
1176 for (
unsigned I = 0;
I < 16; ++
I)
1177 if ((PredicateBits & (1 <<
I)) != 0)
1180 unsigned PredSize = Mask & -Mask;
1185 for (
unsigned I = 0;
I < 16;
I += PredSize)
1186 if ((PredicateBits & (1 <<
I)) == 0)
1187 return std::nullopt;
1192 {PredType}, {PTruePat});
1194 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
1195 auto *ConvertFromSVBool =
1197 {
II.getType()}, {ConvertToSVBool});
1205 Value *Pg =
II.getArgOperand(0);
1206 Value *Vec =
II.getArgOperand(1);
1207 auto IntrinsicID =
II.getIntrinsicID();
1208 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
1219 auto *OldBinOp = cast<BinaryOperator>(Vec);
1220 auto OpC = OldBinOp->getOpcode();
1226 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
1231 auto *
C = dyn_cast<Constant>(Pg);
1232 if (IsAfter &&
C &&
C->isNullValue()) {
1236 Extract->insertBefore(&
II);
1237 Extract->takeName(&
II);
1241 auto *IntrPG = dyn_cast<IntrinsicInst>(Pg);
1243 return std::nullopt;
1245 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1246 return std::nullopt;
1248 const auto PTruePattern =
1249 cast<ConstantInt>(IntrPG->getOperand(0))->getZExtValue();
1254 return std::nullopt;
1256 unsigned Idx = MinNumElts - 1;
1265 auto *PgVTy = cast<ScalableVectorType>(Pg->
getType());
1266 if (
Idx >= PgVTy->getMinNumElements())
1267 return std::nullopt;
1272 Extract->insertBefore(&
II);
1273 Extract->takeName(&
II);
1286 Value *Pg =
II.getArgOperand(0);
1288 Value *Vec =
II.getArgOperand(2);
1292 return std::nullopt;
1297 return std::nullopt;
1311 FPTy, cast<VectorType>(Vec->
getType())->getElementCount());
1314 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
1327 {
II.getType()}, {AllPat});
1334static std::optional<Instruction *>
1336 const auto Pattern = cast<ConstantInt>(
II.getArgOperand(0))->getZExtValue();
1338 if (
Pattern == AArch64SVEPredPattern::all) {
1339 Constant *StepVal = ConstantInt::get(
II.getType(), NumElts);
1347 return MinNumElts && NumElts >= MinNumElts
1349 II, ConstantInt::get(
II.getType(), MinNumElts)))
1355 Value *PgVal =
II.getArgOperand(0);
1356 Value *OpVal =
II.getArgOperand(1);
1360 if (PgVal == OpVal &&
1361 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
1362 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
1363 Value *Ops[] = {PgVal, OpVal};
1377 return std::nullopt;
1381 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
1382 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
1396 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
1397 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
1398 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
1399 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
1400 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
1401 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
1402 (OpIID == Intrinsic::aarch64_sve_and_z) ||
1403 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
1404 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
1405 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
1406 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
1407 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
1408 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
1418 return std::nullopt;
1421template <Intrinsic::ID MulOpc,
typename Intrinsic::ID FuseOpc>
1422static std::optional<Instruction *>
1424 bool MergeIntoAddendOp) {
1426 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
1427 if (MergeIntoAddendOp) {
1428 AddendOp =
II.getOperand(1);
1429 Mul =
II.getOperand(2);
1431 AddendOp =
II.getOperand(2);
1432 Mul =
II.getOperand(1);
1437 return std::nullopt;
1439 if (!
Mul->hasOneUse())
1440 return std::nullopt;
1443 if (
II.getType()->isFPOrFPVectorTy()) {
1448 return std::nullopt;
1450 return std::nullopt;
1455 if (MergeIntoAddendOp)
1457 {
P, AddendOp, MulOp0, MulOp1}, FMFSource);
1460 {
P, MulOp0, MulOp1, AddendOp}, FMFSource);
1465static std::optional<Instruction *>
1467 Value *Pred =
II.getOperand(0);
1468 Value *PtrOp =
II.getOperand(1);
1469 Type *VecTy =
II.getType();
1477 Load->copyMetadata(
II);
1488static std::optional<Instruction *>
1490 Value *VecOp =
II.getOperand(0);
1491 Value *Pred =
II.getOperand(1);
1492 Value *PtrOp =
II.getOperand(2);
1496 Store->copyMetadata(
II);
1507 switch (Intrinsic) {
1508 case Intrinsic::aarch64_sve_fmul_u:
1509 return Instruction::BinaryOps::FMul;
1510 case Intrinsic::aarch64_sve_fadd_u:
1511 return Instruction::BinaryOps::FAdd;
1512 case Intrinsic::aarch64_sve_fsub_u:
1513 return Instruction::BinaryOps::FSub;
1515 return Instruction::BinaryOpsEnd;
1519static std::optional<Instruction *>
1522 if (
II.isStrictFP())
1523 return std::nullopt;
1525 auto *OpPredicate =
II.getOperand(0);
1527 if (BinOpCode == Instruction::BinaryOpsEnd ||
1528 !
match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1529 m_ConstantInt<AArch64SVEPredPattern::all>())))
1530 return std::nullopt;
1542 auto *OpPredicate =
II.getOperand(0);
1543 if (!
match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1544 m_ConstantInt<AArch64SVEPredPattern::all>())))
1545 return std::nullopt;
1547 auto *
Mod =
II.getModule();
1549 II.setCalledFunction(NewDecl);
1556static std::optional<Instruction *>
1573 Intrinsic::aarch64_sve_mla>(
1577 Intrinsic::aarch64_sve_mad>(
1580 return std::nullopt;
1583static std::optional<Instruction *>
1590 Intrinsic::aarch64_sve_fmla>(IC,
II,
1595 Intrinsic::aarch64_sve_fmad>(IC,
II,
1600 Intrinsic::aarch64_sve_fmla>(IC,
II,
1603 return std::nullopt;
1606static std::optional<Instruction *>
1610 Intrinsic::aarch64_sve_fmla>(IC,
II,
1615 Intrinsic::aarch64_sve_fmad>(IC,
II,
1620 Intrinsic::aarch64_sve_fmla_u>(
1626static std::optional<Instruction *>
1633 Intrinsic::aarch64_sve_fmls>(IC,
II,
1638 Intrinsic::aarch64_sve_fnmsb>(
1643 Intrinsic::aarch64_sve_fmls>(IC,
II,
1646 return std::nullopt;
1649static std::optional<Instruction *>
1653 Intrinsic::aarch64_sve_fmls>(IC,
II,
1658 Intrinsic::aarch64_sve_fnmsb>(
1663 Intrinsic::aarch64_sve_fmls_u>(
1675 Intrinsic::aarch64_sve_mls>(
1678 return std::nullopt;
1684 auto *OpPredicate =
II.getOperand(0);
1685 auto *OpMultiplicand =
II.getOperand(1);
1686 auto *OpMultiplier =
II.getOperand(2);
1689 auto IsUnitSplat = [](
auto *
I) {
1698 auto IsUnitDup = [](
auto *
I) {
1699 auto *IntrI = dyn_cast<IntrinsicInst>(
I);
1700 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup)
1703 auto *SplatValue = IntrI->getOperand(2);
1707 if (IsUnitSplat(OpMultiplier)) {
1709 OpMultiplicand->takeName(&
II);
1711 }
else if (IsUnitDup(OpMultiplier)) {
1713 auto *DupInst = cast<IntrinsicInst>(OpMultiplier);
1714 auto *DupPg = DupInst->getOperand(1);
1717 if (OpPredicate == DupPg) {
1718 OpMultiplicand->takeName(&
II);
1728 Value *UnpackArg =
II.getArgOperand(0);
1729 auto *
RetTy = cast<ScalableVectorType>(
II.getType());
1730 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
1731 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
1744 return std::nullopt;
1748 auto *OpVal =
II.getOperand(0);
1749 auto *OpIndices =
II.getOperand(1);
1754 auto *SplatValue = dyn_cast_or_null<ConstantInt>(
getSplatValue(OpIndices));
1756 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
1757 return std::nullopt;
1773 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
1774 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
1778 if ((
match(
II.getArgOperand(0),
1779 m_Intrinsic<FromSVB>(m_Intrinsic<ToSVB>(
m_Value(
A)))) &&
1781 m_Intrinsic<FromSVB>(m_Intrinsic<ToSVB>(
m_Value(
B))))) ||
1784 auto *TyA = cast<ScalableVectorType>(
A->getType());
1785 if (TyA ==
B->getType() &&
1796 return std::nullopt;
1804 if (
match(
II.getArgOperand(0),
1806 match(
II.getArgOperand(1), m_Intrinsic<Intrinsic::aarch64_sve_uzp2>(
1809 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
1811 return std::nullopt;
1814static std::optional<Instruction *>
1816 Value *Mask =
II.getOperand(0);
1817 Value *BasePtr =
II.getOperand(1);
1830 if (
match(
Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1833 BasePtr->getPointerAlignment(
II.getDataLayout());
1837 BasePtr, IndexBase);
1845 return std::nullopt;
1848static std::optional<Instruction *>
1850 Value *Val =
II.getOperand(0);
1851 Value *Mask =
II.getOperand(1);
1852 Value *BasePtr =
II.getOperand(2);
1860 if (
match(
Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1863 BasePtr->getPointerAlignment(
II.getDataLayout());
1866 BasePtr, IndexBase);
1875 return std::nullopt;
1881 Value *Pred =
II.getOperand(0);
1882 Value *Vec =
II.getOperand(1);
1883 Value *DivVec =
II.getOperand(2);
1886 ConstantInt *SplatConstantInt = dyn_cast_or_null<ConstantInt>(SplatValue);
1887 if (!SplatConstantInt)
1888 return std::nullopt;
1894 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
1901 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
1903 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
1907 return std::nullopt;
1911 size_t VecSize = Vec.
size();
1916 size_t HalfVecSize = VecSize / 2;
1920 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
1928 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
1943 m_Intrinsic<Intrinsic::vector_insert>(
1945 !isa<FixedVectorType>(CurrentInsertElt->
getType()))
1946 return std::nullopt;
1947 auto IIScalableTy = cast<ScalableVectorType>(
II.getType());
1951 while (
auto InsertElt = dyn_cast<InsertElementInst>(CurrentInsertElt)) {
1952 auto Idx = cast<ConstantInt>(InsertElt->getOperand(2));
1953 Elts[
Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
1954 CurrentInsertElt = InsertElt->getOperand(0);
1958 isa<PoisonValue>(CurrentInsertElt) && isa<PoisonValue>(
Default);
1960 return std::nullopt;
1964 for (
size_t I = 0;
I < Elts.
size();
I++) {
1965 if (Elts[
I] ==
nullptr)
1970 if (InsertEltChain ==
nullptr)
1971 return std::nullopt;
1977 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
1978 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
1979 IIScalableTy->getMinNumElements() /
1984 auto *WideShuffleMaskTy =
1995 auto NarrowBitcast =
2008 return std::nullopt;
2013 Value *Pred =
II.getOperand(0);
2014 Value *Vec =
II.getOperand(1);
2015 Value *Shift =
II.getOperand(2);
2018 Value *AbsPred, *MergedValue;
2019 if (!
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_sqabs>(
2021 !
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_abs>(
2024 return std::nullopt;
2032 return std::nullopt;
2037 return std::nullopt;
2040 {
II.getType()}, {Pred, Vec, Shift});
2045std::optional<Instruction *>
2053 case Intrinsic::aarch64_sve_st1_scatter:
2054 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
2055 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
2056 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
2057 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
2058 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
2059 case Intrinsic::aarch64_sve_st1dq:
2060 case Intrinsic::aarch64_sve_st1q_scatter_index:
2061 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
2062 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
2063 case Intrinsic::aarch64_sve_st1wq:
2064 case Intrinsic::aarch64_sve_stnt1:
2065 case Intrinsic::aarch64_sve_stnt1_scatter:
2066 case Intrinsic::aarch64_sve_stnt1_scatter_index:
2067 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
2068 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
2070 case Intrinsic::aarch64_sve_st2:
2071 case Intrinsic::aarch64_sve_st2q:
2073 case Intrinsic::aarch64_sve_st3:
2074 case Intrinsic::aarch64_sve_st3q:
2076 case Intrinsic::aarch64_sve_st4:
2077 case Intrinsic::aarch64_sve_st4q:
2079 case Intrinsic::aarch64_sve_ld1_gather:
2080 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
2081 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
2082 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
2083 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
2084 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
2085 case Intrinsic::aarch64_sve_ld1q_gather_index:
2086 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
2087 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
2088 case Intrinsic::aarch64_sve_ld1ro:
2089 case Intrinsic::aarch64_sve_ld1rq:
2090 case Intrinsic::aarch64_sve_ld1udq:
2091 case Intrinsic::aarch64_sve_ld1uwq:
2092 case Intrinsic::aarch64_sve_ld2_sret:
2093 case Intrinsic::aarch64_sve_ld2q_sret:
2094 case Intrinsic::aarch64_sve_ld3_sret:
2095 case Intrinsic::aarch64_sve_ld3q_sret:
2096 case Intrinsic::aarch64_sve_ld4_sret:
2097 case Intrinsic::aarch64_sve_ld4q_sret:
2098 case Intrinsic::aarch64_sve_ldff1:
2099 case Intrinsic::aarch64_sve_ldff1_gather:
2100 case Intrinsic::aarch64_sve_ldff1_gather_index:
2101 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
2102 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
2103 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
2104 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
2105 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
2106 case Intrinsic::aarch64_sve_ldnf1:
2107 case Intrinsic::aarch64_sve_ldnt1:
2108 case Intrinsic::aarch64_sve_ldnt1_gather:
2109 case Intrinsic::aarch64_sve_ldnt1_gather_index:
2110 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
2111 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
2113 case Intrinsic::aarch64_neon_fmaxnm:
2114 case Intrinsic::aarch64_neon_fminnm:
2116 case Intrinsic::aarch64_sve_convert_from_svbool:
2118 case Intrinsic::aarch64_sve_dup:
2120 case Intrinsic::aarch64_sve_dup_x:
2122 case Intrinsic::aarch64_sve_cmpne:
2123 case Intrinsic::aarch64_sve_cmpne_wide:
2125 case Intrinsic::aarch64_sve_rdffr:
2127 case Intrinsic::aarch64_sve_lasta:
2128 case Intrinsic::aarch64_sve_lastb:
2130 case Intrinsic::aarch64_sve_clasta_n:
2131 case Intrinsic::aarch64_sve_clastb_n:
2133 case Intrinsic::aarch64_sve_cntd:
2135 case Intrinsic::aarch64_sve_cntw:
2137 case Intrinsic::aarch64_sve_cnth:
2139 case Intrinsic::aarch64_sve_cntb:
2141 case Intrinsic::aarch64_sve_ptest_any:
2142 case Intrinsic::aarch64_sve_ptest_first:
2143 case Intrinsic::aarch64_sve_ptest_last:
2145 case Intrinsic::aarch64_sve_fabd:
2147 case Intrinsic::aarch64_sve_fadd:
2149 case Intrinsic::aarch64_sve_fadd_u:
2151 case Intrinsic::aarch64_sve_fdiv:
2153 case Intrinsic::aarch64_sve_fmax:
2155 case Intrinsic::aarch64_sve_fmaxnm:
2157 case Intrinsic::aarch64_sve_fmin:
2159 case Intrinsic::aarch64_sve_fminnm:
2161 case Intrinsic::aarch64_sve_fmla:
2163 case Intrinsic::aarch64_sve_fmls:
2165 case Intrinsic::aarch64_sve_fmul:
2170 case Intrinsic::aarch64_sve_fmul_u:
2172 case Intrinsic::aarch64_sve_fmulx:
2174 case Intrinsic::aarch64_sve_fnmla:
2176 case Intrinsic::aarch64_sve_fnmls:
2178 case Intrinsic::aarch64_sve_fsub:
2180 case Intrinsic::aarch64_sve_fsub_u:
2182 case Intrinsic::aarch64_sve_add:
2184 case Intrinsic::aarch64_sve_add_u:
2186 Intrinsic::aarch64_sve_mla_u>(
2188 case Intrinsic::aarch64_sve_mla:
2190 case Intrinsic::aarch64_sve_mls:
2192 case Intrinsic::aarch64_sve_mul:
2197 case Intrinsic::aarch64_sve_mul_u:
2199 case Intrinsic::aarch64_sve_sabd:
2201 case Intrinsic::aarch64_sve_smax:
2203 case Intrinsic::aarch64_sve_smin:
2205 case Intrinsic::aarch64_sve_smulh:
2207 case Intrinsic::aarch64_sve_sub:
2209 case Intrinsic::aarch64_sve_sub_u:
2211 Intrinsic::aarch64_sve_mls_u>(
2213 case Intrinsic::aarch64_sve_uabd:
2215 case Intrinsic::aarch64_sve_umax:
2217 case Intrinsic::aarch64_sve_umin:
2219 case Intrinsic::aarch64_sve_umulh:
2221 case Intrinsic::aarch64_sve_asr:
2223 case Intrinsic::aarch64_sve_lsl:
2225 case Intrinsic::aarch64_sve_lsr:
2227 case Intrinsic::aarch64_sve_and:
2229 case Intrinsic::aarch64_sve_bic:
2231 case Intrinsic::aarch64_sve_eor:
2233 case Intrinsic::aarch64_sve_orr:
2235 case Intrinsic::aarch64_sve_sqsub:
2237 case Intrinsic::aarch64_sve_uqsub:
2239 case Intrinsic::aarch64_sve_tbl:
2241 case Intrinsic::aarch64_sve_uunpkhi:
2242 case Intrinsic::aarch64_sve_uunpklo:
2243 case Intrinsic::aarch64_sve_sunpkhi:
2244 case Intrinsic::aarch64_sve_sunpklo:
2246 case Intrinsic::aarch64_sve_uzp1:
2248 case Intrinsic::aarch64_sve_zip1:
2249 case Intrinsic::aarch64_sve_zip2:
2251 case Intrinsic::aarch64_sve_ld1_gather_index:
2253 case Intrinsic::aarch64_sve_st1_scatter_index:
2255 case Intrinsic::aarch64_sve_ld1:
2257 case Intrinsic::aarch64_sve_st1:
2259 case Intrinsic::aarch64_sve_sdiv:
2261 case Intrinsic::aarch64_sve_sel:
2263 case Intrinsic::aarch64_sve_srshl:
2265 case Intrinsic::aarch64_sve_dupq_lane:
2269 return std::nullopt;
2276 SimplifyAndSetOp)
const {
2277 switch (
II.getIntrinsicID()) {
2280 case Intrinsic::aarch64_neon_fcvtxn:
2281 case Intrinsic::aarch64_neon_rshrn:
2282 case Intrinsic::aarch64_neon_sqrshrn:
2283 case Intrinsic::aarch64_neon_sqrshrun:
2284 case Intrinsic::aarch64_neon_sqshrn:
2285 case Intrinsic::aarch64_neon_sqshrun:
2286 case Intrinsic::aarch64_neon_sqxtn:
2287 case Intrinsic::aarch64_neon_sqxtun:
2288 case Intrinsic::aarch64_neon_uqrshrn:
2289 case Intrinsic::aarch64_neon_uqshrn:
2290 case Intrinsic::aarch64_neon_uqxtn:
2291 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
2295 return std::nullopt;
2322bool AArch64TTIImpl::isWideningInstruction(
Type *DstTy,
unsigned Opcode,
2324 Type *SrcOverrideTy) {
2327 auto toVectorTy = [&](
Type *ArgTy) {
2329 cast<VectorType>(DstTy)->getElementCount());
2339 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
2349 Type *SrcTy = SrcOverrideTy;
2351 case Instruction::Add:
2352 case Instruction::Sub:
2354 if (isa<SExtInst>(Args[1]) || isa<ZExtInst>(Args[1])) {
2357 toVectorTy(cast<Instruction>(Args[1])->getOperand(0)->
getType());
2361 case Instruction::Mul: {
2363 if ((isa<SExtInst>(Args[0]) && isa<SExtInst>(Args[1])) ||
2364 (isa<ZExtInst>(Args[0]) && isa<ZExtInst>(Args[1]))) {
2367 toVectorTy(cast<Instruction>(Args[0])->getOperand(0)->
getType());
2368 }
else if (isa<ZExtInst>(Args[0]) || isa<ZExtInst>(Args[1])) {
2397 assert(SrcTy &&
"Expected some SrcTy");
2399 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
2405 DstTyL.first * DstTyL.second.getVectorMinNumElements();
2407 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
2411 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
2424 (Src->isScalableTy() && !ST->hasSVE2()))
2433 dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
2434 if (AddUser && AddUser->getOpcode() == Instruction::Add)
2437 auto *Shr = dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
2438 if (!Shr || Shr->getOpcode() != Instruction::LShr)
2441 auto *Trunc = dyn_cast_or_null<Instruction>(Shr->getUniqueUndroppableUser());
2442 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
2443 Src->getScalarSizeInBits() !=
2444 cast<CastInst>(Trunc)->getDestTy()->getScalarSizeInBits())
2468 assert(ISD &&
"Invalid opcode");
2471 if (
I &&
I->hasOneUser()) {
2472 auto *SingleUser = cast<Instruction>(*
I->user_begin());
2474 if (isWideningInstruction(Dst, SingleUser->getOpcode(),
Operands, Src)) {
2478 if (SingleUser->getOpcode() == Instruction::Add) {
2479 if (
I == SingleUser->getOperand(1) ||
2480 (isa<CastInst>(SingleUser->getOperand(1)) &&
2481 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
2488 if ((isa<ZExtInst>(
I) || isa<SExtInst>(
I)) &&
2496 return Cost == 0 ? 0 : 1;
2781 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
2785 std::pair<InstructionCost, MVT> LT =
2788 LT.second.getScalarSizeInBits();
2800 return AdjustCost(Entry->Cost);
2827 if (ST->hasFullFP16())
2830 return AdjustCost(Entry->Cost);
2846 Opcode, LegalTy, Src, CCH,
CostKind,
I);
2849 return Part1 + Part2;
2869 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
2877 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) &&
"Invalid type");
2893 if (!VecLT.second.isVector() || !TLI->
isTypeLegal(DstVT))
2899 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
2909 case Instruction::SExt:
2914 case Instruction::ZExt:
2915 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
2928 return Opcode == Instruction::PHI ? 0 : 1;
2945 if (!LT.second.isVector())
2950 if (LT.second.isFixedLengthVector()) {
2951 unsigned Width = LT.second.getVectorNumElements();
2968 if (
I && dyn_cast<LoadInst>(
I->getOperand(1)))
2992 Opcode == Instruction::InsertElement && Op0 && !isa<UndefValue>(Op0);
2993 return getVectorInstrCostHelper(
nullptr, Val,
Index, HasRealUse);
3000 return getVectorInstrCostHelper(&
I, Val,
Index,
true );
3006 if (isa<ScalableVectorType>(Ty))
3011 return DemandedElts.
popcount() * (Insert + Extract) *
3024 Op2Info, Args, CxtI);
3066 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
3071 Opcode, Ty,
CostKind, Op1Info, Op2Info);
3076 if (isa<FixedVectorType>(Ty) && cast<FixedVectorType>(Ty)
3077 ->getPrimitiveSizeInBits()
3078 .getFixedValue() < 128) {
3089 if (
nullptr != Entry)
3094 if (LT.second.getScalarType() == MVT::i8)
3096 else if (LT.second.getScalarType() == MVT::i16)
3106 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
3109 return (4 + DivCost) * VTy->getNumElements();
3129 if (LT.second == MVT::v2i64 && ST->hasSVE())
3144 if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
3146 return LT.first * 14;
3165 return 2 * LT.first;
3174 return 2 * LT.first;
3196 int MaxMergeDistance = 64;
3200 return NumVectorInstToHideOverhead;
3220 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SELECT) {
3222 const int AmortizationCost = 20;
3230 VecPred = CurrentPred;
3238 static const auto ValidMinMaxTys = {
3239 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
3240 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
3241 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
3244 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
3245 (ST->hasFullFP16() &&
3246 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
3251 VectorSelectTbl[] = {
3260 {
ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
3261 {
ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
3262 {
ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
3275 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SETCC) {
3278 if (LT.second == MVT::v4f16 && !ST->hasFullFP16())
3279 return LT.first * 4;
3300 if (ST->requiresStrictAlign()) {
3305 Options.AllowOverlappingLoads =
true;
3311 Options.LoadSizes = {8, 4, 2, 1};
3312 Options.AllowedTailExpansions = {3, 5, 6};
3317 return ST->hasSVE();
3328 if (!LT.first.isValid())
3332 auto *VT = cast<VectorType>(Src);
3333 if (VT->getElementType()->isIntegerTy(1))
3351 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
3356 auto *VT = cast<VectorType>(DataTy);
3358 if (!LT.first.isValid())
3362 if (!LT.second.isVector() ||
3364 VT->getElementType()->isIntegerTy(1))
3374 ElementCount LegalVF = LT.second.getVectorElementCount();
3377 {TTI::OK_AnyValue, TTI::OP_None},
I);
3397 if (VT == MVT::Other)
3402 if (!LT.first.isValid())
3410 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
3412 (VTy->getElementType()->isIntegerTy(1) &&
3413 !VTy->getElementCount().isKnownMultipleOf(
3424 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
3425 LT.second.is128BitVector() && (!Alignment || *Alignment <
Align(16))) {
3431 const int AmortizationCost = 6;
3433 return LT.first * 2 * AmortizationCost;
3444 if (VT == MVT::v4i8)
3447 return cast<FixedVectorType>(Ty)->getNumElements() * 2;
3451 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
3453 *Alignment !=
Align(1))
3467 while (!TypeWorklist.
empty()) {
3489 bool UseMaskForCond,
bool UseMaskForGaps) {
3490 assert(Factor >= 2 &&
"Invalid interleave factor");
3491 auto *VecVTy = cast<VectorType>(VecTy);
3493 if (VecTy->
isScalableTy() && (!ST->hasSVE() || Factor != 2))
3498 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
3501 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
3502 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
3505 VecVTy->getElementCount().divideCoefficientBy(Factor));
3511 if (MinElts % Factor == 0 &&
3518 UseMaskForCond, UseMaskForGaps);
3525 for (
auto *
I : Tys) {
3526 if (!
I->isVectorTy())
3528 if (
I->getScalarSizeInBits() * cast<FixedVectorType>(
I)->getNumElements() ==
3547 enum { MaxStridedLoads = 7 };
3549 int StridedLoads = 0;
3552 for (
const auto BB : L->blocks()) {
3553 for (
auto &
I : *BB) {
3554 LoadInst *LMemI = dyn_cast<LoadInst>(&
I);
3559 if (L->isLoopInvariant(PtrValue))
3563 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
3564 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
3573 if (StridedLoads > MaxStridedLoads / 2)
3574 return StridedLoads;
3577 return StridedLoads;
3580 int StridedLoads = countStridedLoads(L, SE);
3582 <<
" strided loads\n");
3603 if (L->getLoopDepth() > 1)
3616 for (
auto *BB : L->getBlocks()) {
3617 for (
auto &
I : *BB) {
3619 if (
I.getType()->isVectorTy())
3622 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
3637 !ST->getSchedModel().isOutOfOrder()) {
3654 Type *ExpectedType) {
3658 case Intrinsic::aarch64_neon_st2:
3659 case Intrinsic::aarch64_neon_st3:
3660 case Intrinsic::aarch64_neon_st4: {
3662 StructType *ST = dyn_cast<StructType>(ExpectedType);
3665 unsigned NumElts = Inst->
arg_size() - 1;
3666 if (ST->getNumElements() != NumElts)
3668 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
3674 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
3680 case Intrinsic::aarch64_neon_ld2:
3681 case Intrinsic::aarch64_neon_ld3:
3682 case Intrinsic::aarch64_neon_ld4:
3683 if (Inst->
getType() == ExpectedType)
3694 case Intrinsic::aarch64_neon_ld2:
3695 case Intrinsic::aarch64_neon_ld3:
3696 case Intrinsic::aarch64_neon_ld4:
3697 Info.ReadMem =
true;
3698 Info.WriteMem =
false;
3701 case Intrinsic::aarch64_neon_st2:
3702 case Intrinsic::aarch64_neon_st3:
3703 case Intrinsic::aarch64_neon_st4:
3704 Info.ReadMem =
false;
3705 Info.WriteMem =
true;
3713 case Intrinsic::aarch64_neon_ld2:
3714 case Intrinsic::aarch64_neon_st2:
3715 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
3717 case Intrinsic::aarch64_neon_ld3:
3718 case Intrinsic::aarch64_neon_st3:
3719 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
3721 case Intrinsic::aarch64_neon_ld4:
3722 case Intrinsic::aarch64_neon_st4:
3723 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
3735 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) {
3736 bool Considerable =
false;
3737 AllowPromotionWithoutCommonHeader =
false;
3738 if (!isa<SExtInst>(&
I))
3740 Type *ConsideredSExtType =
3742 if (
I.getType() != ConsideredSExtType)
3746 for (
const User *U :
I.users()) {
3748 Considerable =
true;
3752 if (GEPInst->getNumOperands() > 2) {
3753 AllowPromotionWithoutCommonHeader =
true;
3758 return Considerable;
3797 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
3807 return LegalizationCost + 2;
3817 LegalizationCost *= LT.first - 1;
3821 assert(ISD &&
"Invalid opcode");
3829 return LegalizationCost + 2;
3837 std::optional<FastMathFlags> FMF,
3840 if (
auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
3845 return BaseCost + FixedVTy->getNumElements();
3848 if (Opcode != Instruction::FAdd)
3851 auto *VTy = cast<ScalableVectorType>(ValTy);
3858 if (isa<ScalableVectorType>(ValTy))
3862 MVT MTy = LT.second;
3864 assert(ISD &&
"Invalid opcode");
3908 return (LT.first - 1) + Entry->Cost;
3916 auto *ValVTy = cast<FixedVectorType>(ValTy);
3920 if (LT.first != 1) {
3926 ExtraCost *= LT.first - 1;
3929 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
3930 return Cost + ExtraCost;
3964 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
3979 if (LT.second.getScalarType() == MVT::i1) {
3988 assert(Entry &&
"Illegal Type for Splice");
3989 LegalizationCost += Entry->Cost;
3990 return LegalizationCost * LT.first;
4001 if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
4003 Mask.size() > LT.second.getVectorNumElements() && !
Index && !SubTp) {
4009 if (Args.size() >= 1 && isa<LoadInst>(Args[0]) &&
4012 return std::max<InstructionCost>(1, LT.first / 4);
4025 unsigned TpNumElts = Mask.size();
4026 unsigned LTNumElts = LT.second.getVectorNumElements();
4027 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
4031 for (
unsigned N = 0;
N < NumVecs;
N++) {
4035 unsigned Source1, Source2;
4036 unsigned NumSources = 0;
4037 for (
unsigned E = 0; E < LTNumElts; E++) {
4038 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
4047 unsigned Source = MaskElt / LTNumElts;
4048 if (NumSources == 0) {
4051 }
else if (NumSources == 1 && Source != Source1) {
4054 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
4060 if (Source == Source1)
4062 else if (Source == Source2)
4063 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
4069 if (NumSources <= 2)
4072 NTp, NMask,
CostKind, 0,
nullptr, Args, CxtI);
4074 return ME.value() % LTNumElts == ME.index();
4076 Cost += LTNumElts - 1;
4086 if (IsExtractSubvector && LT.second.isFixedLengthVector())
4097 bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0]);
4098 if (IsLoad && LT.second.isVector() &&
4100 LT.second.getVectorElementCount()))
4108 all_of(Mask, [](
int E) {
return E < 8; }))
4112 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
4115 return M.value() < 0 || M.value() == (
int)M.index();
4122 if (LT.second.isFixedLengthVector() &&
4123 LT.second.getVectorNumElements() == Mask.size() &&
4125 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
4126 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
4129 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
4252 return LT.first * Entry->Cost;
4261 LT.second.getSizeInBits() <= 128 && SubTp) {
4263 if (SubLT.second.isVector()) {
4264 int NumElts = LT.second.getVectorNumElements();
4265 int NumSubElts = SubLT.second.getVectorNumElements();
4266 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
4272 if (IsExtractSubvector)
4285 if (isa<LoadInst>(&
I) || isa<StoreInst>(&
I)) {
4329 unsigned NumInsns = 0;
4331 NumInsns += BB->sizeWithoutDebug();
4341 int64_t Scale,
unsigned AddrSpace)
const {
4368 isa<BranchInst>(
I->getNextNode()) &&
4369 cast<BranchInst>(
I->getNextNode())->isUnconditional())
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
amdgpu AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
This file defines the LoopVectorizationLegality class.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
uint64_t IntrinsicInst * II
const char LLVMTargetMachineRef TM
static uint64_t getBits(uint64_t Val, int Start, int End)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getFastMathFlags(const MachineInstr &I)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
unsigned getVectorInsertExtractBaseCost() const
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
unsigned getMaxInterleaveFactor() const
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
TailFoldingOpts getSVETailFoldingDefaultOpts() const
bool useSVEForFixedLengthVectors() const
unsigned getMinSVEVectorSizeInBits() const
bool isSVEAvailable() const
Returns true if the target has SVE and can use the full range of SVE instructions,...
InstructionCost getSpliceCost(VectorType *Tp, int Index)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool shouldTreatInstructionLikeSelect(const Instruction *I)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
bool prefersVectorizedAddressing() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind)
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isElementTypeLegalForScalableVector(Type *Ty) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool useNeonVector(const Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool isLegalMaskedGatherScatter(Type *DataType) const
unsigned getMaxInterleaveFactor(ElementCount VF)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
EVT getPromotedVTForPredicate(EVT VT) const
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, bool UseScalable) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const
Returns true if VecTy is a legal interleaved access type.
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
bool isTypeLegal(Type *Ty)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
bool isIntPredicate() const
static ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isEquality() const
Return true if this predicate is either EQ or NE.
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
static InstructionCost getInvalid(CostType Val=0)
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool requiresSMChange(const SMEAttrs &Callee) const
void set(unsigned M, bool Enable=true)
bool hasStreamingBody() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Class to represent struct types.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const
Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isFP128Ty() const
Return true if this is 'fp128'.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static constexpr unsigned SVEBitsPerBlock
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
constexpr int PoisonMaskElem
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Mod
The access may modify the value stored in memory.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FAnyOf
Any_of reduction with select(fcmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ IAnyOf
Any_of reduction with select(icmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
@ Default
The result values are uniform if and only if all operands are uniform.
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.