32using namespace PatternMatch;
34#define LV_NAME "loop-vectorize"
35#define DEBUG_TYPE LV_NAME
39 cl::desc(
"Enable if-conversion during vectorization."));
43 cl::desc(
"Enable recognition of non-constant strided "
44 "pointer induction variables."));
49 cl::desc(
"Allow enabling loop hints to reorder "
50 "FP operations during vectorization."));
57 cl::desc(
"The maximum number of SCEV checks allowed."));
61 cl::desc(
"The maximum number of SCEV checks allowed with a "
62 "vectorize(enable) pragma"));
68 cl::desc(
"Control whether the compiler can use scalable vectors to "
72 "Scalable vectorization is disabled."),
75 "Scalable vectorization is available and favored when the "
76 "cost is inconclusive."),
79 "Scalable vectorization is available and favored when the "
80 "cost is inconclusive.")));
84 cl::desc(
"Enables autovectorization of some loops containing histograms"));
91bool LoopVectorizeHints::Hint::validate(
unsigned Val) {
102 return (Val == 0 || Val == 1);
108 bool InterleaveOnlyWhenForced,
112 Interleave(
"interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
113 Force(
"vectorize.enable", FK_Undefined, HK_FORCE),
114 IsVectorized(
"isvectorized", 0, HK_ISVECTORIZED),
115 Predicate(
"vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
116 Scalable(
"vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
117 TheLoop(L), ORE(ORE) {
119 getHintsFromMetadata();
153 if (IsVectorized.Value != 1)
160 <<
"LV: Interleaving disabled by the pass manager\n");
173 {
Twine(Prefix(),
"vectorize.").
str(),
174 Twine(Prefix(),
"interleave.").
str()},
179 IsVectorized.Value = 1;
185 LLVM_DEBUG(
dbgs() <<
"LV: Not vectorizing: #pragma vectorize disable.\n");
191 LLVM_DEBUG(
dbgs() <<
"LV: Not vectorizing: No #pragma vectorize enable.\n");
197 LLVM_DEBUG(
dbgs() <<
"LV: Not vectorizing: Disabled/already vectorized.\n");
203 "AllDisabled", L->getStartLoc(),
205 <<
"loop not vectorized: vectorization and interleaving are "
206 "explicitly disabled, or the loop has already been "
223 <<
"loop not vectorized: vectorization is explicitly disabled";
227 R <<
"loop not vectorized";
229 R <<
" (Force=" << NV(
"Force", true);
230 if (Width.Value != 0)
231 R <<
", Vector Width=" << NV(
"VectorWidth", getWidth());
232 if (getInterleave() != 0)
233 R <<
", Interleave Count=" << NV(
"InterleaveCount", getInterleave());
256 EC.getKnownMinValue() > 1);
259void LoopVectorizeHints::getHintsFromMetadata() {
274 if (
const MDNode *MD = dyn_cast<MDNode>(MDO)) {
275 if (!MD || MD->getNumOperands() == 0)
277 S = dyn_cast<MDString>(MD->getOperand(0));
278 for (
unsigned Idx = 1;
Idx < MD->getNumOperands(); ++
Idx)
279 Args.push_back(MD->getOperand(
Idx));
281 S = dyn_cast<MDString>(MDO);
282 assert(Args.size() == 0 &&
"too many arguments for MDString");
290 if (
Args.size() == 1)
291 setHint(
Name, Args[0]);
296 if (!
Name.starts_with(Prefix()))
300 const ConstantInt *
C = mdconst::dyn_extract<ConstantInt>(Arg);
303 unsigned Val =
C->getZExtValue();
305 Hint *Hints[] = {&Width, &Interleave, &Force,
306 &IsVectorized, &Predicate, &Scalable};
307 for (
auto *
H : Hints) {
308 if (
Name ==
H->Name) {
309 if (
H->validate(Val))
358 auto *LatchBr = dyn_cast<BranchInst>(Latch->
getTerminator());
359 if (!LatchBr || LatchBr->isUnconditional()) {
365 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
368 dbgs() <<
"LV: Loop latch condition is not a compare instruction.\n");
372 Value *CondOp0 = LatchCmp->getOperand(0);
373 Value *CondOp1 = LatchCmp->getOperand(1);
374 Value *IVUpdate =
IV->getIncomingValueForBlock(Latch);
377 LLVM_DEBUG(
dbgs() <<
"LV: Loop latch condition is not uniform.\n");
391 for (
Loop *SubLp : *Lp)
400 return DL.getIntPtrType(Ty);
424 if (!AllowedExit.
count(Inst))
430 LLVM_DEBUG(
dbgs() <<
"LV: Found an outside user for : " << *UI <<
'\n');
445 Value *APtr =
A->getPointerOperand();
446 Value *BPtr =
B->getPointerOperand();
460 const auto &Strides =
466 CanAddPredicate,
false).value_or(0);
467 if (Stride == 1 || Stride == -1)
483class SCEVAddRecForUniformityRewriter
486 unsigned StepMultiplier;
495 bool CannotAnalyze =
false;
497 bool canAnalyze()
const {
return !CannotAnalyze; }
500 SCEVAddRecForUniformityRewriter(
ScalarEvolution &SE,
unsigned StepMultiplier,
501 unsigned Offset,
Loop *TheLoop)
507 "addrec outside of TheLoop must be invariant and should have been "
513 if (!SE.isLoopInvariant(Step, TheLoop)) {
514 CannotAnalyze =
true;
517 const SCEV *NewStep =
518 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
519 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
520 const SCEV *NewStart = SE.getAddExpr(Expr->
getStart(), ScaledOffset);
525 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
531 if (SE.isLoopInvariant(S, TheLoop))
534 CannotAnalyze =
true;
540 CannotAnalyze =
true;
545 unsigned StepMultiplier,
unsigned Offset,
552 [](
const SCEV *S) {
return isa<SCEVUDivExpr>(S); }))
555 SCEVAddRecForUniformityRewriter
Rewriter(SE, StepMultiplier, Offset,
577 auto *SE = PSE.
getSE();
585 const SCEV *FirstLaneExpr =
586 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
587 if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
593 return all_of(
reverse(seq<unsigned>(1, FixedVF)), [&](
unsigned I) {
594 const SCEV *IthLaneExpr =
595 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF,
I, TheLoop);
596 return FirstLaneExpr == IthLaneExpr;
612bool LoopVectorizationLegality::canVectorizeOuterLoop() {
622 auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
625 "loop control flow is not understood by vectorizer",
626 "CFGNotUnderstood", ORE, TheLoop);
639 if (Br && Br->isConditional() &&
644 "loop control flow is not understood by vectorizer",
645 "CFGNotUnderstood", ORE, TheLoop);
658 "loop control flow is not understood by vectorizer",
659 "CFGNotUnderstood", ORE, TheLoop);
667 if (!setupOuterLoopInductions()) {
669 "UnsupportedPhi", ORE, TheLoop);
679void LoopVectorizationLegality::addInductionPhi(
682 Inductions[
Phi] =
ID;
705 ID.getConstIntStepValue() &&
ID.getConstIntStepValue()->isOne() &&
706 isa<Constant>(
ID.getStartValue()) &&
707 cast<Constant>(
ID.getStartValue())->isNullValue()) {
713 if (!PrimaryInduction || PhiTy == WidestIndTy)
714 PrimaryInduction =
Phi;
731bool LoopVectorizationLegality::setupOuterLoopInductions() {
735 auto IsSupportedPhi = [&](
PHINode &
Phi) ->
bool {
739 addInductionPhi(&Phi,
ID, AllowedExit);
745 dbgs() <<
"LV: Found unsupported PHI for outer loop vectorization.\n");
768 TLI.
getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
776 "Caller may decide to scalarize a variant using a scalable VF");
784 auto *StructTy = dyn_cast<StructType>(Ty);
788 if (StructTy && !StructTy->containsHomogeneousTypes())
793bool LoopVectorizationLegality::canVectorizeInstrs() {
800 if (
auto *Phi = dyn_cast<PHINode>(&
I)) {
801 Type *PhiTy = Phi->getType();
806 "loop control flow is not understood by vectorizer",
807 "CFGNotUnderstood", ORE, TheLoop);
825 if (
Phi->getNumIncomingValues() != 2) {
827 "loop control flow is not understood by vectorizer",
828 "CFGNotUnderstood", ORE, TheLoop, Phi);
837 Reductions[
Phi] = RedDes;
845 auto IsDisallowedStridedPointerInduction =
850 ID.getConstIntStepValue() ==
nullptr;
869 !IsDisallowedStridedPointerInduction(
ID)) {
870 addInductionPhi(Phi,
ID, AllowedExit);
877 FixedOrderRecurrences.
insert(Phi);
884 !IsDisallowedStridedPointerInduction(
ID)) {
885 addInductionPhi(Phi,
ID, AllowedExit);
890 "value that could not be identified as "
891 "reduction is used outside the loop",
892 "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
900 auto *CI = dyn_cast<CallInst>(&
I);
903 !isa<DbgInfoIntrinsic>(CI) &&
904 !(CI->getCalledFunction() && TLI &&
911 TLI && CI->getCalledFunction() &&
912 CI->getType()->isFloatingPointTy() &&
913 TLI->
getLibFunc(CI->getCalledFunction()->getName(), Func) &&
922 "Found a non-intrinsic callsite",
923 "library call cannot be vectorized. "
924 "Try compiling with -fno-math-errno, -ffast-math, "
926 "CantVectorizeLibcall", ORE, TheLoop, CI);
929 "call instruction cannot be vectorized",
930 "CantVectorizeLibcall", ORE, TheLoop, CI);
938 auto *SE = PSE.
getSE();
940 for (
unsigned Idx = 0;
Idx < CI->arg_size(); ++
Idx)
945 "intrinsic instruction cannot be vectorized",
946 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
955 VecCallVariantsFound =
true;
957 auto CanWidenInstructionTy = [
this](
Instruction const &Inst) {
958 Type *InstTy = Inst.getType();
959 if (!isa<StructType>(InstTy))
966 all_of(Inst.users(), IsaPred<ExtractValueInst>)) {
969 StructVecCallFound =
true;
979 if (!CanWidenInstructionTy(
I) ||
982 isa<ExtractElementInst>(
I)) {
984 "instruction return type cannot be vectorized",
985 "CantVectorizeInstructionReturnType", ORE, TheLoop, &
I);
990 if (
auto *ST = dyn_cast<StoreInst>(&
I)) {
991 Type *
T =
ST->getValueOperand()->getType();
994 "CantVectorizeStore", ORE, TheLoop, ST);
1000 if (
ST->getMetadata(LLVMContext::MD_nontemporal)) {
1003 assert(VecTy &&
"did not find vectorized version of stored type");
1006 "nontemporal store instruction cannot be vectorized",
1007 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
1012 }
else if (
auto *LD = dyn_cast<LoadInst>(&
I)) {
1013 if (
LD->getMetadata(LLVMContext::MD_nontemporal)) {
1017 assert(VecTy &&
"did not find vectorized version of load type");
1020 "nontemporal load instruction cannot be vectorized",
1021 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
1031 }
else if (
I.getType()->isFloatingPointTy() && (CI ||
I.isBinaryOp()) &&
1034 Hints->setPotentiallyUnsafe();
1049 "ValueUsedOutsideLoop", ORE, TheLoop, &
I);
1055 if (!PrimaryInduction) {
1056 if (Inductions.
empty()) {
1058 "loop induction variable could not be identified",
1059 "NoInductionVariable", ORE, TheLoop);
1064 "integer loop induction variable could not be identified",
1065 "NoIntegerInductionVariable", ORE, TheLoop);
1068 LLVM_DEBUG(
dbgs() <<
"LV: Did not find one integer induction var.\n");
1074 if (PrimaryInduction && WidestIndTy != PrimaryInduction->
getType())
1075 PrimaryInduction =
nullptr;
1107 Value *HIncVal =
nullptr;
1122 Value *HIdx =
nullptr;
1123 for (
Value *Index :
GEP->indices()) {
1126 if (!isa<ConstantInt>(Index))
1145 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.
getSE()->
getSCEV(VPtrVal));
1146 if (!AR || AR->getLoop() != TheLoop)
1156 LLVM_DEBUG(
dbgs() <<
"LV: Found histogram for: " << *HSt <<
"\n");
1163bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
1203 LLVM_DEBUG(
dbgs() <<
"LV: Checking for a histogram on: " << *SI <<
"\n");
1207bool LoopVectorizationLegality::canVectorizeMemory() {
1208 LAI = &LAIs.
getInfo(*TheLoop);
1213 "loop not vectorized: ", *LAR);
1218 return canVectorizeIndirectUnsafeDependences();
1222 "write to a loop invariant address could not "
1224 "CantVectorizeStoreToLoopInvariantAddress", ORE,
1242 "We don't allow storing to uniform addresses",
1243 "write of conditional recurring variant value to a loop "
1244 "invariant address could not be vectorized",
1245 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1255 "Invariant address is calculated inside the loop",
1256 "write to a loop invariant address could not "
1258 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1286 I->getValueOperand()->getType() ==
1287 SI->getValueOperand()->getType();
1294 bool IsOK = UnhandledStores.
empty();
1298 "We don't allow storing to uniform addresses",
1299 "write to a loop invariant address could not "
1301 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1312 bool EnableStrictReductions) {
1321 if (!EnableStrictReductions ||
1352 return V == InvariantAddress ||
1359 PHINode *PN = dyn_cast_or_null<PHINode>(In0);
1363 return Inductions.
count(PN);
1388 const Value *V)
const {
1389 auto *Inst = dyn_cast<Instruction>(V);
1390 return (Inst && InductionCastsToIgnore.
count(Inst));
1399 return FixedOrderRecurrences.
count(Phi);
1410 "Uncountable exiting block must be a direct predecessor of latch");
1416bool LoopVectorizationLegality::blockCanBePredicated(
1422 if (
match(&
I, m_Intrinsic<Intrinsic::assume>())) {
1430 if (isa<NoAliasScopeDeclInst>(&
I))
1437 if (
CallInst *CI = dyn_cast<CallInst>(&
I))
1444 if (
auto *LI = dyn_cast<LoadInst>(&
I)) {
1455 if (
auto *SI = dyn_cast<StoreInst>(&
I)) {
1460 if (
I.mayReadFromMemory() ||
I.mayWriteToMemory() ||
I.mayThrow())
1467bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1470 "IfConversionDisabled", ORE, TheLoop);
1516 if (isa<SwitchInst>(BB->getTerminator())) {
1519 "LoopContainsUnsupportedSwitch", ORE,
1520 TheLoop, BB->getTerminator());
1523 }
else if (!isa<BranchInst>(BB->getTerminator())) {
1525 "LoopContainsUnsupportedTerminator", ORE,
1526 TheLoop, BB->getTerminator());
1532 !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
1534 "Control flow cannot be substituted for a select",
"NoCFGForSelect",
1535 ORE, TheLoop, BB->getTerminator());
1545bool LoopVectorizationLegality::canVectorizeLoopCFG(
Loop *Lp,
1546 bool UseVPlanNativePath) {
1548 "VPlan-native path is not enabled.");
1564 "loop control flow is not understood by vectorizer",
1565 "CFGNotUnderstood", ORE, TheLoop);
1566 if (DoExtraAnalysis)
1575 "loop control flow is not understood by vectorizer",
1576 "CFGNotUnderstood", ORE, TheLoop);
1577 if (DoExtraAnalysis)
1586bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1587 Loop *Lp,
bool UseVPlanNativePath) {
1592 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1593 if (DoExtraAnalysis)
1601 for (
Loop *SubLp : *Lp)
1602 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1603 if (DoExtraAnalysis)
1612bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1616 "Cannot vectorize early exit loop",
1617 "NoLatchEarlyExit", ORE, TheLoop);
1621 if (Reductions.
size() || FixedOrderRecurrences.
size()) {
1623 "Found reductions or recurrences in early-exit loop",
1624 "Cannot vectorize early exit loop with reductions or recurrences",
1625 "RecurrencesInEarlyExitLoop", ORE, TheLoop);
1637 if (isa<SCEVCouldNotCompute>(EC)) {
1638 UncountableExitingBlocks.push_back(BB);
1641 if (Succs.size() != 2) {
1643 "Early exiting block does not have exactly two successors",
1644 "Incorrect number of successors from early exiting block",
1645 "EarlyExitTooManySuccessors", ORE, TheLoop);
1651 ExitBlock = Succs[0];
1654 ExitBlock = Succs[1];
1656 UncountableExitBlocks.push_back(ExitBlock);
1658 CountableExitingBlocks.push_back(BB);
1669 "Loop has too many uncountable exits",
1670 "Cannot vectorize early exit loop with more than one early exit",
1671 "TooManyUncountableEarlyExits", ORE, TheLoop);
1680 "Cannot vectorize early exit loop",
1681 "EarlyExitNotLatchPredecessor", ORE, TheLoop);
1686 if (isa<SCEVCouldNotCompute>(
1689 "Cannot determine exact exit count for latch block",
1690 "Cannot vectorize early exit loop",
1691 "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1695 "Latch block not found in list of countable exits!");
1700 switch (
I->getOpcode()) {
1701 case Instruction::Load:
1702 case Instruction::Store:
1703 case Instruction::PHI:
1704 case Instruction::Br:
1712 for (
auto *BB : TheLoop->
blocks())
1713 for (
auto &
I : *BB) {
1714 if (
I.mayWriteToMemory()) {
1717 "Writes to memory unsupported in early exit loops",
1718 "Cannot vectorize early exit loop with writes to memory",
1719 "WritesInEarlyExitLoop", ORE, TheLoop);
1721 }
else if (!IsSafeOperation(&
I)) {
1723 "cannot be speculatively executed",
1724 "UnsafeOperationsEarlyExitLoop", ORE,
1732 "Expected latch predecessor to be the early exiting block");
1740 "Cannot vectorize potentially faulting early exit loop",
1741 "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1745 [[maybe_unused]]
const SCEV *SymbolicMaxBTC =
1749 assert(!isa<SCEVCouldNotCompute>(SymbolicMaxBTC) &&
1750 "Failed to get symbolic expression for backedge taken count");
1751 LLVM_DEBUG(
dbgs() <<
"LV: Found an early exit loop with symbolic max "
1752 "backedge taken count: "
1753 << *SymbolicMaxBTC <<
'\n');
1765 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1766 if (DoExtraAnalysis) {
1781 assert(UseVPlanNativePath &&
"VPlan-native path is not enabled.");
1783 if (!canVectorizeOuterLoop()) {
1785 "UnsupportedOuterLoop", ORE, TheLoop);
1798 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1800 if (DoExtraAnalysis)
1807 if (!canVectorizeInstrs()) {
1808 LLVM_DEBUG(
dbgs() <<
"LV: Can't vectorize the instructions or CFG\n");
1809 if (DoExtraAnalysis)
1815 HasUncountableEarlyExit =
false;
1819 "UnsupportedUncountableLoop", ORE, TheLoop);
1820 if (DoExtraAnalysis)
1825 HasUncountableEarlyExit =
true;
1826 if (!isVectorizableEarlyExitLoop()) {
1827 UncountableExitingBlocks.clear();
1828 HasUncountableEarlyExit =
false;
1829 if (DoExtraAnalysis)
1838 if (!canVectorizeMemory()) {
1839 LLVM_DEBUG(
dbgs() <<
"LV: Can't vectorize due to memory conflicts\n");
1840 if (DoExtraAnalysis)
1849 ?
" (with a runtime bound check)"
1860 "due to SCEVThreshold");
1862 "Too many SCEV assumptions need to be made and checked at runtime",
1863 "TooManySCEVRunTimeChecks", ORE, TheLoop);
1864 if (DoExtraAnalysis)
1879 LLVM_DEBUG(
dbgs() <<
"LV: checking if tail can be folded by masking.\n");
1887 for (
auto *AE : AllowedExit) {
1890 if (ReductionLiveOuts.
count(AE))
1892 for (
User *U : AE->users()) {
1898 <<
"LV: Cannot fold tail by masking, loop has an outside user for "
1905 PHINode *OrigPhi = Entry.first;
1907 auto *UI = cast<Instruction>(U);
1909 LLVM_DEBUG(
dbgs() <<
"LV: Cannot fold tail by masking, loop IV has an "
1924 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
1942 [[maybe_unused]]
bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
1943 assert(R &&
"Must be able to predicate block when tail-folding.");
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
loop Loop Strength Reduction
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
Virtual Register Rewriter
static const uint32_t IV[8]
Class for arbitrary precision integers.
LLVM Basic Block Representation.
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
A parsed version of the target data layout string in and methods for querying it.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
const LoopAccessInfo & getInfo(Loop &L)
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
ArrayRef< StoreInst * > getStoresToInvariantAddresses() const
Return the list of stores to invariant addresses.
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
const RuntimePointerChecking * getRuntimePointerChecking() const
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
bool isInvariant(Value *V) const
Returns true if value V is loop invariant.
bool hasLoadStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving a load and a store to an invariant address,...
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
bool hasStoreStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving two stores to an invariant address,...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
bool isLoopHeader(const BlockT *BB) const
const SmallVector< BasicBlock *, 4 > & getUncountableExitingBlocks() const
Returns all the exiting blocks with an uncountable exit.
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
bool blockNeedsPredication(BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
bool isUniform(Value *V, ElementCount VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
bool hasUncountableEarlyExit() const
Returns true if the loop has an uncountable early exit, i.e.
bool isUniformMemOp(Instruction &I, ElementCount VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
BasicBlock * getUncountableEarlyExitingBlock() const
Returns the uncountable early exiting block.
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
Instruction * getExactFPInst()
void addExactFPMathInst(Instruction *I)
Track the 1st floating-point instruction that can not be reassociated.
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_Unspecified
Not selected.
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
enum ForceKind getForce() const
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
void emitRemarkWithHints() const
Dumps all the hint information.
ElementCount getWidth() const
@ FK_Enabled
Forcing enabled.
@ FK_Undefined
Not selected.
@ FK_Disabled
Forcing disabled.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
unsigned getInterleave() const
unsigned getIsVectorized() const
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
StringRef getString() const
static MDString * get(LLVMContext &Context, StringRef Str)
size_type count(const KeyT &Key) const
iterator find(const KeyT &Key)
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
const SCEVPredicate & getPredicate() const
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
bool Need
This flag indicates if we need to add the runtime check.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
const Loop * getLoop() const
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
const SCEV * getPredicatedExitCount(const Loop *L, const BasicBlock *ExitingBlock, SmallVectorImpl< const SCEVPredicate * > *Predicates, ExitCountKind Kind=Exact)
Same as above except this uses the predicated backedge taken info and may require predicates.
const SCEV * getCouldNotCompute()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
static constexpr size_t npos
Provides information about what library functions are available for the current target.
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Value * getOperand(unsigned i) const
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
StringRef getName() const
Return a constant reference to the value's name.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr bool isZero() const
const ParentTy * getParent() const
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
static bool canWidenCallReturnType(Type *Ty)
Returns true if the call return type Ty can be widened by the loop vectorizer.
bool isDereferenceableReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if the loop L cannot fault on any iteration and only contains read-only memory accesses.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)
Find histogram operations that match high-level code in loops:
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
static VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
An object of this class is returned by queries that could not be answered.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static const unsigned MaxVectorWidth
Maximum SIMD width.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.