36#define LV_NAME "loop-vectorize"
37#define DEBUG_TYPE LV_NAME
41 cl::desc(
"Enable if-conversion during vectorization."));
45 cl::desc(
"Enable recognition of non-constant strided "
46 "pointer induction variables."));
50 cl::desc(
"Allow enabling loop hints to reorder "
51 "FP operations during vectorization."));
57 cl::desc(
"The maximum number of SCEV checks allowed."));
61 cl::desc(
"The maximum number of SCEV checks allowed with a "
62 "vectorize(enable) pragma"));
68 cl::desc(
"Control whether the compiler can use scalable vectors to "
72 "Scalable vectorization is disabled."),
75 "Scalable vectorization is available and favored when the "
76 "cost is inconclusive."),
79 "Scalable vectorization is available and favored when the "
80 "cost is inconclusive.")));
84 cl::desc(
"Enables autovectorization of some loops containing histograms"));
91bool LoopVectorizeHints::Hint::validate(
unsigned Val) {
102 return (Val == 0 || Val == 1);
108 bool InterleaveOnlyWhenForced,
112 Interleave(
"interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
114 IsVectorized(
"isvectorized", 0, HK_ISVECTORIZED),
115 Predicate(
"vectorize.predicate.enable",
FK_Undefined, HK_PREDICATE),
116 Scalable(
"vectorize.scalable.enable",
SK_Unspecified, HK_SCALABLE),
117 TheLoop(L), ORE(ORE) {
119 getHintsFromMetadata();
153 if (IsVectorized.Value != 1)
160 <<
"LV: Interleaving disabled by the pass manager\n");
164 LLVMContext &Context = TheLoop->getHeader()->getContext();
170 MDNode *LoopID = TheLoop->getLoopID();
173 {
Twine(Prefix(),
"vectorize.").
str(),
174 Twine(Prefix(),
"interleave.").
str()},
176 TheLoop->setLoopID(NewLoopID);
179 IsVectorized.Value = 1;
185 LLVM_DEBUG(
dbgs() <<
"LV: Not vectorizing: #pragma vectorize disable.\n");
191 LLVM_DEBUG(
dbgs() <<
"LV: Not vectorizing: No #pragma vectorize enable.\n");
197 LLVM_DEBUG(
dbgs() <<
"LV: Not vectorizing: Disabled/already vectorized.\n");
203 "AllDisabled", L->getStartLoc(),
205 <<
"loop not vectorized: vectorization and interleaving are "
206 "explicitly disabled, or the loop has already been "
221 TheLoop->getStartLoc(),
222 TheLoop->getHeader())
223 <<
"loop not vectorized: vectorization is explicitly disabled";
226 TheLoop->getHeader());
227 R <<
"loop not vectorized";
229 R <<
" (Force=" << NV(
"Force",
true);
230 if (Width.Value != 0)
231 R <<
", Vector Width=" << NV(
"VectorWidth",
getWidth());
233 R <<
", Interleave Count=" << NV(
"InterleaveCount",
getInterleave());
256 EC.getKnownMinValue() > 1);
259void LoopVectorizeHints::getHintsFromMetadata() {
275 if (!MD || MD->getNumOperands() == 0)
278 for (
unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)
279 Args.push_back(MD->getOperand(Idx));
282 assert(Args.size() == 0 &&
"too many arguments for MDString");
290 if (
Args.size() == 1)
291 setHint(Name, Args[0]);
296 if (!
Name.starts_with(Prefix()))
303 unsigned Val =
C->getZExtValue();
305 Hint *Hints[] = {&Width, &Interleave, &Force,
306 &IsVectorized, &Predicate, &Scalable};
307 for (
auto *
H : Hints) {
308 if (Name ==
H->Name) {
309 if (
H->validate(Val))
312 LLVM_DEBUG(
dbgs() <<
"LV: ignoring invalid hint '" << Name <<
"'\n");
359 if (!LatchBr || LatchBr->isUnconditional()) {
368 dbgs() <<
"LV: Loop latch condition is not a compare instruction.\n");
372 Value *CondOp0 = LatchCmp->getOperand(0);
373 Value *CondOp1 = LatchCmp->getOperand(1);
374 Value *IVUpdate =
IV->getIncomingValueForBlock(Latch);
377 LLVM_DEBUG(
dbgs() <<
"LV: Loop latch condition is not uniform.\n");
391 for (
Loop *SubLp : *Lp)
399 assert(Ty->isIntOrPtrTy() &&
"Expected integer or pointer type");
401 if (Ty->isPointerTy())
402 return DL.getIntPtrType(Ty->getContext(), Ty->getPointerAddressSpace());
406 if (Ty->getScalarSizeInBits() < 32)
425 if (!AllowedExit.
count(Inst))
431 LLVM_DEBUG(
dbgs() <<
"LV: Found an outside user for : " << *UI <<
'\n');
446 Value *APtr =
A->getPointerOperand();
447 Value *BPtr =
B->getPointerOperand();
461 const auto &Strides =
467 CanAddPredicate,
false).value_or(0);
468 if (Stride == 1 || Stride == -1)
474 return LAI->isInvariant(V);
484class SCEVAddRecForUniformityRewriter
487 unsigned StepMultiplier;
496 bool CannotAnalyze =
false;
498 bool canAnalyze()
const {
return !CannotAnalyze; }
501 SCEVAddRecForUniformityRewriter(
ScalarEvolution &SE,
unsigned StepMultiplier,
506 const SCEV *visitAddRecExpr(
const SCEVAddRecExpr *Expr) {
508 "addrec outside of TheLoop must be invariant and should have been "
514 if (!SE.isLoopInvariant(Step, TheLoop)) {
515 CannotAnalyze =
true;
518 const SCEV *NewStep =
519 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
520 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
521 const SCEV *NewStart = SE.getAddExpr(Expr->
getStart(), ScaledOffset);
525 const SCEV *
visit(
const SCEV *S) {
526 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
531 const SCEV *visitUnknown(
const SCEVUnknown *S) {
532 if (SE.isLoopInvariant(S, TheLoop))
535 CannotAnalyze =
true;
539 const SCEV *visitCouldNotCompute(
const SCEVCouldNotCompute *S) {
541 CannotAnalyze =
true;
545 static const SCEV *rewrite(
const SCEV *S, ScalarEvolution &SE,
546 unsigned StepMultiplier,
unsigned Offset,
556 SCEVAddRecForUniformityRewriter
Rewriter(SE, StepMultiplier, Offset,
578 auto *SE = PSE.getSE();
586 const SCEV *FirstLaneExpr =
587 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
595 const SCEV *IthLaneExpr =
596 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF,
I, TheLoop);
597 return FirstLaneExpr == IthLaneExpr;
613bool LoopVectorizationLegality::canVectorizeOuterLoop() {
626 "loop control flow is not understood by vectorizer",
627 "CFGNotUnderstood", ORE, TheLoop);
640 if (Br && Br->isConditional() &&
645 "loop control flow is not understood by vectorizer",
646 "CFGNotUnderstood", ORE, TheLoop);
659 "loop control flow is not understood by vectorizer",
660 "CFGNotUnderstood", ORE, TheLoop);
668 if (!setupOuterLoopInductions()) {
670 "UnsupportedPhi", ORE, TheLoop);
680void LoopVectorizationLegality::addInductionPhi(
683 Inductions[
Phi] =
ID;
689 const SmallVectorImpl<Instruction *> &Casts =
ID.getCastInsts();
691 InductionCastsToIgnore.insert(*Casts.
begin());
694 const DataLayout &
DL =
Phi->getDataLayout();
697 "Expected int, ptr, or FP induction phi type");
709 ID.getConstIntStepValue() &&
ID.getConstIntStepValue()->isOne() &&
717 if (!PrimaryInduction || PhiTy == WidestIndTy)
718 PrimaryInduction =
Phi;
727 if (PSE.getPredicate().isAlwaysTrue()) {
728 AllowedExit.insert(Phi);
729 AllowedExit.insert(
Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
735bool LoopVectorizationLegality::setupOuterLoopInductions() {
739 auto IsSupportedPhi = [&](PHINode &
Phi) ->
bool {
740 InductionDescriptor
ID;
743 addInductionPhi(&Phi,
ID, AllowedExit);
749 dbgs() <<
"LV: Found unsupported PHI for outer loop vectorization.\n");
772 TLI.
getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
780 "Caller may decide to scalarize a variant using a scalable VF");
792 if (StructTy && !StructTy->containsHomogeneousTypes())
797bool LoopVectorizationLegality::canVectorizeInstrs() {
805 Result &= canVectorizeInstr(
I);
806 if (!DoExtraAnalysis && !Result)
811 if (!PrimaryInduction) {
812 if (Inductions.empty()) {
814 "Did not find one integer induction var",
815 "loop induction variable could not be identified",
816 "NoInductionVariable", ORE, TheLoop);
821 "Did not find one integer induction var",
822 "integer loop induction variable could not be identified",
823 "NoIntegerInductionVariable", ORE, TheLoop);
826 LLVM_DEBUG(
dbgs() <<
"LV: Did not find one integer induction var.\n");
832 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
833 PrimaryInduction =
nullptr;
838bool LoopVectorizationLegality::canVectorizeInstr(
Instruction &
I) {
848 "Found a non-int non-pointer PHI",
849 "loop control flow is not understood by vectorizer",
850 "CFGNotUnderstood", ORE, TheLoop);
863 AllowedExit.insert(&
I);
868 if (
Phi->getNumIncomingValues() != 2) {
870 "Found an invalid PHI",
871 "loop control flow is not understood by vectorizer",
872 "CFGNotUnderstood", ORE, TheLoop, Phi);
876 RecurrenceDescriptor RedDes;
881 Reductions[
Phi] = RedDes;
889 auto IsDisallowedStridedPointerInduction =
890 [](
const InductionDescriptor &
ID) {
894 ID.getConstIntStepValue() ==
nullptr;
911 InductionDescriptor
ID;
913 !IsDisallowedStridedPointerInduction(
ID)) {
914 addInductionPhi(Phi,
ID, AllowedExit);
915 Requirements->addExactFPMathInst(
ID.getExactFPMathInst());
920 AllowedExit.insert(Phi);
921 FixedOrderRecurrences.insert(Phi);
928 !IsDisallowedStridedPointerInduction(
ID)) {
929 addInductionPhi(Phi,
ID, AllowedExit);
934 "value that could not be identified as "
935 "reduction is used outside the loop",
936 "NonReductionValueUsedOutsideLoop", ORE, TheLoop,
947 !(CI->getCalledFunction() && TLI &&
953 TLI && CI->getCalledFunction() && CI->getType()->isFloatingPointTy() &&
954 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
955 TLI->hasOptimizedCodeGen(Func);
963 "Found a non-intrinsic callsite",
964 "library call cannot be vectorized. "
965 "Try compiling with -fno-math-errno, -ffast-math, "
967 "CantVectorizeLibcall", ORE, TheLoop, CI);
970 "call instruction cannot be vectorized",
971 "CantVectorizeLibcall", ORE, TheLoop, CI);
979 auto *SE = PSE.getSE();
981 for (
unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
985 "Found unvectorizable intrinsic",
986 "intrinsic instruction cannot be vectorized",
987 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
996 VecCallVariantsFound =
true;
998 auto CanWidenInstructionTy = [](
Instruction const &Inst) {
999 Type *InstTy = Inst.getType();
1013 if (!CanWidenInstructionTy(
I) ||
1018 "instruction return type cannot be vectorized",
1019 "CantVectorizeInstructionReturnType", ORE,
1026 Type *
T =
ST->getValueOperand()->getType();
1029 "CantVectorizeStore", ORE, TheLoop, ST);
1035 if (
ST->getMetadata(LLVMContext::MD_nontemporal)) {
1038 assert(VecTy &&
"did not find vectorized version of stored type");
1039 if (!TTI->isLegalNTStore(VecTy,
ST->getAlign())) {
1041 "nontemporal store instruction cannot be vectorized",
1042 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
1048 if (
LD->getMetadata(LLVMContext::MD_nontemporal)) {
1052 assert(VecTy &&
"did not find vectorized version of load type");
1053 if (!TTI->isLegalNTLoad(VecTy,
LD->getAlign())) {
1055 "nontemporal load instruction cannot be vectorized",
1056 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
1066 }
else if (
I.getType()->isFloatingPointTy() && (CI ||
I.isBinaryOp()) &&
1069 Hints->setPotentiallyUnsafe();
1079 if (PSE.getPredicate().isAlwaysTrue()) {
1080 AllowedExit.insert(&
I);
1084 "ValueUsedOutsideLoop", ORE, TheLoop, &
I);
1118 Value *HIncVal =
nullptr;
1133 Value *HIdx =
nullptr;
1134 for (
Value *Index :
GEP->indices()) {
1157 if (!AR || AR->getLoop() != TheLoop)
1167 LLVM_DEBUG(
dbgs() <<
"LV: Found histogram for: " << *HSt <<
"\n");
1174bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
1214 LLVM_DEBUG(
dbgs() <<
"LV: Checking for a histogram on: " << *SI <<
"\n");
1215 return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);
1218bool LoopVectorizationLegality::canVectorizeMemory() {
1219 LAI = &LAIs.getInfo(*TheLoop);
1220 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1223 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
1224 "loop not vectorized: ", *LAR);
1228 if (!LAI->canVectorizeMemory()) {
1231 "Cannot vectorize unsafe dependencies in uncountable exit loop with "
1233 "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE,
1238 return canVectorizeIndirectUnsafeDependences();
1241 if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
1243 "write to a loop invariant address could not "
1245 "CantVectorizeStoreToLoopInvariantAddress", ORE,
1254 if (!LAI->getStoresToInvariantAddresses().empty()) {
1257 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1263 "We don't allow storing to uniform addresses",
1264 "write of conditional recurring variant value to a loop "
1265 "invariant address could not be vectorized",
1266 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1274 if (TheLoop->contains(
Ptr)) {
1276 "Invariant address is calculated inside the loop",
1277 "write to a loop invariant address could not "
1279 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1285 if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
1291 ScalarEvolution *SE = PSE.getSE();
1293 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1305 erase_if(UnhandledStores, [SE, SI](StoreInst *
I) {
1307 I->getValueOperand()->getType() ==
1308 SI->getValueOperand()->getType();
1315 bool IsOK = UnhandledStores.
empty();
1319 "We don't allow storing to uniform addresses",
1320 "write to a loop invariant address could not "
1322 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1328 PSE.addPredicate(LAI->getPSE().getPredicate());
1333 bool EnableStrictReductions) {
1336 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1342 if (!EnableStrictReductions ||
1373 return V == InvariantAddress ||
1384 return Inductions.count(PN);
1409 const Value *V)
const {
1411 return (Inst && InductionCastsToIgnore.count(Inst));
1420 return FixedOrderRecurrences.count(Phi);
1431 "Uncountable exiting block must be a direct predecessor of latch");
1437bool LoopVectorizationLegality::blockCanBePredicated(
1466 if (!SafePtrs.
count(LI->getPointerOperand()))
1467 MaskedOp.insert(LI);
1477 MaskedOp.insert(SI);
1481 if (
I.mayReadFromMemory() ||
I.mayWriteToMemory() ||
I.mayThrow())
1488bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1491 "IfConversionDisabled", ORE, TheLoop);
1495 assert(TheLoop->getNumBlocks() > 1 &&
"Single block loops are vectorizable");
1502 SmallPtrSet<Value *, 8> SafePointers;
1505 for (BasicBlock *BB : TheLoop->blocks()) {
1507 for (Instruction &
I : *BB)
1518 ScalarEvolution &SE = *PSE.getSE();
1520 for (Instruction &
I : *BB) {
1530 auto CanSpeculatePointerOp = [
this](
Value *
Ptr) {
1532 SmallPtrSet<Value *, 4> Visited;
1533 while (!Worklist.
empty()) {
1535 if (!Visited.
insert(CurrV).second)
1539 if (!CurrI || !TheLoop->contains(CurrI)) {
1543 TheLoop->getLoopPredecessor()
1567 CanSpeculatePointerOp(LI->getPointerOperand()) &&
1570 SafePointers.
insert(LI->getPointerOperand());
1576 for (BasicBlock *BB : TheLoop->blocks()) {
1580 if (TheLoop->isLoopExiting(BB)) {
1582 "LoopContainsUnsupportedSwitch", ORE,
1583 TheLoop, BB->getTerminator());
1588 "LoopContainsUnsupportedTerminator", ORE,
1589 TheLoop, BB->getTerminator());
1595 !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
1597 "Control flow cannot be substituted for a select",
"NoCFGForSelect",
1598 ORE, TheLoop, BB->getTerminator());
1608bool LoopVectorizationLegality::canVectorizeLoopCFG(
Loop *Lp,
1609 bool UseVPlanNativePath) {
1611 "VPlan-native path is not enabled.");
1621 bool DoExtraAnalysis = ORE->allowExtraAnalysis(
DEBUG_TYPE);
1627 "loop control flow is not understood by vectorizer",
1628 "CFGNotUnderstood", ORE, TheLoop);
1629 if (DoExtraAnalysis)
1638 "loop control flow is not understood by vectorizer",
1639 "CFGNotUnderstood", ORE, TheLoop);
1640 if (DoExtraAnalysis)
1649bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1650 Loop *Lp,
bool UseVPlanNativePath) {
1654 bool DoExtraAnalysis = ORE->allowExtraAnalysis(
DEBUG_TYPE);
1655 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1656 if (DoExtraAnalysis)
1664 for (Loop *SubLp : *Lp)
1665 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1666 if (DoExtraAnalysis)
1675bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1676 BasicBlock *LatchBB = TheLoop->getLoopLatch();
1679 "Cannot vectorize early exit loop",
1680 "NoLatchEarlyExit", ORE, TheLoop);
1684 if (Reductions.size() || FixedOrderRecurrences.size()) {
1686 "Found reductions or recurrences in early-exit loop",
1687 "Cannot vectorize early exit loop with reductions or recurrences",
1688 "RecurrencesInEarlyExitLoop", ORE, TheLoop);
1692 SmallVector<BasicBlock *, 8> ExitingBlocks;
1693 TheLoop->getExitingBlocks(ExitingBlocks);
1697 BasicBlock *SingleUncountableExitingBlock =
nullptr;
1698 for (BasicBlock *BB : ExitingBlocks) {
1700 PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
1704 "Early exiting block does not have exactly two successors",
1705 "Incorrect number of successors from early exiting block",
1706 "EarlyExitTooManySuccessors", ORE, TheLoop);
1710 if (SingleUncountableExitingBlock) {
1712 "Loop has too many uncountable exits",
1713 "Cannot vectorize early exit loop with more than one early exit",
1714 "TooManyUncountableEarlyExits", ORE, TheLoop);
1718 SingleUncountableExitingBlock = BB;
1720 CountableExitingBlocks.push_back(BB);
1728 if (!SingleUncountableExitingBlock) {
1729 LLVM_DEBUG(
dbgs() <<
"LV: Cound not find any uncountable exits");
1736 if (LatchPredBB != SingleUncountableExitingBlock) {
1738 "Cannot vectorize early exit loop",
1739 "EarlyExitNotLatchPredecessor", ORE, TheLoop);
1745 PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1747 "Cannot determine exact exit count for latch block",
1748 "Cannot vectorize early exit loop",
1749 "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1753 "Latch block not found in list of countable exits!");
1758 switch (
I->getOpcode()) {
1759 case Instruction::Load:
1760 case Instruction::Store:
1761 case Instruction::PHI:
1762 case Instruction::Br:
1770 bool HasSideEffects =
false;
1771 for (
auto *BB : TheLoop->blocks())
1772 for (
auto &
I : *BB) {
1773 if (
I.mayWriteToMemory()) {
1775 HasSideEffects =
true;
1781 "Complex writes to memory unsupported in early exit loops",
1782 "Cannot vectorize early exit loop with complex writes to memory",
1783 "WritesInEarlyExitLoop", ORE, TheLoop);
1787 if (!IsSafeOperation(&
I)) {
1789 "cannot be speculatively executed",
1790 "UnsafeOperationsEarlyExitLoop", ORE,
1798 "Expected latch predecessor to be the early exiting block");
1802 if (!HasSideEffects) {
1808 "Loop may fault",
"Cannot vectorize non-read-only early exit loop",
1809 "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
1812 }
else if (!canUncountableExitConditionLoadBeMoved(
1813 SingleUncountableExitingBlock))
1817 for (LoadInst *LI : NonDerefLoads) {
1822 "Loop contains potentially faulting strided load",
1823 "Cannot vectorize early exit loop with "
1824 "strided fault-only-first load",
1825 "EarlyExitLoopWithStridedFaultOnlyFirstLoad", ORE, TheLoop);
1828 PotentiallyFaultingLoads.insert(LI);
1829 LLVM_DEBUG(
dbgs() <<
"LV: Found potentially faulting load: " << *LI
1833 [[maybe_unused]]
const SCEV *SymbolicMaxBTC =
1834 PSE.getSymbolicMaxBackedgeTakenCount();
1838 "Failed to get symbolic expression for backedge taken count");
1839 LLVM_DEBUG(
dbgs() <<
"LV: Found an early exit loop with symbolic max "
1840 "backedge taken count: "
1841 << *SymbolicMaxBTC <<
'\n');
1842 UncountableExitingBB = SingleUncountableExitingBlock;
1843 UncountableExitWithSideEffects = HasSideEffects;
1847bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
1858 using namespace llvm::PatternMatch;
1862 if (!
match(Br->getCondition(),
1866 "Early exit loop with store but no supported condition load",
1867 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1872 if (!TheLoop->isLoopInvariant(R)) {
1874 "Early exit loop with store but no supported condition load",
1875 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1881 const SCEV *PtrScev = PSE.getSE()->getSCEV(
Ptr);
1884 "Uncountable exit condition depends on load with an address that is "
1885 "not an add recurrence",
1886 "EarlyExitLoadInvariantAddress", ORE, TheLoop);
1897 "Cannot vectorize potentially faulting early exit loop",
1898 "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1902 ICFLoopSafetyInfo SafetyInfo;
1910 "Unhandled control flow in uncountable exit loop with side effects");
1915 for (
auto *BB : TheLoop->blocks()) {
1916 for (
auto &
I : *BB) {
1920 if (
I.mayWriteToMemory()) {
1922 AliasResult AR = AA->alias(
Ptr,
SI->getPointerOperand());
1928 "Cannot determine whether critical uncountable exit load address "
1929 "does not alias with a memory write",
1930 "CantVectorizeAliasWithCriticalUncountableExitLoad", ORE, TheLoop);
1944 bool DoExtraAnalysis = ORE->allowExtraAnalysis(
DEBUG_TYPE);
1947 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1948 if (DoExtraAnalysis) {
1957 LLVM_DEBUG(
dbgs() <<
"LV: Found a loop: " << TheLoop->getHeader()->getName()
1962 if (!TheLoop->isInnermost()) {
1963 assert(UseVPlanNativePath &&
"VPlan-native path is not enabled.");
1965 if (!canVectorizeOuterLoop()) {
1967 "UnsupportedOuterLoop", ORE, TheLoop);
1977 assert(TheLoop->isInnermost() &&
"Inner loop expected.");
1979 unsigned NumBlocks = TheLoop->getNumBlocks();
1980 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1982 if (DoExtraAnalysis)
1989 if (!canVectorizeInstrs()) {
1990 LLVM_DEBUG(
dbgs() <<
"LV: Can't vectorize the instructions or CFG\n");
1991 if (DoExtraAnalysis)
1998 if (TheLoop->getExitingBlock()) {
2000 "UnsupportedUncountableLoop", ORE, TheLoop);
2001 if (DoExtraAnalysis)
2006 if (!isVectorizableEarlyExitLoop()) {
2009 "Must be false without vectorizable early-exit loop");
2010 if (DoExtraAnalysis)
2019 if (!canVectorizeMemory()) {
2020 LLVM_DEBUG(
dbgs() <<
"LV: Can't vectorize due to memory conflicts\n");
2021 if (DoExtraAnalysis)
2028 if (UncountableExitWithSideEffects) {
2030 "Writes to memory unsupported in early exit loops",
2031 "Cannot vectorize early exit loop with writes to memory",
2032 "WritesInEarlyExitLoop", ORE, TheLoop);
2038 << (LAI->getRuntimePointerChecking()->Need
2039 ?
" (with a runtime bound check)"
2048 if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
2050 "due to SCEVThreshold");
2052 "Too many SCEV assumptions need to be made and checked at runtime",
2053 "TooManySCEVRunTimeChecks", ORE, TheLoop);
2054 if (DoExtraAnalysis)
2072 if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
2075 <<
"LV: Cannot fold tail by masking. Requires a singe latch exit\n");
2079 LLVM_DEBUG(
dbgs() <<
"LV: checking if tail can be folded by masking.\n");
2084 ReductionLiveOuts.
insert(Reduction.second.getLoopExitInstr());
2087 for (
auto *AE : AllowedExit) {
2090 if (ReductionLiveOuts.
count(AE))
2094 if (TheLoop->contains(UI))
2098 <<
"LV: Cannot fold tail by masking, loop has an outside user for "
2105 PHINode *OrigPhi = Entry.first;
2108 if (!TheLoop->contains(UI)) {
2109 LLVM_DEBUG(
dbgs() <<
"LV: Cannot fold tail by masking, loop IV has an "
2124 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
2142 [[maybe_unused]]
bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
2143 assert(R &&
"Must be able to predicate block when tail-folding.");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
static bool isSimple(Instruction *I)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
Virtual Register Rewriter
static const uint32_t IV[8]
Class for arbitrary precision integers.
@ NoAlias
The two locations do not alias at all.
LLVM Basic Block Representation.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
A parsed version of the target data layout string in and methods for querying it.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
static LLVM_ABI bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isLoopHeader(const BlockT *BB) const
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
bool blockNeedsPredication(BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool hasUncountableExitWithSideEffects() const
Returns true if this is an early exit loop with state-changing or potentially-faulting operations and...
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
bool isUniform(Value *V, ElementCount VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
bool hasUncountableEarlyExit() const
Returns true if the loop has exactly one uncountable early exit, i.e.
bool isUniformMemOp(Instruction &I, ElementCount VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
BasicBlock * getUncountableEarlyExitingBlock() const
Returns the uncountable early exiting block, if there is exactly one.
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_Unspecified
Not selected.
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
enum ForceKind getForce() const
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
void emitRemarkWithHints() const
Dumps all the hint information.
ElementCount getWidth() const
@ FK_Enabled
Forcing enabled.
@ FK_Undefined
Not selected.
@ FK_Disabled
Forcing disabled.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
unsigned getInterleave() const
unsigned getIsVectorized() const
Represents a single loop in the control flow graph.
bool isLoopInvariant(const Value *V, bool HasCoroSuspendInst=false) const
Return true if the specified value is loop invariant.
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
LLVM_ABI StringRef getString() const
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
iterator find(const KeyT &Key)
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
const Loop * getLoop() const
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getCouldNotCompute()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
static constexpr size_t npos
Provides information about what library functions are available for the current target.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Value * getOperand(unsigned i) const
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
LLVM Value Representation.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr bool isZero() const
const ParentTy * getParent() const
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
static bool canWidenCallReturnType(Type *Ty)
Returns true if the call return type Ty can be widened by the loop vectorizer.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
static IntegerType * getWiderInductionTy(const DataLayout &DL, Type *Ty0, Type *Ty1)
static IntegerType * getInductionIntegerTy(const DataLayout &DL, Type *Ty)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool isReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< LoadInst * > &NonDereferenceableAndAlignedLoads, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns true if the loop contains read-only memory accesses and doesn't throw.
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)
Find histogram operations that match high-level code in loops:
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.