31using namespace PatternMatch;
33#define LV_NAME "loop-vectorize"
34#define DEBUG_TYPE LV_NAME
38 cl::desc(
"Enable if-conversion during vectorization."));
43 cl::desc(
"Allow enabling loop hints to reorder "
44 "FP operations during vectorization."));
51 cl::desc(
"The maximum number of SCEV checks allowed."));
55 cl::desc(
"The maximum number of SCEV checks allowed with a "
56 "vectorize(enable) pragma"));
62 cl::desc(
"Control whether the compiler can use scalable vectors to "
66 "Scalable vectorization is disabled."),
69 "Scalable vectorization is available and favored when the "
70 "cost is inconclusive."),
73 "Scalable vectorization is available and favored when the "
74 "cost is inconclusive.")));
81bool LoopVectorizeHints::Hint::validate(
unsigned Val) {
92 return (Val == 0 || Val == 1);
98 bool InterleaveOnlyWhenForced,
102 Interleave(
"interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
103 Force(
"vectorize.enable", FK_Undefined, HK_FORCE),
104 IsVectorized(
"isvectorized", 0, HK_ISVECTORIZED),
105 Predicate(
"vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
106 Scalable(
"vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
107 TheLoop(L), ORE(ORE) {
109 getHintsFromMetadata();
143 if (IsVectorized.Value != 1)
150 <<
"LV: Interleaving disabled by the pass manager\n");
163 {
Twine(Prefix(),
"vectorize.").
str(),
164 Twine(Prefix(),
"interleave.").
str()},
169 IsVectorized.Value = 1;
175 LLVM_DEBUG(
dbgs() <<
"LV: Not vectorizing: #pragma vectorize disable.\n");
181 LLVM_DEBUG(
dbgs() <<
"LV: Not vectorizing: No #pragma vectorize enable.\n");
187 LLVM_DEBUG(
dbgs() <<
"LV: Not vectorizing: Disabled/already vectorized.\n");
193 "AllDisabled", L->getStartLoc(),
195 <<
"loop not vectorized: vectorization and interleaving are "
196 "explicitly disabled, or the loop has already been "
213 <<
"loop not vectorized: vectorization is explicitly disabled";
215 OptimizationRemarkMissed R(LV_NAME,
"MissedDetails",
216 TheLoop->getStartLoc(), TheLoop->getHeader());
217 R <<
"loop not vectorized";
218 if (Force.Value == LoopVectorizeHints::FK_Enabled) {
219 R <<
" (Force=" << NV(
"Force", true);
220 if (Width.Value != 0)
221 R <<
", Vector Width=" << NV(
"VectorWidth", getWidth());
222 if (getInterleave() != 0)
223 R <<
", Interleave Count=" << NV(
"InterleaveCount", getInterleave());
247 EC.getKnownMinValue() > 1);
250void LoopVectorizeHints::getHintsFromMetadata() {
259 for (
unsigned i = 1, ie = LoopID->
getNumOperands(); i < ie; ++i) {
266 if (!MD || MD->getNumOperands() == 0)
268 S = dyn_cast<MDString>(MD->getOperand(0));
269 for (
unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
270 Args.push_back(MD->getOperand(i));
272 S = dyn_cast<MDString>(LoopID->
getOperand(i));
273 assert(Args.size() == 0 &&
"too many arguments for MDString");
281 if (
Args.size() == 1)
282 setHint(
Name, Args[0]);
287 if (!
Name.startswith(Prefix()))
294 unsigned Val =
C->getZExtValue();
296 Hint *Hints[] = {&Width, &Interleave, &Force,
297 &IsVectorized, &Predicate, &Scalable};
298 for (
auto *
H : Hints) {
299 if (
Name ==
H->Name) {
300 if (
H->validate(Val))
349 auto *LatchBr = dyn_cast<BranchInst>(Latch->
getTerminator());
350 if (!LatchBr || LatchBr->isUnconditional()) {
356 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
359 dbgs() <<
"LV: Loop latch condition is not a compare instruction.\n");
363 Value *CondOp0 = LatchCmp->getOperand(0);
364 Value *CondOp1 = LatchCmp->getOperand(1);
365 Value *IVUpdate =
IV->getIncomingValueForBlock(Latch);
368 LLVM_DEBUG(
dbgs() <<
"LV: Loop latch condition is not uniform.\n");
382 for (
Loop *SubLp : *Lp)
391 return DL.getIntPtrType(Ty);
415 if (!AllowedExit.
count(Inst))
421 LLVM_DEBUG(
dbgs() <<
"LV: Found an outside user for : " << *UI <<
'\n');
436 Value *APtr =
A->getPointerOperand();
437 Value *BPtr =
B->getPointerOperand();
454 bool OptForSize =
F->hasOptSize() ||
457 bool CanAddPredicate = !OptForSize;
459 CanAddPredicate,
false).value_or(0);
460 if (Stride == 1 || Stride == -1)
480bool LoopVectorizationLegality::canVectorizeOuterLoop() {
490 auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
493 "loop control flow is not understood by vectorizer",
494 "CFGNotUnderstood", ORE, TheLoop);
507 if (Br && Br->isConditional() &&
512 "loop control flow is not understood by vectorizer",
513 "CFGNotUnderstood", ORE, TheLoop);
526 "loop control flow is not understood by vectorizer",
527 "CFGNotUnderstood", ORE, TheLoop);
535 if (!setupOuterLoopInductions()) {
537 "Unsupported outer loop Phi(s)",
538 "UnsupportedPhi", ORE, TheLoop);
548void LoopVectorizationLegality::addInductionPhi(
551 Inductions[Phi] =
ID;
574 ID.getConstIntStepValue() &&
ID.getConstIntStepValue()->isOne() &&
575 isa<Constant>(
ID.getStartValue()) &&
576 cast<Constant>(
ID.getStartValue())->isNullValue()) {
582 if (!PrimaryInduction || PhiTy == WidestIndTy)
583 PrimaryInduction = Phi;
600bool LoopVectorizationLegality::setupOuterLoopInductions() {
604 auto isSupportedPhi = [&](
PHINode &Phi) ->
bool {
608 addInductionPhi(&Phi,
ID, AllowedExit);
615 <<
"LV: Found unsupported PHI for outer loop vectorization.\n");
642 TLI.
getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
650 "Caller may decide to scalarize a variant using a scalable VF");
655bool LoopVectorizationLegality::canVectorizeInstrs() {
662 if (
auto *Phi = dyn_cast<PHINode>(&
I)) {
668 "loop control flow is not understood by vectorizer",
669 "CFGNotUnderstood", ORE, TheLoop);
689 "loop control flow is not understood by vectorizer",
690 "CFGNotUnderstood", ORE, TheLoop, Phi);
699 Reductions[Phi] = RedDes;
719 addInductionPhi(Phi,
ID, AllowedExit);
727 FixedOrderRecurrences.
insert(Phi);
734 addInductionPhi(Phi,
ID, AllowedExit);
739 "value that could not be identified as "
740 "reduction is used outside the loop",
741 "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
749 auto *CI = dyn_cast<CallInst>(&
I);
752 !isa<DbgInfoIntrinsic>(CI) &&
753 !(CI->getCalledFunction() && TLI &&
760 TLI && CI->getCalledFunction() &&
761 CI->getType()->isFloatingPointTy() &&
762 TLI->
getLibFunc(CI->getCalledFunction()->getName(), Func) &&
771 "Found a non-intrinsic callsite",
772 "library call cannot be vectorized. "
773 "Try compiling with -fno-math-errno, -ffast-math, "
775 "CantVectorizeLibcall", ORE, TheLoop, CI);
778 "call instruction cannot be vectorized",
779 "CantVectorizeLibcall", ORE, TheLoop, CI);
787 auto *SE = PSE.
getSE();
789 for (
unsigned i = 0, e = CI->arg_size(); i != e; ++i)
791 if (!SE->isLoopInvariant(PSE.
getSCEV(CI->getOperand(i)), TheLoop)) {
793 "intrinsic instruction cannot be vectorized",
794 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
803 !
I.getType()->isVoidTy()) ||
804 isa<ExtractElementInst>(
I)) {
806 "instruction return type cannot be vectorized",
807 "CantVectorizeInstructionReturnType", ORE, TheLoop, &
I);
812 if (
auto *ST = dyn_cast<StoreInst>(&
I)) {
813 Type *
T =
ST->getValueOperand()->getType();
816 "store instruction cannot be vectorized",
817 "CantVectorizeStore", ORE, TheLoop, ST);
823 if (
ST->getMetadata(LLVMContext::MD_nontemporal)) {
826 assert(VecTy &&
"did not find vectorized version of stored type");
829 "nontemporal store instruction cannot be vectorized",
830 "nontemporal store instruction cannot be vectorized",
831 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
836 }
else if (
auto *LD = dyn_cast<LoadInst>(&
I)) {
837 if (
LD->getMetadata(LLVMContext::MD_nontemporal)) {
841 assert(VecTy &&
"did not find vectorized version of load type");
844 "nontemporal load instruction cannot be vectorized",
845 "nontemporal load instruction cannot be vectorized",
846 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
856 }
else if (
I.getType()->isFloatingPointTy() && (CI ||
I.isBinaryOp()) &&
859 Hints->setPotentiallyUnsafe();
874 "value cannot be used outside the loop",
875 "ValueUsedOutsideLoop", ORE, TheLoop, &
I);
881 if (!PrimaryInduction) {
882 if (Inductions.
empty()) {
884 "loop induction variable could not be identified",
885 "NoInductionVariable", ORE, TheLoop);
887 }
else if (!WidestIndTy) {
889 "integer loop induction variable could not be identified",
890 "NoIntegerInductionVariable", ORE, TheLoop);
893 LLVM_DEBUG(
dbgs() <<
"LV: Did not find one integer induction var.\n");
902 if (
any_of(FixedOrderRecurrences, [LoopLatch,
this](
const PHINode *Phi) {
905 return SinkAfter.contains(V);
912 if (PrimaryInduction && WidestIndTy != PrimaryInduction->
getType())
913 PrimaryInduction =
nullptr;
918bool LoopVectorizationLegality::canVectorizeMemory() {
924 "loop not vectorized: ", *LAR);
944 "We don't allow storing to uniform addresses",
945 "write of conditional recurring variant value to a loop "
946 "invariant address could not be vectorized",
947 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
957 "Invariant address is calculated inside the loop",
958 "write to a loop invariant address could not "
960 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
989 I->getValueOperand()->getType() ==
990 SI->getValueOperand()->getType();
997 bool IsOK = UnhandledStores.
empty();
1001 "We don't allow storing to uniform addresses",
1002 "write to a loop invariant address could not "
1004 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1015 bool EnableStrictReductions) {
1024 if (!EnableStrictReductions ||
1055 return V == InvariantAddress ||
1062 PHINode *PN = dyn_cast_or_null<PHINode>(In0);
1066 return Inductions.
count(PN);
1091 const Value *V)
const {
1092 auto *Inst = dyn_cast<Instruction>(V);
1093 return (Inst && InductionCastsToIgnore.
count(Inst));
1102 return FixedOrderRecurrences.
count(Phi);
1109bool LoopVectorizationLegality::blockCanBePredicated(
1116 if (
match(&
I, m_Intrinsic<Intrinsic::assume>())) {
1117 ConditionalAssumes.
insert(&
I);
1124 if (isa<NoAliasScopeDeclInst>(&
I))
1131 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
1135 [](
VFInfo &Info) {
return Info.isMasked(); })) {
1142 if (
auto *LI = dyn_cast<LoadInst>(&
I)) {
1143 if (!SafePtrs.
count(LI->getPointerOperand()))
1153 if (
auto *SI = dyn_cast<StoreInst>(&
I)) {
1158 if (
I.mayReadFromMemory() ||
I.mayWriteToMemory() ||
I.mayThrow())
1165bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1168 "if-conversion is disabled",
1169 "IfConversionDisabled",
1209 if (!isa<BranchInst>(BB->getTerminator())) {
1211 "loop contains a switch statement",
1212 "LoopContainsSwitch", ORE, TheLoop,
1213 BB->getTerminator());
1219 if (!blockCanBePredicated(BB, SafePointers, MaskedOp,
1220 ConditionalAssumes)) {
1222 "Control flow cannot be substituted for a select",
1223 "control flow cannot be substituted for a select",
1224 "NoCFGForSelect", ORE, TheLoop,
1225 BB->getTerminator());
1236bool LoopVectorizationLegality::canVectorizeLoopCFG(
Loop *Lp,
1237 bool UseVPlanNativePath) {
1239 "VPlan-native path is not enabled.");
1255 "loop control flow is not understood by vectorizer",
1256 "CFGNotUnderstood", ORE, TheLoop);
1257 if (DoExtraAnalysis)
1266 "loop control flow is not understood by vectorizer",
1267 "CFGNotUnderstood", ORE, TheLoop);
1268 if (DoExtraAnalysis)
1277bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1278 Loop *Lp,
bool UseVPlanNativePath) {
1283 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1284 if (DoExtraAnalysis)
1292 for (
Loop *SubLp : *Lp)
1293 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1294 if (DoExtraAnalysis)
1311 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1312 if (DoExtraAnalysis)
1325 assert(UseVPlanNativePath &&
"VPlan-native path is not enabled.");
1327 if (!canVectorizeOuterLoop()) {
1329 "unsupported outer loop",
1330 "UnsupportedOuterLoop",
1344 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1346 if (DoExtraAnalysis)
1353 if (!canVectorizeInstrs()) {
1354 LLVM_DEBUG(
dbgs() <<
"LV: Can't vectorize the instructions or CFG\n");
1355 if (DoExtraAnalysis)
1362 if (!canVectorizeMemory()) {
1363 LLVM_DEBUG(
dbgs() <<
"LV: Can't vectorize due to memory conflicts\n");
1364 if (DoExtraAnalysis)
1372 ?
" (with a runtime bound check)"
1382 "Too many SCEV assumptions need to be made and checked at runtime",
1383 "TooManySCEVRunTimeChecks", ORE, TheLoop);
1384 if (DoExtraAnalysis)
1399 LLVM_DEBUG(
dbgs() <<
"LV: checking if tail can be folded by masking.\n");
1407 for (
auto *AE : AllowedExit) {
1410 if (ReductionLiveOuts.
count(AE))
1412 for (
User *U : AE->users()) {
1418 <<
"LV: Cannot fold tail by masking, loop has an outside user for "
1433 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp,
1434 TmpConditionalAssumes)) {
1435 LLVM_DEBUG(
dbgs() <<
"LV: Cannot fold tail by masking as requested.\n");
1443 ConditionalAssumes.
insert(TmpConditionalAssumes.
begin(),
1444 TmpConditionalAssumes.
end());
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
loop Loop Strength Reduction
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const uint32_t IV[8]
Class for arbitrary precision integers.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
A parsed version of the target data layout string in and methods for querying it.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C / sizeof(elem).
@ IK_IntInduction
Integer induction variable. Step = C.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
const LoopAccessInfo & getInfo(Loop &L)
bool hasDependenceInvolvingLoopInvariantAddress() const
If the loop has memory dependence involving an invariant address, i.e.
ArrayRef< StoreInst * > getStoresToInvariantAddresses() const
Return the list of stores to invariant addresses.
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
const RuntimePointerChecking * getRuntimePointerChecking() const
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isLoopHeader(const BlockT *BB) const
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
bool blockNeedsPredication(BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool prepareToFoldTailByMasking()
Return true if we can vectorize this loop while folding its tail by masking, and mark all respective ...
bool isUniformMemOp(Instruction &I) const
A uniform memory op is a load or store which accesses the same memory location on all lanes.
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
Instruction * getExactFPInst()
void addExactFPMathInst(Instruction *I)
Track the 1st floating-point instruction that can not be reassociated.
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_Unspecified
Not selected.
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
enum ForceKind getForce() const
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
void emitRemarkWithHints() const
Dumps all the hint information.
ElementCount getWidth() const
@ FK_Enabled
Forcing enabled.
@ FK_Undefined
Not selected.
@ FK_Disabled
Forcing disabled.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
unsigned getInterleave() const
unsigned getIsVectorized() const
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
StringRef getString() const
static MDString * get(LLVMContext &Context, StringRef Str)
size_type count(const KeyT &Key) const
iterator find(const KeyT &Key)
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
const SCEVPredicate & getPredicate() const
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, MapVector< Instruction *, Instruction * > &SinkAfter, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
bool Need
This flag indicates if we need to add the runtime check.
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
static constexpr size_t npos
Provides information about what library functions are available for the current target.
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
StringRef getName() const
Return a constant reference to the value's name.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isZero() const
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
bool match(Val *V, const Pattern &P)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
DenseMap< const Value *, Value * > ValueToValueMap
cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
Holds the VFShape for a specific scalar to vector function mapping.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static const unsigned MaxVectorWidth
Maximum SIMD width.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.