59 cl::desc(
"Use dot format instead of plain text when dumping VPlans"));
61#define DEBUG_TYPE "loop-vectorize"
63#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
67 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() :
nullptr);
87 : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
89 Def->addDefinedValue(
this);
93 assert(Users.empty() &&
"trying to delete a VPValue with remaining users");
95 Def->removeDefinedValue(
this);
98#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
109 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() :
nullptr);
115 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(
this);
117 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() :
nullptr);
124 return cast_or_null<VPRecipeBase>(
Def);
128 return cast_or_null<VPRecipeBase>(
Def);
136 while ((Next = Next->getParent()))
142 for (
unsigned i = 0; i < WorkList.
size(); i++) {
143 T *Current = WorkList[i];
144 if (Current->getNumPredecessors() == 0)
146 auto &Predecessors = Current->getPredecessors();
147 WorkList.
insert(Predecessors.begin(), Predecessors.end());
162 return cast<VPBasicBlock>(
Block);
169 return cast<VPBasicBlock>(
Block);
173 assert(ParentPlan->
getEntry() ==
this &&
"Can only set plan on its entry.");
182 return cast<VPBasicBlock>(
Block);
189 return cast<VPBasicBlock>(
Block);
193 if (!Successors.empty() || !Parent)
196 "Block w/o successors not the exiting block of its parent.");
201 if (!Predecessors.empty() || !Parent)
204 "Block w/o predecessors not the entry of its parent.");
215 while (It !=
end() && It->isPhi())
225 :
TTI(
TTI), VF(VF),
CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
226 LVer(nullptr), TypeAnalysis(CanonicalIVTy) {}
230 return Def->getLiveInIRValue();
242 if (!VecPart->getType()->isVectorTy()) {
243 assert(
Lane.isFirstLane() &&
"cannot get lane > 0 for scalar");
259 "Trying to access a single scalar per part but has multiple scalars "
268 auto GetBroadcastInstrs = [
this, Def](
Value *V) {
269 bool SafeToHoist = Def->isDefinedOutsideLoopRegions();
277 if (LoopVectorPreHeader)
289 assert(Def->isLiveIn() &&
"expected a live-in");
290 Value *IRV = Def->getLiveInIRValue();
291 Value *
B = GetBroadcastInstrs(IRV);
300 set(Def, ScalarValue);
313 "unexpected recipe found to be invariant");
318 auto *LastInst = cast<Instruction>(
get(Def, LastLane));
324 isa<PHINode>(LastInst)
335 Value *VectorValue =
nullptr;
337 VectorValue = GetBroadcastInstrs(ScalarValue);
338 set(Def, VectorValue);
346 VectorValue =
get(Def);
353 VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
361 if (
LVer && isa<LoadInst, StoreInst>(Orig))
370 if (
Instruction *ToI = dyn_cast<Instruction>(To)) {
393 << DIL->getFilename() <<
" Line: " << DIL->getLine());
404 set(Def, VectorValue);
422 for (
VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
427 assert(PredBB &&
"Predecessor basic-block not found building successor.");
431 auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
432 if (isa<UnreachableInst>(PredBBTerminator)) {
433 assert(PredVPSuccessors.size() == 1 &&
434 "Predecessor ending w/o branch must have single successor.");
435 DebugLoc DL = PredBBTerminator->getDebugLoc();
439 }
else if (TermBr && !TermBr->isConditional()) {
440 TermBr->setSuccessor(0, NewBB);
444 unsigned idx = PredVPSuccessors.front() ==
this ? 0 : 1;
446 (!TermBr->getSuccessor(idx) ||
447 (isa<VPIRBasicBlock>(
this) && TermBr->getSuccessor(idx) == NewBB)) &&
448 "Trying to reset an existing successor block.");
449 TermBr->setSuccessor(idx, NewBB);
456 assert(getHierarchicalSuccessors().
size() <= 2 &&
457 "VPIRBasicBlock can have at most two successors at the moment!");
461 executeRecipes(State, IRBB);
464 if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
467 IRBB->getTerminator()->eraseFromParent();
470 (getNumSuccessors() == 0 || isa<BranchInst>(IRBB->getTerminator())) &&
471 "other blocks must be terminated by a branch");
474 connectToPredecessors(State->
CFG);
482 auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
483 return R && R->isReplicator();
487 if (
this == getPlan()->getVectorPreheader() ||
488 (Replica &&
this ==
getParent()->getEntry()) ||
489 IsReplicateRegion(getSingleHierarchicalPredecessor())) {
496 NewBB = createEmptyBasicBlock(State->
CFG);
509 connectToPredecessors(State->
CFG);
513 executeRecipes(State, NewBB);
518 for (
auto *Def : R.definedValues())
519 Def->replaceAllUsesWith(NewValue);
521 for (
unsigned I = 0, E = R.getNumOperands();
I != E;
I++)
522 R.setOperand(
I, NewValue);
528 <<
" in BB:" << BB->
getName() <<
'\n');
539 assert((SplitAt == end() || SplitAt->getParent() ==
this) &&
540 "can only split at a position in the same block");
558 if (
P &&
P->isReplicator()) {
560 assert(!cast<VPRegionBlock>(
P)->isReplicator() &&
561 "unexpected nested replicate regions");
578 "block with multiple successors doesn't have a recipe as terminator");
591 "conditional branch recipe");
598 "block with 0 or 1 successors terminated by conditional branch recipe");
618#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
620 if (getSuccessors().empty()) {
621 O << Indent <<
"No successors\n";
623 O << Indent <<
"Successor(s): ";
625 for (
auto *Succ : getSuccessors())
626 O << LS << Succ->getName();
633 O << Indent <<
getName() <<
":\n";
635 auto RecipeIndent = Indent +
" ";
641 printSuccessors(O, Indent);
655 bool InRegion = Entry->getParent();
659 Old2NewVPBlocks[BB] = NewBB;
660 if (InRegion && BB->getNumSuccessors() == 0) {
661 assert(!Exiting &&
"Multiple exiting blocks?");
665 assert((!InRegion || Exiting) &&
"regions must have a single exiting block");
672 NewPreds.
push_back(Old2NewVPBlocks[Pred]);
677 NewSuccs.
push_back(Old2NewVPBlocks[Succ]);
685 for (
const auto &[OldBB, NewBB] :
688 for (
const auto &[OldPred, NewPred] :
689 zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
690 assert(NewPred == Old2NewVPBlocks[OldPred] &&
"Different predecessors");
692 for (
const auto &[OldSucc, NewSucc] :
693 zip(OldBB->successors(), NewBB->successors()))
694 assert(NewSucc == Old2NewVPBlocks[OldSucc] &&
"Different successors");
698 return std::make_pair(Old2NewVPBlocks[Entry],
699 Exiting ? Old2NewVPBlocks[Exiting] :
nullptr);
703 const auto &[NewEntry, NewExiting] =
cloneFrom(getEntry());
707 Block->setParent(NewRegion);
715 Block->dropAllReferences(NewValue);
722 if (!isReplicator()) {
739 Block->execute(State);
746 assert(!State->
Lane &&
"Replicating a Region with non-null instance.");
757 Block->execute(State);
773 if (!isReplicator()) {
782 <<
": vector loop backedge\n");
783 Cost += BackedgeCost;
798 VPBasicBlock *Then = cast<VPBasicBlock>(getEntry()->getSuccessors()[0]);
809#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
812 O << Indent << (isReplicator() ?
"<xVFxUF> " :
"<x1> ") <<
getName() <<
": {";
813 auto NewIndent = Indent +
" ";
818 O << Indent <<
"}\n";
820 printSuccessors(O, Indent);
833 Block->dropAllReferences(&DummyValue);
837 for (
VPValue *VPV : VPLiveInsToFree)
839 if (BackedgeTakenCount)
840 delete BackedgeTakenCount;
853 bool RequiresScalarEpilogueCheck,
854 bool TailFolded,
Loop *TheLoop) {
855 auto Plan = std::make_unique<VPlan>(TheLoop);
871 assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
872 "Invalid loop count");
875 InductionTy, TheLoop);
884 auto *TopRegion =
new VPRegionBlock(HeaderVPBB, LatchVPBB,
"vector loop",
893 if (!RequiresScalarEpilogueCheck) {
924 ScalarLatchTerm->getDebugLoc(),
"cmp.n");
926 ScalarLatchTerm->getDebugLoc());
934 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
937 "trip.count.minus.1");
938 BackedgeTakenCount->setUnderlyingValue(TCMO);
941 VectorTripCount.setUnderlyingValue(VectorTripCountV);
945 assert(VFxUF.getNumUsers() &&
"VFxUF expected to always have users");
946 unsigned UF = getUF();
947 if (
VF.getNumUsers()) {
949 VF.setUnderlyingValue(RuntimeVF);
950 VFxUF.setUnderlyingValue(
965 assert(!R.isPhi() &&
"Tried to move phi recipe to end of block");
966 R.moveBefore(*IRVPBB, IRVPBB->
end());
985 cast<BranchInst>(VectorPreHeader->
getTerminator())->setSuccessor(0,
nullptr);
1001 <<
", UF=" << getUF() <<
'\n');
1002 setName(
"Final VPlan");
1023 Block->execute(State);
1030 VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
1033 if (isa<VPWidenPHIRecipe>(&R))
1036 if (isa<VPWidenInductionRecipe>(&R)) {
1038 if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1039 Phi = cast<PHINode>(State->
get(R.getVPSingleValue()));
1041 auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1043 "recipe generating only scalars should have been replaced");
1044 auto *
GEP = cast<GetElementPtrInst>(State->
get(WidenPhi));
1045 Phi = cast<PHINode>(
GEP->getPointerOperand());
1048 Phi->setIncomingBlock(1, VectorLatchBB);
1052 Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1056 if (
auto *
IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1061 auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1062 bool NeedsScalar = isa<VPScalarPHIRecipe>(PhiR) ||
1063 (isa<VPReductionPHIRecipe>(PhiR) &&
1064 cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1065 Value *Phi = State->
get(PhiR, NeedsScalar);
1066 Value *Val = State->
get(PhiR->getBackedgeValue(), NeedsScalar);
1067 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1076 return getVectorLoopRegion()->cost(
VF, Ctx);
1082 if (
auto *R = dyn_cast<VPRegionBlock>(
B))
1089 if (
auto *R = dyn_cast<VPRegionBlock>(
B))
1094#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1098 if (
VF.getNumUsers() > 0) {
1104 if (VFxUF.getNumUsers() > 0) {
1110 if (VectorTripCount.getNumUsers() > 0) {
1113 O <<
" = vector-trip-count";
1116 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1118 BackedgeTakenCount->printAsOperand(O,
SlotTracker);
1119 O <<
" = backedge-taken count";
1123 if (TripCount->isLiveIn())
1126 O <<
" = original trip-count";
1134 O <<
"VPlan '" <<
getName() <<
"' {";
1151 RSO <<
Name <<
" for ";
1153 RSO <<
"VF={" << VFs[0];
1162 RSO <<
"UF={" << UFs[0];
1189 NewDeepRPOT(NewEntry);
1192 for (
const auto &[OldBB, NewBB] :
1193 zip(VPBlockUtils::blocksOnly<VPBasicBlock>(OldDeepRPOT),
1194 VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT))) {
1195 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().
size() &&
1196 "blocks must have the same number of recipes");
1197 for (
const auto &[OldR, NewR] :
zip(*OldBB, *NewBB)) {
1198 assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1199 "recipes must have the same number of operands");
1200 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1201 "recipes must define the same number of operands");
1202 for (
const auto &[OldV, NewV] :
1203 zip(OldR.definedValues(), NewR.definedValues()))
1204 Old2NewVPValues[OldV] = NewV;
1210 VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT)) {
1212 for (
unsigned I = 0, E = NewR.getNumOperands();
I != E; ++
I) {
1214 NewR.setOperand(
I, NewOp);
1221 const auto &[NewEntry, __] =
cloneFrom(Entry);
1223 BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
1226 auto *VPIRBB = dyn_cast<VPIRBasicBlock>(VPB);
1227 return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
1230 auto *NewPlan =
new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader);
1232 for (
VPValue *OldLiveIn : VPLiveInsToFree) {
1233 Old2NewVPValues[OldLiveIn] =
1234 NewPlan->getOrAddLiveIn(OldLiveIn->getLiveInIRValue());
1236 Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
1237 Old2NewVPValues[&
VF] = &NewPlan->VF;
1238 Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
1239 if (BackedgeTakenCount) {
1240 NewPlan->BackedgeTakenCount =
new VPValue();
1241 Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
1243 assert(TripCount &&
"trip count must be set");
1244 if (TripCount->isLiveIn())
1245 Old2NewVPValues[TripCount] =
1246 NewPlan->getOrAddLiveIn(TripCount->getLiveInIRValue());
1256 NewPlan->Name =
Name;
1258 "TripCount must have been added to Old2NewVPValues");
1259 NewPlan->TripCount = Old2NewVPValues[TripCount];
1263#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1266 return (isa<VPRegionBlock>(
Block) ?
"cluster_N" :
"N") +
1271 const std::string &
Name =
Block->getName();
1280 OS <<
"digraph VPlan {\n";
1281 OS <<
"graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1292 for (
auto Line : Lines)
1297 OS <<
"node [shape=rect, fontname=Courier, fontsize=30]\n";
1298 OS <<
"edge [fontname=Courier, fontsize=30]\n";
1299 OS <<
"compound=true\n";
1317 bool Hidden,
const Twine &Label) {
1322 OS << Indent << getUID(
Tail) <<
" -> " << getUID(Head);
1323 OS <<
" [ label=\"" << Label <<
'\"';
1325 OS <<
" ltail=" << getUID(
From);
1327 OS <<
" lhead=" << getUID(To);
1329 OS <<
"; splines=none";
1334 auto &Successors =
Block->getSuccessors();
1335 if (Successors.size() == 1)
1336 drawEdge(
Block, Successors.front(),
false,
"");
1337 else if (Successors.size() == 2) {
1338 drawEdge(
Block, Successors.front(),
false,
"T");
1339 drawEdge(
Block, Successors.back(),
false,
"F");
1341 unsigned SuccessorNumber = 0;
1368 EmitLine(Line,
" +\n");
1369 EmitLine(
Lines.back(),
"\n");
1372 OS << Indent <<
"]\n";
1378 OS << Indent <<
"subgraph " << getUID(
Region) <<
" {\n";
1380 OS << Indent <<
"fontname=Courier\n"
1381 << Indent <<
"label=\""
1389 OS << Indent <<
"}\n";
1394 if (
auto *Inst = dyn_cast<Instruction>(V)) {
1395 if (!Inst->getType()->isVoidTy()) {
1396 Inst->printAsOperand(O,
false);
1399 O << Inst->getOpcodeName() <<
" ";
1400 unsigned E = Inst->getNumOperands();
1402 Inst->getOperand(0)->printAsOperand(O,
false);
1403 for (
unsigned I = 1;
I < E; ++
I)
1404 Inst->getOperand(
I)->printAsOperand(O <<
", ",
false);
1407 V->printAsOperand(O,
false);
1413 return !hasDefiningRecipe() ||
1414 !getDefiningRecipe()->getParent()->getEnclosingLoopRegion();
1418 replaceUsesWithIf(New, [](
VPUser &,
unsigned) {
return true; });
1430 for (
unsigned J = 0; J < getNumUsers();) {
1432 bool RemovedUser =
false;
1448#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1466 visitBlock(
Base, Old2New, IAI);
1470void VPInterleavedAccessInfo::visitBlock(
VPBlockBase *
Block, Old2NewTy &Old2New,
1474 if (isa<VPWidenPHIRecipe>(&VPI))
1476 assert(isa<VPInstruction>(&VPI) &&
"Can only handle VPInstructions");
1477 auto *VPInst = cast<VPInstruction>(&VPI);
1479 auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
1486 auto NewIGIter = Old2New.find(IG);
1487 if (NewIGIter == Old2New.end())
1489 IG->getFactor(), IG->isReverse(), IG->getAlign());
1491 if (Inst == IG->getInsertPos())
1492 Old2New[IG]->setInsertPos(VPInst);
1494 InterleaveGroupMap[VPInst] = Old2New[IG];
1495 InterleaveGroupMap[VPInst]->insertMember(
1496 VPInst, IG->getIndex(Inst),
1497 Align(IG->isReverse() ? (-1) *
int(IG->getFactor())
1498 : IG->getFactor()));
1501 visitRegion(
Region, Old2New, IAI);
1512void VPSlotTracker::assignName(
const VPValue *V) {
1513 assert(!VPValue2Name.contains(V) &&
"VPValue already has a name!");
1514 auto *UV = V->getUnderlyingValue();
1515 auto *VPI = dyn_cast_or_null<VPInstruction>(V->getDefiningRecipe());
1516 if (!UV && !(VPI && !VPI->getName().empty())) {
1517 VPValue2Name[V] = (
Twine(
"vp<%") +
Twine(NextSlot) +
">").str();
1527 UV->printAsOperand(S,
false);
1529 Name = VPI->getName();
1531 assert(!
Name.empty() &&
"Name cannot be empty.");
1533 std::string BaseName = (
Twine(Prefix) +
Name +
Twine(
">")).str();
1536 const auto &[
A,
_] = VPValue2Name.insert({
V, BaseName});
1539 if (
V->isLiveIn() && isa<ConstantInt, ConstantFP>(UV))
1544 const auto &[
C, UseInserted] = BaseName2Version.insert({BaseName, 0});
1547 A->second = (BaseName +
Twine(
".") +
Twine(
C->second)).str();
1551void VPSlotTracker::assignNames(
const VPlan &
Plan) {
1553 assignName(&
Plan.VF);
1555 assignName(&
Plan.VFxUF);
1556 assignName(&
Plan.VectorTripCount);
1557 if (
Plan.BackedgeTakenCount)
1558 assignName(
Plan.BackedgeTakenCount);
1565 VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
1569void VPSlotTracker::assignNames(
const VPBasicBlock *VPBB) {
1571 for (
VPValue *Def : Recipe.definedValues())
1576 std::string
Name = VPValue2Name.lookup(V);
1590 "VPValue defined by a recipe in a VPlan?");
1593 if (
auto *UV = V->getUnderlyingValue()) {
1596 UV->printAsOperand(S,
false);
1597 return (
Twine(
"ir<") +
Name +
">").str();
1605 assert(!
Range.isEmpty() &&
"Trying to test an empty VF range.");
1606 bool PredicateAtRangeStart = Predicate(
Range.Start);
1609 if (Predicate(TmpVF) != PredicateAtRangeStart) {
1614 return PredicateAtRangeStart;
1624 auto MaxVFTimes2 = MaxVF * 2;
1627 auto Plan = buildVPlan(SubRange);
1629 VPlans.push_back(std::move(
Plan));
1638 "Multiple VPlans for VF.");
1647#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1649 if (VPlans.empty()) {
1650 O <<
"LV: No VPlans built.\n";
1653 for (
const auto &
Plan : VPlans)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
dxil pretty DXIL Metadata Pretty Printer
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static void dumpEdges(CFGMST< Edge, BBInfo > &MST, GCOVFunction &GF)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
This file provides a LoopVectorizationPlanner class.
cl::opt< unsigned > ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
static T * getPlanEntry(T *Start)
static T * getEnclosingLoopRegionForRegion(T *P)
Return the enclosing loop region for region P.
static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB)
Replace VPBB with a VPIRBasicBlock wrapping IRBB.
cl::opt< unsigned > ForceTargetInstructionCost
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
static std::pair< VPBlockBase *, VPBlockBase * > cloneFrom(VPBlockBase *Entry)
static cl::opt< bool > PrintVPlansInDotFormat("vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans"))
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
static const uint32_t IV[8]
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW=nullptr, bool ShouldPreserveUseListOrder=false, bool IsForDebug=false) const
Print the basic block to an output stream with an optional AssemblyAnnotationWriter.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
static ConstantInt * getTrue(LLVMContext &Context)
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
This class represents an Operation in the Expression.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
constexpr bool isScalar() const
Exactly one element.
bool shouldEmitDebugInfoForProfiling() const
Returns true if we should emit debug info for profiling.
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
void flush()
Apply all pending updates to available trees and flush all BasicBlocks awaiting deletion.
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
UnreachableInst * CreateUnreachable()
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
BasicBlock * GetInsertBlock() const
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
InsertPoint saveIP() const
Returns the current insert point.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
static InstructionCost getInvalid(CostType Val=0)
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
The group of interleaved loads/stores sharing the same stride and close to each other.
Drive the analysis of interleaved memory accesses in the loop.
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getUniqueLatchExitBlock() const
Return the unique exit block for the latch, or null if there are multiple different exit blocks or th...
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
void printPlans(raw_ostream &O)
void annotateInstWithNoAlias(Instruction *VersionedInst, const Instruction *OrigInst)
Add the noalias annotations to VersionedInst.
Represents a single loop in the control flow graph.
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This class provides computation of slot numbers for LLVM Assembly writing.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getInt1Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
RecipeListTy::iterator iterator
Instruction iterators...
void connectToPredecessors(VPTransformState::CFGState &CFG)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
iterator begin()
Recipe iterator methods.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
bool isExiting() const
Returns true if the block is exiting it's parent region.
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
iterator_range< VPBlockBase ** > successors()
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
VPBlockBase * getEnclosingBlockWithPredecessors()
const VPBlocksTy & getPredecessors() const
static void deleteCFG(VPBlockBase *Entry)
Delete all blocks reachable from a given VPBlockBase, inclusive.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
const VPBlocksTy & getHierarchicalSuccessors()
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
const VPBasicBlock * getEntryBasicBlock() const
Helper for GraphTraits specialization that traverses through VPRegionBlocks.
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void reassociateBlocks(VPBlockBase *Old, VPBlockBase *New)
Reassociate all the blocks connected to Old so that they now point to New.
VPlan-based builder utility analogous to IRBuilder.
This class augments a recipe with a set of VPValues defined by the recipe.
void dump() const
Dump the VPDef to stderr (for debugging).
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPDef prints itself.
Recipe to expand a SCEV expression.
A special type of VPBasicBlock that wraps an existing IR basic block.
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
static VPIRBasicBlock * fromBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
A recipe to wrap on original IR instruction not to be modified during execution, execept for PHIs.
This is a concrete Recipe that models a single VPlan-level instruction.
VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI)
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
static VPLane getFirstLane()
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPBasicBlock * getParent()
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
This class can be used to assign names to VPValues.
std::string getOrCreateName(const VPValue *V) const
Returns the name assigned to V, if there is one, otherwise try to construct one from the underlying v...
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
void dump() const
Dump the value to stderr (for debugging).
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
VPDef * Def
Pointer to the VPDef that defines this VPValue.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPlanPrinter prints a given VPlan to a given output stream.
LLVM_DUMP_METHOD void dump()
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
void prepareToExecute(Value *TripCount, Value *VectorTripCount, VPTransformState &State)
Prepare the plan for execution, setting up the required live-in values.
VPBasicBlock * getEntry()
VPValue & getVectorTripCount()
The vector trip count.
VPValue * getTripCount() const
The trip count of the original loop.
static VPlanPtr createInitialVPlan(Type *InductionTy, PredicatedScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop)
Create initial VPlan, having an "entry" VPBasicBlock (wrapping original scalar pre-header) which cont...
bool hasVF(ElementCount VF)
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
void print(raw_ostream &O) const
Print this VPlan to O.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region.
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
StringRef getName() const
Return a constant reference to the value's name.
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
std::string EscapeString(const std::string &Label)
bool match(Val *V, const Pattern &P)
BinaryVPInstruction_match< Op0_t, Op1_t, VPInstruction::BranchOnCount > m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1)
UnaryVPInstruction_match< Op0_t, VPInstruction::BranchOnCond > m_BranchOnCond(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
bool isUniformAfterVectorization(const VPValue *VPV)
Returns true if VPV is uniform after vectorization.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, ScalarEvolution &SE)
Get or create a VPValue that corresponds to the expansion of Expr.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
auto successors(const MachineBasicBlock *BB)
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
cl::opt< bool > EnableFSDiscriminator
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
std::unique_ptr< VPlan > VPlanPtr
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
unsigned getReciprocalPredBlockProb()
A helper function that returns the reciprocal of the block probability of predicated blocks.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
const TargetTransformInfo & TTI
void print(raw_ostream &O) const