55#define DEBUG_TYPE "structurizecfg"
63 "structurizecfg-skip-uniform-regions",
65 cl::desc(
"Force whether the StructurizeCFG pass skips uniform regions"),
69 RelaxedUniformRegions(
"structurizecfg-relaxed-uniform-regions",
cl::Hidden,
70 cl::desc(
"Allow relaxed uniform region checks"),
75using BBValuePair = std::pair<BasicBlock *, Value *>;
97struct SubGraphTraits {
98 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
103 class WrappedSuccIterator
105 WrappedSuccIterator, BaseSuccIterator,
106 typename std::iterator_traits<BaseSuccIterator>::iterator_category,
107 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
114 NodeRef
operator*()
const {
return {*
I, Nodes}; }
117 static bool filterAll(
const NodeRef &
N) {
return true; }
118 static bool filterSet(
const NodeRef &
N) {
return N.second->count(
N.first); }
120 using ChildIteratorType =
123 static NodeRef getEntryNode(
Region *R) {
127 static NodeRef getEntryNode(NodeRef
N) {
return N; }
130 auto *filter =
N.second ? &filterSet : &filterAll;
132 make_range<WrappedSuccIterator>(
138 static ChildIteratorType child_begin(
const NodeRef &
N) {
142 static ChildIteratorType child_end(
const NodeRef &
N) {
152class NearestCommonDominator {
155 bool ResultIsRemembered =
false;
158 void addBlock(
BasicBlock *BB,
bool Remember) {
161 ResultIsRemembered = Remember;
166 if (NewResult != Result)
167 ResultIsRemembered =
false;
169 ResultIsRemembered |= Remember;
174 explicit NearestCommonDominator(
DominatorTree *DomTree) : DT(DomTree) {}
190 bool resultIsRememberedBlock() {
return ResultIsRemembered; }
239class StructurizeCFG {
256 BBPhiMap DeletedPhis;
257 BB2BBVecMap AddedPhis;
260 BranchVector Conditions;
264 BranchVector LoopConds;
266 BranchDebugLocMap TermDL;
280 void insertConditions(
bool Loops);
282 void simplifyConditions();
293 void simplifyAffectedPhis();
298 bool IncludeDominator);
312 void wireFlow(
bool ExitUseAllowed,
BasicBlock *LoopEnd);
314 void handleLoops(
bool ExitUseAllowed,
BasicBlock *LoopEnd);
326class StructurizeCFGLegacyPass :
public RegionPass {
327 bool SkipUniformRegions;
332 explicit StructurizeCFGLegacyPass(
bool SkipUniformRegions_ =
false)
333 :
RegionPass(
ID), SkipUniformRegions(SkipUniformRegions_) {
335 SkipUniformRegions = ForceSkipUniformRegions.
getValue();
342 if (SkipUniformRegions) {
344 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
345 if (SCFG.makeUniformRegion(R, UA))
348 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
349 return SCFG.run(R, DT);
355 if (SkipUniformRegions)
366char StructurizeCFGLegacyPass::ID = 0;
369 "Structurize the CFG",
false,
false)
379void StructurizeCFG::orderNodes() {
386 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
390 unsigned I = 0, E = Order.size();
402 unsigned Size = SCC.size();
407 for (
const auto &
N : SCC) {
408 assert(
I < E &&
"SCC size mismatch!");
409 Order[
I++] =
N.first;
412 assert(
I == E &&
"SCC size mismatch!");
415 if (WorkList.
empty())
424 Nodes.
insert(Order.begin() +
I, Order.begin() + E - 1);
427 EntryNode.first = Order[E - 1];
428 EntryNode.second = &Nodes;
434 if (
N->isSubRegion()) {
437 if (Visited.count(Exit))
446 if (Visited.count(Succ))
454 Value *
Cond = Invert ? BoolFalse : BoolTrue;
455 if (
Term->isConditional()) {
458 if (
Idx != (
unsigned)Invert)
465void StructurizeCFG::gatherPredicates(
RegionNode *
N) {
466 RegionInfo *RI = ParentRegion->getRegionInfo();
473 if (!ParentRegion->contains(
P))
477 if (R == ParentRegion) {
480 for (
unsigned i = 0, e =
Term->getNumSuccessors(); i != e; ++i) {
485 if (Visited.count(
P)) {
487 if (
Term->isConditional()) {
493 Pred[
Other] = BoolFalse;
498 Pred[
P] = buildCondition(Term, i,
false);
501 LPred[
P] = buildCondition(Term, i,
true);
506 while (
R->getParent() != ParentRegion)
514 if (Visited.count(Entry))
515 Pred[
Entry] = BoolTrue;
517 LPred[
Entry] = BoolFalse;
523void StructurizeCFG::collectInfos() {
536 << (
RN->isSubRegion() ?
"SubRegion with entry: " :
"")
537 <<
RN->getEntry()->getName() <<
"\n");
540 gatherPredicates(RN);
543 Visited.insert(
RN->getEntry());
559void StructurizeCFG::insertConditions(
bool Loops) {
560 BranchVector &Conds =
Loops ? LoopConds : Conditions;
577 NearestCommonDominator Dominator(DT);
578 Dominator.addBlock(Parent);
580 Value *ParentValue =
nullptr;
581 for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) {
583 Value *Pred = BBAndPred.second;
590 Dominator.addAndRememberBlock(BB);
594 Term->setCondition(ParentValue);
596 if (!Dominator.resultIsRememberedBlock())
605void StructurizeCFG::simplifyConditions() {
607 for (
auto &
I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
608 auto &Preds =
I.second;
609 for (
auto &J : Preds) {
610 auto &
Cond = J.second;
613 !
Cond->use_empty()) {
614 if (
auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
615 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
616 Cond->replaceAllUsesWith(InvertedCmp);
622 for (
auto *
I : InstToErase)
623 I->eraseFromParent();
629 PhiMap &
Map = DeletedPhis[To];
631 bool Recorded =
false;
632 while (
Phi.getBasicBlockIndex(
From) != -1) {
636 AffectedPhis.push_back(&Phi);
649 AddedPhis[To].push_back(
From);
656void StructurizeCFG::findUndefBlocks(
685 if (PHIBlock == ParentRegion->getExit()) {
687 if (ParentRegion->contains(
P))
698 while (!
Stack.empty()) {
703 VisitedBlock.
insert(Current);
704 if (FlowSet.contains(Current)) {
707 }
else if (!Incomings.
contains(Current)) {
714void StructurizeCFG::setPhiValues() {
717 for (
const auto &AddedPhi : AddedPhis) {
719 const BBVector &
From = AddedPhi.second;
721 if (!DeletedPhis.count(To))
725 bool CachedUndefs =
false;
726 PhiMap &
Map = DeletedPhis[To];
727 for (
const auto &PI : Map) {
730 Updater.Initialize(
Phi->getType(),
"");
731 Updater.AddAvailableValue(&
Func->getEntryBlock(), Undef);
732 Updater.AddAvailableValue(To, Undef);
736 for (
const auto &VI : PI.second) {
738 Updater.AddAvailableValue(
VI.first,
VI.second);
739 if (isa<Constant>(
VI.second))
744 findUndefBlocks(To, Incomings, UndefBlks);
748 for (
auto UB : UndefBlks) {
754 [&](
BasicBlock *CP) {
return DT->dominates(CP, UB); }))
756 Updater.AddAvailableValue(UB, Undef);
760 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
761 AffectedPhis.push_back(Phi);
764 DeletedPhis.erase(To);
766 assert(DeletedPhis.empty());
768 AffectedPhis.append(InsertedPhis.
begin(), InsertedPhis.
end());
771void StructurizeCFG::simplifyAffectedPhis() {
779 Q.CanUseUndef =
false;
780 for (
WeakVH VH : AffectedPhis) {
781 if (
auto Phi = dyn_cast_or_null<PHINode>(VH)) {
783 Phi->replaceAllUsesWith(NewValue);
784 Phi->eraseFromParent();
793void StructurizeCFG::killTerminator(
BasicBlock *BB) {
799 delPhiValues(BB, Succ);
801 Term->eraseFromParent();
806 bool IncludeDominator) {
807 if (
Node->isSubRegion()) {
819 delPhiValues(BB, OldExit);
821 addPhiValues(BB, NewExit);
824 if (IncludeDominator) {
828 Dominator = DT->findNearestCommonDominator(Dominator, BB);
834 DT->changeImmediateDominator(NewExit, Dominator);
843 addPhiValues(BB, NewExit);
844 if (IncludeDominator)
845 DT->changeImmediateDominator(NewExit, BB);
853 Order.back()->getEntry();
856 FlowSet.insert(
Flow);
861 TermDL[
Flow] = std::move(
DL);
863 DT->addNewBlock(
Flow, Dominator);
864 ParentRegion->getRegionInfo()->setRegionFor(
Flow, ParentRegion);
869BasicBlock *StructurizeCFG::needPrefix(
bool NeedEmpty) {
872 if (!PrevNode->isSubRegion()) {
873 killTerminator(Entry);
874 if (!NeedEmpty ||
Entry->getFirstInsertionPt() ==
Entry->end())
882 changeExit(PrevNode,
Flow,
true);
883 PrevNode = ParentRegion->getBBNode(
Flow);
889 bool ExitUseAllowed) {
890 if (!Order.empty() || !ExitUseAllowed)
891 return getNextFlow(
Flow);
894 DT->changeImmediateDominator(Exit,
Flow);
895 addPhiValues(
Flow, Exit);
900void StructurizeCFG::setPrevNode(
BasicBlock *BB) {
901 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
908 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) {
909 return DT->dominates(BB, Pred.first);
916 bool Dominated =
false;
922 for (std::pair<BasicBlock*, Value*> Pred : Preds) {
929 if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
938void StructurizeCFG::wireFlow(
bool ExitUseAllowed,
941 Visited.insert(
Node->getEntry());
943 if (isPredictableTrue(
Node)) {
946 changeExit(PrevNode,
Node->getEntry(),
true);
960 Conditions.push_back(Br);
961 addPhiValues(
Flow, Entry);
962 DT->changeImmediateDominator(Entry,
Flow);
965 while (!Order.empty() && !Visited.count(LoopEnd) &&
966 dominatesPredicates(Entry, Order.back())) {
967 handleLoops(
false, LoopEnd);
970 changeExit(PrevNode, Next,
false);
975void StructurizeCFG::handleLoops(
bool ExitUseAllowed,
980 if (!
Loops.count(LoopStart)) {
981 wireFlow(ExitUseAllowed, LoopEnd);
985 if (!isPredictableTrue(
Node))
986 LoopStart = needPrefix(
true);
989 wireFlow(
false, LoopEnd);
990 while (!Visited.count(LoopEnd)) {
991 handleLoops(
false, LoopEnd);
997 LoopEnd = needPrefix(
false);
998 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
1001 LoopConds.push_back(Br);
1002 addPhiValues(LoopEnd, LoopStart);
1008void StructurizeCFG::createFlow() {
1010 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
1012 AffectedPhis.clear();
1013 DeletedPhis.clear();
1021 while (!Order.empty()) {
1022 handleLoops(EntryDominatesExit,
nullptr);
1026 changeExit(PrevNode, Exit, EntryDominatesExit);
1028 assert(EntryDominatesExit);
1033void StructurizeCFG::rebuildSSA() {
1035 for (
BasicBlock *BB : ParentRegion->blocks())
1037 bool Initialized =
false;
1042 if (
User->getParent() == BB) {
1044 }
else if (
PHINode *UserPN = dyn_cast<PHINode>(
User)) {
1045 if (UserPN->getIncomingBlock(U) == BB)
1049 if (DT->dominates(&
I,
User))
1067 bool SubRegionsAreUniform =
true;
1069 unsigned ConditionalDirectChildren = 0;
1071 for (
auto *E : R->elements()) {
1072 if (!E->isSubRegion()) {
1073 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
1081 ConditionalDirectChildren++;
1084 <<
" has uniform terminator\n");
1096 auto Br = dyn_cast<BranchInst>(BB->getTerminator());
1102 if (!RelaxedUniformRegions)
1105 SubRegionsAreUniform =
false;
1118 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1121void StructurizeCFG::init(
Region *R) {
1133 if (
R->isTopLevelRegion())
1142 unsigned UniformMDKindID =
1143 R->getEntry()->getContext().getMDKindID(
"structurizecfg.uniform");
1146 LLVM_DEBUG(
dbgs() <<
"Skipping region with uniform control flow: " << *R
1155 if (E->isSubRegion())
1158 if (
Instruction *Term = E->getEntry()->getTerminator())
1159 Term->setMetadata(UniformMDKindID, MD);
1169 if (
R->isTopLevelRegion())
1174 Func =
R->getEntry()->getParent();
1182 insertConditions(
false);
1183 insertConditions(
true);
1185 simplifyConditions();
1186 simplifyAffectedPhis();
1192 DeletedPhis.clear();
1206 return new StructurizeCFGLegacyPass(SkipUniformRegions);
1210 Regions.push_back(&R);
1211 for (
const auto &E : R)
1216 : SkipUniformRegions(SkipUniformRegions_) {
1217 if (ForceSkipUniformRegions.getNumOccurrences())
1218 SkipUniformRegions = ForceSkipUniformRegions.getValue();
1224 OS, MapClassName2PassName);
1225 if (SkipUniformRegions)
1226 OS <<
"<skip-uniform-regions>";
1232 bool Changed =
false;
1237 if (SkipUniformRegions)
1240 std::vector<Region *> Regions;
1242 while (!Regions.empty()) {
1243 Region *R = Regions.back();
1246 StructurizeCFG SCFG;
1249 if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) {
1254 Changed |= SCFG.run(R, DT);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
BlockVerifier::State From
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This defines the Use class.
This file implements a map that provides insertion order iteration.
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static void addRegionIntoQueue(Region &R, std::deque< Region * > &RQ)
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static void addRegionIntoQueue(Region &R, std::vector< Region * > &Regions)
const char FlowBlockName[]
static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, const UniformityInfo &UA)
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getFalse(LLVMContext &Context)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
const BasicBlock & getEntryBlock() const
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
This is an important class for using LLVM in a threaded context.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
This class implements a map that also provides access to all stored values in a deterministic order.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserve()
Mark an analysis as preserved.
The pass manager to schedule RegionPasses.
void replaceExit(BlockT *BB)
Replace the exit basic block of the region with the new basic block.
block_range blocks()
Returns a range view of the basic blocks in the region.
BlockT * getExit() const
Get the exit BasicBlock of the Region.
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
Analysis pass that exposes the RegionInfo for a function.
RegionT * getRegionFor(BlockT *BB) const
Get the smallest region that contains a BasicBlock.
A pass that runs on each Region in a function.
virtual bool runOnRegion(Region *R, RGPassManager &RGM)=0
Run the pass on a specific Region.
Helper class for SSA formation on a set of values defined in multiple blocks.
void RewriteUseAfterInsertions(Use &U)
Rewrite a use like RewriteUse but handling in-block definitions.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Implements a dense probed hash-table based set with some number of buckets stored inline.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getInt1Ty(LLVMContext &C)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
LLVM Value Representation.
A nullable Value handle that is nullable.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
Specialization of filter_iterator_base for forward iteration only.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
CRTP base class for adapting an iterator to a different type.
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
OneUse_match< T > m_OneUse(const T &SubPattern)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
APInt operator*(APInt a, uint64_t RHS)
bool hasOnlySimpleTerminator(const Function &F)
auto successors(const MachineBasicBlock *BB)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void initializeStructurizeCFGLegacyPassPass(PassRegistry &)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
auto predecessors(const MachineBasicBlock *BB)
iterator_range< typename GraphTraits< GraphType >::ChildIteratorType > children(const typename GraphTraits< GraphType >::NodeRef &G)
Value * invertCondition(Value *Condition)
Invert the given true/false value, possibly reusing an existing copy.
A CRTP mix-in to automatically provide informational APIs needed for passes.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
StructurizeCFGPass(bool SkipUniformRegions=false)