54#define DEBUG_TYPE "structurizecfg"
62 "structurizecfg-skip-uniform-regions",
64 cl::desc(
"Force whether the StructurizeCFG pass skips uniform regions"),
68 RelaxedUniformRegions(
"structurizecfg-relaxed-uniform-regions",
cl::Hidden,
69 cl::desc(
"Allow relaxed uniform region checks"),
74using BBValuePair = std::pair<BasicBlock *, Value *>;
96struct SubGraphTraits {
97 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
102 class WrappedSuccIterator
104 WrappedSuccIterator, BaseSuccIterator,
105 typename std::iterator_traits<BaseSuccIterator>::iterator_category,
106 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
113 NodeRef
operator*()
const {
return {*
I, Nodes}; }
116 static bool filterAll(
const NodeRef &
N) {
return true; }
117 static bool filterSet(
const NodeRef &
N) {
return N.second->count(
N.first); }
119 using ChildIteratorType =
122 static NodeRef getEntryNode(
Region *R) {
126 static NodeRef getEntryNode(NodeRef
N) {
return N; }
129 auto *filter =
N.second ? &filterSet : &filterAll;
131 make_range<WrappedSuccIterator>(
137 static ChildIteratorType child_begin(
const NodeRef &
N) {
141 static ChildIteratorType child_end(
const NodeRef &
N) {
151class NearestCommonDominator {
154 bool ResultIsRemembered =
false;
157 void addBlock(
BasicBlock *BB,
bool Remember) {
160 ResultIsRemembered = Remember;
165 if (NewResult != Result)
166 ResultIsRemembered =
false;
168 ResultIsRemembered |= Remember;
173 explicit NearestCommonDominator(
DominatorTree *DomTree) : DT(DomTree) {}
189 bool resultIsRememberedBlock() {
return ResultIsRemembered; }
238class StructurizeCFG {
255 BBPhiMap DeletedPhis;
256 BB2BBVecMap AddedPhis;
259 BranchVector Conditions;
263 BranchVector LoopConds;
265 BranchDebugLocMap TermDL;
279 void insertConditions(
bool Loops);
281 void simplifyConditions();
292 void simplifyAffectedPhis();
297 bool IncludeDominator);
311 void wireFlow(
bool ExitUseAllowed,
BasicBlock *LoopEnd);
313 void handleLoops(
bool ExitUseAllowed,
BasicBlock *LoopEnd);
325class StructurizeCFGLegacyPass :
public RegionPass {
326 bool SkipUniformRegions;
331 explicit StructurizeCFGLegacyPass(
bool SkipUniformRegions_ =
false)
332 :
RegionPass(
ID), SkipUniformRegions(SkipUniformRegions_) {
334 SkipUniformRegions = ForceSkipUniformRegions.
getValue();
341 if (SkipUniformRegions) {
343 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
344 if (SCFG.makeUniformRegion(R, UA))
347 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
348 return SCFG.run(R, DT);
354 if (SkipUniformRegions)
366char StructurizeCFGLegacyPass::ID = 0;
369 "Structurize the CFG",
false,
false)
380void StructurizeCFG::orderNodes() {
387 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
391 unsigned I = 0,
E = Order.size();
403 unsigned Size = SCC.size();
408 for (
const auto &
N : SCC) {
409 assert(
I <
E &&
"SCC size mismatch!");
410 Order[
I++] =
N.first;
413 assert(
I ==
E &&
"SCC size mismatch!");
416 if (WorkList.
empty())
425 Nodes.
insert(Order.begin() +
I, Order.begin() +
E - 1);
428 EntryNode.first = Order[
E - 1];
429 EntryNode.second = &Nodes;
435 if (
N->isSubRegion()) {
438 if (Visited.count(Exit))
439 Loops[Exit] =
N->getEntry();
447 if (Visited.count(Succ))
455 Value *
Cond = Invert ? BoolFalse : BoolTrue;
456 if (
Term->isConditional()) {
459 if (
Idx != (
unsigned)Invert)
466void StructurizeCFG::gatherPredicates(
RegionNode *
N) {
467 RegionInfo *RI = ParentRegion->getRegionInfo();
474 if (!ParentRegion->contains(
P))
478 if (R == ParentRegion) {
481 for (
unsigned i = 0, e =
Term->getNumSuccessors(); i != e; ++i) {
486 if (Visited.count(
P)) {
488 if (
Term->isConditional()) {
494 Pred[
Other] = BoolFalse;
499 Pred[
P] = buildCondition(Term, i,
false);
502 LPred[
P] = buildCondition(Term, i,
true);
507 while (
R->getParent() != ParentRegion)
515 if (Visited.count(Entry))
516 Pred[Entry] = BoolTrue;
518 LPred[Entry] = BoolFalse;
524void StructurizeCFG::collectInfos() {
537 << (
RN->isSubRegion() ?
"SubRegion with entry: " :
"")
538 <<
RN->getEntry()->getName() <<
"\n");
541 gatherPredicates(RN);
544 Visited.insert(
RN->getEntry());
560void StructurizeCFG::insertConditions(
bool Loops) {
561 BranchVector &Conds =
Loops ? LoopConds : Conditions;
578 NearestCommonDominator Dominator(DT);
579 Dominator.addBlock(Parent);
581 Value *ParentValue =
nullptr;
582 for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) {
584 Value *Pred = BBAndPred.second;
591 Dominator.addAndRememberBlock(BB);
595 Term->setCondition(ParentValue);
597 if (!Dominator.resultIsRememberedBlock())
606void StructurizeCFG::simplifyConditions() {
608 for (
auto &
I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
609 auto &Preds =
I.second;
610 for (
auto &J : Preds) {
611 auto &
Cond = J.second;
614 !
Cond->use_empty()) {
615 if (
auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
616 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
617 Cond->replaceAllUsesWith(InvertedCmp);
623 for (
auto *
I : InstToErase)
624 I->eraseFromParent();
630 PhiMap &
Map = DeletedPhis[To];
632 bool Recorded =
false;
633 while (
Phi.getBasicBlockIndex(
From) != -1) {
637 AffectedPhis.push_back(&Phi);
650 AddedPhis[To].push_back(
From);
657void StructurizeCFG::findUndefBlocks(
686 if (PHIBlock == ParentRegion->getExit()) {
688 if (ParentRegion->contains(
P))
699 while (!
Stack.empty()) {
704 VisitedBlock.
insert(Current);
705 if (FlowSet.contains(Current)) {
708 }
else if (!Incomings.
contains(Current)) {
715void StructurizeCFG::setPhiValues() {
718 for (
const auto &AddedPhi : AddedPhis) {
720 const BBVector &
From = AddedPhi.second;
722 if (!DeletedPhis.count(To))
726 bool CachedUndefs =
false;
727 PhiMap &
Map = DeletedPhis[To];
728 for (
const auto &PI : Map) {
731 Updater.Initialize(
Phi->getType(),
"");
732 Updater.AddAvailableValue(&
Func->getEntryBlock(), Undef);
733 Updater.AddAvailableValue(To, Undef);
737 for (
const auto &VI : PI.second) {
739 Updater.AddAvailableValue(
VI.first,
VI.second);
740 if (isa<Constant>(
VI.second))
745 findUndefBlocks(To, Incomings, UndefBlks);
749 for (
auto UB : UndefBlks) {
755 [&](
BasicBlock *CP) {
return DT->dominates(CP, UB); }))
757 Updater.AddAvailableValue(UB, Undef);
761 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
762 AffectedPhis.push_back(Phi);
765 DeletedPhis.erase(To);
767 assert(DeletedPhis.empty());
769 AffectedPhis.append(InsertedPhis.
begin(), InsertedPhis.
end());
772void StructurizeCFG::simplifyAffectedPhis() {
780 Q.CanUseUndef =
false;
781 for (
WeakVH VH : AffectedPhis) {
782 if (
auto Phi = dyn_cast_or_null<PHINode>(VH)) {
784 Phi->replaceAllUsesWith(NewValue);
785 Phi->eraseFromParent();
794void StructurizeCFG::killTerminator(
BasicBlock *BB) {
800 delPhiValues(BB, Succ);
802 Term->eraseFromParent();
807 bool IncludeDominator) {
808 if (
Node->isSubRegion()) {
820 delPhiValues(BB, OldExit);
822 addPhiValues(BB, NewExit);
825 if (IncludeDominator) {
829 Dominator = DT->findNearestCommonDominator(Dominator, BB);
835 DT->changeImmediateDominator(NewExit, Dominator);
844 addPhiValues(BB, NewExit);
845 if (IncludeDominator)
846 DT->changeImmediateDominator(NewExit, BB);
854 Order.back()->getEntry();
857 FlowSet.insert(
Flow);
862 TermDL[
Flow] = std::move(
DL);
864 DT->addNewBlock(
Flow, Dominator);
865 ParentRegion->getRegionInfo()->setRegionFor(
Flow, ParentRegion);
870BasicBlock *StructurizeCFG::needPrefix(
bool NeedEmpty) {
873 if (!PrevNode->isSubRegion()) {
874 killTerminator(Entry);
875 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
883 changeExit(PrevNode,
Flow,
true);
884 PrevNode = ParentRegion->getBBNode(
Flow);
890 bool ExitUseAllowed) {
891 if (!Order.empty() || !ExitUseAllowed)
892 return getNextFlow(
Flow);
895 DT->changeImmediateDominator(Exit,
Flow);
896 addPhiValues(
Flow, Exit);
901void StructurizeCFG::setPrevNode(
BasicBlock *BB) {
902 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
909 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) {
910 return DT->dominates(BB, Pred.first);
917 bool Dominated =
false;
923 for (std::pair<BasicBlock*, Value*> Pred : Preds) {
930 if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
939void StructurizeCFG::wireFlow(
bool ExitUseAllowed,
942 Visited.insert(
Node->getEntry());
944 if (isPredictableTrue(
Node)) {
947 changeExit(PrevNode,
Node->getEntry(),
true);
961 Conditions.push_back(Br);
962 addPhiValues(
Flow, Entry);
963 DT->changeImmediateDominator(Entry,
Flow);
966 while (!Order.empty() && !Visited.count(LoopEnd) &&
967 dominatesPredicates(Entry, Order.back())) {
968 handleLoops(
false, LoopEnd);
971 changeExit(PrevNode, Next,
false);
976void StructurizeCFG::handleLoops(
bool ExitUseAllowed,
981 if (!
Loops.count(LoopStart)) {
982 wireFlow(ExitUseAllowed, LoopEnd);
986 if (!isPredictableTrue(
Node))
987 LoopStart = needPrefix(
true);
990 wireFlow(
false, LoopEnd);
991 while (!Visited.count(LoopEnd)) {
992 handleLoops(
false, LoopEnd);
998 LoopEnd = needPrefix(
false);
999 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
1002 LoopConds.push_back(Br);
1003 addPhiValues(LoopEnd, LoopStart);
1009void StructurizeCFG::createFlow() {
1011 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
1013 AffectedPhis.clear();
1014 DeletedPhis.clear();
1022 while (!Order.empty()) {
1023 handleLoops(EntryDominatesExit,
nullptr);
1027 changeExit(PrevNode, Exit, EntryDominatesExit);
1029 assert(EntryDominatesExit);
1034void StructurizeCFG::rebuildSSA() {
1036 for (
BasicBlock *BB : ParentRegion->blocks())
1038 bool Initialized =
false;
1043 if (
User->getParent() == BB) {
1045 }
else if (
PHINode *UserPN = dyn_cast<PHINode>(
User)) {
1046 if (UserPN->getIncomingBlock(U) == BB)
1050 if (DT->dominates(&
I,
User))
1068 bool SubRegionsAreUniform =
true;
1070 unsigned ConditionalDirectChildren = 0;
1072 for (
auto *
E : R->elements()) {
1073 if (!
E->isSubRegion()) {
1074 auto Br = dyn_cast<BranchInst>(
E->getEntry()->getTerminator());
1082 ConditionalDirectChildren++;
1085 <<
" has uniform terminator\n");
1097 auto Br = dyn_cast<BranchInst>(BB->getTerminator());
1103 if (!RelaxedUniformRegions)
1106 SubRegionsAreUniform =
false;
1119 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1122void StructurizeCFG::init(
Region *R) {
1134 if (
R->isTopLevelRegion())
1143 unsigned UniformMDKindID =
1144 R->getEntry()->getContext().getMDKindID(
"structurizecfg.uniform");
1147 LLVM_DEBUG(
dbgs() <<
"Skipping region with uniform control flow: " << *R
1156 if (
E->isSubRegion())
1159 if (
Instruction *Term =
E->getEntry()->getTerminator())
1160 Term->setMetadata(UniformMDKindID, MD);
1170 if (
R->isTopLevelRegion())
1175 Func =
R->getEntry()->getParent();
1181 insertConditions(
false);
1182 insertConditions(
true);
1184 simplifyConditions();
1185 simplifyAffectedPhis();
1191 DeletedPhis.clear();
1205 return new StructurizeCFGLegacyPass(SkipUniformRegions);
1209 Regions.push_back(&R);
1210 for (
const auto &
E : R)
1217 bool Changed =
false;
1220 std::vector<Region *> Regions;
1222 while (!Regions.empty()) {
1223 Region *R = Regions.back();
1224 StructurizeCFG SCFG;
1226 Changed |= SCFG.run(R, DT);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
BlockVerifier::State From
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This file implements a map that provides insertion order iteration.
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static void addRegionIntoQueue(Region &R, std::deque< Region * > &RQ)
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static void addRegionIntoQueue(Region &R, std::vector< Region * > &Regions)
const char FlowBlockName[]
static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, const UniformityInfo &UA)
This defines the Use class.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getFalse(LLVMContext &Context)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
const BasicBlock & getEntryBlock() const
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const BasicBlock * getParent() const
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
This is an important class for using LLVM in a threaded context.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
This class implements a map that also provides access to all stored values in a deterministic order.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserve()
Mark an analysis as preserved.
The pass manager to schedule RegionPasses.
void replaceExit(BlockT *BB)
Replace the exit basic block of the region with the new basic block.
block_range blocks()
Returns a range view of the basic blocks in the region.
BlockT * getExit() const
Get the exit BasicBlock of the Region.
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
Analysis pass that exposes the RegionInfo for a function.
RegionT * getRegionFor(BlockT *BB) const
Get the smallest region that contains a BasicBlock.
A pass that runs on each Region in a function.
virtual bool runOnRegion(Region *R, RGPassManager &RGM)=0
Run the pass on a specific Region.
Helper class for SSA formation on a set of values defined in multiple blocks.
void RewriteUseAfterInsertions(Use &U)
Rewrite a use like RewriteUse but handling in-block definitions.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Implements a dense probed hash-table based set with some number of buckets stored inline.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getInt1Ty(LLVMContext &C)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
A nullable Value handle that is nullable.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
Specialization of filter_iterator_base for forward iteration only.
CRTP base class for adapting an iterator to a different type.
A range adaptor for a pair of iterators.
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
OneUse_match< T > m_OneUse(const T &SubPattern)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
APInt operator*(APInt a, uint64_t RHS)
auto successors(const MachineBasicBlock *BB)
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void initializeStructurizeCFGLegacyPassPass(PassRegistry &)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
auto predecessors(const MachineBasicBlock *BB)
iterator_range< typename GraphTraits< GraphType >::ChildIteratorType > children(const typename GraphTraits< GraphType >::NodeRef &G)
Value * invertCondition(Value *Condition)
Invert the given true/false value, possibly reusing an existing copy.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)