LLVM 23.0.0git
StructurizeCFG.cpp
Go to the documentation of this file.
1//===- StructurizeCFG.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseMap.h"
12#include "llvm/ADT/MapVector.h"
14#include "llvm/ADT/STLExtras.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/CFG.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/Dominators.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Metadata.h"
32#include "llvm/IR/PassManager.h"
35#include "llvm/IR/Type.h"
36#include "llvm/IR/Use.h"
37#include "llvm/IR/Value.h"
38#include "llvm/IR/ValueHandle.h"
40#include "llvm/Pass.h"
43#include "llvm/Support/Debug.h"
51#include <cassert>
52#include <utility>
53
54using namespace llvm;
55using namespace llvm::PatternMatch;
56
57#define DEBUG_TYPE "structurizecfg"
58
59// The name for newly created blocks.
60const char FlowBlockName[] = "Flow";
61
62namespace {
63
64static cl::opt<bool> ForceSkipUniformRegions(
65 "structurizecfg-skip-uniform-regions",
67 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
68 cl::init(false));
69
70static cl::opt<bool>
71 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
72 cl::desc("Allow relaxed uniform region checks"),
73 cl::init(true));
74
75// Definition of the complex types used in this pass.
76
77using BBValuePair = std::pair<BasicBlock *, Value *>;
78
79using RNVector = SmallVector<RegionNode *, 8>;
80using BBVector = SmallVector<BasicBlock *, 8>;
81using BranchVector = SmallVector<CondBrInst *, 8>;
82using BBValueVector = SmallVector<BBValuePair, 2>;
83
85
87using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
88
89using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
90
91using MaybeCondBranchWeights = std::optional<class CondBranchWeights>;
92
93class CondBranchWeights {
94 uint32_t TrueWeight;
95 uint32_t FalseWeight;
96
97 CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {}
98
99public:
100 static MaybeCondBranchWeights tryParse(const CondBrInst &Br) {
101 uint64_t T, F;
102 if (!extractBranchWeights(Br, T, F))
103 return std::nullopt;
104
105 return CondBranchWeights(T, F);
106 }
107
108 static void setMetadata(CondBrInst &Br,
109 const MaybeCondBranchWeights &Weights) {
110 if (!Weights)
111 return;
112 uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight};
113 setBranchWeights(Br, Arr, false);
114 }
115
116 CondBranchWeights invert() const {
117 return CondBranchWeights{FalseWeight, TrueWeight};
118 }
119};
120
121struct PredInfo {
122 Value *Pred;
123 MaybeCondBranchWeights Weights;
124};
125
129using Val2BBMap = DenseMap<Value *, BasicBlock *>;
130
131// A traits type that is intended to be used in graph algorithms. The graph
132// traits starts at an entry node, and traverses the RegionNodes that are in
133// the Nodes set.
134struct SubGraphTraits {
135 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
136 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
137
138 // This wraps a set of Nodes into the iterator, so we know which edges to
139 // filter out.
140 class WrappedSuccIterator
141 : public iterator_adaptor_base<
142 WrappedSuccIterator, BaseSuccIterator,
143 std::iterator_traits<BaseSuccIterator>::iterator_category, NodeRef,
144 std::ptrdiff_t, NodeRef *, NodeRef> {
146
147 public:
148 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
149 : iterator_adaptor_base(It), Nodes(Nodes) {}
150
151 NodeRef operator*() const { return {*I, Nodes}; }
152 };
153
154 static bool filterAll(const NodeRef &N) { return true; }
155 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); }
156
157 using ChildIteratorType =
158 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
159
160 static NodeRef getEntryNode(Region *R) {
161 return {GraphTraits<Region *>::getEntryNode(R), nullptr};
162 }
163
164 static NodeRef getEntryNode(NodeRef N) { return N; }
165
166 static iterator_range<ChildIteratorType> children(const NodeRef &N) {
167 auto *filter = N.second ? &filterSet : &filterAll;
168 return make_filter_range(
171 {GraphTraits<RegionNode *>::child_end(N.first), N.second}),
172 filter);
173 }
174
175 static ChildIteratorType child_begin(const NodeRef &N) {
176 return children(N).begin();
177 }
178
179 static ChildIteratorType child_end(const NodeRef &N) {
180 return children(N).end();
181 }
182};
183
184/// Finds the nearest common dominator of a set of BasicBlocks.
185///
186/// For every BB you add to the set, you can specify whether we "remember" the
187/// block. When you get the common dominator, you can also ask whether it's one
188/// of the blocks we remembered.
189class NearestCommonDominator {
190 DominatorTree *DT;
191 BasicBlock *Result = nullptr;
192 bool ResultIsRemembered = false;
193
194 /// Add BB to the resulting dominator.
195 void addBlock(BasicBlock *BB, bool Remember) {
196 if (!Result) {
197 Result = BB;
198 ResultIsRemembered = Remember;
199 return;
200 }
201
202 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
203 if (NewResult != Result)
204 ResultIsRemembered = false;
205 if (NewResult == BB)
206 ResultIsRemembered |= Remember;
207 Result = NewResult;
208 }
209
210public:
211 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
212
213 void addBlock(BasicBlock *BB) {
214 addBlock(BB, /* Remember = */ false);
215 }
216
217 void addAndRememberBlock(BasicBlock *BB) {
218 addBlock(BB, /* Remember = */ true);
219 }
220
221 /// Get the nearest common dominator of all the BBs added via addBlock() and
222 /// addAndRememberBlock().
223 BasicBlock *result() { return Result; }
224
225 /// Is the BB returned by getResult() one of the blocks we added to the set
226 /// with addAndRememberBlock()?
227 bool resultIsRememberedBlock() { return ResultIsRemembered; }
228};
229
230/// Transforms the control flow graph on one single entry/exit region
231/// at a time.
232///
233/// After the transform all "If"/"Then"/"Else" style control flow looks like
234/// this:
235///
236/// \verbatim
237/// 1
238/// ||
239/// | |
240/// 2 |
241/// | /
242/// |/
243/// 3
244/// || Where:
245/// | | 1 = "If" block, calculates the condition
246/// 4 | 2 = "Then" subregion, runs if the condition is true
247/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
248/// |/ 4 = "Else" optional subregion, runs if the condition is false
249/// 5 5 = "End" block, also rejoins the control flow
250/// \endverbatim
251///
252/// Control flow is expressed as a branch where the true exit goes into the
253/// "Then"/"Else" region, while the false exit skips the region
254/// The condition for the optional "Else" region is expressed as a PHI node.
255/// The incoming values of the PHI node are true for the "If" edge and false
256/// for the "Then" edge.
257///
258/// Additionally to that even complicated loops look like this:
259///
260/// \verbatim
261/// 1
262/// ||
263/// | |
264/// 2 ^ Where:
265/// | / 1 = "Entry" block
266/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
267/// 3 3 = "Flow" block, with back edge to entry block
268/// |
269/// \endverbatim
270///
271/// The back edge of the "Flow" block is always on the false side of the branch
272/// while the true side continues the general flow. So the loop condition
273/// consist of a network of PHI nodes where the true incoming values expresses
274/// breaks and the false values expresses continue states.
275
276class StructurizeCFG {
277 Type *Boolean;
278 ConstantInt *BoolTrue;
279 ConstantInt *BoolFalse;
280 Value *BoolPoison;
282 Function *Func;
283 Region *ParentRegion;
284
285 UniformityInfo *UA = nullptr;
286 DominatorTree *DT;
287
289 BBSet Visited;
290 BBSet FlowSet;
291
292 SmallVector<WeakVH, 8> AffectedPhis;
293 BBPhiMap DeletedPhis;
294 BB2BBVecMap AddedPhis;
295
296 PredMap Predicates;
297 BranchVector Conditions;
298
299 BB2BBMap Loops;
300 PredMap LoopPreds;
301 BranchVector LoopConds;
302
303 Val2BBMap HoistedValues;
304
305 RegionNode *PrevNode;
306
307 void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB);
308
309 bool isHoistableInstruction(Instruction *I, BasicBlock *BB,
310 BasicBlock *HoistTo);
311
312 void orderNodes();
313
314 void analyzeLoops(RegionNode *N);
315
316 PredInfo buildCondition(CondBrInst *Term, unsigned Idx, bool Invert);
317
318 void gatherPredicates(RegionNode *N);
319
320 void collectInfos();
321
322 void insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter);
323
324 void simplifyConditions();
325
326 void delPhiValues(BasicBlock *From, BasicBlock *To);
327
328 void addPhiValues(BasicBlock *From, BasicBlock *To);
329
330 void findUndefBlocks(BasicBlock *PHIBlock,
331 const SmallPtrSet<BasicBlock *, 8> &Incomings,
332 SmallVector<BasicBlock *> &UndefBlks) const;
333
334 void mergeIfCompatible(EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A,
335 PHINode *B);
336
337 void setPhiValues();
338
339 void simplifyAffectedPhis();
340
341 void simplifyHoistedPhis();
342
343 DebugLoc killTerminator(BasicBlock *BB);
344
345 void changeExit(RegionNode *Node, BasicBlock *NewExit,
346 bool IncludeDominator);
347
348 BasicBlock *getNextFlow(BasicBlock *Dominator);
349
350 std::pair<BasicBlock *, DebugLoc> needPrefix(bool NeedEmpty);
351
352 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
353
354 void setPrevNode(BasicBlock *BB);
355
356 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
357
358 bool isPredictableTrue(RegionNode *Node);
359
360 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
361
362 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
363
364 void createFlow();
365
366 void rebuildSSA();
367
368public:
369 void init(Region *R);
370 bool run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI);
371 bool makeUniformRegion(Region *R, UniformityInfo &UA);
372};
373
374class StructurizeCFGLegacyPass : public RegionPass {
375 bool SkipUniformRegions;
376
377public:
378 static char ID;
379
380 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
381 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
382 if (ForceSkipUniformRegions.getNumOccurrences())
383 SkipUniformRegions = ForceSkipUniformRegions.getValue();
385 }
386
387 bool runOnRegion(Region *R, RGPassManager &RGM) override {
388 StructurizeCFG SCFG;
389 SCFG.init(R);
390 if (SkipUniformRegions) {
391 UniformityInfo &UA =
392 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
393 if (SCFG.makeUniformRegion(R, UA))
394 return false;
395 }
396 Function *F = R->getEntry()->getParent();
397 const TargetTransformInfo *TTI =
398 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
399 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
400 return SCFG.run(R, DT, TTI);
401 }
402
403 StringRef getPassName() const override { return "Structurize control flow"; }
404
405 void getAnalysisUsage(AnalysisUsage &AU) const override {
406 if (SkipUniformRegions)
411
414 }
415};
416
417} // end anonymous namespace
418
419char StructurizeCFGLegacyPass::ID = 0;
420
421INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
422 "Structurize the CFG", false, false)
426INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
427 "Structurize the CFG", false, false)
428
429/// Checks whether an instruction is zero cost instruction and checks if the
430/// operands are from different BB. If so, this instruction can be coalesced
431/// if its hoisted to predecessor block. So, this returns true.
432bool StructurizeCFG::isHoistableInstruction(Instruction *I, BasicBlock *BB,
433 BasicBlock *HoistTo) {
434 if (I->getParent() != BB || isa<PHINode>(I))
435 return false;
436
437 // If the instruction is not a zero cost instruction, return false.
438 auto Cost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
440 Cost.isValid()
441 ? Cost.getValue()
443 if (CostVal != 0)
444 return false;
445
446 // Check if all operands are available at the hoisting destination.
447 for (auto &Op : I->operands()) {
448 if (auto *OpI = dyn_cast<Instruction>(Op)) {
449 // Operand must dominate the hoisting destination.
450 if (!DT->dominates(OpI->getParent(), HoistTo))
451 return false;
452 }
453 }
454
455 return true;
456}
457
458/// Structurization can introduce unnecessary VGPR copies due to register
459/// coalescing interference. For example, if the Else block has a zero-cost
460/// instruction and the Then block modifies the VGPR value, only one value is
461/// live at a time in merge block before structurization. After structurization,
462/// the coalescer may incorrectly treat the Then value as live in the Else block
463/// (via the path Then → Flow → Else), leading to unnecessary VGPR copies.
464///
465/// This function examines phi nodes whose incoming values are zero-cost
466/// instructions in the Else block. It identifies such values that can be safely
467/// hoisted and moves them to the nearest common dominator of Then and Else
468/// blocks. A follow-up function after setting PhiNodes assigns the hoisted
469/// value to poison phi nodes along the if→flow edge, aiding register coalescing
470/// and minimizing unnecessary live ranges.
471void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
472 BasicBlock *ThenBB) {
473
474 BasicBlock *ElseSucc = ElseBB->getSingleSuccessor();
475 BasicBlock *CommonDominator = DT->findNearestCommonDominator(ElseBB, ThenBB);
476
477 if (!ElseSucc || !CommonDominator)
478 return;
479 // Only hoist in a simple if-else: ThenBB must branch directly to ElseSucc
480 // and ElseSucc must have exactly 2 predecessors (ThenBB and ElseBB).
481 // simplifyHoistedPhis assumes this exact shape; with additional predecessors
482 // the hoisted value leaks into unrelated control-flow paths.
483 if (ThenBB->getSingleSuccessor() != ElseSucc ||
484 !ElseSucc->hasNPredecessors(2))
485 return;
486 Instruction *Term = CommonDominator->getTerminator();
487 for (PHINode &Phi : ElseSucc->phis()) {
488 Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
489 auto *Inst = dyn_cast<Instruction>(ElseVal);
490 if (!Inst || !isHoistableInstruction(Inst, ElseBB, CommonDominator))
491 continue;
492 Inst->removeFromParent();
493 Inst->insertInto(CommonDominator, Term->getIterator());
494 HoistedValues[Inst] = CommonDominator;
495 }
496}
497
498/// Build up the general order of nodes, by performing a topological sort of the
499/// parent region's nodes, while ensuring that there is no outer cycle node
500/// between any two inner cycle nodes.
501void StructurizeCFG::orderNodes() {
502 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion),
503 GraphTraits<Region *>::nodes_end(ParentRegion)));
504 if (Order.empty())
505 return;
506
507 SmallDenseSet<RegionNode *> Nodes;
508 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
509
510 // A list of range indices of SCCs in Order, to be processed.
512 unsigned I = 0, E = Order.size();
513 while (true) {
514 // Run through all the SCCs in the subgraph starting with Entry.
515 for (auto SCCI =
517 EntryNode);
518 !SCCI.isAtEnd(); ++SCCI) {
519 auto &SCC = *SCCI;
520
521 // An SCC up to the size of 2, can be reduced to an entry (the last node),
522 // and a possible additional node. Therefore, it is already in order, and
523 // there is no need to add it to the work-list.
524 unsigned Size = SCC.size();
525 if (Size > 2)
526 WorkList.emplace_back(I, I + Size);
527
528 // Add the SCC nodes to the Order array.
529 for (const auto &N : SCC) {
530 assert(I < E && "SCC size mismatch!");
531 Order[I++] = N.first;
532 }
533 }
534 assert(I == E && "SCC size mismatch!");
535
536 // If there are no more SCCs to order, then we are done.
537 if (WorkList.empty())
538 break;
539
540 std::tie(I, E) = WorkList.pop_back_val();
541
542 // Collect the set of nodes in the SCC's subgraph. These are only the
543 // possible child nodes; we do not add the entry (last node) otherwise we
544 // will have the same exact SCC all over again.
545 Nodes.clear();
546 Nodes.insert(Order.begin() + I, Order.begin() + E - 1);
547
548 // Update the entry node.
549 EntryNode.first = Order[E - 1];
550 EntryNode.second = &Nodes;
551 }
552}
553
554/// Determine the end of the loops
555void StructurizeCFG::analyzeLoops(RegionNode *N) {
556 if (N->isSubRegion()) {
557 // Test for exit as back edge
558 BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
559 if (Visited.count(Exit))
560 Loops[Exit] = N->getEntry();
561
562 } else {
563 // Test for successors as back edge
564 // TODO: support other terminators other than branches.
565 BasicBlock *BB = N->getNodeAs<BasicBlock>();
567 for (BasicBlock *Succ : successors(BB))
568 if (Visited.count(Succ))
569 Loops[Succ] = BB;
570 }
571}
572
573/// Build the condition for one edge
574PredInfo StructurizeCFG::buildCondition(CondBrInst *Term, unsigned Idx,
575 bool Invert) {
576 Value *Cond = Term->getCondition();
577 auto Weights = CondBranchWeights::tryParse(*Term);
578 if (Idx != (unsigned)Invert) {
580 if (Weights)
581 Weights = Weights->invert();
582 }
583 return {Cond, Weights};
584}
585
586/// Analyze the predecessors of each block and build up predicates
587void StructurizeCFG::gatherPredicates(RegionNode *N) {
588 RegionInfo *RI = ParentRegion->getRegionInfo();
589 BasicBlock *BB = N->getEntry();
590 BBPredicates &Pred = Predicates[BB];
591 BBPredicates &LPred = LoopPreds[BB];
592
593 for (BasicBlock *P : predecessors(BB)) {
594 // Ignore it if it's a branch from outside into our region entry
595 if (!ParentRegion->contains(P))
596 continue;
597
598 Region *R = RI->getRegionFor(P);
599 if (R == ParentRegion) {
600 if (isa<UncondBrInst>(P->getTerminator())) {
601 if (Visited.count(P))
602 Pred[P] = {BoolTrue, std::nullopt};
603 else
604 LPred[P] = {BoolFalse, std::nullopt};
605 } else if (auto *CondBr = dyn_cast<CondBrInst>(P->getTerminator())) {
606 bool Idx = CondBr->getSuccessor(0) == BB ? 0 : 1;
607 if (Visited.count(P)) {
608 // Normal forward edge
609 // Try to treat Other like an ELSE block
610 BasicBlock *Other = CondBr->getSuccessor(!Idx);
611 if (Visited.count(Other) && !Loops.count(Other) &&
612 !Pred.count(Other) && !Pred.count(P)) {
613 hoistZeroCostElseBlockPhiValues(BB, Other);
614 Pred[Other] = {BoolFalse, std::nullopt};
615 Pred[P] = {BoolTrue, std::nullopt};
616 } else
617 Pred[P] = buildCondition(CondBr, Idx, false);
618 } else {
619 // Back edge
620 LPred[P] = buildCondition(CondBr, Idx, true);
621 }
622 }
623 } else {
624 // It's an exit from a sub region
625 while (R->getParent() != ParentRegion)
626 R = R->getParent();
627
628 // Edge from inside a subregion to its entry, ignore it
629 if (*R == *N)
630 continue;
631
632 BasicBlock *Entry = R->getEntry();
633 if (Visited.count(Entry))
634 Pred[Entry] = {BoolTrue, std::nullopt};
635 else
636 LPred[Entry] = {BoolFalse, std::nullopt};
637 }
638 }
639}
640
641/// Collect various loop and predicate infos
642void StructurizeCFG::collectInfos() {
643 // Reset predicate
644 Predicates.clear();
645
646 // and loop infos
647 Loops.clear();
648 LoopPreds.clear();
649
650 // Reset the visited nodes
651 Visited.clear();
652
653 for (RegionNode *RN : reverse(Order)) {
654 LLVM_DEBUG(dbgs() << "Visiting: "
655 << (RN->isSubRegion() ? "SubRegion with entry: " : "")
656 << RN->getEntry()->getName() << "\n");
657
658 // Analyze all the conditions leading to a node
659 gatherPredicates(RN);
660
661 // Remember that we've seen this node
662 Visited.insert(RN->getEntry());
663
664 // Find the last back edges
665 analyzeLoops(RN);
666 }
667}
668
669/// Insert the missing branch conditions
670void StructurizeCFG::insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter) {
671 BranchVector &Conds = Loops ? LoopConds : Conditions;
672 Value *Default = Loops ? BoolTrue : BoolFalse;
673
674 for (CondBrInst *Term : Conds) {
675 BasicBlock *Parent = Term->getParent();
676 BasicBlock *SuccTrue = Term->getSuccessor(0);
677 BasicBlock *SuccFalse = Term->getSuccessor(1);
678
679 unsigned Variable = PhiInserter.AddVariable("", Boolean);
680 PhiInserter.AddAvailableValue(Variable, Loops ? SuccFalse : Parent,
681 Default);
682
683 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
684
685 NearestCommonDominator Dominator(DT);
686 Dominator.addBlock(Parent);
687
688 PredInfo ParentInfo{nullptr, std::nullopt};
689 for (auto [BB, PI] : Preds) {
690 if (BB == Parent) {
691 ParentInfo = PI;
692 break;
693 }
694 PhiInserter.AddAvailableValue(Variable, BB, PI.Pred);
695 Dominator.addAndRememberBlock(BB);
696 }
697
698 if (ParentInfo.Pred) {
699 Term->setCondition(ParentInfo.Pred);
700 CondBranchWeights::setMetadata(*Term, ParentInfo.Weights);
701 } else {
702 if (!Dominator.resultIsRememberedBlock())
703 PhiInserter.AddAvailableValue(Variable, Dominator.result(), Default);
704
705 PhiInserter.AddUse(Variable, &Term->getOperandUse(0));
706 }
707 }
708}
709
710/// Simplify any inverted conditions that were built by buildConditions.
711void StructurizeCFG::simplifyConditions() {
712 SmallVector<Instruction *> InstToErase;
713 for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
714 auto &Preds = I.second;
715 for (auto [BB, PI] : Preds) {
716 Instruction *Inverted;
717 if (match(PI.Pred, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
718 !PI.Pred->use_empty()) {
719 if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
720 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
721 PI.Pred->replaceAllUsesWith(InvertedCmp);
722 InstToErase.push_back(cast<Instruction>(PI.Pred));
723 }
724 }
725 }
726 }
727 for (auto *I : InstToErase)
728 I->eraseFromParent();
729}
730
731/// Remove all PHI values coming from "From" into "To" and remember
732/// them in DeletedPhis
733void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
734 PhiMap &Map = DeletedPhis[To];
735 for (PHINode &Phi : To->phis()) {
736 bool Recorded = false;
737 while (Phi.getBasicBlockIndex(From) != -1) {
738 Value *Deleted = Phi.removeIncomingValue(From, false);
739 Map[&Phi].push_back(std::make_pair(From, Deleted));
740 if (!Recorded) {
741 AffectedPhis.push_back(&Phi);
742 Recorded = true;
743 }
744 }
745 }
746}
747
748/// Add a dummy PHI value as soon as we knew the new predecessor
749void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
750 for (PHINode &Phi : To->phis()) {
751 Value *Poison = PoisonValue::get(Phi.getType());
752 Phi.addIncoming(Poison, From);
753 }
754 AddedPhis[To].push_back(From);
755}
756
757/// When we are reconstructing a PHI inside \p PHIBlock with incoming values
758/// from predecessors \p Incomings, we have a chance to mark the available value
759/// from some blocks as undefined. The function will find out all such blocks
760/// and return in \p UndefBlks.
761void StructurizeCFG::findUndefBlocks(
762 BasicBlock *PHIBlock, const SmallPtrSet<BasicBlock *, 8> &Incomings,
763 SmallVector<BasicBlock *> &UndefBlks) const {
764 // We may get a post-structured CFG like below:
765 //
766 // | P1
767 // |/
768 // F1
769 // |\
770 // | N
771 // |/
772 // F2
773 // |\
774 // | P2
775 // |/
776 // F3
777 // |\
778 // B
779 //
780 // B is the block that has a PHI being reconstructed. P1/P2 are predecessors
781 // of B before structurization. F1/F2/F3 are flow blocks inserted during
782 // structurization process. Block N is not a predecessor of B before
783 // structurization, but are placed between the predecessors(P1/P2) of B after
784 // structurization. This usually means that threads went to N never take the
785 // path N->F2->F3->B. For example, the threads take the branch F1->N may
786 // always take the branch F2->P2. So, when we are reconstructing a PHI
787 // originally in B, we can safely say the incoming value from N is undefined.
788 SmallPtrSet<BasicBlock *, 8> VisitedBlock;
789 SmallVector<BasicBlock *, 8> Stack;
790 if (PHIBlock == ParentRegion->getExit()) {
791 for (auto P : predecessors(PHIBlock)) {
792 if (ParentRegion->contains(P))
793 Stack.push_back(P);
794 }
795 } else {
796 append_range(Stack, predecessors(PHIBlock));
797 }
798
799 // Do a backward traversal over the CFG, and stop further searching if
800 // the block is not a Flow. If a block is neither flow block nor the
801 // incoming predecessor, then the incoming value from the block is
802 // undefined value for the PHI being reconstructed.
803 while (!Stack.empty()) {
804 BasicBlock *Current = Stack.pop_back_val();
805 if (!VisitedBlock.insert(Current).second)
806 continue;
807
808 if (FlowSet.contains(Current))
809 llvm::append_range(Stack, predecessors(Current));
810 else if (!Incomings.contains(Current))
811 UndefBlks.push_back(Current);
812 }
813}
814
815// If two phi nodes have compatible incoming values (for each
816// incoming block, either they have the same incoming value or only one phi
817// node has an incoming value), let them share the merged incoming values. The
818// merge process is guided by the equivalence information from \p PhiClasses.
819// The function will possibly update the incoming values of leader phi in
820// DeletedPhis.
821void StructurizeCFG::mergeIfCompatible(
822 EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, PHINode *B) {
823 auto ItA = PhiClasses.findLeader(PhiClasses.insert(A));
824 auto ItB = PhiClasses.findLeader(PhiClasses.insert(B));
825 // They are already in the same class, no work needed.
826 if (ItA == ItB)
827 return;
828
829 PHINode *LeaderA = *ItA;
830 PHINode *LeaderB = *ItB;
831 BBValueVector &IncomingA = DeletedPhis[LeaderA->getParent()][LeaderA];
832 BBValueVector &IncomingB = DeletedPhis[LeaderB->getParent()][LeaderB];
833
834 DenseMap<BasicBlock *, Value *> Mergeable(IncomingA.begin(), IncomingA.end());
835 for (auto [BB, V] : IncomingB) {
836 auto BBIt = Mergeable.find(BB);
837 if (BBIt != Mergeable.end() && BBIt->second != V)
838 return;
839 // Either IncomingA does not have this value or IncomingA has the same
840 // value.
841 Mergeable.insert({BB, V});
842 }
843
844 // Update the incoming value of leaderA.
845 IncomingA.assign(Mergeable.begin(), Mergeable.end());
846 PhiClasses.unionSets(ItA, ItB);
847}
848
849/// Add the real PHI value as soon as everything is set up
850void StructurizeCFG::setPhiValues() {
851 SmallVector<PHINode *, 8> InsertedPhis;
852 SSAUpdater Updater(&InsertedPhis);
853 DenseMap<BasicBlock *, SmallVector<BasicBlock *>> UndefBlksMap;
854
855 // Find phi nodes that have compatible incoming values (either they have
856 // the same value for the same block or only one phi node has an incoming
857 // value, see example below). We only search again the phi's that are
858 // referenced by another phi, which is the case we care about.
859 //
860 // For example (-- means no incoming value):
861 // phi1 : BB1:phi2 BB2:v BB3:--
862 // phi2: BB1:-- BB2:v BB3:w
863 //
864 // Then we can merge these incoming values and let phi1, phi2 use the
865 // same set of incoming values:
866 //
867 // phi1&phi2: BB1:phi2 BB2:v BB3:w
868 //
869 // By doing this, phi1 and phi2 would share more intermediate phi nodes.
870 // This would help reduce the number of phi nodes during SSA reconstruction
871 // and ultimately result in fewer COPY instructions.
872 //
873 // This should be correct, because if a phi node does not have incoming
874 // value from certain block, this means the block is not the predecessor
875 // of the parent block, so we actually don't care about its incoming value.
876 EquivalenceClasses<PHINode *> PhiClasses;
877 for (const auto &[To, From] : AddedPhis) {
878 auto OldPhiIt = DeletedPhis.find(To);
879 if (OldPhiIt == DeletedPhis.end())
880 continue;
881
882 PhiMap &BlkPhis = OldPhiIt->second;
883 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
884 SmallPtrSet<BasicBlock *, 8> Incomings;
885
886 // Get the undefined blocks shared by all the phi nodes.
887 if (!BlkPhis.empty()) {
888 Incomings.insert_range(llvm::make_first_range(BlkPhis.front().second));
889 findUndefBlocks(To, Incomings, UndefBlks);
890 }
891
892 for (const auto &[Phi, Incomings] : OldPhiIt->second) {
893 SmallVector<PHINode *> IncomingPHIs;
894 for (const auto &[BB, V] : Incomings) {
895 // First, for each phi, check whether it has incoming value which is
896 // another phi.
897 if (PHINode *P = dyn_cast<PHINode>(V))
898 IncomingPHIs.push_back(P);
899 }
900
901 for (auto *OtherPhi : IncomingPHIs) {
902 // Skip phis that are unrelated to the phi reconstruction for now.
903 if (!DeletedPhis.contains(OtherPhi->getParent()))
904 continue;
905 mergeIfCompatible(PhiClasses, Phi, OtherPhi);
906 }
907 }
908 }
909
910 for (const auto &AddedPhi : AddedPhis) {
911 BasicBlock *To = AddedPhi.first;
912 const BBVector &From = AddedPhi.second;
913
914 auto It = DeletedPhis.find(To);
915 if (It == DeletedPhis.end())
916 continue;
917
918 PhiMap &Map = It->second;
919 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
920 for (const auto &[Phi, Incoming] : Map) {
921 Value *Poison = PoisonValue::get(Phi->getType());
922 Updater.Initialize(Phi->getType(), "");
923 Updater.AddAvailableValue(&Func->getEntryBlock(), Poison);
924 Updater.AddAvailableValue(To, Poison);
925
926 // Use leader phi's incoming if there is.
927 auto LeaderIt = PhiClasses.findLeader(Phi);
928 bool UseIncomingOfLeader =
929 LeaderIt != PhiClasses.member_end() && *LeaderIt != Phi;
930 const auto &IncomingMap =
931 UseIncomingOfLeader ? DeletedPhis[(*LeaderIt)->getParent()][*LeaderIt]
932 : Incoming;
933
934 SmallVector<BasicBlock *> ConstantPreds;
935 for (const auto &[BB, V] : IncomingMap) {
936 Updater.AddAvailableValue(BB, V);
937 if (isa<Constant>(V))
938 ConstantPreds.push_back(BB);
939 }
940
941 for (auto UB : UndefBlks) {
942 // If this undef block is dominated by any predecessor(before
943 // structurization) of reconstructed PHI with constant incoming value,
944 // don't mark the available value as undefined. Setting undef to such
945 // block will stop us from getting optimal phi insertion.
946 if (any_of(ConstantPreds,
947 [&](BasicBlock *CP) { return DT->dominates(CP, UB); }))
948 continue;
949 // Maybe already get a value through sharing with other phi nodes.
950 if (Updater.HasValueForBlock(UB))
951 continue;
952
953 Updater.AddAvailableValue(UB, Poison);
954 }
955
956 for (BasicBlock *FI : From)
957 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
958 AffectedPhis.push_back(Phi);
959 }
960 }
961
962 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
963}
964
965/// Updates PHI nodes after hoisted zero cost instructions by replacing poison
966/// entries on Flow nodes with the appropriate hoisted values
967void StructurizeCFG::simplifyHoistedPhis() {
968 for (WeakVH VH : AffectedPhis) {
969 PHINode *Phi = dyn_cast_or_null<PHINode>(VH);
970 if (!Phi || Phi->getNumIncomingValues() != 2)
971 continue;
972
973 for (int i = 0; i < 2; i++) {
974 Value *V = Phi->getIncomingValue(i);
975 auto BBIt = HoistedValues.find(V);
976
977 if (BBIt == HoistedValues.end())
978 continue;
979
980 Value *OtherV = Phi->getIncomingValue(!i);
981 PHINode *OtherPhi = dyn_cast<PHINode>(OtherV);
982 if (!OtherPhi)
983 continue;
984
985 int PoisonValBBIdx = -1;
986 for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) {
987 if (!isa<PoisonValue>(OtherPhi->getIncomingValue(i)))
988 continue;
989 PoisonValBBIdx = i;
990 break;
991 }
992 if (PoisonValBBIdx == -1 ||
993 !DT->dominates(BBIt->second,
994 OtherPhi->getIncomingBlock(PoisonValBBIdx)))
995 continue;
996
997 OtherPhi->setIncomingValue(PoisonValBBIdx, V);
998 if (DT->dominates(OtherV, Phi))
999 Phi->setIncomingValue(i, OtherV);
1000 }
1001 }
1002}
1003
1004void StructurizeCFG::simplifyAffectedPhis() {
1005 bool Changed;
1006 do {
1007 Changed = false;
1008 SimplifyQuery Q(Func->getDataLayout());
1009 Q.DT = DT;
1010 // Setting CanUseUndef to true might extend value liveness, set it to false
1011 // to achieve better register pressure.
1012 Q.CanUseUndef = false;
1013 for (WeakVH VH : AffectedPhis) {
1014 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
1015 if (auto NewValue = simplifyInstruction(Phi, Q)) {
1016 Phi->replaceAllUsesWith(NewValue);
1017 Phi->eraseFromParent();
1018 Changed = true;
1019 }
1020 }
1021 }
1022 } while (Changed);
1023}
1024
1025/// Remove phi values from all successors and then remove the terminator.
1026DebugLoc StructurizeCFG::killTerminator(BasicBlock *BB) {
1028 if (!Term)
1029 return DebugLoc();
1030
1031 for (BasicBlock *Succ : successors(BB))
1032 delPhiValues(BB, Succ);
1033
1034 DebugLoc DL = Term->getDebugLoc();
1035 Term->eraseFromParent();
1036 return DL;
1037}
1038
1039/// Let node exit(s) point to NewExit
1040void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
1041 bool IncludeDominator) {
1042 if (Node->isSubRegion()) {
1043 Region *SubRegion = Node->getNodeAs<Region>();
1044 BasicBlock *OldExit = SubRegion->getExit();
1045 BasicBlock *Dominator = nullptr;
1046
1047 // Find all the edges from the sub region to the exit.
1048 // We use make_early_inc_range here because we modify BB's terminator.
1049 for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) {
1050 if (!SubRegion->contains(BB))
1051 continue;
1052
1053 // Modify the edges to point to the new exit
1054 delPhiValues(BB, OldExit);
1055 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
1056 addPhiValues(BB, NewExit);
1057
1058 // Find the new dominator (if requested)
1059 if (IncludeDominator) {
1060 if (!Dominator)
1061 Dominator = BB;
1062 else
1063 Dominator = DT->findNearestCommonDominator(Dominator, BB);
1064 }
1065 }
1066
1067 // Change the dominator (if requested)
1068 if (Dominator)
1069 DT->changeImmediateDominator(NewExit, Dominator);
1070
1071 // Update the region info
1072 SubRegion->replaceExit(NewExit);
1073 } else {
1074 BasicBlock *BB = Node->getNodeAs<BasicBlock>();
1075 DebugLoc DL = killTerminator(BB);
1076 UncondBrInst *Br = UncondBrInst::Create(NewExit, BB);
1077 Br->setDebugLoc(DL);
1078 addPhiValues(BB, NewExit);
1079 if (IncludeDominator)
1080 DT->changeImmediateDominator(NewExit, BB);
1081 }
1082}
1083
1084/// Create a new flow node and update dominator tree and region info
1085BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
1086 LLVMContext &Context = Func->getContext();
1087 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
1088 Order.back()->getEntry();
1090 Func, Insert);
1091 FlowSet.insert(Flow);
1092 DT->addNewBlock(Flow, Dominator);
1093 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
1094 return Flow;
1095}
1096
1097/// Create a new or reuse the previous node as flow node. Returns a block and a
1098/// debug location to be used for new instructions in that block.
1099std::pair<BasicBlock *, DebugLoc> StructurizeCFG::needPrefix(bool NeedEmpty) {
1100 BasicBlock *Entry = PrevNode->getEntry();
1101
1102 if (!PrevNode->isSubRegion()) {
1103 DebugLoc DL = killTerminator(Entry);
1104 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
1105 return {Entry, DL};
1106 }
1107
1108 // create a new flow node
1109 BasicBlock *Flow = getNextFlow(Entry);
1110
1111 // and wire it up
1112 changeExit(PrevNode, Flow, true);
1113 PrevNode = ParentRegion->getBBNode(Flow);
1114 return {Flow, DebugLoc()};
1115}
1116
1117/// Returns the region exit if possible, otherwise just a new flow node
1118BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
1119 bool ExitUseAllowed) {
1120 if (!Order.empty() || !ExitUseAllowed)
1121 return getNextFlow(Flow);
1122
1123 BasicBlock *Exit = ParentRegion->getExit();
1124 DT->changeImmediateDominator(Exit, Flow);
1125 addPhiValues(Flow, Exit);
1126 return Exit;
1127}
1128
1129/// Set the previous node
1130void StructurizeCFG::setPrevNode(BasicBlock *BB) {
1131 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
1132 : nullptr;
1133}
1134
1135/// Does BB dominate all the predicates of Node?
1136bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
1137 BBPredicates &Preds = Predicates[Node->getEntry()];
1138 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, PredInfo> Pred) {
1139 return DT->dominates(BB, Pred.first);
1140 });
1141}
1142
1143/// Can we predict that this node will always be called?
1144bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
1145 BBPredicates &Preds = Predicates[Node->getEntry()];
1146 bool Dominated = false;
1147
1148 // Regionentry is always true
1149 if (!PrevNode)
1150 return true;
1151
1152 for (auto [BB, PI] : Preds) {
1153 if (PI.Pred != BoolTrue)
1154 return false;
1155
1156 if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
1157 Dominated = true;
1158 }
1159
1160 // TODO: The dominator check is too strict
1161 return Dominated;
1162}
1163
1164/// Take one node from the order vector and wire it up
1165void StructurizeCFG::wireFlow(bool ExitUseAllowed,
1166 BasicBlock *LoopEnd) {
1167 RegionNode *Node = Order.pop_back_val();
1168 Visited.insert(Node->getEntry());
1169
1170 if (isPredictableTrue(Node)) {
1171 // Just a linear flow
1172 if (PrevNode) {
1173 changeExit(PrevNode, Node->getEntry(), true);
1174 }
1175 PrevNode = Node;
1176 } else {
1177 // Insert extra prefix node (or reuse last one)
1178 auto [Flow, DL] = needPrefix(false);
1179
1180 // Insert extra postfix node (or use exit instead)
1181 BasicBlock *Entry = Node->getEntry();
1182 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
1183
1184 // let it point to entry and next block
1185 CondBrInst *Br = CondBrInst::Create(BoolPoison, Entry, Next, Flow);
1186 Br->setDebugLoc(DL);
1187 Conditions.push_back(Br);
1188 addPhiValues(Flow, Entry);
1189 DT->changeImmediateDominator(Entry, Flow);
1190
1191 PrevNode = Node;
1192 while (!Order.empty() && !Visited.count(LoopEnd) &&
1193 dominatesPredicates(Entry, Order.back())) {
1194 handleLoops(false, LoopEnd);
1195 }
1196
1197 changeExit(PrevNode, Next, false);
1198 setPrevNode(Next);
1199 }
1200}
1201
1202void StructurizeCFG::handleLoops(bool ExitUseAllowed,
1203 BasicBlock *LoopEnd) {
1204 RegionNode *Node = Order.back();
1205 BasicBlock *LoopStart = Node->getEntry();
1206
1207 if (!Loops.count(LoopStart)) {
1208 wireFlow(ExitUseAllowed, LoopEnd);
1209 return;
1210 }
1211
1212 if (!isPredictableTrue(Node))
1213 LoopStart = needPrefix(true).first;
1214
1215 LoopEnd = Loops[Node->getEntry()];
1216 wireFlow(false, LoopEnd);
1217 while (!Visited.count(LoopEnd)) {
1218 handleLoops(false, LoopEnd);
1219 }
1220
1221 assert(LoopStart != &LoopStart->getParent()->getEntryBlock());
1222
1223 // Create an extra loop end node
1224 DebugLoc DL;
1225 std::tie(LoopEnd, DL) = needPrefix(false);
1226 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
1227 CondBrInst *Br = CondBrInst::Create(BoolPoison, Next, LoopStart, LoopEnd);
1228 Br->setDebugLoc(DL);
1229 LoopConds.push_back(Br);
1230 addPhiValues(LoopEnd, LoopStart);
1231 setPrevNode(Next);
1232}
1233
1234/// After this function control flow looks like it should be, but
1235/// branches and PHI nodes only have undefined conditions.
1236void StructurizeCFG::createFlow() {
1237 BasicBlock *Exit = ParentRegion->getExit();
1238 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
1239
1240 AffectedPhis.clear();
1241 DeletedPhis.clear();
1242 AddedPhis.clear();
1243 Conditions.clear();
1244 LoopConds.clear();
1245
1246 PrevNode = nullptr;
1247 Visited.clear();
1248
1249 while (!Order.empty()) {
1250 handleLoops(EntryDominatesExit, nullptr);
1251 }
1252
1253 if (PrevNode)
1254 changeExit(PrevNode, Exit, EntryDominatesExit);
1255 else
1256 assert(EntryDominatesExit);
1257}
1258
1259/// Handle a rare case where the disintegrated nodes instructions
1260/// no longer dominate all their uses. Not sure if this is really necessary
1261void StructurizeCFG::rebuildSSA() {
1262 SSAUpdater Updater;
1263 for (BasicBlock *BB : ParentRegion->blocks())
1264 for (Instruction &I : *BB) {
1265 bool Initialized = false;
1266 // We may modify the use list as we iterate over it, so we use
1267 // make_early_inc_range.
1268 for (Use &U : llvm::make_early_inc_range(I.uses())) {
1269 Instruction *User = cast<Instruction>(U.getUser());
1270 if (User->getParent() == BB) {
1271 continue;
1272 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1273 if (UserPN->getIncomingBlock(U) == BB)
1274 continue;
1275 }
1276
1277 if (DT->dominates(&I, User))
1278 continue;
1279
1280 if (!Initialized) {
1281 Value *Poison = PoisonValue::get(I.getType());
1282 Updater.Initialize(I.getType(), "");
1283 Updater.AddAvailableValue(&Func->getEntryBlock(), Poison);
1284 Updater.AddAvailableValue(BB, &I);
1285 Initialized = true;
1286 }
1287 Updater.RewriteUseAfterInsertions(U);
1288 }
1289 }
1290}
1291
1292static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
1293 const UniformityInfo &UA) {
1294 // Bool for if all sub-regions are uniform.
1295 bool SubRegionsAreUniform = true;
1296 // Count of how many direct children are conditional.
1297 unsigned ConditionalDirectChildren = 0;
1298
1299 for (auto *E : R->elements()) {
1300 if (!E->isSubRegion()) {
1301 auto Br = dyn_cast<CondBrInst>(E->getEntry()->getTerminator());
1302 if (!Br)
1303 continue;
1304
1305 if (!UA.isUniform(Br))
1306 return false;
1307
1308 // One of our direct children is conditional.
1309 ConditionalDirectChildren++;
1310
1311 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
1312 << " has uniform terminator\n");
1313 } else {
1314 // Explicitly refuse to treat regions as uniform if they have non-uniform
1315 // subregions. We cannot rely on UniformityAnalysis for branches in
1316 // subregions because those branches may have been removed and re-created,
1317 // so we look for our metadata instead.
1318 //
1319 // Warning: It would be nice to treat regions as uniform based only on
1320 // their direct child basic blocks' terminators, regardless of whether
1321 // subregions are uniform or not. However, this requires a very careful
1322 // look at SIAnnotateControlFlow to make sure nothing breaks there.
1323 for (auto *BB : E->getNodeAs<Region>()->blocks()) {
1324 auto Br = dyn_cast<CondBrInst>(BB->getTerminator());
1325 if (!Br)
1326 continue;
1327
1328 if (!Br->getMetadata(UniformMDKindID)) {
1329 // Early exit if we cannot have relaxed uniform regions.
1330 if (!RelaxedUniformRegions)
1331 return false;
1332
1333 SubRegionsAreUniform = false;
1334 break;
1335 }
1336 }
1337 }
1338 }
1339
1340 // Our region is uniform if:
1341 // 1. All conditional branches that are direct children are uniform (checked
1342 // above).
1343 // 2. And either:
1344 // a. All sub-regions are uniform.
1345 // b. There is one or less conditional branches among the direct children.
1346 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1347}
1348
1349void StructurizeCFG::init(Region *R) {
1350 LLVMContext &Context = R->getEntry()->getContext();
1351
1352 Boolean = Type::getInt1Ty(Context);
1353 BoolTrue = ConstantInt::getTrue(Context);
1354 BoolFalse = ConstantInt::getFalse(Context);
1355 BoolPoison = PoisonValue::get(Boolean);
1356
1357 this->UA = nullptr;
1358}
1359
1360bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
1361 if (R->isTopLevelRegion())
1362 return false;
1363
1364 this->UA = &UA;
1365
1366 // TODO: We could probably be smarter here with how we handle sub-regions.
1367 // We currently rely on the fact that metadata is set by earlier invocations
1368 // of the pass on sub-regions, and that this metadata doesn't get lost --
1369 // but we shouldn't rely on metadata for correctness!
1370 unsigned UniformMDKindID =
1371 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
1372
1373 if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) {
1374 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1375 << '\n');
1376
1377 // Mark all direct child block terminators as having been treated as
1378 // uniform. To account for a possible future in which non-uniform
1379 // sub-regions are treated more cleverly, indirect children are not
1380 // marked as uniform.
1381 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
1382 for (RegionNode *E : R->elements()) {
1383 if (E->isSubRegion())
1384 continue;
1385
1386 if (Instruction *Term = E->getEntry()->getTerminator())
1387 Term->setMetadata(UniformMDKindID, MD);
1388 }
1389
1390 return true;
1391 }
1392 return false;
1393}
1394
1395/// Run the transformation for each region found
1396bool StructurizeCFG::run(Region *R, DominatorTree *DT,
1397 const TargetTransformInfo *TTI) {
1398 // CallBr and its corresponding direct target blocks are for now ignored by
1399 // this pass. This is not a limitation for the currently intended uses cases
1400 // of callbr in the AMDGPU backend.
1401 // Parent and child regions are not affected by this (current) restriction.
1402 // See `llvm/test/Transforms/StructurizeCFG/callbr.ll` for details.
1403 if (R->isTopLevelRegion() || isa<CallBrInst>(R->getEntry()->getTerminator()))
1404 return false;
1405
1406 this->DT = DT;
1407 this->TTI = TTI;
1408 Func = R->getEntry()->getParent();
1409
1410 ParentRegion = R;
1411
1412 orderNodes();
1413 collectInfos();
1414 createFlow();
1415
1416 SSAUpdaterBulk PhiInserter;
1417 insertConditions(false, PhiInserter);
1418 insertConditions(true, PhiInserter);
1419 PhiInserter.RewriteAndOptimizeAllUses(*DT);
1420
1421 setPhiValues();
1422 simplifyHoistedPhis();
1423 simplifyConditions();
1424 simplifyAffectedPhis();
1425 rebuildSSA();
1426
1427 // Cleanup
1428 Order.clear();
1429 Visited.clear();
1430 DeletedPhis.clear();
1431 AddedPhis.clear();
1432 Predicates.clear();
1433 Conditions.clear();
1434 Loops.clear();
1435 LoopPreds.clear();
1436 LoopConds.clear();
1437 FlowSet.clear();
1438
1439 return true;
1440}
1441
1442Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1443 return new StructurizeCFGLegacyPass(SkipUniformRegions);
1444}
1445
1446static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
1447 Regions.push_back(&R);
1448 for (const auto &E : R)
1449 addRegionIntoQueue(*E, Regions);
1450}
1451
1453 : SkipUniformRegions(SkipUniformRegions_) {
1454 if (ForceSkipUniformRegions.getNumOccurrences())
1455 SkipUniformRegions = ForceSkipUniformRegions.getValue();
1456}
1457
1459 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1461 OS, MapClassName2PassName);
1462 if (SkipUniformRegions)
1463 OS << "<skip-uniform-regions>";
1464}
1465
1468
1469 bool Changed = false;
1471 auto &RI = AM.getResult<RegionInfoAnalysis>(F);
1473 UniformityInfo *UI = nullptr;
1474 if (SkipUniformRegions)
1476
1477 std::vector<Region *> Regions;
1478 addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
1479 while (!Regions.empty()) {
1480 Region *R = Regions.back();
1481 Regions.pop_back();
1482
1483 StructurizeCFG SCFG;
1484 SCFG.init(R);
1485
1486 if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) {
1487 Changed = true; // May have added metadata.
1488 continue;
1489 }
1490
1491 Changed |= SCFG.run(R, DT, TTI);
1492 }
1493 if (!Changed)
1494 return PreservedAnalyses::all();
1497 return PA;
1498}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DenseMap< BasicBlock *, Instruction * > BBPredicates
This file defines the DenseMap class.
@ Default
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
Hexagon Hardware Loops
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
#define T
#define P(N)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static void addRegionIntoQueue(Region &R, std::deque< Region * > &RQ)
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
Annotate SI Control Flow
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
SmallDenseMap< const PHINode *, PhiInfo, 16 > PhiMap
const char FlowBlockName[]
static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, const UniformityInfo &UA)
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
LLVM IR instance of the generic uniformity analysis.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:518
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Conditional Branch instruction.
static CondBrInst * Create(Value *Cond, BasicBlock *IfTrue, BasicBlock *IfFalse, InsertPosition InsertBefore=nullptr)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
A debug info location.
Definition DebugLoc.h:123
bool empty() const
Definition DenseMap.h:109
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:316
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
const ECValue & insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
member_iterator member_end() const
member_iterator findLeader(const ElemTy &V) const
findLeader - Given a value in the set, return a member iterator for the equivalence class it is in.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
const BasicBlock & getEntryBlock() const
Definition Function.h:809
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:362
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition Pass.cpp:112
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
The pass manager to schedule RegionPasses.
Definition RegionPass.h:88
void replaceExit(BlockT *BB)
Replace the exit basic block of the region with the new basic block.
block_range blocks()
Returns a range view of the basic blocks in the region.
Definition RegionInfo.h:620
BlockT * getExit() const
Get the exit BasicBlock of the Region.
Definition RegionInfo.h:357
RegionNodeT * getBBNode(BlockT *BB) const
Get the BasicBlock RegionNode for a BasicBlock.
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
RegionInfoT * getRegionInfo() const
Return the RegionInfo object, that belongs to this Region.
Definition RegionInfo.h:424
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
Analysis pass that exposes the RegionInfo for a function.
Definition RegionInfo.h:965
RegionT * getRegionFor(BlockT *BB) const
Get the smallest region that contains a BasicBlock.
bool isSubRegion() const
Is this RegionNode a subregion?
Definition RegionInfo.h:186
BlockT * getEntry() const
Get the entry BasicBlock of this RegionNode.
Definition RegionInfo.h:172
A pass that runs on each Region in a function.
Definition RegionPass.h:33
Helper class for SSA formation on a set of values defined in multiple blocks.
LLVM_ABI unsigned AddVariable(StringRef Name, Type *Ty)
Add a new variable to the SSA rewriter.
LLVM_ABI void AddAvailableValue(unsigned Var, BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
LLVM_ABI void AddUse(unsigned Var, Use *U)
Record a use of the symbolic value.
LLVM_ABI_FOR_TEST void RewriteAndOptimizeAllUses(DominatorTree &DT)
Rewrite all uses and simplify the inserted PHI nodes.
void RewriteUseAfterInsertions(Use &U)
Rewrite a use like RewriteUse but handling in-block definitions.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition DenseSet.h:291
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_Latency
The latency of instruction.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static UncondBrInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Analysis pass which computes UniformityInfo.
Legacy analysis pass which computes a CycleInfo.
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
LLVM Value Representation.
Definition Value.h:75
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
CRTP base class for adapting an iterator to a different type.
Definition iterator.h:240
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
static scc_iterator begin(const GraphT &G)
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Changed
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
GenericUniformityInfo< SSAContext > UniformityInfo
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
unsigned char Boolean
Definition ConvertUTF.h:132
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
APInt operator*(APInt a, uint64_t RHS)
Definition APInt.h:2250
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
LLVM_ABI Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1152
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:552
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
iterator_range< typename GraphTraits< GraphType >::ChildIteratorType > children(const typename GraphTraits< GraphType >::NodeRef &G)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI Value * invertCondition(Value *Condition)
Invert the given true/false value, possibly reusing an existing copy.
Definition Local.cpp:3989
filter_iterator_impl< WrappedIteratorT, PredicateT, detail::fwd_or_bidi_tag< WrappedIteratorT > > filter_iterator
Defines filter_iterator to a suitable specialization of filter_iterator_impl, based on the underlying...
Definition STLExtras.h:539
LLVM_ABI void initializeStructurizeCFGLegacyPassPass(PassRegistry &)
#define N
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
StructurizeCFGPass(bool SkipUniformRegions=false)