LLVM 22.0.0git
StructurizeCFG.cpp
Go to the documentation of this file.
1//===- StructurizeCFG.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseMap.h"
12#include "llvm/ADT/MapVector.h"
14#include "llvm/ADT/STLExtras.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/CFG.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/Dominators.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Metadata.h"
32#include "llvm/IR/PassManager.h"
35#include "llvm/IR/Type.h"
36#include "llvm/IR/Use.h"
37#include "llvm/IR/Value.h"
38#include "llvm/IR/ValueHandle.h"
40#include "llvm/Pass.h"
43#include "llvm/Support/Debug.h"
50#include <cassert>
51#include <utility>
52
53using namespace llvm;
54using namespace llvm::PatternMatch;
55
56#define DEBUG_TYPE "structurizecfg"
57
58// The name for newly created blocks.
59const char FlowBlockName[] = "Flow";
60
61namespace {
62
63static cl::opt<bool> ForceSkipUniformRegions(
64 "structurizecfg-skip-uniform-regions",
66 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
67 cl::init(false));
68
69static cl::opt<bool>
70 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
71 cl::desc("Allow relaxed uniform region checks"),
72 cl::init(true));
73
74// Definition of the complex types used in this pass.
75
76using BBValuePair = std::pair<BasicBlock *, Value *>;
77
78using RNVector = SmallVector<RegionNode *, 8>;
79using BBVector = SmallVector<BasicBlock *, 8>;
80using BranchVector = SmallVector<BranchInst *, 8>;
81using BBValueVector = SmallVector<BBValuePair, 2>;
82
84
86using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
87
88using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
89
90using MaybeCondBranchWeights = std::optional<class CondBranchWeights>;
91
92class CondBranchWeights {
93 uint32_t TrueWeight;
94 uint32_t FalseWeight;
95
96 CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {}
97
98public:
99 static MaybeCondBranchWeights tryParse(const BranchInst &Br) {
100 assert(Br.isConditional());
101
102 uint64_t T, F;
103 if (!extractBranchWeights(Br, T, F))
104 return std::nullopt;
105
106 return CondBranchWeights(T, F);
107 }
108
109 static void setMetadata(BranchInst &Br,
110 const MaybeCondBranchWeights &Weights) {
111 assert(Br.isConditional());
112 if (!Weights)
113 return;
114 uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight};
115 setBranchWeights(Br, Arr, false);
116 }
117
118 CondBranchWeights invert() const {
119 return CondBranchWeights{FalseWeight, TrueWeight};
120 }
121};
122
123struct PredInfo {
124 Value *Pred;
125 MaybeCondBranchWeights Weights;
126};
127
131using Val2BBMap = DenseMap<Value *, BasicBlock *>;
132
133// A traits type that is intended to be used in graph algorithms. The graph
134// traits starts at an entry node, and traverses the RegionNodes that are in
135// the Nodes set.
136struct SubGraphTraits {
137 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
138 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
139
140 // This wraps a set of Nodes into the iterator, so we know which edges to
141 // filter out.
142 class WrappedSuccIterator
143 : public iterator_adaptor_base<
144 WrappedSuccIterator, BaseSuccIterator,
145 typename std::iterator_traits<BaseSuccIterator>::iterator_category,
146 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
148
149 public:
150 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
151 : iterator_adaptor_base(It), Nodes(Nodes) {}
152
153 NodeRef operator*() const { return {*I, Nodes}; }
154 };
155
156 static bool filterAll(const NodeRef &N) { return true; }
157 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); }
158
159 using ChildIteratorType =
160 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
161
162 static NodeRef getEntryNode(Region *R) {
163 return {GraphTraits<Region *>::getEntryNode(R), nullptr};
164 }
165
166 static NodeRef getEntryNode(NodeRef N) { return N; }
167
168 static iterator_range<ChildIteratorType> children(const NodeRef &N) {
169 auto *filter = N.second ? &filterSet : &filterAll;
170 return make_filter_range(
173 {GraphTraits<RegionNode *>::child_end(N.first), N.second}),
174 filter);
175 }
176
177 static ChildIteratorType child_begin(const NodeRef &N) {
178 return children(N).begin();
179 }
180
181 static ChildIteratorType child_end(const NodeRef &N) {
182 return children(N).end();
183 }
184};
185
186/// Finds the nearest common dominator of a set of BasicBlocks.
187///
188/// For every BB you add to the set, you can specify whether we "remember" the
189/// block. When you get the common dominator, you can also ask whether it's one
190/// of the blocks we remembered.
191class NearestCommonDominator {
192 DominatorTree *DT;
193 BasicBlock *Result = nullptr;
194 bool ResultIsRemembered = false;
195
196 /// Add BB to the resulting dominator.
197 void addBlock(BasicBlock *BB, bool Remember) {
198 if (!Result) {
199 Result = BB;
200 ResultIsRemembered = Remember;
201 return;
202 }
203
204 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
205 if (NewResult != Result)
206 ResultIsRemembered = false;
207 if (NewResult == BB)
208 ResultIsRemembered |= Remember;
209 Result = NewResult;
210 }
211
212public:
213 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
214
215 void addBlock(BasicBlock *BB) {
216 addBlock(BB, /* Remember = */ false);
217 }
218
219 void addAndRememberBlock(BasicBlock *BB) {
220 addBlock(BB, /* Remember = */ true);
221 }
222
223 /// Get the nearest common dominator of all the BBs added via addBlock() and
224 /// addAndRememberBlock().
225 BasicBlock *result() { return Result; }
226
227 /// Is the BB returned by getResult() one of the blocks we added to the set
228 /// with addAndRememberBlock()?
229 bool resultIsRememberedBlock() { return ResultIsRemembered; }
230};
231
232/// Transforms the control flow graph on one single entry/exit region
233/// at a time.
234///
235/// After the transform all "If"/"Then"/"Else" style control flow looks like
236/// this:
237///
238/// \verbatim
239/// 1
240/// ||
241/// | |
242/// 2 |
243/// | /
244/// |/
245/// 3
246/// || Where:
247/// | | 1 = "If" block, calculates the condition
248/// 4 | 2 = "Then" subregion, runs if the condition is true
249/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
250/// |/ 4 = "Else" optional subregion, runs if the condition is false
251/// 5 5 = "End" block, also rejoins the control flow
252/// \endverbatim
253///
254/// Control flow is expressed as a branch where the true exit goes into the
255/// "Then"/"Else" region, while the false exit skips the region
256/// The condition for the optional "Else" region is expressed as a PHI node.
257/// The incoming values of the PHI node are true for the "If" edge and false
258/// for the "Then" edge.
259///
260/// Additionally to that even complicated loops look like this:
261///
262/// \verbatim
263/// 1
264/// ||
265/// | |
266/// 2 ^ Where:
267/// | / 1 = "Entry" block
268/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
269/// 3 3 = "Flow" block, with back edge to entry block
270/// |
271/// \endverbatim
272///
273/// The back edge of the "Flow" block is always on the false side of the branch
274/// while the true side continues the general flow. So the loop condition
275/// consist of a network of PHI nodes where the true incoming values expresses
276/// breaks and the false values expresses continue states.
277
278class StructurizeCFG {
279 Type *Boolean;
280 ConstantInt *BoolTrue;
281 ConstantInt *BoolFalse;
282 Value *BoolPoison;
284 Function *Func;
285 Region *ParentRegion;
286
287 UniformityInfo *UA = nullptr;
288 DominatorTree *DT;
289
291 BBSet Visited;
292 BBSet FlowSet;
293
294 SmallVector<WeakVH, 8> AffectedPhis;
295 BBPhiMap DeletedPhis;
296 BB2BBVecMap AddedPhis;
297
298 PredMap Predicates;
299 BranchVector Conditions;
300
301 BB2BBMap Loops;
302 PredMap LoopPreds;
303 BranchVector LoopConds;
304
305 Val2BBMap HoistedValues;
306
307 RegionNode *PrevNode;
308
309 void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB);
310
311 void orderNodes();
312
313 void analyzeLoops(RegionNode *N);
314
315 PredInfo buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
316
317 void gatherPredicates(RegionNode *N);
318
319 void collectInfos();
320
321 void insertConditions(bool Loops);
322
323 void simplifyConditions();
324
325 void delPhiValues(BasicBlock *From, BasicBlock *To);
326
327 void addPhiValues(BasicBlock *From, BasicBlock *To);
328
329 void findUndefBlocks(BasicBlock *PHIBlock,
330 const SmallPtrSet<BasicBlock *, 8> &Incomings,
331 SmallVector<BasicBlock *> &UndefBlks) const;
332
333 void mergeIfCompatible(EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A,
334 PHINode *B);
335
336 void setPhiValues();
337
338 void simplifyAffectedPhis();
339
340 void simplifyHoistedPhis();
341
342 DebugLoc killTerminator(BasicBlock *BB);
343
344 void changeExit(RegionNode *Node, BasicBlock *NewExit,
345 bool IncludeDominator);
346
347 BasicBlock *getNextFlow(BasicBlock *Dominator);
348
349 std::pair<BasicBlock *, DebugLoc> needPrefix(bool NeedEmpty);
350
351 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
352
353 void setPrevNode(BasicBlock *BB);
354
355 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
356
357 bool isPredictableTrue(RegionNode *Node);
358
359 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
360
361 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
362
363 void createFlow();
364
365 void rebuildSSA();
366
367public:
368 void init(Region *R);
369 bool run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI);
370 bool makeUniformRegion(Region *R, UniformityInfo &UA);
371};
372
373class StructurizeCFGLegacyPass : public RegionPass {
374 bool SkipUniformRegions;
375
376public:
377 static char ID;
378
379 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
380 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
381 if (ForceSkipUniformRegions.getNumOccurrences())
382 SkipUniformRegions = ForceSkipUniformRegions.getValue();
384 }
385
386 bool runOnRegion(Region *R, RGPassManager &RGM) override {
387 StructurizeCFG SCFG;
388 SCFG.init(R);
389 if (SkipUniformRegions) {
390 UniformityInfo &UA =
391 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
392 if (SCFG.makeUniformRegion(R, UA))
393 return false;
394 }
395 Function *F = R->getEntry()->getParent();
396 const TargetTransformInfo *TTI =
397 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
398 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
399 return SCFG.run(R, DT, TTI);
400 }
401
402 StringRef getPassName() const override { return "Structurize control flow"; }
403
404 void getAnalysisUsage(AnalysisUsage &AU) const override {
405 if (SkipUniformRegions)
410
413 }
414};
415
416} // end anonymous namespace
417
418/// Checks whether an instruction is zero cost instruction and checks if the
419/// operands are from different BB. If so, this instruction can be coalesced
420/// if its hoisted to predecessor block. So, this returns true.
422 const TargetTransformInfo *TTI) {
423 if (I->getParent() != BB || isa<PHINode>(I))
424 return false;
425
426 // If the instruction is not a zero cost instruction, return false.
427 auto Cost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
429 Cost.isValid()
430 ? Cost.getValue()
432 if (CostVal != 0)
433 return false;
434
435 // Check if any operands are instructions defined in the same block.
436 for (auto &Op : I->operands()) {
437 if (auto *OpI = dyn_cast<Instruction>(Op)) {
438 if (OpI->getParent() == BB)
439 return false;
440 }
441 }
442
443 return true;
444}
445
446char StructurizeCFGLegacyPass::ID = 0;
447
448INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
449 "Structurize the CFG", false, false)
453INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
454 "Structurize the CFG", false, false)
455
456/// Structurization can introduce unnecessary VGPR copies due to register
457/// coalescing interference. For example, if the Else block has a zero-cost
458/// instruction and the Then block modifies the VGPR value, only one value is
459/// live at a time in merge block before structurization. After structurization,
460/// the coalescer may incorrectly treat the Then value as live in the Else block
461/// (via the path Then → Flow → Else), leading to unnecessary VGPR copies.
462///
463/// This function examines phi nodes whose incoming values are zero-cost
464/// instructions in the Else block. It identifies such values that can be safely
465/// hoisted and moves them to the nearest common dominator of Then and Else
466/// blocks. A follow-up function after setting PhiNodes assigns the hoisted
467/// value to poison phi nodes along the if→flow edge, aiding register coalescing
468/// and minimizing unnecessary live ranges.
469void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
470 BasicBlock *ThenBB) {
471
472 BasicBlock *ElseSucc = ElseBB->getSingleSuccessor();
473 BasicBlock *CommonDominator = DT->findNearestCommonDominator(ElseBB, ThenBB);
474
475 if (!ElseSucc || !CommonDominator)
476 return;
477 Instruction *Term = CommonDominator->getTerminator();
478 for (PHINode &Phi : ElseSucc->phis()) {
479 Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
480 auto *Inst = dyn_cast<Instruction>(ElseVal);
481 if (!Inst || !isHoistableInstruction(Inst, ElseBB, TTI))
482 continue;
483 Inst->removeFromParent();
484 Inst->insertInto(CommonDominator, Term->getIterator());
485 HoistedValues[Inst] = CommonDominator;
486 }
487}
488
489/// Build up the general order of nodes, by performing a topological sort of the
490/// parent region's nodes, while ensuring that there is no outer cycle node
491/// between any two inner cycle nodes.
492void StructurizeCFG::orderNodes() {
493 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion),
494 GraphTraits<Region *>::nodes_end(ParentRegion)));
495 if (Order.empty())
496 return;
497
498 SmallDenseSet<RegionNode *> Nodes;
499 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
500
501 // A list of range indices of SCCs in Order, to be processed.
503 unsigned I = 0, E = Order.size();
504 while (true) {
505 // Run through all the SCCs in the subgraph starting with Entry.
506 for (auto SCCI =
508 EntryNode);
509 !SCCI.isAtEnd(); ++SCCI) {
510 auto &SCC = *SCCI;
511
512 // An SCC up to the size of 2, can be reduced to an entry (the last node),
513 // and a possible additional node. Therefore, it is already in order, and
514 // there is no need to add it to the work-list.
515 unsigned Size = SCC.size();
516 if (Size > 2)
517 WorkList.emplace_back(I, I + Size);
518
519 // Add the SCC nodes to the Order array.
520 for (const auto &N : SCC) {
521 assert(I < E && "SCC size mismatch!");
522 Order[I++] = N.first;
523 }
524 }
525 assert(I == E && "SCC size mismatch!");
526
527 // If there are no more SCCs to order, then we are done.
528 if (WorkList.empty())
529 break;
530
531 std::tie(I, E) = WorkList.pop_back_val();
532
533 // Collect the set of nodes in the SCC's subgraph. These are only the
534 // possible child nodes; we do not add the entry (last node) otherwise we
535 // will have the same exact SCC all over again.
536 Nodes.clear();
537 Nodes.insert(Order.begin() + I, Order.begin() + E - 1);
538
539 // Update the entry node.
540 EntryNode.first = Order[E - 1];
541 EntryNode.second = &Nodes;
542 }
543}
544
545/// Determine the end of the loops
546void StructurizeCFG::analyzeLoops(RegionNode *N) {
547 if (N->isSubRegion()) {
548 // Test for exit as back edge
549 BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
550 if (Visited.count(Exit))
551 Loops[Exit] = N->getEntry();
552
553 } else {
554 // Test for successors as back edge
555 BasicBlock *BB = N->getNodeAs<BasicBlock>();
556 BranchInst *Term = cast<BranchInst>(BB->getTerminator());
557
558 for (BasicBlock *Succ : Term->successors())
559 if (Visited.count(Succ))
560 Loops[Succ] = BB;
561 }
562}
563
564/// Build the condition for one edge
565PredInfo StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
566 bool Invert) {
567 Value *Cond = Invert ? BoolFalse : BoolTrue;
568 MaybeCondBranchWeights Weights;
569
570 if (Term->isConditional()) {
571 Cond = Term->getCondition();
572 Weights = CondBranchWeights::tryParse(*Term);
573
574 if (Idx != (unsigned)Invert) {
576 if (Weights)
577 Weights = Weights->invert();
578 }
579 }
580 return {Cond, Weights};
581}
582
583/// Analyze the predecessors of each block and build up predicates
584void StructurizeCFG::gatherPredicates(RegionNode *N) {
585 RegionInfo *RI = ParentRegion->getRegionInfo();
586 BasicBlock *BB = N->getEntry();
587 BBPredicates &Pred = Predicates[BB];
588 BBPredicates &LPred = LoopPreds[BB];
589
590 for (BasicBlock *P : predecessors(BB)) {
591 // Ignore it if it's a branch from outside into our region entry
592 if (!ParentRegion->contains(P))
593 continue;
594
595 Region *R = RI->getRegionFor(P);
596 if (R == ParentRegion) {
597 // It's a top level block in our region
598 BranchInst *Term = cast<BranchInst>(P->getTerminator());
599 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
600 BasicBlock *Succ = Term->getSuccessor(i);
601 if (Succ != BB)
602 continue;
603
604 if (Visited.count(P)) {
605 // Normal forward edge
606 if (Term->isConditional()) {
607 // Try to treat it like an ELSE block
608 BasicBlock *Other = Term->getSuccessor(!i);
609 if (Visited.count(Other) && !Loops.count(Other) &&
610 !Pred.count(Other) && !Pred.count(P)) {
611 hoistZeroCostElseBlockPhiValues(Succ, Other);
612 Pred[Other] = {BoolFalse, std::nullopt};
613 Pred[P] = {BoolTrue, std::nullopt};
614 continue;
615 }
616 }
617 Pred[P] = buildCondition(Term, i, false);
618 } else {
619 // Back edge
620 LPred[P] = buildCondition(Term, i, true);
621 }
622 }
623 } else {
624 // It's an exit from a sub region
625 while (R->getParent() != ParentRegion)
626 R = R->getParent();
627
628 // Edge from inside a subregion to its entry, ignore it
629 if (*R == *N)
630 continue;
631
632 BasicBlock *Entry = R->getEntry();
633 if (Visited.count(Entry))
634 Pred[Entry] = {BoolTrue, std::nullopt};
635 else
636 LPred[Entry] = {BoolFalse, std::nullopt};
637 }
638 }
639}
640
641/// Collect various loop and predicate infos
642void StructurizeCFG::collectInfos() {
643 // Reset predicate
644 Predicates.clear();
645
646 // and loop infos
647 Loops.clear();
648 LoopPreds.clear();
649
650 // Reset the visited nodes
651 Visited.clear();
652
653 for (RegionNode *RN : reverse(Order)) {
654 LLVM_DEBUG(dbgs() << "Visiting: "
655 << (RN->isSubRegion() ? "SubRegion with entry: " : "")
656 << RN->getEntry()->getName() << "\n");
657
658 // Analyze all the conditions leading to a node
659 gatherPredicates(RN);
660
661 // Remember that we've seen this node
662 Visited.insert(RN->getEntry());
663
664 // Find the last back edges
665 analyzeLoops(RN);
666 }
667}
668
669/// Insert the missing branch conditions
670void StructurizeCFG::insertConditions(bool Loops) {
671 BranchVector &Conds = Loops ? LoopConds : Conditions;
672 Value *Default = Loops ? BoolTrue : BoolFalse;
673 SSAUpdater PhiInserter;
674
675 for (BranchInst *Term : Conds) {
676 assert(Term->isConditional());
677
678 BasicBlock *Parent = Term->getParent();
679 BasicBlock *SuccTrue = Term->getSuccessor(0);
680 BasicBlock *SuccFalse = Term->getSuccessor(1);
681
682 PhiInserter.Initialize(Boolean, "");
683 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
684
685 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
686
687 NearestCommonDominator Dominator(DT);
688 Dominator.addBlock(Parent);
689
690 PredInfo ParentInfo{nullptr, std::nullopt};
691 for (auto [BB, PI] : Preds) {
692 if (BB == Parent) {
693 ParentInfo = PI;
694 break;
695 }
696 PhiInserter.AddAvailableValue(BB, PI.Pred);
697 Dominator.addAndRememberBlock(BB);
698 }
699
700 if (ParentInfo.Pred) {
701 Term->setCondition(ParentInfo.Pred);
702 CondBranchWeights::setMetadata(*Term, ParentInfo.Weights);
703 } else {
704 if (!Dominator.resultIsRememberedBlock())
705 PhiInserter.AddAvailableValue(Dominator.result(), Default);
706
707 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
708 }
709 }
710}
711
712/// Simplify any inverted conditions that were built by buildConditions.
713void StructurizeCFG::simplifyConditions() {
714 SmallVector<Instruction *> InstToErase;
715 for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
716 auto &Preds = I.second;
717 for (auto [BB, PI] : Preds) {
718 Instruction *Inverted;
719 if (match(PI.Pred, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
720 !PI.Pred->use_empty()) {
721 if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
722 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
723 PI.Pred->replaceAllUsesWith(InvertedCmp);
724 InstToErase.push_back(cast<Instruction>(PI.Pred));
725 }
726 }
727 }
728 }
729 for (auto *I : InstToErase)
730 I->eraseFromParent();
731}
732
733/// Remove all PHI values coming from "From" into "To" and remember
734/// them in DeletedPhis
735void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
736 PhiMap &Map = DeletedPhis[To];
737 for (PHINode &Phi : To->phis()) {
738 bool Recorded = false;
739 while (Phi.getBasicBlockIndex(From) != -1) {
740 Value *Deleted = Phi.removeIncomingValue(From, false);
741 Map[&Phi].push_back(std::make_pair(From, Deleted));
742 if (!Recorded) {
743 AffectedPhis.push_back(&Phi);
744 Recorded = true;
745 }
746 }
747 }
748}
749
750/// Add a dummy PHI value as soon as we knew the new predecessor
751void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
752 for (PHINode &Phi : To->phis()) {
753 Value *Poison = PoisonValue::get(Phi.getType());
754 Phi.addIncoming(Poison, From);
755 }
756 AddedPhis[To].push_back(From);
757}
758
759/// When we are reconstructing a PHI inside \p PHIBlock with incoming values
760/// from predecessors \p Incomings, we have a chance to mark the available value
761/// from some blocks as undefined. The function will find out all such blocks
762/// and return in \p UndefBlks.
763void StructurizeCFG::findUndefBlocks(
764 BasicBlock *PHIBlock, const SmallPtrSet<BasicBlock *, 8> &Incomings,
765 SmallVector<BasicBlock *> &UndefBlks) const {
766 // We may get a post-structured CFG like below:
767 //
768 // | P1
769 // |/
770 // F1
771 // |\
772 // | N
773 // |/
774 // F2
775 // |\
776 // | P2
777 // |/
778 // F3
779 // |\
780 // B
781 //
782 // B is the block that has a PHI being reconstructed. P1/P2 are predecessors
783 // of B before structurization. F1/F2/F3 are flow blocks inserted during
784 // structurization process. Block N is not a predecessor of B before
785 // structurization, but are placed between the predecessors(P1/P2) of B after
786 // structurization. This usually means that threads went to N never take the
787 // path N->F2->F3->B. For example, the threads take the branch F1->N may
788 // always take the branch F2->P2. So, when we are reconstructing a PHI
789 // originally in B, we can safely say the incoming value from N is undefined.
790 SmallPtrSet<BasicBlock *, 8> VisitedBlock;
791 SmallVector<BasicBlock *, 8> Stack;
792 if (PHIBlock == ParentRegion->getExit()) {
793 for (auto P : predecessors(PHIBlock)) {
794 if (ParentRegion->contains(P))
795 Stack.push_back(P);
796 }
797 } else {
798 append_range(Stack, predecessors(PHIBlock));
799 }
800
801 // Do a backward traversal over the CFG, and stop further searching if
802 // the block is not a Flow. If a block is neither flow block nor the
803 // incoming predecessor, then the incoming value from the block is
804 // undefined value for the PHI being reconstructed.
805 while (!Stack.empty()) {
806 BasicBlock *Current = Stack.pop_back_val();
807 if (!VisitedBlock.insert(Current).second)
808 continue;
809
810 if (FlowSet.contains(Current))
811 llvm::append_range(Stack, predecessors(Current));
812 else if (!Incomings.contains(Current))
813 UndefBlks.push_back(Current);
814 }
815}
816
817// If two phi nodes have compatible incoming values (for each
818// incoming block, either they have the same incoming value or only one phi
819// node has an incoming value), let them share the merged incoming values. The
820// merge process is guided by the equivalence information from \p PhiClasses.
821// The function will possibly update the incoming values of leader phi in
822// DeletedPhis.
823void StructurizeCFG::mergeIfCompatible(
824 EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, PHINode *B) {
825 auto ItA = PhiClasses.findLeader(PhiClasses.insert(A));
826 auto ItB = PhiClasses.findLeader(PhiClasses.insert(B));
827 // They are already in the same class, no work needed.
828 if (ItA == ItB)
829 return;
830
831 PHINode *LeaderA = *ItA;
832 PHINode *LeaderB = *ItB;
833 BBValueVector &IncomingA = DeletedPhis[LeaderA->getParent()][LeaderA];
834 BBValueVector &IncomingB = DeletedPhis[LeaderB->getParent()][LeaderB];
835
836 DenseMap<BasicBlock *, Value *> Mergeable(IncomingA.begin(), IncomingA.end());
837 for (auto [BB, V] : IncomingB) {
838 auto BBIt = Mergeable.find(BB);
839 if (BBIt != Mergeable.end() && BBIt->second != V)
840 return;
841 // Either IncomingA does not have this value or IncomingA has the same
842 // value.
843 Mergeable.insert({BB, V});
844 }
845
846 // Update the incoming value of leaderA.
847 IncomingA.assign(Mergeable.begin(), Mergeable.end());
848 PhiClasses.unionSets(ItA, ItB);
849}
850
851/// Add the real PHI value as soon as everything is set up
852void StructurizeCFG::setPhiValues() {
853 SmallVector<PHINode *, 8> InsertedPhis;
854 SSAUpdater Updater(&InsertedPhis);
855 DenseMap<BasicBlock *, SmallVector<BasicBlock *>> UndefBlksMap;
856
857 // Find phi nodes that have compatible incoming values (either they have
858 // the same value for the same block or only one phi node has an incoming
859 // value, see example below). We only search again the phi's that are
860 // referenced by another phi, which is the case we care about.
861 //
862 // For example (-- means no incoming value):
863 // phi1 : BB1:phi2 BB2:v BB3:--
864 // phi2: BB1:-- BB2:v BB3:w
865 //
866 // Then we can merge these incoming values and let phi1, phi2 use the
867 // same set of incoming values:
868 //
869 // phi1&phi2: BB1:phi2 BB2:v BB3:w
870 //
871 // By doing this, phi1 and phi2 would share more intermediate phi nodes.
872 // This would help reduce the number of phi nodes during SSA reconstruction
873 // and ultimately result in fewer COPY instructions.
874 //
875 // This should be correct, because if a phi node does not have incoming
876 // value from certain block, this means the block is not the predecessor
877 // of the parent block, so we actually don't care about its incoming value.
878 EquivalenceClasses<PHINode *> PhiClasses;
879 for (const auto &[To, From] : AddedPhis) {
880 auto OldPhiIt = DeletedPhis.find(To);
881 if (OldPhiIt == DeletedPhis.end())
882 continue;
883
884 PhiMap &BlkPhis = OldPhiIt->second;
885 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
886 SmallPtrSet<BasicBlock *, 8> Incomings;
887
888 // Get the undefined blocks shared by all the phi nodes.
889 if (!BlkPhis.empty()) {
890 Incomings.insert_range(llvm::make_first_range(BlkPhis.front().second));
891 findUndefBlocks(To, Incomings, UndefBlks);
892 }
893
894 for (const auto &[Phi, Incomings] : OldPhiIt->second) {
895 SmallVector<PHINode *> IncomingPHIs;
896 for (const auto &[BB, V] : Incomings) {
897 // First, for each phi, check whether it has incoming value which is
898 // another phi.
899 if (PHINode *P = dyn_cast<PHINode>(V))
900 IncomingPHIs.push_back(P);
901 }
902
903 for (auto *OtherPhi : IncomingPHIs) {
904 // Skip phis that are unrelated to the phi reconstruction for now.
905 if (!DeletedPhis.contains(OtherPhi->getParent()))
906 continue;
907 mergeIfCompatible(PhiClasses, Phi, OtherPhi);
908 }
909 }
910 }
911
912 for (const auto &AddedPhi : AddedPhis) {
913 BasicBlock *To = AddedPhi.first;
914 const BBVector &From = AddedPhi.second;
915
916 auto It = DeletedPhis.find(To);
917 if (It == DeletedPhis.end())
918 continue;
919
920 PhiMap &Map = It->second;
921 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
922 for (const auto &[Phi, Incoming] : Map) {
923 Value *Poison = PoisonValue::get(Phi->getType());
924 Updater.Initialize(Phi->getType(), "");
925 Updater.AddAvailableValue(&Func->getEntryBlock(), Poison);
926 Updater.AddAvailableValue(To, Poison);
927
928 // Use leader phi's incoming if there is.
929 auto LeaderIt = PhiClasses.findLeader(Phi);
930 bool UseIncomingOfLeader =
931 LeaderIt != PhiClasses.member_end() && *LeaderIt != Phi;
932 const auto &IncomingMap =
933 UseIncomingOfLeader ? DeletedPhis[(*LeaderIt)->getParent()][*LeaderIt]
934 : Incoming;
935
936 SmallVector<BasicBlock *> ConstantPreds;
937 for (const auto &[BB, V] : IncomingMap) {
938 Updater.AddAvailableValue(BB, V);
939 if (isa<Constant>(V))
940 ConstantPreds.push_back(BB);
941 }
942
943 for (auto UB : UndefBlks) {
944 // If this undef block is dominated by any predecessor(before
945 // structurization) of reconstructed PHI with constant incoming value,
946 // don't mark the available value as undefined. Setting undef to such
947 // block will stop us from getting optimal phi insertion.
948 if (any_of(ConstantPreds,
949 [&](BasicBlock *CP) { return DT->dominates(CP, UB); }))
950 continue;
951 // Maybe already get a value through sharing with other phi nodes.
952 if (Updater.HasValueForBlock(UB))
953 continue;
954
955 Updater.AddAvailableValue(UB, Poison);
956 }
957
958 for (BasicBlock *FI : From)
959 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
960 AffectedPhis.push_back(Phi);
961 }
962 }
963
964 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
965}
966
967/// Updates PHI nodes after hoisted zero cost instructions by replacing poison
968/// entries on Flow nodes with the appropriate hoisted values
969void StructurizeCFG::simplifyHoistedPhis() {
970 for (WeakVH VH : AffectedPhis) {
971 PHINode *Phi = dyn_cast_or_null<PHINode>(VH);
972 if (!Phi || Phi->getNumIncomingValues() != 2)
973 continue;
974
975 for (int i = 0; i < 2; i++) {
976 Value *V = Phi->getIncomingValue(i);
977 auto BBIt = HoistedValues.find(V);
978
979 if (BBIt == HoistedValues.end())
980 continue;
981
982 Value *OtherV = Phi->getIncomingValue(!i);
983 PHINode *OtherPhi = dyn_cast<PHINode>(OtherV);
984 if (!OtherPhi)
985 continue;
986
987 int PoisonValBBIdx = -1;
988 for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) {
989 if (!isa<PoisonValue>(OtherPhi->getIncomingValue(i)))
990 continue;
991 PoisonValBBIdx = i;
992 break;
993 }
994 if (PoisonValBBIdx == -1 ||
995 !DT->dominates(BBIt->second,
996 OtherPhi->getIncomingBlock(PoisonValBBIdx)))
997 continue;
998
999 OtherPhi->setIncomingValue(PoisonValBBIdx, V);
1000 if (DT->dominates(OtherV, Phi))
1001 Phi->setIncomingValue(i, OtherV);
1002 }
1003 }
1004}
1005
1006void StructurizeCFG::simplifyAffectedPhis() {
1007 bool Changed;
1008 do {
1009 Changed = false;
1010 SimplifyQuery Q(Func->getDataLayout());
1011 Q.DT = DT;
1012 // Setting CanUseUndef to true might extend value liveness, set it to false
1013 // to achieve better register pressure.
1014 Q.CanUseUndef = false;
1015 for (WeakVH VH : AffectedPhis) {
1016 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
1017 if (auto NewValue = simplifyInstruction(Phi, Q)) {
1018 Phi->replaceAllUsesWith(NewValue);
1019 Phi->eraseFromParent();
1020 Changed = true;
1021 }
1022 }
1023 }
1024 } while (Changed);
1025}
1026
1027/// Remove phi values from all successors and then remove the terminator.
1028DebugLoc StructurizeCFG::killTerminator(BasicBlock *BB) {
1030 if (!Term)
1031 return DebugLoc();
1032
1033 for (BasicBlock *Succ : successors(BB))
1034 delPhiValues(BB, Succ);
1035
1036 DebugLoc DL = Term->getDebugLoc();
1037 Term->eraseFromParent();
1038 return DL;
1039}
1040
1041/// Let node exit(s) point to NewExit
1042void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
1043 bool IncludeDominator) {
1044 if (Node->isSubRegion()) {
1045 Region *SubRegion = Node->getNodeAs<Region>();
1046 BasicBlock *OldExit = SubRegion->getExit();
1047 BasicBlock *Dominator = nullptr;
1048
1049 // Find all the edges from the sub region to the exit.
1050 // We use make_early_inc_range here because we modify BB's terminator.
1051 for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) {
1052 if (!SubRegion->contains(BB))
1053 continue;
1054
1055 // Modify the edges to point to the new exit
1056 delPhiValues(BB, OldExit);
1057 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
1058 addPhiValues(BB, NewExit);
1059
1060 // Find the new dominator (if requested)
1061 if (IncludeDominator) {
1062 if (!Dominator)
1063 Dominator = BB;
1064 else
1065 Dominator = DT->findNearestCommonDominator(Dominator, BB);
1066 }
1067 }
1068
1069 // Change the dominator (if requested)
1070 if (Dominator)
1071 DT->changeImmediateDominator(NewExit, Dominator);
1072
1073 // Update the region info
1074 SubRegion->replaceExit(NewExit);
1075 } else {
1076 BasicBlock *BB = Node->getNodeAs<BasicBlock>();
1077 DebugLoc DL = killTerminator(BB);
1078 BranchInst *Br = BranchInst::Create(NewExit, BB);
1079 Br->setDebugLoc(DL);
1080 addPhiValues(BB, NewExit);
1081 if (IncludeDominator)
1082 DT->changeImmediateDominator(NewExit, BB);
1083 }
1084}
1085
1086/// Create a new flow node and update dominator tree and region info
1087BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
1088 LLVMContext &Context = Func->getContext();
1089 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
1090 Order.back()->getEntry();
1092 Func, Insert);
1093 FlowSet.insert(Flow);
1094 DT->addNewBlock(Flow, Dominator);
1095 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
1096 return Flow;
1097}
1098
1099/// Create a new or reuse the previous node as flow node. Returns a block and a
1100/// debug location to be used for new instructions in that block.
1101std::pair<BasicBlock *, DebugLoc> StructurizeCFG::needPrefix(bool NeedEmpty) {
1102 BasicBlock *Entry = PrevNode->getEntry();
1103
1104 if (!PrevNode->isSubRegion()) {
1105 DebugLoc DL = killTerminator(Entry);
1106 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
1107 return {Entry, DL};
1108 }
1109
1110 // create a new flow node
1111 BasicBlock *Flow = getNextFlow(Entry);
1112
1113 // and wire it up
1114 changeExit(PrevNode, Flow, true);
1115 PrevNode = ParentRegion->getBBNode(Flow);
1116 return {Flow, DebugLoc()};
1117}
1118
1119/// Returns the region exit if possible, otherwise just a new flow node
1120BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
1121 bool ExitUseAllowed) {
1122 if (!Order.empty() || !ExitUseAllowed)
1123 return getNextFlow(Flow);
1124
1125 BasicBlock *Exit = ParentRegion->getExit();
1126 DT->changeImmediateDominator(Exit, Flow);
1127 addPhiValues(Flow, Exit);
1128 return Exit;
1129}
1130
1131/// Set the previous node
1132void StructurizeCFG::setPrevNode(BasicBlock *BB) {
1133 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
1134 : nullptr;
1135}
1136
1137/// Does BB dominate all the predicates of Node?
1138bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
1139 BBPredicates &Preds = Predicates[Node->getEntry()];
1140 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, PredInfo> Pred) {
1141 return DT->dominates(BB, Pred.first);
1142 });
1143}
1144
1145/// Can we predict that this node will always be called?
1146bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
1147 BBPredicates &Preds = Predicates[Node->getEntry()];
1148 bool Dominated = false;
1149
1150 // Regionentry is always true
1151 if (!PrevNode)
1152 return true;
1153
1154 for (auto [BB, PI] : Preds) {
1155 if (PI.Pred != BoolTrue)
1156 return false;
1157
1158 if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
1159 Dominated = true;
1160 }
1161
1162 // TODO: The dominator check is too strict
1163 return Dominated;
1164}
1165
1166/// Take one node from the order vector and wire it up
1167void StructurizeCFG::wireFlow(bool ExitUseAllowed,
1168 BasicBlock *LoopEnd) {
1169 RegionNode *Node = Order.pop_back_val();
1170 Visited.insert(Node->getEntry());
1171
1172 if (isPredictableTrue(Node)) {
1173 // Just a linear flow
1174 if (PrevNode) {
1175 changeExit(PrevNode, Node->getEntry(), true);
1176 }
1177 PrevNode = Node;
1178 } else {
1179 // Insert extra prefix node (or reuse last one)
1180 auto [Flow, DL] = needPrefix(false);
1181
1182 // Insert extra postfix node (or use exit instead)
1183 BasicBlock *Entry = Node->getEntry();
1184 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
1185
1186 // let it point to entry and next block
1187 BranchInst *Br = BranchInst::Create(Entry, Next, BoolPoison, Flow);
1188 Br->setDebugLoc(DL);
1189 Conditions.push_back(Br);
1190 addPhiValues(Flow, Entry);
1191 DT->changeImmediateDominator(Entry, Flow);
1192
1193 PrevNode = Node;
1194 while (!Order.empty() && !Visited.count(LoopEnd) &&
1195 dominatesPredicates(Entry, Order.back())) {
1196 handleLoops(false, LoopEnd);
1197 }
1198
1199 changeExit(PrevNode, Next, false);
1200 setPrevNode(Next);
1201 }
1202}
1203
1204void StructurizeCFG::handleLoops(bool ExitUseAllowed,
1205 BasicBlock *LoopEnd) {
1206 RegionNode *Node = Order.back();
1207 BasicBlock *LoopStart = Node->getEntry();
1208
1209 if (!Loops.count(LoopStart)) {
1210 wireFlow(ExitUseAllowed, LoopEnd);
1211 return;
1212 }
1213
1214 if (!isPredictableTrue(Node))
1215 LoopStart = needPrefix(true).first;
1216
1217 LoopEnd = Loops[Node->getEntry()];
1218 wireFlow(false, LoopEnd);
1219 while (!Visited.count(LoopEnd)) {
1220 handleLoops(false, LoopEnd);
1221 }
1222
1223 assert(LoopStart != &LoopStart->getParent()->getEntryBlock());
1224
1225 // Create an extra loop end node
1226 DebugLoc DL;
1227 std::tie(LoopEnd, DL) = needPrefix(false);
1228 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
1229 BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolPoison, LoopEnd);
1230 Br->setDebugLoc(DL);
1231 LoopConds.push_back(Br);
1232 addPhiValues(LoopEnd, LoopStart);
1233 setPrevNode(Next);
1234}
1235
1236/// After this function control flow looks like it should be, but
1237/// branches and PHI nodes only have undefined conditions.
1238void StructurizeCFG::createFlow() {
1239 BasicBlock *Exit = ParentRegion->getExit();
1240 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
1241
1242 AffectedPhis.clear();
1243 DeletedPhis.clear();
1244 AddedPhis.clear();
1245 Conditions.clear();
1246 LoopConds.clear();
1247
1248 PrevNode = nullptr;
1249 Visited.clear();
1250
1251 while (!Order.empty()) {
1252 handleLoops(EntryDominatesExit, nullptr);
1253 }
1254
1255 if (PrevNode)
1256 changeExit(PrevNode, Exit, EntryDominatesExit);
1257 else
1258 assert(EntryDominatesExit);
1259}
1260
1261/// Handle a rare case where the disintegrated nodes instructions
1262/// no longer dominate all their uses. Not sure if this is really necessary
1263void StructurizeCFG::rebuildSSA() {
1264 SSAUpdater Updater;
1265 for (BasicBlock *BB : ParentRegion->blocks())
1266 for (Instruction &I : *BB) {
1267 bool Initialized = false;
1268 // We may modify the use list as we iterate over it, so we use
1269 // make_early_inc_range.
1270 for (Use &U : llvm::make_early_inc_range(I.uses())) {
1271 Instruction *User = cast<Instruction>(U.getUser());
1272 if (User->getParent() == BB) {
1273 continue;
1274 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1275 if (UserPN->getIncomingBlock(U) == BB)
1276 continue;
1277 }
1278
1279 if (DT->dominates(&I, User))
1280 continue;
1281
1282 if (!Initialized) {
1283 Value *Poison = PoisonValue::get(I.getType());
1284 Updater.Initialize(I.getType(), "");
1285 Updater.AddAvailableValue(&Func->getEntryBlock(), Poison);
1286 Updater.AddAvailableValue(BB, &I);
1287 Initialized = true;
1288 }
1289 Updater.RewriteUseAfterInsertions(U);
1290 }
1291 }
1292}
1293
1294static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
1295 const UniformityInfo &UA) {
1296 // Bool for if all sub-regions are uniform.
1297 bool SubRegionsAreUniform = true;
1298 // Count of how many direct children are conditional.
1299 unsigned ConditionalDirectChildren = 0;
1300
1301 for (auto *E : R->elements()) {
1302 if (!E->isSubRegion()) {
1303 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
1304 if (!Br || !Br->isConditional())
1305 continue;
1306
1307 if (!UA.isUniform(Br))
1308 return false;
1309
1310 // One of our direct children is conditional.
1311 ConditionalDirectChildren++;
1312
1313 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
1314 << " has uniform terminator\n");
1315 } else {
1316 // Explicitly refuse to treat regions as uniform if they have non-uniform
1317 // subregions. We cannot rely on UniformityAnalysis for branches in
1318 // subregions because those branches may have been removed and re-created,
1319 // so we look for our metadata instead.
1320 //
1321 // Warning: It would be nice to treat regions as uniform based only on
1322 // their direct child basic blocks' terminators, regardless of whether
1323 // subregions are uniform or not. However, this requires a very careful
1324 // look at SIAnnotateControlFlow to make sure nothing breaks there.
1325 for (auto *BB : E->getNodeAs<Region>()->blocks()) {
1326 auto Br = dyn_cast<BranchInst>(BB->getTerminator());
1327 if (!Br || !Br->isConditional())
1328 continue;
1329
1330 if (!Br->getMetadata(UniformMDKindID)) {
1331 // Early exit if we cannot have relaxed uniform regions.
1332 if (!RelaxedUniformRegions)
1333 return false;
1334
1335 SubRegionsAreUniform = false;
1336 break;
1337 }
1338 }
1339 }
1340 }
1341
1342 // Our region is uniform if:
1343 // 1. All conditional branches that are direct children are uniform (checked
1344 // above).
1345 // 2. And either:
1346 // a. All sub-regions are uniform.
1347 // b. There is one or less conditional branches among the direct children.
1348 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1349}
1350
1351void StructurizeCFG::init(Region *R) {
1352 LLVMContext &Context = R->getEntry()->getContext();
1353
1354 Boolean = Type::getInt1Ty(Context);
1355 BoolTrue = ConstantInt::getTrue(Context);
1356 BoolFalse = ConstantInt::getFalse(Context);
1357 BoolPoison = PoisonValue::get(Boolean);
1358
1359 this->UA = nullptr;
1360}
1361
1362bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
1363 if (R->isTopLevelRegion())
1364 return false;
1365
1366 this->UA = &UA;
1367
1368 // TODO: We could probably be smarter here with how we handle sub-regions.
1369 // We currently rely on the fact that metadata is set by earlier invocations
1370 // of the pass on sub-regions, and that this metadata doesn't get lost --
1371 // but we shouldn't rely on metadata for correctness!
1372 unsigned UniformMDKindID =
1373 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
1374
1375 if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) {
1376 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1377 << '\n');
1378
1379 // Mark all direct child block terminators as having been treated as
1380 // uniform. To account for a possible future in which non-uniform
1381 // sub-regions are treated more cleverly, indirect children are not
1382 // marked as uniform.
1383 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
1384 for (RegionNode *E : R->elements()) {
1385 if (E->isSubRegion())
1386 continue;
1387
1388 if (Instruction *Term = E->getEntry()->getTerminator())
1389 Term->setMetadata(UniformMDKindID, MD);
1390 }
1391
1392 return true;
1393 }
1394 return false;
1395}
1396
1397/// Run the transformation for each region found
1398bool StructurizeCFG::run(Region *R, DominatorTree *DT,
1399 const TargetTransformInfo *TTI) {
1400 if (R->isTopLevelRegion())
1401 return false;
1402
1403 this->DT = DT;
1404 this->TTI = TTI;
1405 Func = R->getEntry()->getParent();
1406 assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
1407
1408 ParentRegion = R;
1409
1410 orderNodes();
1411 collectInfos();
1412 createFlow();
1413 insertConditions(false);
1414 insertConditions(true);
1415 setPhiValues();
1416 simplifyHoistedPhis();
1417 simplifyConditions();
1418 simplifyAffectedPhis();
1419 rebuildSSA();
1420
1421 // Cleanup
1422 Order.clear();
1423 Visited.clear();
1424 DeletedPhis.clear();
1425 AddedPhis.clear();
1426 Predicates.clear();
1427 Conditions.clear();
1428 Loops.clear();
1429 LoopPreds.clear();
1430 LoopConds.clear();
1431 FlowSet.clear();
1432
1433 return true;
1434}
1435
1436Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1437 return new StructurizeCFGLegacyPass(SkipUniformRegions);
1438}
1439
1440static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
1441 Regions.push_back(&R);
1442 for (const auto &E : R)
1443 addRegionIntoQueue(*E, Regions);
1444}
1445
1447 : SkipUniformRegions(SkipUniformRegions_) {
1448 if (ForceSkipUniformRegions.getNumOccurrences())
1449 SkipUniformRegions = ForceSkipUniformRegions.getValue();
1450}
1451
1453 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1455 OS, MapClassName2PassName);
1456 if (SkipUniformRegions)
1457 OS << "<skip-uniform-regions>";
1458}
1459
1462
1463 bool Changed = false;
1465 auto &RI = AM.getResult<RegionInfoAnalysis>(F);
1467 UniformityInfo *UI = nullptr;
1468 if (SkipUniformRegions)
1470
1471 std::vector<Region *> Regions;
1472 addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
1473 while (!Regions.empty()) {
1474 Region *R = Regions.back();
1475 Regions.pop_back();
1476
1477 StructurizeCFG SCFG;
1478 SCFG.init(R);
1479
1480 if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) {
1481 Changed = true; // May have added metadata.
1482 continue;
1483 }
1484
1485 Changed |= SCFG.run(R, DT, TTI);
1486 }
1487 if (!Changed)
1488 return PreservedAnalyses::all();
1491 return PA;
1492}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DenseMap< BasicBlock *, Instruction * > BBPredicates
This file defines the DenseMap class.
@ Default
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
Hexagon Hardware Loops
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
#define T
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static void addRegionIntoQueue(Region &R, std::deque< Region * > &RQ)
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
Annotate SI Control Flow
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
SmallDenseMap< const PHINode *, PhiInfo, 16 > PhiMap
static bool isHoistableInstruction(Instruction *I, BasicBlock *BB, const TargetTransformInfo *TTI)
Checks whether an instruction is zero cost instruction and checks if the operands are from different ...
const char FlowBlockName[]
static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, const UniformityInfo &UA)
#define LLVM_DEBUG(...)
Definition Debug.h:119
This pass exposes codegen information to IR-level passes.
LLVM IR instance of the generic uniformity analysis.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Conditional or Unconditional Branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
A debug info location.
Definition DebugLoc.h:124
bool empty() const
Definition DenseMap.h:107
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:161
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:322
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
const ECValue & insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
member_iterator member_end() const
member_iterator findLeader(const ElemTy &V) const
findLeader - Given a value in the set, return a member iterator for the equivalence class it is in.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
const BasicBlock & getEntryBlock() const
Definition Function.h:807
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:363
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1565
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition Pass.cpp:112
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
The pass manager to schedule RegionPasses.
Definition RegionPass.h:88
void replaceExit(BlockT *BB)
Replace the exit basic block of the region with the new basic block.
block_range blocks()
Returns a range view of the basic blocks in the region.
Definition RegionInfo.h:620
BlockT * getExit() const
Get the exit BasicBlock of the Region.
Definition RegionInfo.h:357
RegionNodeT * getBBNode(BlockT *BB) const
Get the BasicBlock RegionNode for a BasicBlock.
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
RegionInfoT * getRegionInfo() const
Return the RegionInfo object, that belongs to this Region.
Definition RegionInfo.h:424
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
Analysis pass that exposes the RegionInfo for a function.
Definition RegionInfo.h:965
RegionT * getRegionFor(BlockT *BB) const
Get the smallest region that contains a BasicBlock.
bool isSubRegion() const
Is this RegionNode a subregion?
Definition RegionInfo.h:186
BlockT * getEntry() const
Get the entry BasicBlock of this RegionNode.
Definition RegionInfo.h:172
A pass that runs on each Region in a function.
Definition RegionPass.h:33
void RewriteUseAfterInsertions(Use &U)
Rewrite a use like RewriteUse but handling in-block definitions.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition DenseSet.h:281
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_Latency
The latency of instruction.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
Analysis pass which computes UniformityInfo.
Legacy analysis pass which computes a CycleInfo.
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:21
LLVM Value Representation.
Definition Value.h:75
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
CRTP base class for adapting an iterator to a different type.
Definition iterator.h:237
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
static scc_iterator begin(const GraphT &G)
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Changed
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< SSAContext > UniformityInfo
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
APInt operator*(APInt a, uint64_t RHS)
Definition APInt.h:2235
LLVM_ABI bool hasOnlySimpleTerminator(const Function &F)
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2138
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:646
LLVM_ABI Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1180
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
auto reverse(ContainerTy &&C)
Definition STLExtras.h:420
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1427
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:564
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
unsigned char Boolean
Definition ConvertUTF.h:132
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
auto predecessors(const MachineBasicBlock *BB)
iterator_range< typename GraphTraits< GraphType >::ChildIteratorType > children(const typename GraphTraits< GraphType >::NodeRef &G)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI Value * invertCondition(Value *Condition)
Invert the given true/false value, possibly reusing an existing copy.
Definition Local.cpp:3923
filter_iterator_impl< WrappedIteratorT, PredicateT, detail::fwd_or_bidi_tag< WrappedIteratorT > > filter_iterator
Defines filter_iterator to a suitable specialization of filter_iterator_impl, based on the underlying...
Definition STLExtras.h:551
LLVM_ABI void initializeStructurizeCFGLegacyPassPass(PassRegistry &)
#define N
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
StructurizeCFGPass(bool SkipUniformRegions=false)