docs/doxygen/UnifyLoopExits_8cpp_source.html

//===- UnifyLoopExits.cpp - Redirect exiting edges to one block -*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// For each natural loop with multiple exit blocks, this pass creates a new

// block N such that all exiting blocks now branch to N, and then control flow

// is redistributed to all the original exit blocks.

//

// Limitation: This assumes that all terminators in the CFG are direct branches

//             (the "br" instruction). The presence of any other control flow

//             such as indirectbr or switch will cause an assert.

//             The callbr terminator is supported by creating intermediate

//             target blocks that unconditionally branch to the original target

//             blocks. These intermediate target blocks can then be redirected

//             through the ControlFlowHub as usual.

//

//===----------------------------------------------------------------------===//


#include "llvm/Transforms/Utils/UnifyLoopExits.h"

#include "llvm/ADT/MapVector.h"

#include "llvm/Analysis/DomTreeUpdater.h"

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/Dominators.h"

#include "llvm/InitializePasses.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Transforms/Utils.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/ControlFlowUtils.h"


#define DEBUG_TYPE "unify-loop-exits"


using namespace llvm;


static cl::opt<unsigned> MaxBooleansInControlFlowHub(

    "max-booleans-in-control-flow-hub", cl::init(32), cl::Hidden,

    cl::desc("Set the maximum number of outgoing blocks for using a boolean "

             "value to record the exiting block in the ControlFlowHub."));


namespace {

struct UnifyLoopExitsLegacyPass : public FunctionPass {

  static char ID;

  UnifyLoopExitsLegacyPass() : FunctionPass(ID) {

    initializeUnifyLoopExitsLegacyPassPass(*PassRegistry::getPassRegistry());

  }


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.addRequired<LoopInfoWrapperPass>();

    AU.addRequired<DominatorTreeWrapperPass>();

    AU.addPreserved<LoopInfoWrapperPass>();

    AU.addPreserved<DominatorTreeWrapperPass>();

  }


  bool runOnFunction(Function &F) override;

};

} // namespace


char UnifyLoopExitsLegacyPass::ID = 0;


FunctionPass *llvm::createUnifyLoopExitsPass() {

  return new UnifyLoopExitsLegacyPass();

}


INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits",

                      "Fixup each natural loop to have a single exit block",

                      false /* Only looks at CFG */, false /* Analysis Pass */)

INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)

INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)

INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits",

                    "Fixup each natural loop to have a single exit block",

                    false /* Only looks at CFG */, false /* Analysis Pass */)


// The current transform introduces new control flow paths which may break the

// SSA requirement that every def must dominate all its uses. For example,

// consider a value D defined inside the loop that is used by some instruction

// U outside the loop. It follows that D dominates U, since the original

// program has valid SSA form. After merging the exits, all paths from D to U

// now flow through the unified exit block. In addition, there may be other

// paths that do not pass through D, but now reach the unified exit

// block. Thus, D no longer dominates U.

//

// Restore the dominance by creating a phi for each such D at the new unified

// loop exit. But when doing this, ignore any uses U that are in the new unified

// loop exit, since those were introduced specially when the block was created.

//

// The use of SSAUpdater seems like overkill for this operation. The location

// for creating the new PHI is well-known, and also the set of incoming blocks

// to the new PHI.


static void restoreSSA(const DominatorTree &DT, const Loop *L,

                       SmallVectorImpl<BasicBlock *> &Incoming,

                       BasicBlock *LoopExitBlock) {

  using InstVector = SmallVector<Instruction *, 8>;

  using IIMap = MapVector<Instruction *, InstVector>;

  IIMap ExternalUsers;

  for (auto *BB : L->blocks()) {

    for (auto &I : *BB) {

      for (auto &U : I.uses()) {

        auto UserInst = cast<Instruction>(U.getUser());

        auto UserBlock = UserInst->getParent();

        if (UserBlock == LoopExitBlock)

          continue;

        if (L->contains(UserBlock))

          continue;

        LLVM_DEBUG(dbgs() << "added ext use for " << I.getName() << "("

                          << BB->getName() << ")"

                          << ": " << UserInst->getName() << "("

                          << UserBlock->getName() << ")"

                          << "\n");

        ExternalUsers[&I].push_back(UserInst);

      }

    }

  }


  for (const auto &II : ExternalUsers) {

    // For each Def used outside the loop, create NewPhi in

    // LoopExitBlock. NewPhi receives Def only along exiting blocks that

    // dominate it, while the remaining values are undefined since those paths

    // didn't exist in the original CFG.

    auto Def = II.first;

    LLVM_DEBUG(dbgs() << "externally used: " << Def->getName() << "\n");

    auto NewPhi =

        PHINode::Create(Def->getType(), Incoming.size(),

                        Def->getName() + ".moved", LoopExitBlock->begin());

    for (auto *In : Incoming) {

      LLVM_DEBUG(dbgs() << "predecessor " << In->getName() << ": ");

      if (Def->getParent() == In || DT.dominates(Def, In)) {

        LLVM_DEBUG(dbgs() << "dominated\n");

        NewPhi->addIncoming(Def, In);

      } else {

        LLVM_DEBUG(dbgs() << "not dominated\n");

        NewPhi->addIncoming(PoisonValue::get(Def->getType()), In);

      }

    }


    LLVM_DEBUG(dbgs() << "external users:");

    for (auto *U : II.second) {

      LLVM_DEBUG(dbgs() << " " << U->getName());

      U->replaceUsesOfWith(Def, NewPhi);

    }

    LLVM_DEBUG(dbgs() << "\n");

  }

}


static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {

  // To unify the loop exits, we need a list of the exiting blocks as

  // well as exit blocks. The functions for locating these lists both

  // traverse the entire loop body. It is more efficient to first

  // locate the exiting blocks and then examine their successors to

  // locate the exit blocks.

  SmallVector<BasicBlock *, 8> ExitingBlocks;

  L->getExitingBlocks(ExitingBlocks);


  // No exit blocks, so nothing to do. Just return.

  if (ExitingBlocks.empty())

    return false;


  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);

  SmallVector<BasicBlock *, 8> CallBrTargetBlocksToFix;


  // Redirect exiting edges through a control flow hub.

  ControlFlowHub CHub;

  bool Changed = false;


  for (unsigned I = 0; I < ExitingBlocks.size(); ++I) {

    BasicBlock *BB = ExitingBlocks[I];

    if (BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator())) {

      BasicBlock *Succ0 = Branch->getSuccessor(0);

      Succ0 = L->contains(Succ0) ? nullptr : Succ0;


      BasicBlock *Succ1 =

          Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);

      Succ1 = L->contains(Succ1) ? nullptr : Succ1;

      CHub.addBranch(BB, Succ0, Succ1);


      LLVM_DEBUG(dbgs() << "Added extiting branch: " << printBasicBlock(BB)

                        << " -> " << printBasicBlock(Succ0)

                        << (Succ0 && Succ1 ? " " : "") << printBasicBlock(Succ1)

                        << '\n');

    } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(BB->getTerminator())) {

      for (unsigned J = 0; J < CallBr->getNumSuccessors(); ++J) {

        BasicBlock *Succ = CallBr->getSuccessor(J);

        if (L->contains(Succ))

          continue;

        bool UpdatedLI = false;

        BasicBlock *NewSucc =

            SplitCallBrEdge(BB, Succ, J, &DTU, nullptr, &LI, &UpdatedLI);

        // SplitCallBrEdge modifies the CFG because it creates an intermediate

        // block. So we need to set the changed flag no matter what the

        // ControlFlowHub is going to do later.

        Changed = true;

        // Even if CallBr and Succ do not have a common parent loop, we need to

        // add the new target block to the parent loop of the current loop.

        if (!UpdatedLI)

          CallBrTargetBlocksToFix.push_back(NewSucc);

        // ExitingBlocks is later used to restore SSA, so we need to make sure

        // that the blocks used for phi nodes in the guard blocks match the

        // predecessors of the guard blocks, which, in the case of callbr, are

        // the new intermediate target blocks instead of the callbr blocks

        // themselves.

        ExitingBlocks[I] = NewSucc;

        CHub.addBranch(NewSucc, Succ);

        LLVM_DEBUG(dbgs() << "Added exiting branch: "

                          << printBasicBlock(NewSucc) << " -> "

                          << printBasicBlock(Succ) << '\n');

      }

    } else {

      llvm_unreachable("unsupported block terminator");

    }

  }


  SmallVector<BasicBlock *, 8> GuardBlocks;

  BasicBlock *LoopExitBlock;

  bool ChangedCFG;

  std::tie(LoopExitBlock, ChangedCFG) = CHub.finalize(

      &DTU, GuardBlocks, "loop.exit", MaxBooleansInControlFlowHub.getValue());

  ChangedCFG |= Changed;

  if (!ChangedCFG)

    return false;


  restoreSSA(DT, L, ExitingBlocks, LoopExitBlock);


#if defined(EXPENSIVE_CHECKS)

  assert(DT.verify(DominatorTree::VerificationLevel::Full));

#else

  assert(DT.verify(DominatorTree::VerificationLevel::Fast));

#endif // EXPENSIVE_CHECKS

  L->verifyLoop();


  // The guard blocks were created outside the loop, so they need to become

  // members of the parent loop.

  // Same goes for the callbr target blocks.  Although we try to add them to the

  // smallest common parent loop of the callbr block and the corresponding

  // original target block, there might not have been such a loop, in which case

  // the newly created callbr target blocks are not part of any loop. For nested

  // loops, this might result in them leading to a loop with multiple entry

  // points.

  if (auto *ParentLoop = L->getParentLoop()) {

    for (auto *G : GuardBlocks) {

      ParentLoop->addBasicBlockToLoop(G, LI);

    }

    for (auto *C : CallBrTargetBlocksToFix) {

      ParentLoop->addBasicBlockToLoop(C, LI);

    }

    ParentLoop->verifyLoop();

  }


#if defined(EXPENSIVE_CHECKS)

  LI.verify(DT);

#endif // EXPENSIVE_CHECKS


  return true;

}


static bool runImpl(LoopInfo &LI, DominatorTree &DT) {


  bool Changed = false;

  auto Loops = LI.getLoopsInPreorder();

  for (auto *L : Loops) {

    LLVM_DEBUG(dbgs() << "Processing loop:\n"; L->print(dbgs()));

    Changed |= unifyLoopExits(DT, LI, L);

  }

  return Changed;

}


bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) {

  LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName()

                    << "\n");

  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();

  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();


  return runImpl(LI, DT);

}


namespace llvm {


PreservedAnalyses UnifyLoopExitsPass::run(Function &F,

                                          FunctionAnalysisManager &AM) {

  LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName()

                    << "\n");

  auto &LI = AM.getResult<LoopAnalysis>(F);

  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);


  if (!runImpl(LI, DT))

    return PreservedAnalyses::all();

  PreservedAnalyses PA;

  PA.preserve<LoopAnalysis>();

  PA.preserve<DominatorTreeAnalysis>();

  return PA;

}


} // namespace llvm

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const
aarch64 promote const
Definition AArch64PromoteConstant.cpp:228

BasicBlockUtils.h

CommandLine.h

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

ControlFlowUtils.h

DomTreeUpdater.h

Dominators.h

runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition EntryExitInstrumenter.cpp:103

runImpl
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
Definition ExpandIRInsts.cpp:1030

Loops
Hexagon Hardware Loops
Definition HexagonHardwareLoops.cpp:367

InitializePasses.h

LoopInfo.h

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

G
#define G(x, y, z)
Definition MD5.cpp:55

MapVector.h
This file implements a map that provides insertion order iteration.

II
uint64_t IntrinsicInst * II
Definition NVVMIntrRange.cpp:46

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

Utils.h

unifyLoopExits
static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L)
Definition UnifyLoopExits.cpp:148

restoreSSA
unify loop Fixup each natural loop to have a single exit static false void restoreSSA(const DominatorTree &DT, const Loop *L, SmallVectorImpl< BasicBlock * > &Incoming, BasicBlock *LoopExitBlock)
Definition UnifyLoopExits.cpp:93

MaxBooleansInControlFlowHub
static cl::opt< unsigned > MaxBooleansInControlFlowHub("max-booleans-in-control-flow-hub", cl::init(32), cl::Hidden, cl::desc("Set the maximum number of outgoing blocks for using a boolean " "value to record the exiting block in the ControlFlowHub."))

runImpl
static bool runImpl(LoopInfo &LI, DominatorTree &DT)
Definition UnifyLoopExits.cpp:258

UnifyLoopExits.h

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition PassManager.h:411

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition PassAnalysisSupport.h:76

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition PassAnalysisSupport.h:99

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233

llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition Instructions.h:3058

llvm::CallBrInst
CallBr instruction, tracking function calls that may not return control but instead transfer it to a ...
Definition Instructions.h:3886

llvm::DomTreeUpdater
Definition DomTreeUpdater.h:34

llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283

llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition GenericDomTree.h:920

llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314

llvm::Function
Definition Function.h:64

llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569

llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition GenericLoopInfoImpl.h:749

llvm::LoopInfoBase::getLoopsInPreorder
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
Definition GenericLoopInfoImpl.h:607

llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596

llvm::LoopInfo
Definition LoopInfo.h:408

llvm::Loop
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40

llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36

llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition Instructions.h:2675

llvm::PassRegistry::getPassRegistry
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition PassRegistry.cpp:23

llvm::PoisonValue::get
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition Constants.cpp:1905

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::PreservedAnalyses::preserve
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:576

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:419

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:80

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:83

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1205

llvm::UnifyLoopExitsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition UnifyLoopExits.cpp:280

llvm::cl::opt
Definition CommandLine.h:1454

Changed
Changed
Definition ObjCARCOpts.cpp:2369

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

false
Definition MachinePipeliner.cpp:245

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:444

llvm::rdf::Def
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384

llvm::tgtok::In
@ In
Definition TGLexer.h:83

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::initializeUnifyLoopExitsLegacyPassPass
LLVM_ABI void initializeUnifyLoopExitsLegacyPassPass(PassRegistry &)

llvm::SplitCallBrEdge
LLVM_ABI BasicBlock * SplitCallBrEdge(BasicBlock *CallBrBlock, BasicBlock *Succ, unsigned SuccIdx, DomTreeUpdater *DTU=nullptr, CycleInfo *CI=nullptr, LoopInfo *LI=nullptr, bool *UpdatedLI=nullptr)
Create a new intermediate target block for a callbr edge.
Definition BasicBlockUtils.cpp:725

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::createUnifyLoopExitsPass
LLVM_ABI FunctionPass * createUnifyLoopExitsPass()
Definition UnifyLoopExits.cpp:64

llvm::printBasicBlock
LLVM_ABI Printable printBasicBlock(const BasicBlock *BB)
Print BasicBlock BB as an operand or print "<nullptr>" if BB is a nullptr.
Definition AsmWriter.cpp:4281

llvm::FunctionAnalysisManager
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
Definition PassManager.h:563

llvm::ControlFlowHub
Given a set of branch descriptors [BB, Succ0, Succ1], create a "hub" such that the control flow from ...
Definition ControlFlowUtils.h:100

llvm::ControlFlowHub::addBranch
void addBranch(BasicBlock *BB, BasicBlock *Succ0, BasicBlock *Succ1=nullptr)
Definition ControlFlowUtils.h:110

llvm::ControlFlowHub::finalize
std::pair< BasicBlock *, bool > finalize(DomTreeUpdater *DTU, SmallVectorImpl< BasicBlock * > &GuardBlocks, const StringRef Prefix, std::optional< unsigned > MaxControlFlowBooleans=std::nullopt)
Return the unified loop exit block and a flag indicating if the CFG was changed at all.
Definition ControlFlowUtils.cpp:282

llvm::Incoming
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
Definition SILowerI1Copies.h:26

llvm::cl::desc
Definition CommandLine.h:410