LLVM 20.0.0git
BlockExtractor.cpp
Go to the documentation of this file.
1//===- BlockExtractor.cpp - Extracts blocks into their own functions ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass extracts the specified basic blocks from the module into their
10// own functions.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/Statistic.h"
18#include "llvm/IR/Module.h"
19#include "llvm/IR/PassManager.h"
21#include "llvm/Support/Debug.h"
23#include "llvm/Transforms/IPO.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "block-extractor"
30
31STATISTIC(NumExtracted, "Number of basic blocks extracted");
32
34 "extract-blocks-file", cl::value_desc("filename"),
35 cl::desc("A file containing list of basic blocks to extract"), cl::Hidden);
36
37static cl::opt<bool>
38 BlockExtractorEraseFuncs("extract-blocks-erase-funcs",
39 cl::desc("Erase the existing functions"),
41namespace {
42class BlockExtractor {
43public:
44 BlockExtractor(bool EraseFunctions) : EraseFunctions(EraseFunctions) {}
45 bool runOnModule(Module &M);
46 void
47 init(const std::vector<std::vector<BasicBlock *>> &GroupsOfBlocksToExtract) {
48 GroupsOfBlocks = GroupsOfBlocksToExtract;
49 if (!BlockExtractorFile.empty())
50 loadFile();
51 }
52
53private:
54 std::vector<std::vector<BasicBlock *>> GroupsOfBlocks;
55 bool EraseFunctions;
56 /// Map a function name to groups of blocks.
58 BlocksByName;
59
60 void loadFile();
61 void splitLandingPadPreds(Function &F);
62};
63
64} // end anonymous namespace
65
66/// Gets all of the blocks specified in the input file.
67void BlockExtractor::loadFile() {
69 if (ErrOrBuf.getError())
70 report_fatal_error("BlockExtractor couldn't load the file.");
71 // Read the file.
72 auto &Buf = *ErrOrBuf;
74 Buf->getBuffer().split(Lines, '\n', /*MaxSplit=*/-1,
75 /*KeepEmpty=*/false);
76 for (const auto &Line : Lines) {
78 Line.split(LineSplit, ' ', /*MaxSplit=*/-1,
79 /*KeepEmpty=*/false);
80 if (LineSplit.empty())
81 continue;
82 if (LineSplit.size()!=2)
83 report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'",
84 /*GenCrashDiag=*/false);
86 LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1,
87 /*KeepEmpty=*/false);
88 if (BBNames.empty())
89 report_fatal_error("Missing bbs name");
90 BlocksByName.push_back(
91 {std::string(LineSplit[0]), {BBNames.begin(), BBNames.end()}});
92 }
93}
94
95/// Extracts the landing pads to make sure all of them have only one
96/// predecessor.
97void BlockExtractor::splitLandingPadPreds(Function &F) {
98 for (BasicBlock &BB : F) {
99 for (Instruction &I : BB) {
100 if (!isa<InvokeInst>(&I))
101 continue;
102 InvokeInst *II = cast<InvokeInst>(&I);
103 BasicBlock *Parent = II->getParent();
104 BasicBlock *LPad = II->getUnwindDest();
105
106 // Look through the landing pad's predecessors. If one of them ends in an
107 // 'invoke', then we want to split the landing pad.
108 bool Split = false;
109 for (auto *PredBB : predecessors(LPad)) {
110 if (PredBB->isLandingPad() && PredBB != Parent &&
111 isa<InvokeInst>(Parent->getTerminator())) {
112 Split = true;
113 break;
114 }
115 }
116
117 if (!Split)
118 continue;
119
121 SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", NewBBs);
122 }
123 }
124}
125
126bool BlockExtractor::runOnModule(Module &M) {
127 bool Changed = false;
128
129 // Get all the functions.
131 for (Function &F : M) {
132 splitLandingPadPreds(F);
133 Functions.push_back(&F);
134 }
135
136 // Get all the blocks specified in the input file.
137 unsigned NextGroupIdx = GroupsOfBlocks.size();
138 GroupsOfBlocks.resize(NextGroupIdx + BlocksByName.size());
139 for (const auto &BInfo : BlocksByName) {
140 Function *F = M.getFunction(BInfo.first);
141 if (!F)
142 report_fatal_error("Invalid function name specified in the input file",
143 /*GenCrashDiag=*/false);
144 for (const auto &BBInfo : BInfo.second) {
145 auto Res = llvm::find_if(
146 *F, [&](const BasicBlock &BB) { return BB.getName() == BBInfo; });
147 if (Res == F->end())
148 report_fatal_error("Invalid block name specified in the input file",
149 /*GenCrashDiag=*/false);
150 GroupsOfBlocks[NextGroupIdx].push_back(&*Res);
151 }
152 ++NextGroupIdx;
153 }
154
155 // Extract each group of basic blocks.
156 for (auto &BBs : GroupsOfBlocks) {
157 SmallVector<BasicBlock *, 32> BlocksToExtractVec;
158 for (BasicBlock *BB : BBs) {
159 // Check if the module contains BB.
160 if (BB->getParent()->getParent() != &M)
161 report_fatal_error("Invalid basic block", /*GenCrashDiag=*/false);
162 LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting "
163 << BB->getParent()->getName() << ":" << BB->getName()
164 << "\n");
165 BlocksToExtractVec.push_back(BB);
166 if (const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
167 BlocksToExtractVec.push_back(II->getUnwindDest());
168 ++NumExtracted;
169 Changed = true;
170 }
171 CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent());
172 Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC);
173 if (F)
174 LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName()
175 << "' in: " << F->getName() << '\n');
176 else
177 LLVM_DEBUG(dbgs() << "Failed to extract for group '"
178 << (*BBs.begin())->getName() << "'\n");
179 }
180
181 // Erase the functions.
182 if (EraseFunctions || BlockExtractorEraseFuncs) {
183 for (Function *F : Functions) {
184 LLVM_DEBUG(dbgs() << "BlockExtractor: Trying to delete " << F->getName()
185 << "\n");
186 F->deleteBody();
187 }
188 // Set linkage as ExternalLinkage to avoid erasing unreachable functions.
189 for (Function &F : M)
191 Changed = true;
192 }
193
194 return Changed;
195}
196
198 std::vector<std::vector<BasicBlock *>> &&GroupsOfBlocks,
199 bool EraseFunctions)
200 : GroupsOfBlocks(GroupsOfBlocks), EraseFunctions(EraseFunctions) {}
201
204 BlockExtractor BE(EraseFunctions);
205 BE.init(GroupsOfBlocks);
206 return BE.runOnModule(M) ? PreservedAnalyses::none()
208}
static const Function * getParent(const Value *V)
static cl::opt< std::string > BlockExtractorFile("extract-blocks-file", cl::value_desc("filename"), cl::desc("A file containing list of basic blocks to extract"), cl::Hidden)
static cl::opt< bool > BlockExtractorEraseFuncs("extract-blocks-erase-funcs", cl::desc("Erase the existing functions"), cl::Hidden)
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static std::unique_ptr< Module > loadFile(const std::string &FileName, LLVMContext &Context)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
This header defines various interfaces for pass management in LLVM.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:45
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:84
Function * extractCodeRegion(const CodeExtractorAnalysisCache &CEAC)
Perform the extraction, returning the new function.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
Invoke instruction.
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
auto predecessors(const MachineBasicBlock *BB)
BlockExtractorPass(std::vector< std::vector< BasicBlock * > > &&GroupsOfBlocks, bool EraseFunctions)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)