LLVM  10.0.0svn
WebAssemblyFixIrreducibleControlFlow.cpp
Go to the documentation of this file.
1 //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a pass that removes irreducible control flow.
11 /// Irreducible control flow means multiple-entry loops, which this pass
12 /// transforms to have a single entry.
13 ///
14 /// Note that LLVM has a generic pass that lowers irreducible control flow, but
15 /// it linearizes control flow, turning diamonds into two triangles, which is
16 /// both unnecessary and undesirable for WebAssembly.
17 ///
18 /// The big picture: We recursively process each "region", defined as a group
19 /// of blocks with a single entry and no branches back to that entry. A region
20 /// may be the entire function body, or the inner part of a loop, i.e., the
21 /// loop's body without branches back to the loop entry. In each region we fix
22 /// up multi-entry loops by adding a new block that can dispatch to each of the
23 /// loop entries, based on the value of a label "helper" variable, and we
24 /// replace direct branches to the entries with assignments to the label
25 /// variable and a branch to the dispatch block. Then the dispatch block is the
26 /// single entry in the loop containing the previous multiple entries. After
27 /// ensuring all the loops in a region are reducible, we recurse into them. The
28 /// total time complexity of this pass is:
29 ///
30 /// O(NumBlocks * NumNestedLoops * NumIrreducibleLoops +
31 /// NumLoops * NumLoops)
32 ///
33 /// This pass is similar to what the Relooper [1] does. Both identify looping
34 /// code that requires multiple entries, and resolve it in a similar way (in
35 /// Relooper terminology, we implement a Multiple shape in a Loop shape). Note
36 /// also that like the Relooper, we implement a "minimal" intervention: we only
37 /// use the "label" helper for the blocks we absolutely must and no others. We
38 /// also prioritize code size and do not duplicate code in order to resolve
39 /// irreducibility. The graph algorithms for finding loops and entries and so
40 /// forth are also similar to the Relooper. The main differences between this
41 /// pass and the Relooper are:
42 ///
43 /// * We just care about irreducibility, so we just look at loops.
44 /// * The Relooper emits structured control flow (with ifs etc.), while we
45 /// emit a CFG.
46 ///
47 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
48 /// Proceedings of the ACM international conference companion on Object oriented
49 /// programming systems languages and applications companion (SPLASH '11). ACM,
50 /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
51 /// http://doi.acm.org/10.1145/2048147.2048224
52 ///
53 //===----------------------------------------------------------------------===//
54 
56 #include "WebAssembly.h"
57 #include "WebAssemblySubtarget.h"
59 #include "llvm/Support/Debug.h"
60 using namespace llvm;
61 
62 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
63 
64 namespace {
65 
66 using BlockVector = SmallVector<MachineBasicBlock *, 4>;
68 
69 // Calculates reachability in a region. Ignores branches to blocks outside of
70 // the region, and ignores branches to the region entry (for the case where
71 // the region is the inner part of a loop).
72 class ReachabilityGraph {
73 public:
74  ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks)
75  : Entry(Entry), Blocks(Blocks) {
76 #ifndef NDEBUG
77  // The region must have a single entry.
78  for (auto *MBB : Blocks) {
79  if (MBB != Entry) {
80  for (auto *Pred : MBB->predecessors()) {
81  assert(inRegion(Pred));
82  }
83  }
84  }
85 #endif
86  calculate();
87  }
88 
89  bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const {
90  assert(inRegion(From) && inRegion(To));
91  auto I = Reachable.find(From);
92  if (I == Reachable.end())
93  return false;
94  return I->second.count(To);
95  }
96 
97  // "Loopers" are blocks that are in a loop. We detect these by finding blocks
98  // that can reach themselves.
99  const BlockSet &getLoopers() const { return Loopers; }
100 
101  // Get all blocks that are loop entries.
102  const BlockSet &getLoopEntries() const { return LoopEntries; }
103 
104  // Get all blocks that enter a particular loop from outside.
105  const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const {
106  assert(inRegion(LoopEntry));
107  auto I = LoopEnterers.find(LoopEntry);
108  assert(I != LoopEnterers.end());
109  return I->second;
110  }
111 
112 private:
113  MachineBasicBlock *Entry;
114  const BlockSet &Blocks;
115 
116  BlockSet Loopers, LoopEntries;
118 
119  bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); }
120 
121  // Maps a block to all the other blocks it can reach.
123 
124  void calculate() {
125  // Reachability computation work list. Contains pairs of recent additions
126  // (A, B) where we just added a link A => B.
127  using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
128  SmallVector<BlockPair, 4> WorkList;
129 
130  // Add all relevant direct branches.
131  for (auto *MBB : Blocks) {
132  for (auto *Succ : MBB->successors()) {
133  if (Succ != Entry && inRegion(Succ)) {
134  Reachable[MBB].insert(Succ);
135  WorkList.emplace_back(MBB, Succ);
136  }
137  }
138  }
139 
140  while (!WorkList.empty()) {
141  MachineBasicBlock *MBB, *Succ;
142  std::tie(MBB, Succ) = WorkList.pop_back_val();
143  assert(inRegion(MBB) && Succ != Entry && inRegion(Succ));
144  if (MBB != Entry) {
145  // We recently added MBB => Succ, and that means we may have enabled
146  // Pred => MBB => Succ.
147  for (auto *Pred : MBB->predecessors()) {
148  if (Reachable[Pred].insert(Succ).second) {
149  WorkList.emplace_back(Pred, Succ);
150  }
151  }
152  }
153  }
154 
155  // Blocks that can return to themselves are in a loop.
156  for (auto *MBB : Blocks) {
157  if (canReach(MBB, MBB)) {
158  Loopers.insert(MBB);
159  }
160  }
161  assert(!Loopers.count(Entry));
162 
163  // Find the loop entries - loopers reachable from blocks not in that loop -
164  // and those outside blocks that reach them, the "loop enterers".
165  for (auto *Looper : Loopers) {
166  for (auto *Pred : Looper->predecessors()) {
167  // Pred can reach Looper. If Looper can reach Pred, it is in the loop;
168  // otherwise, it is a block that enters into the loop.
169  if (!canReach(Looper, Pred)) {
170  LoopEntries.insert(Looper);
171  LoopEnterers[Looper].insert(Pred);
172  }
173  }
174  }
175  }
176 };
177 
178 // Finds the blocks in a single-entry loop, given the loop entry and the
179 // list of blocks that enter the loop.
180 class LoopBlocks {
181 public:
182  LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers)
183  : Entry(Entry), Enterers(Enterers) {
184  calculate();
185  }
186 
187  BlockSet &getBlocks() { return Blocks; }
188 
189 private:
190  MachineBasicBlock *Entry;
191  const BlockSet &Enterers;
192 
193  BlockSet Blocks;
194 
195  void calculate() {
196  // Going backwards from the loop entry, if we ignore the blocks entering
197  // from outside, we will traverse all the blocks in the loop.
198  BlockVector WorkList;
199  BlockSet AddedToWorkList;
200  Blocks.insert(Entry);
201  for (auto *Pred : Entry->predecessors()) {
202  if (!Enterers.count(Pred)) {
203  WorkList.push_back(Pred);
204  AddedToWorkList.insert(Pred);
205  }
206  }
207 
208  while (!WorkList.empty()) {
209  auto *MBB = WorkList.pop_back_val();
210  assert(!Enterers.count(MBB));
211  if (Blocks.insert(MBB).second) {
212  for (auto *Pred : MBB->predecessors()) {
213  if (!AddedToWorkList.count(Pred)) {
214  WorkList.push_back(Pred);
215  AddedToWorkList.insert(Pred);
216  }
217  }
218  }
219  }
220  }
221 };
222 
223 class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
224  StringRef getPassName() const override {
225  return "WebAssembly Fix Irreducible Control Flow";
226  }
227 
228  bool runOnMachineFunction(MachineFunction &MF) override;
229 
230  bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks,
231  MachineFunction &MF);
232 
233  void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
234  MachineFunction &MF, const ReachabilityGraph &Graph);
235 
236 public:
237  static char ID; // Pass identification, replacement for typeid
238  WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
239 };
240 
241 bool WebAssemblyFixIrreducibleControlFlow::processRegion(
242  MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {
243  bool Changed = false;
244 
245  // Remove irreducibility before processing child loops, which may take
246  // multiple iterations.
247  while (true) {
248  ReachabilityGraph Graph(Entry, Blocks);
249 
250  bool FoundIrreducibility = false;
251 
252  for (auto *LoopEntry : Graph.getLoopEntries()) {
253  // Find mutual entries - all entries which can reach this one, and
254  // are reached by it (that always includes LoopEntry itself). All mutual
255  // entries must be in the same loop, so if we have more than one, then we
256  // have irreducible control flow.
257  //
258  // Note that irreducibility may involve inner loops, e.g. imagine A
259  // starts one loop, and it has B inside it which starts an inner loop.
260  // If we add a branch from all the way on the outside to B, then in a
261  // sense B is no longer an "inner" loop, semantically speaking. We will
262  // fix that irreducibility by adding a block that dispatches to either
263  // either A or B, so B will no longer be an inner loop in our output.
264  // (A fancier approach might try to keep it as such.)
265  //
266  // Note that we still need to recurse into inner loops later, to handle
267  // the case where the irreducibility is entirely nested - we would not
268  // be able to identify that at this point, since the enclosing loop is
269  // a group of blocks all of whom can reach each other. (We'll see the
270  // irreducibility after removing branches to the top of that enclosing
271  // loop.)
272  BlockSet MutualLoopEntries;
273  MutualLoopEntries.insert(LoopEntry);
274  for (auto *OtherLoopEntry : Graph.getLoopEntries()) {
275  if (OtherLoopEntry != LoopEntry &&
276  Graph.canReach(LoopEntry, OtherLoopEntry) &&
277  Graph.canReach(OtherLoopEntry, LoopEntry)) {
278  MutualLoopEntries.insert(OtherLoopEntry);
279  }
280  }
281 
282  if (MutualLoopEntries.size() > 1) {
283  makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph);
284  FoundIrreducibility = true;
285  Changed = true;
286  break;
287  }
288  }
289  // Only go on to actually process the inner loops when we are done
290  // removing irreducible control flow and changing the graph. Modifying
291  // the graph as we go is possible, and that might let us avoid looking at
292  // the already-fixed loops again if we are careful, but all that is
293  // complex and bug-prone. Since irreducible loops are rare, just starting
294  // another iteration is best.
295  if (FoundIrreducibility) {
296  continue;
297  }
298 
299  for (auto *LoopEntry : Graph.getLoopEntries()) {
300  LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry));
301  // Each of these calls to processRegion may change the graph, but are
302  // guaranteed not to interfere with each other. The only changes we make
303  // to the graph are to add blocks on the way to a loop entry. As the
304  // loops are disjoint, that means we may only alter branches that exit
305  // another loop, which are ignored when recursing into that other loop
306  // anyhow.
307  if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) {
308  Changed = true;
309  }
310  }
311 
312  return Changed;
313  }
314 }
315 
316 // Given a set of entries to a single loop, create a single entry for that
317 // loop by creating a dispatch block for them, routing control flow using
318 // a helper variable. Also updates Blocks with any new blocks created, so
319 // that we properly track all the blocks in the region. But this does not update
320 // ReachabilityGraph; this will be updated in the caller of this function as
321 // needed.
322 void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
323  BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,
324  const ReachabilityGraph &Graph) {
325  assert(Entries.size() >= 2);
326 
327  // Sort the entries to ensure a deterministic build.
328  BlockVector SortedEntries(Entries.begin(), Entries.end());
329  llvm::sort(SortedEntries,
330  [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
331  auto ANum = A->getNumber();
332  auto BNum = B->getNumber();
333  return ANum < BNum;
334  });
335 
336 #ifndef NDEBUG
337  for (auto Block : SortedEntries)
338  assert(Block->getNumber() != -1);
339  if (SortedEntries.size() > 1) {
340  for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E;
341  ++I) {
342  auto ANum = (*I)->getNumber();
343  auto BNum = (*(std::next(I)))->getNumber();
344  assert(ANum != BNum);
345  }
346  }
347 #endif
348 
349  // Create a dispatch block which will contain a jump table to the entries.
351  MF.insert(MF.end(), Dispatch);
352  Blocks.insert(Dispatch);
353 
354  // Add the jump table.
355  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
356  MachineInstrBuilder MIB =
357  BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32));
358 
359  // Add the register which will be used to tell the jump table which block to
360  // jump to.
362  Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
363  MIB.addReg(Reg);
364 
365  // Compute the indices in the superheader, one for each bad block, and
366  // add them as successors.
368  for (auto *Entry : SortedEntries) {
369  auto Pair = Indices.insert(std::make_pair(Entry, 0));
370  assert(Pair.second);
371 
372  unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
373  Pair.first->second = Index;
374 
375  MIB.addMBB(Entry);
376  Dispatch->addSuccessor(Entry);
377  }
378 
379  // Rewrite the problematic successors for every block that wants to reach
380  // the bad blocks. For simplicity, we just introduce a new block for every
381  // edge we need to rewrite. (Fancier things are possible.)
382 
383  BlockVector AllPreds;
384  for (auto *Entry : SortedEntries) {
385  for (auto *Pred : Entry->predecessors()) {
386  if (Pred != Dispatch) {
387  AllPreds.push_back(Pred);
388  }
389  }
390  }
391 
392  // This set stores predecessors within this loop.
394  for (auto *Pred : AllPreds) {
395  for (auto *Entry : Pred->successors()) {
396  if (!Entries.count(Entry))
397  continue;
398  if (Graph.canReach(Entry, Pred)) {
399  InLoop.insert(Pred);
400  break;
401  }
402  }
403  }
404 
405  // Record if each entry has a layout predecessor. This map stores
406  // <<Predecessor is within the loop?, loop entry>, layout predecessor>
407  std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *>
408  EntryToLayoutPred;
409  for (auto *Pred : AllPreds)
410  for (auto *Entry : Pred->successors())
411  if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry))
412  EntryToLayoutPred[std::make_pair(InLoop.count(Pred), Entry)] = Pred;
413 
414  // We need to create at most two routing blocks per entry: one for
415  // predecessors outside the loop and one for predecessors inside the loop.
416  // This map stores
417  // <<Predecessor is within the loop?, loop entry>, routing block>
418  std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *> Map;
419  for (auto *Pred : AllPreds) {
420  bool PredInLoop = InLoop.count(Pred);
421  for (auto *Entry : Pred->successors()) {
422  if (!Entries.count(Entry) ||
423  Map.count(std::make_pair(InLoop.count(Pred), Entry)))
424  continue;
425  // If there exists a layout predecessor of this entry and this predecessor
426  // is not that, we rather create a routing block after that layout
427  // predecessor to save a branch.
428  if (EntryToLayoutPred.count(std::make_pair(PredInLoop, Entry)) &&
429  EntryToLayoutPred[std::make_pair(PredInLoop, Entry)] != Pred)
430  continue;
431 
432  // This is a successor we need to rewrite.
434  MF.insert(Pred->isLayoutSuccessor(Entry)
436  : MF.end(),
437  Routing);
438  Blocks.insert(Routing);
439 
440  // Set the jump table's register of the index of the block we wish to
441  // jump to, and jump to the jump table.
442  BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
443  .addImm(Indices[Entry]);
444  BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
445  Routing->addSuccessor(Dispatch);
446  Map[std::make_pair(PredInLoop, Entry)] = Routing;
447  }
448  }
449 
450  for (auto *Pred : AllPreds) {
451  bool PredInLoop = InLoop.count(Pred);
452  // Remap the terminator operands and the successor list.
453  for (MachineInstr &Term : Pred->terminators())
454  for (auto &Op : Term.explicit_uses())
455  if (Op.isMBB() && Indices.count(Op.getMBB()))
456  Op.setMBB(Map[std::make_pair(PredInLoop, Op.getMBB())]);
457 
458  for (auto *Succ : Pred->successors()) {
459  if (!Entries.count(Succ))
460  continue;
461  auto *Routing = Map[std::make_pair(PredInLoop, Succ)];
462  Pred->replaceSuccessor(Succ, Routing);
463  }
464  }
465 
466  // Create a fake default label, because br_table requires one.
467  MIB.addMBB(MIB.getInstr()
469  .getMBB());
470 }
471 
472 } // end anonymous namespace
473 
475 INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
476  "Removes irreducible control flow", false, false)
477 
479  return new WebAssemblyFixIrreducibleControlFlow();
480 }
481 
482 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
483  MachineFunction &MF) {
484  LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
485  "********** Function: "
486  << MF.getName() << '\n');
487 
488  // Start the recursive process on the entire function body.
489  BlockSet AllBlocks;
490  for (auto &MBB : MF) {
491  AllBlocks.insert(&MBB);
492  }
493 
494  if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {
495  // We rewrote part of the function; recompute relevant things.
496  MF.getRegInfo().invalidateLiveness();
497  MF.RenumberBlocks();
498  return true;
499  }
500 
501  return false;
502 }
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
This class represents lattice values for constants.
Definition: AllocatorList.h:23
#define LLVM_UNLIKELY(EXPR)
Definition: Compiler.h:212
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
unsigned Reg
A debug info location.
Definition: DebugLoc.h:33
This file contains the entry points for global functions defined in the LLVM WebAssembly back-end...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
iterator_range< succ_iterator > successors()
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:195
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
BasicBlockListType::iterator iterator
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:665
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file provides WebAssembly-specific target descriptions.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
iterator_range< pred_iterator > predecessors()
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
This file declares the WebAssembly-specific subclass of TargetSubtarget.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
BlockVerifier::State From
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
FunctionPass * createWebAssemblyFixIrreducibleControlFlow()
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:63
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE, "Removes irreducible control flow", false, false) FunctionPass *llvm
#define I(x, y, z)
Definition: MD5.cpp:58
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:91
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19