LLVM  6.0.0svn
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1 //===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 #include "llvm/ADT/DenseMap.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/ADT/Sequence.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/ADT/SmallPtrSet.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/Twine.h"
22 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/LoopPass.h"
24 #include "llvm/IR/BasicBlock.h"
25 #include "llvm/IR/Constant.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/InstrTypes.h"
30 #include "llvm/IR/Instruction.h"
31 #include "llvm/IR/Instructions.h"
32 #include "llvm/IR/IntrinsicInst.h"
33 #include "llvm/IR/Use.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/Debug.h"
46 #include <algorithm>
47 #include <cassert>
48 #include <iterator>
49 #include <numeric>
50 #include <utility>
51 
52 #define DEBUG_TYPE "simple-loop-unswitch"
53 
54 using namespace llvm;
55 
56 STATISTIC(NumBranches, "Number of branches unswitched");
57 STATISTIC(NumSwitches, "Number of switches unswitched");
58 STATISTIC(NumTrivial, "Number of unswitches that are trivial");
59 
61  "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
62  cl::desc("Forcibly enables non-trivial loop unswitching rather than "
63  "following the configuration passed into the pass."));
64 
65 static cl::opt<int>
66  UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
67  cl::desc("The cost threshold for unswitching a loop."));
68 
69 static void replaceLoopUsesWithConstant(Loop &L, Value &LIC,
70  Constant &Replacement) {
71  assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
72 
73  // Replace uses of LIC in the loop with the given constant.
74  for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) {
75  // Grab the use and walk past it so we can clobber it in the use list.
76  Use *U = &*UI++;
77  Instruction *UserI = dyn_cast<Instruction>(U->getUser());
78  if (!UserI || !L.contains(UserI))
79  continue;
80 
81  // Replace this use within the loop body.
82  *U = &Replacement;
83  }
84 }
85 
86 /// Update the IDom for a basic block whose predecessor set has changed.
87 ///
88 /// This routine is designed to work when the domtree update is relatively
89 /// localized by leveraging a known common dominator, often a loop header.
90 ///
91 /// FIXME: Should consider hand-rolling a slightly more efficient non-DFS
92 /// approach here as we can do that easily by persisting the candidate IDom's
93 /// dominating set between each predecessor.
94 ///
95 /// FIXME: Longer term, many uses of this can be replaced by an incremental
96 /// domtree update strategy that starts from a known dominating block and
97 /// rebuilds that subtree.
99  BasicBlock *KnownDominatingBB,
100  DominatorTree &DT) {
101  assert(pred_begin(BB) != pred_end(BB) &&
102  "This routine does not handle unreachable blocks!");
103 
104  BasicBlock *OrigIDom = DT[BB]->getIDom()->getBlock();
105 
106  BasicBlock *IDom = *pred_begin(BB);
107  assert(DT.dominates(KnownDominatingBB, IDom) &&
108  "Bad known dominating block!");
109 
110  // Walk all of the other predecessors finding the nearest common dominator
111  // until all predecessors are covered or we reach the loop header. The loop
112  // header necessarily dominates all loop exit blocks in loop simplified form
113  // so we can early-exit the moment we hit that block.
114  for (auto PI = std::next(pred_begin(BB)), PE = pred_end(BB);
115  PI != PE && IDom != KnownDominatingBB; ++PI) {
116  assert(DT.dominates(KnownDominatingBB, *PI) &&
117  "Bad known dominating block!");
118  IDom = DT.findNearestCommonDominator(IDom, *PI);
119  }
120 
121  if (IDom == OrigIDom)
122  return false;
123 
124  DT.changeImmediateDominator(BB, IDom);
125  return true;
126 }
127 
128 // Note that we don't currently use the IDFCalculator here for two reasons:
129 // 1) It computes dominator tree levels for the entire function on each run
130 // of 'compute'. While this isn't terrible, given that we expect to update
131 // relatively small subtrees of the domtree, it isn't necessarily the right
132 // tradeoff.
133 // 2) The interface doesn't fit this usage well. It doesn't operate in
134 // append-only, and builds several sets that we don't need.
135 //
136 // FIXME: Neither of these issues are a big deal and could be addressed with
137 // some amount of refactoring of IDFCalculator. That would allow us to share
138 // the core logic here (which is solving the same core problem).
143  assert(DomNodes.empty() && "Must start with no dominator nodes.");
144  assert(DomSet.empty() && "Must start with an empty dominator set.");
145 
146  // First flatten this subtree into sequence of nodes by doing a pre-order
147  // walk.
148  DomNodes.push_back(Node);
149  // We intentionally re-evaluate the size as each node can add new children.
150  // Because this is a tree walk, this cannot add any duplicates.
151  for (int i = 0; i < (int)DomNodes.size(); ++i)
152  DomNodes.insert(DomNodes.end(), DomNodes[i]->begin(), DomNodes[i]->end());
153 
154  // Now create a set of the basic blocks so we can quickly test for
155  // dominated successors. We could in theory use the DFS numbers of the
156  // dominator tree for this, but we want this to remain predictably fast
157  // even while we mutate the dominator tree in ways that would invalidate
158  // the DFS numbering.
159  for (DomTreeNode *InnerN : DomNodes)
160  DomSet.insert(InnerN->getBlock());
161 
162  // Now re-walk the nodes, appending every successor of every node that isn't
163  // in the set. Note that we don't append the node itself, even though if it
164  // is a successor it does not strictly dominate itself and thus it would be
165  // part of the dominance frontier. The reason we don't append it is that
166  // the node passed in came *from* the worklist and so it has already been
167  // processed.
168  for (DomTreeNode *InnerN : DomNodes)
169  for (BasicBlock *SuccBB : successors(InnerN->getBlock()))
170  if (!DomSet.count(SuccBB))
171  Worklist.insert(SuccBB);
172 
173  DomNodes.clear();
174  DomSet.clear();
175 }
176 
177 /// Update the dominator tree after unswitching a particular former exit block.
178 ///
179 /// This handles the full update of the dominator tree after hoisting a block
180 /// that previously was an exit block (or split off of an exit block) up to be
181 /// reached from the new immediate dominator of the preheader.
182 ///
183 /// The common case is simple -- we just move the unswitched block to have an
184 /// immediate dominator of the old preheader. But in complex cases, there may
185 /// be other blocks reachable from the unswitched block that are immediately
186 /// dominated by some node between the unswitched one and the old preheader.
187 /// All of these also need to be hoisted in the dominator tree. We also want to
188 /// minimize queries to the dominator tree because each step of this
189 /// invalidates any DFS numbers that would make queries fast.
190 static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH,
191  DominatorTree &DT) {
192  DomTreeNode *OldPHNode = DT[OldPH];
193  DomTreeNode *UnswitchedNode = DT[UnswitchedBB];
194  // If the dominator tree has already been updated for this unswitched node,
195  // we're done. This makes it easier to use this routine if there are multiple
196  // paths to the same unswitched destination.
197  if (UnswitchedNode->getIDom() == OldPHNode)
198  return;
199 
200  // First collect the domtree nodes that we are hoisting over. These are the
201  // set of nodes which may have children that need to be hoisted as well.
203  for (auto *IDom = UnswitchedNode->getIDom(); IDom != OldPHNode;
204  IDom = IDom->getIDom())
205  DomChain.insert(IDom);
206 
207  // The unswitched block ends up immediately dominated by the old preheader --
208  // regardless of whether it is the loop exit block or split off of the loop
209  // exit block.
210  DT.changeImmediateDominator(UnswitchedNode, OldPHNode);
211 
212  // For everything that moves up the dominator tree, we need to examine the
213  // dominator frontier to see if it additionally should move up the dominator
214  // tree. This lambda appends the dominator frontier for a node on the
215  // worklist.
217 
218  // Scratch data structures reused by domfrontier finding.
221 
222  // Append the initial dom frontier nodes.
223  appendDomFrontier(UnswitchedNode, Worklist, DomNodes, DomSet);
224 
225  // Walk the worklist. We grow the list in the loop and so must recompute size.
226  for (int i = 0; i < (int)Worklist.size(); ++i) {
227  auto *BB = Worklist[i];
228 
229  DomTreeNode *Node = DT[BB];
230  assert(!DomChain.count(Node) &&
231  "Cannot be dominated by a block you can reach!");
232 
233  // If this block had an immediate dominator somewhere in the chain
234  // we hoisted over, then its position in the domtree needs to move as it is
235  // reachable from a node hoisted over this chain.
236  if (!DomChain.count(Node->getIDom()))
237  continue;
238 
239  DT.changeImmediateDominator(Node, OldPHNode);
240 
241  // Now add this node's dominator frontier to the worklist as well.
242  appendDomFrontier(Node, Worklist, DomNodes, DomSet);
243  }
244 }
245 
246 /// Check that all the LCSSA PHI nodes in the loop exit block have trivial
247 /// incoming values along this edge.
248 static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
249  BasicBlock &ExitBB) {
250  for (Instruction &I : ExitBB) {
251  auto *PN = dyn_cast<PHINode>(&I);
252  if (!PN)
253  // No more PHIs to check.
254  return true;
255 
256  // If the incoming value for this edge isn't loop invariant the unswitch
257  // won't be trivial.
258  if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
259  return false;
260  }
261  llvm_unreachable("Basic blocks should never be empty!");
262 }
263 
264 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
265 ///
266 /// Requires that the loop exit and unswitched basic block are the same, and
267 /// that the exiting block was a unique predecessor of that block. Rewrites the
268 /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
269 /// PHI nodes from the old preheader that now contains the unswitched
270 /// terminator.
272  BasicBlock &OldExitingBB,
273  BasicBlock &OldPH) {
274  for (Instruction &I : UnswitchedBB) {
275  auto *PN = dyn_cast<PHINode>(&I);
276  if (!PN)
277  // No more PHIs to check.
278  break;
279 
280  // When the loop exit is directly unswitched we just need to update the
281  // incoming basic block. We loop to handle weird cases with repeated
282  // incoming blocks, but expect to typically only have one operand here.
283  for (auto i : seq<int>(0, PN->getNumOperands())) {
284  assert(PN->getIncomingBlock(i) == &OldExitingBB &&
285  "Found incoming block different from unique predecessor!");
286  PN->setIncomingBlock(i, &OldPH);
287  }
288  }
289 }
290 
291 /// Rewrite the PHI nodes in the loop exit basic block and the split off
292 /// unswitched block.
293 ///
294 /// Because the exit block remains an exit from the loop, this rewrites the
295 /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
296 /// nodes into the unswitched basic block to select between the value in the
297 /// old preheader and the loop exit.
299  BasicBlock &UnswitchedBB,
300  BasicBlock &OldExitingBB,
301  BasicBlock &OldPH) {
302  assert(&ExitBB != &UnswitchedBB &&
303  "Must have different loop exit and unswitched blocks!");
304  Instruction *InsertPt = &*UnswitchedBB.begin();
305  for (Instruction &I : ExitBB) {
306  auto *PN = dyn_cast<PHINode>(&I);
307  if (!PN)
308  // No more PHIs to check.
309  break;
310 
311  auto *NewPN = PHINode::Create(PN->getType(), /*NumReservedValues*/ 2,
312  PN->getName() + ".split", InsertPt);
313 
314  // Walk backwards over the old PHI node's inputs to minimize the cost of
315  // removing each one. We have to do this weird loop manually so that we
316  // create the same number of new incoming edges in the new PHI as we expect
317  // each case-based edge to be included in the unswitched switch in some
318  // cases.
319  // FIXME: This is really, really gross. It would be much cleaner if LLVM
320  // allowed us to create a single entry for a predecessor block without
321  // having separate entries for each "edge" even though these edges are
322  // required to produce identical results.
323  for (int i = PN->getNumIncomingValues() - 1; i >= 0; --i) {
324  if (PN->getIncomingBlock(i) != &OldExitingBB)
325  continue;
326 
327  Value *Incoming = PN->removeIncomingValue(i);
328  NewPN->addIncoming(Incoming, &OldPH);
329  }
330 
331  // Now replace the old PHI with the new one and wire the old one in as an
332  // input to the new one.
333  PN->replaceAllUsesWith(NewPN);
334  NewPN->addIncoming(PN, &ExitBB);
335  }
336 }
337 
338 /// Unswitch a trivial branch if the condition is loop invariant.
339 ///
340 /// This routine should only be called when loop code leading to the branch has
341 /// been validated as trivial (no side effects). This routine checks if the
342 /// condition is invariant and one of the successors is a loop exit. This
343 /// allows us to unswitch without duplicating the loop, making it trivial.
344 ///
345 /// If this routine fails to unswitch the branch it returns false.
346 ///
347 /// If the branch can be unswitched, this routine splits the preheader and
348 /// hoists the branch above that split. Preserves loop simplified form
349 /// (splitting the exit block as necessary). It simplifies the branch within
350 /// the loop to an unconditional branch but doesn't remove it entirely. Further
351 /// cleanup can be done with some simplify-cfg like pass.
353  LoopInfo &LI) {
354  assert(BI.isConditional() && "Can only unswitch a conditional branch!");
355  DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
356 
357  Value *LoopCond = BI.getCondition();
358 
359  // Need a trivial loop condition to unswitch.
360  if (!L.isLoopInvariant(LoopCond))
361  return false;
362 
363  // FIXME: We should compute this once at the start and update it!
365  L.getExitBlocks(ExitBlocks);
366  SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(),
367  ExitBlocks.end());
368 
369  // Check to see if a successor of the branch is guaranteed to
370  // exit through a unique exit block without having any
371  // side-effects. If so, determine the value of Cond that causes
372  // it to do this.
374  ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext());
375  int LoopExitSuccIdx = 0;
376  auto *LoopExitBB = BI.getSuccessor(0);
377  if (!ExitBlockSet.count(LoopExitBB)) {
378  std::swap(CondVal, Replacement);
379  LoopExitSuccIdx = 1;
380  LoopExitBB = BI.getSuccessor(1);
381  if (!ExitBlockSet.count(LoopExitBB))
382  return false;
383  }
384  auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
385  assert(L.contains(ContinueBB) &&
386  "Cannot have both successors exit and still be in the loop!");
387 
388  auto *ParentBB = BI.getParent();
389  if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB))
390  return false;
391 
392  DEBUG(dbgs() << " unswitching trivial branch when: " << CondVal
393  << " == " << LoopCond << "\n");
394 
395  // Split the preheader, so that we know that there is a safe place to insert
396  // the conditional branch. We will change the preheader to have a conditional
397  // branch on LoopCond.
398  BasicBlock *OldPH = L.getLoopPreheader();
399  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
400 
401  // Now that we have a place to insert the conditional branch, create a place
402  // to branch to: this is the exit block out of the loop that we are
403  // unswitching. We need to split this if there are other loop predecessors.
404  // Because the loop is in simplified form, *any* other predecessor is enough.
405  BasicBlock *UnswitchedBB;
406  if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) {
407  (void)PredBB;
408  assert(PredBB == BI.getParent() &&
409  "A branch's parent isn't a predecessor!");
410  UnswitchedBB = LoopExitBB;
411  } else {
412  UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI);
413  }
414 
415  // Now splice the branch to gate reaching the new preheader and re-point its
416  // successors.
417  OldPH->getInstList().splice(std::prev(OldPH->end()),
418  BI.getParent()->getInstList(), BI);
419  OldPH->getTerminator()->eraseFromParent();
420  BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
421  BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
422 
423  // Create a new unconditional branch that will continue the loop as a new
424  // terminator.
425  BranchInst::Create(ContinueBB, ParentBB);
426 
427  // Rewrite the relevant PHI nodes.
428  if (UnswitchedBB == LoopExitBB)
429  rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
430  else
431  rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
432  *ParentBB, *OldPH);
433 
434  // Now we need to update the dominator tree.
435  updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
436  // But if we split something off of the loop exit block then we also removed
437  // one of the predecessors for the loop exit block and may need to update its
438  // idom.
439  if (UnswitchedBB != LoopExitBB)
440  updateIDomWithKnownCommonDominator(LoopExitBB, L.getHeader(), DT);
441 
442  // Since this is an i1 condition we can also trivially replace uses of it
443  // within the loop with a constant.
444  replaceLoopUsesWithConstant(L, *LoopCond, *Replacement);
445 
446  ++NumTrivial;
447  ++NumBranches;
448  return true;
449 }
450 
451 /// Unswitch a trivial switch if the condition is loop invariant.
452 ///
453 /// This routine should only be called when loop code leading to the switch has
454 /// been validated as trivial (no side effects). This routine checks if the
455 /// condition is invariant and that at least one of the successors is a loop
456 /// exit. This allows us to unswitch without duplicating the loop, making it
457 /// trivial.
458 ///
459 /// If this routine fails to unswitch the switch it returns false.
460 ///
461 /// If the switch can be unswitched, this routine splits the preheader and
462 /// copies the switch above that split. If the default case is one of the
463 /// exiting cases, it copies the non-exiting cases and points them at the new
464 /// preheader. If the default case is not exiting, it copies the exiting cases
465 /// and points the default at the preheader. It preserves loop simplified form
466 /// (splitting the exit blocks as necessary). It simplifies the switch within
467 /// the loop by removing now-dead cases. If the default case is one of those
468 /// unswitched, it replaces its destination with a new basic block containing
469 /// only unreachable. Such basic blocks, while technically loop exits, are not
470 /// considered for unswitching so this is a stable transform and the same
471 /// switch will not be revisited. If after unswitching there is only a single
472 /// in-loop successor, the switch is further simplified to an unconditional
473 /// branch. Still more cleanup can be done with some simplify-cfg like pass.
475  LoopInfo &LI) {
476  DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
477  Value *LoopCond = SI.getCondition();
478 
479  // If this isn't switching on an invariant condition, we can't unswitch it.
480  if (!L.isLoopInvariant(LoopCond))
481  return false;
482 
483  auto *ParentBB = SI.getParent();
484 
485  // FIXME: We should compute this once at the start and update it!
487  L.getExitBlocks(ExitBlocks);
488  SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(),
489  ExitBlocks.end());
490 
491  SmallVector<int, 4> ExitCaseIndices;
492  for (auto Case : SI.cases()) {
493  auto *SuccBB = Case.getCaseSuccessor();
494  if (ExitBlockSet.count(SuccBB) &&
495  areLoopExitPHIsLoopInvariant(L, *ParentBB, *SuccBB))
496  ExitCaseIndices.push_back(Case.getCaseIndex());
497  }
498  BasicBlock *DefaultExitBB = nullptr;
499  if (ExitBlockSet.count(SI.getDefaultDest()) &&
500  areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) &&
501  !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))
502  DefaultExitBB = SI.getDefaultDest();
503  else if (ExitCaseIndices.empty())
504  return false;
505 
506  DEBUG(dbgs() << " unswitching trivial cases...\n");
507 
509  ExitCases.reserve(ExitCaseIndices.size());
510  // We walk the case indices backwards so that we remove the last case first
511  // and don't disrupt the earlier indices.
512  for (unsigned Index : reverse(ExitCaseIndices)) {
513  auto CaseI = SI.case_begin() + Index;
514  // Save the value of this case.
515  ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()});
516  // Delete the unswitched cases.
517  SI.removeCase(CaseI);
518  }
519 
520  // Check if after this all of the remaining cases point at the same
521  // successor.
522  BasicBlock *CommonSuccBB = nullptr;
523  if (SI.getNumCases() > 0 &&
524  std::all_of(std::next(SI.case_begin()), SI.case_end(),
525  [&SI](const SwitchInst::CaseHandle &Case) {
526  return Case.getCaseSuccessor() ==
527  SI.case_begin()->getCaseSuccessor();
528  }))
529  CommonSuccBB = SI.case_begin()->getCaseSuccessor();
530 
531  if (DefaultExitBB) {
532  // We can't remove the default edge so replace it with an edge to either
533  // the single common remaining successor (if we have one) or an unreachable
534  // block.
535  if (CommonSuccBB) {
536  SI.setDefaultDest(CommonSuccBB);
537  } else {
538  BasicBlock *UnreachableBB = BasicBlock::Create(
539  ParentBB->getContext(),
540  Twine(ParentBB->getName()) + ".unreachable_default",
541  ParentBB->getParent());
542  new UnreachableInst(ParentBB->getContext(), UnreachableBB);
543  SI.setDefaultDest(UnreachableBB);
544  DT.addNewBlock(UnreachableBB, ParentBB);
545  }
546  } else {
547  // If we're not unswitching the default, we need it to match any cases to
548  // have a common successor or if we have no cases it is the common
549  // successor.
550  if (SI.getNumCases() == 0)
551  CommonSuccBB = SI.getDefaultDest();
552  else if (SI.getDefaultDest() != CommonSuccBB)
553  CommonSuccBB = nullptr;
554  }
555 
556  // Split the preheader, so that we know that there is a safe place to insert
557  // the switch.
558  BasicBlock *OldPH = L.getLoopPreheader();
559  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
560  OldPH->getTerminator()->eraseFromParent();
561 
562  // Now add the unswitched switch.
563  auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
564 
565  // Rewrite the IR for the unswitched basic blocks. This requires two steps.
566  // First, we split any exit blocks with remaining in-loop predecessors. Then
567  // we update the PHIs in one of two ways depending on if there was a split.
568  // We walk in reverse so that we split in the same order as the cases
569  // appeared. This is purely for convenience of reading the resulting IR, but
570  // it doesn't cost anything really.
571  SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
573  // Handle the default exit if necessary.
574  // FIXME: It'd be great if we could merge this with the loop below but LLVM's
575  // ranges aren't quite powerful enough yet.
576  if (DefaultExitBB) {
577  if (pred_empty(DefaultExitBB)) {
578  UnswitchedExitBBs.insert(DefaultExitBB);
579  rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
580  } else {
581  auto *SplitBB =
582  SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);
583  rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
584  *ParentBB, *OldPH);
585  updateIDomWithKnownCommonDominator(DefaultExitBB, L.getHeader(), DT);
586  DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
587  }
588  }
589  // Note that we must use a reference in the for loop so that we update the
590  // container.
591  for (auto &CasePair : reverse(ExitCases)) {
592  // Grab a reference to the exit block in the pair so that we can update it.
593  BasicBlock *ExitBB = CasePair.second;
594 
595  // If this case is the last edge into the exit block, we can simply reuse it
596  // as it will no longer be a loop exit. No mapping necessary.
597  if (pred_empty(ExitBB)) {
598  // Only rewrite once.
599  if (UnswitchedExitBBs.insert(ExitBB).second)
600  rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
601  continue;
602  }
603 
604  // Otherwise we need to split the exit block so that we retain an exit
605  // block from the loop and a target for the unswitched condition.
606  BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
607  if (!SplitExitBB) {
608  // If this is the first time we see this, do the split and remember it.
609  SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
610  rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
611  *ParentBB, *OldPH);
613  }
614  // Update the case pair to point to the split block.
615  CasePair.second = SplitExitBB;
616  }
617 
618  // Now add the unswitched cases. We do this in reverse order as we built them
619  // in reverse order.
620  for (auto CasePair : reverse(ExitCases)) {
621  ConstantInt *CaseVal = CasePair.first;
622  BasicBlock *UnswitchedBB = CasePair.second;
623 
624  NewSI->addCase(CaseVal, UnswitchedBB);
625  updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
626  }
627 
628  // If the default was unswitched, re-point it and add explicit cases for
629  // entering the loop.
630  if (DefaultExitBB) {
631  NewSI->setDefaultDest(DefaultExitBB);
632  updateDTAfterUnswitch(DefaultExitBB, OldPH, DT);
633 
634  // We removed all the exit cases, so we just copy the cases to the
635  // unswitched switch.
636  for (auto Case : SI.cases())
637  NewSI->addCase(Case.getCaseValue(), NewPH);
638  }
639 
640  // If we ended up with a common successor for every path through the switch
641  // after unswitching, rewrite it to an unconditional branch to make it easy
642  // to recognize. Otherwise we potentially have to recognize the default case
643  // pointing at unreachable and other complexity.
644  if (CommonSuccBB) {
645  BasicBlock *BB = SI.getParent();
646  SI.eraseFromParent();
647  BranchInst::Create(CommonSuccBB, BB);
648  }
649 
650  DT.verifyDomTree();
651  ++NumTrivial;
652  ++NumSwitches;
653  return true;
654 }
655 
656 /// This routine scans the loop to find a branch or switch which occurs before
657 /// any side effects occur. These can potentially be unswitched without
658 /// duplicating the loop. If a branch or switch is successfully unswitched the
659 /// scanning continues to see if subsequent branches or switches have become
660 /// trivial. Once all trivial candidates have been unswitched, this routine
661 /// returns.
662 ///
663 /// The return value indicates whether anything was unswitched (and therefore
664 /// changed).
666  LoopInfo &LI) {
667  bool Changed = false;
668 
669  // If loop header has only one reachable successor we should keep looking for
670  // trivial condition candidates in the successor as well. An alternative is
671  // to constant fold conditions and merge successors into loop header (then we
672  // only need to check header's terminator). The reason for not doing this in
673  // LoopUnswitch pass is that it could potentially break LoopPassManager's
674  // invariants. Folding dead branches could either eliminate the current loop
675  // or make other loops unreachable. LCSSA form might also not be preserved
676  // after deleting branches. The following code keeps traversing loop header's
677  // successors until it finds the trivial condition candidate (condition that
678  // is not a constant). Since unswitching generates branches with constant
679  // conditions, this scenario could be very common in practice.
680  BasicBlock *CurrentBB = L.getHeader();
682  Visited.insert(CurrentBB);
683  do {
684  // Check if there are any side-effecting instructions (e.g. stores, calls,
685  // volatile loads) in the part of the loop that the code *would* execute
686  // without unswitching.
687  if (llvm::any_of(*CurrentBB,
688  [](Instruction &I) { return I.mayHaveSideEffects(); }))
689  return Changed;
690 
691  TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
692 
693  if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
694  // Don't bother trying to unswitch past a switch with a constant
695  // condition. This should be removed prior to running this pass by
696  // simplify-cfg.
697  if (isa<Constant>(SI->getCondition()))
698  return Changed;
699 
700  if (!unswitchTrivialSwitch(L, *SI, DT, LI))
701  // Coludn't unswitch this one so we're done.
702  return Changed;
703 
704  // Mark that we managed to unswitch something.
705  Changed = true;
706 
707  // If unswitching turned the terminator into an unconditional branch then
708  // we can continue. The unswitching logic specifically works to fold any
709  // cases it can into an unconditional branch to make it easier to
710  // recognize here.
711  auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
712  if (!BI || BI->isConditional())
713  return Changed;
714 
715  CurrentBB = BI->getSuccessor(0);
716  continue;
717  }
718 
719  auto *BI = dyn_cast<BranchInst>(CurrentTerm);
720  if (!BI)
721  // We do not understand other terminator instructions.
722  return Changed;
723 
724  // Don't bother trying to unswitch past an unconditional branch or a branch
725  // with a constant value. These should be removed by simplify-cfg prior to
726  // running this pass.
727  if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
728  return Changed;
729 
730  // Found a trivial condition candidate: non-foldable conditional branch. If
731  // we fail to unswitch this, we can't do anything else that is trivial.
732  if (!unswitchTrivialBranch(L, *BI, DT, LI))
733  return Changed;
734 
735  // Mark that we managed to unswitch something.
736  Changed = true;
737 
738  // We unswitched the branch. This should always leave us with an
739  // unconditional branch that we can follow now.
740  BI = cast<BranchInst>(CurrentBB->getTerminator());
741  assert(!BI->isConditional() &&
742  "Cannot form a conditional branch by unswitching1");
743  CurrentBB = BI->getSuccessor(0);
744 
745  // When continuing, if we exit the loop or reach a previous visited block,
746  // then we can not reach any trivial condition candidates (unfoldable
747  // branch instructions or switch instructions) and no unswitch can happen.
748  } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
749 
750  return Changed;
751 }
752 
753 /// Build the cloned blocks for an unswitched copy of the given loop.
754 ///
755 /// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
756 /// after the split block (`SplitBB`) that will be used to select between the
757 /// cloned and original loop.
758 ///
759 /// This routine handles cloning all of the necessary loop blocks and exit
760 /// blocks including rewriting their instructions and the relevant PHI nodes.
761 /// It skips loop and exit blocks that are not necessary based on the provided
762 /// set. It also correctly creates the unconditional branch in the cloned
763 /// unswitched parent block to only point at the unswitched successor.
764 ///
765 /// This does not handle most of the necessary updates to `LoopInfo`. Only exit
766 /// block splitting is correctly reflected in `LoopInfo`, essentially all of
767 /// the cloned blocks (and their loops) are left without full `LoopInfo`
768 /// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
769 /// blocks to them but doesn't create the cloned `DominatorTree` structure and
770 /// instead the caller must recompute an accurate DT. It *does* correctly
771 /// update the `AssumptionCache` provided in `AC`.
773  Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
774  ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
775  BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
776  const SmallPtrSetImpl<BasicBlock *> &SkippedLoopAndExitBlocks,
778  LoopInfo &LI) {
780  NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
781 
782  // We will need to clone a bunch of blocks, wrap up the clone operation in
783  // a helper.
784  auto CloneBlock = [&](BasicBlock *OldBB) {
785  // Clone the basic block and insert it before the new preheader.
786  BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
787  NewBB->moveBefore(LoopPH);
788 
789  // Record this block and the mapping.
790  NewBlocks.push_back(NewBB);
791  VMap[OldBB] = NewBB;
792 
793  // Add the block to the domtree. We'll move it to the correct position
794  // below.
795  DT.addNewBlock(NewBB, SplitBB);
796 
797  return NewBB;
798  };
799 
800  // First, clone the preheader.
801  auto *ClonedPH = CloneBlock(LoopPH);
802 
803  // Then clone all the loop blocks, skipping the ones that aren't necessary.
804  for (auto *LoopBB : L.blocks())
805  if (!SkippedLoopAndExitBlocks.count(LoopBB))
806  CloneBlock(LoopBB);
807 
808  // Split all the loop exit edges so that when we clone the exit blocks, if
809  // any of the exit blocks are *also* a preheader for some other loop, we
810  // don't create multiple predecessors entering the loop header.
811  for (auto *ExitBB : ExitBlocks) {
812  if (SkippedLoopAndExitBlocks.count(ExitBB))
813  continue;
814 
815  // When we are going to clone an exit, we don't need to clone all the
816  // instructions in the exit block and we want to ensure we have an easy
817  // place to merge the CFG, so split the exit first. This is always safe to
818  // do because there cannot be any non-loop predecessors of a loop exit in
819  // loop simplified form.
820  auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
821 
822  // Rearrange the names to make it easier to write test cases by having the
823  // exit block carry the suffix rather than the merge block carrying the
824  // suffix.
825  MergeBB->takeName(ExitBB);
826  ExitBB->setName(Twine(MergeBB->getName()) + ".split");
827 
828  // Now clone the original exit block.
829  auto *ClonedExitBB = CloneBlock(ExitBB);
830  assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
831  "Exit block should have been split to have one successor!");
832  assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
833  "Cloned exit block has the wrong successor!");
834 
835  // Move the merge block's idom to be the split point as one exit is
836  // dominated by one header, and the other by another, so we know the split
837  // point dominates both. While the dominator tree isn't fully accurate, we
838  // want sub-trees within the original loop to be correctly reflect
839  // dominance within that original loop (at least) and that requires moving
840  // the merge block out of that subtree.
841  // FIXME: This is very brittle as we essentially have a partial contract on
842  // the dominator tree. We really need to instead update it and keep it
843  // valid or stop relying on it.
844  DT.changeImmediateDominator(MergeBB, SplitBB);
845 
846  // Remap any cloned instructions and create a merge phi node for them.
847  for (auto ZippedInsts : llvm::zip_first(
848  llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
849  llvm::make_range(ClonedExitBB->begin(),
850  std::prev(ClonedExitBB->end())))) {
851  Instruction &I = std::get<0>(ZippedInsts);
852  Instruction &ClonedI = std::get<1>(ZippedInsts);
853 
854  // The only instructions in the exit block should be PHI nodes and
855  // potentially a landing pad.
856  assert(
857  (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
858  "Bad instruction in exit block!");
859  // We should have a value map between the instruction and its clone.
860  assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
861 
862  auto *MergePN =
863  PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
864  &*MergeBB->getFirstInsertionPt());
865  I.replaceAllUsesWith(MergePN);
866  MergePN->addIncoming(&I, ExitBB);
867  MergePN->addIncoming(&ClonedI, ClonedExitBB);
868  }
869  }
870 
871  // Rewrite the instructions in the cloned blocks to refer to the instructions
872  // in the cloned blocks. We have to do this as a second pass so that we have
873  // everything available. Also, we have inserted new instructions which may
874  // include assume intrinsics, so we update the assumption cache while
875  // processing this.
876  for (auto *ClonedBB : NewBlocks)
877  for (Instruction &I : *ClonedBB) {
878  RemapInstruction(&I, VMap,
880  if (auto *II = dyn_cast<IntrinsicInst>(&I))
881  if (II->getIntrinsicID() == Intrinsic::assume)
882  AC.registerAssumption(II);
883  }
884 
885  // Remove the cloned parent as a predecessor of the cloned continue successor
886  // if we did in fact clone it.
887  auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
888  if (auto *ClonedContinueSuccBB =
889  cast_or_null<BasicBlock>(VMap.lookup(ContinueSuccBB)))
890  ClonedContinueSuccBB->removePredecessor(ClonedParentBB,
891  /*DontDeleteUselessPHIs*/ true);
892  // Replace the cloned branch with an unconditional branch to the cloneed
893  // unswitched successor.
894  auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
895  ClonedParentBB->getTerminator()->eraseFromParent();
896  BranchInst::Create(ClonedSuccBB, ClonedParentBB);
897 
898  // Update any PHI nodes in the cloned successors of the skipped blocks to not
899  // have spurious incoming values.
900  for (auto *LoopBB : L.blocks())
901  if (SkippedLoopAndExitBlocks.count(LoopBB))
902  for (auto *SuccBB : successors(LoopBB))
903  if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
904  for (PHINode &PN : ClonedSuccBB->phis())
905  PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
906 
907  return ClonedPH;
908 }
909 
910 /// Recursively clone the specified loop and all of its children.
911 ///
912 /// The target parent loop for the clone should be provided, or can be null if
913 /// the clone is a top-level loop. While cloning, all the blocks are mapped
914 /// with the provided value map. The entire original loop must be present in
915 /// the value map. The cloned loop is returned.
916 static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
917  const ValueToValueMapTy &VMap, LoopInfo &LI) {
918  auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
919  assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
920  ClonedL.reserveBlocks(OrigL.getNumBlocks());
921  for (auto *BB : OrigL.blocks()) {
922  auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
923  ClonedL.addBlockEntry(ClonedBB);
924  if (LI.getLoopFor(BB) == &OrigL) {
925  assert(!LI.getLoopFor(ClonedBB) &&
926  "Should not have an existing loop for this block!");
927  LI.changeLoopFor(ClonedBB, &ClonedL);
928  }
929  }
930  };
931 
932  // We specially handle the first loop because it may get cloned into
933  // a different parent and because we most commonly are cloning leaf loops.
934  Loop *ClonedRootL = LI.AllocateLoop();
935  if (RootParentL)
936  RootParentL->addChildLoop(ClonedRootL);
937  else
938  LI.addTopLevelLoop(ClonedRootL);
939  AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
940 
941  if (OrigRootL.empty())
942  return ClonedRootL;
943 
944  // If we have a nest, we can quickly clone the entire loop nest using an
945  // iterative approach because it is a tree. We keep the cloned parent in the
946  // data structure to avoid repeatedly querying through a map to find it.
947  SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
948  // Build up the loops to clone in reverse order as we'll clone them from the
949  // back.
950  for (Loop *ChildL : llvm::reverse(OrigRootL))
951  LoopsToClone.push_back({ClonedRootL, ChildL});
952  do {
953  Loop *ClonedParentL, *L;
954  std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
955  Loop *ClonedL = LI.AllocateLoop();
956  ClonedParentL->addChildLoop(ClonedL);
957  AddClonedBlocksToLoop(*L, *ClonedL);
958  for (Loop *ChildL : llvm::reverse(*L))
959  LoopsToClone.push_back({ClonedL, ChildL});
960  } while (!LoopsToClone.empty());
961 
962  return ClonedRootL;
963 }
964 
965 /// Build the cloned loops of an original loop from unswitching.
966 ///
967 /// Because unswitching simplifies the CFG of the loop, this isn't a trivial
968 /// operation. We need to re-verify that there even is a loop (as the backedge
969 /// may not have been cloned), and even if there are remaining backedges the
970 /// backedge set may be different. However, we know that each child loop is
971 /// undisturbed, we only need to find where to place each child loop within
972 /// either any parent loop or within a cloned version of the original loop.
973 ///
974 /// Because child loops may end up cloned outside of any cloned version of the
975 /// original loop, multiple cloned sibling loops may be created. All of them
976 /// are returned so that the newly introduced loop nest roots can be
977 /// identified.
979  const ValueToValueMapTy &VMap, LoopInfo &LI,
980  SmallVectorImpl<Loop *> &NonChildClonedLoops) {
981  Loop *ClonedL = nullptr;
982 
983  auto *OrigPH = OrigL.getLoopPreheader();
984  auto *OrigHeader = OrigL.getHeader();
985 
986  auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
987  auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
988 
989  // We need to know the loops of the cloned exit blocks to even compute the
990  // accurate parent loop. If we only clone exits to some parent of the
991  // original parent, we want to clone into that outer loop. We also keep track
992  // of the loops that our cloned exit blocks participate in.
993  Loop *ParentL = nullptr;
994  SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
996  ClonedExitsInLoops.reserve(ExitBlocks.size());
997  for (auto *ExitBB : ExitBlocks)
998  if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
999  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1000  ExitLoopMap[ClonedExitBB] = ExitL;
1001  ClonedExitsInLoops.push_back(ClonedExitBB);
1002  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1003  ParentL = ExitL;
1004  }
1005  assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1006  ParentL->contains(OrigL.getParentLoop())) &&
1007  "The computed parent loop should always contain (or be) the parent of "
1008  "the original loop.");
1009 
1010  // We build the set of blocks dominated by the cloned header from the set of
1011  // cloned blocks out of the original loop. While not all of these will
1012  // necessarily be in the cloned loop, it is enough to establish that they
1013  // aren't in unreachable cycles, etc.
1014  SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1015  for (auto *BB : OrigL.blocks())
1016  if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1017  ClonedLoopBlocks.insert(ClonedBB);
1018 
1019  // Rebuild the set of blocks that will end up in the cloned loop. We may have
1020  // skipped cloning some region of this loop which can in turn skip some of
1021  // the backedges so we have to rebuild the blocks in the loop based on the
1022  // backedges that remain after cloning.
1024  SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1025  for (auto *Pred : predecessors(ClonedHeader)) {
1026  // The only possible non-loop header predecessor is the preheader because
1027  // we know we cloned the loop in simplified form.
1028  if (Pred == ClonedPH)
1029  continue;
1030 
1031  // Because the loop was in simplified form, the only non-loop predecessor
1032  // should be the preheader.
1033  assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1034  "header other than the preheader "
1035  "that is not part of the loop!");
1036 
1037  // Insert this block into the loop set and on the first visit (and if it
1038  // isn't the header we're currently walking) put it into the worklist to
1039  // recurse through.
1040  if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1041  Worklist.push_back(Pred);
1042  }
1043 
1044  // If we had any backedges then there *is* a cloned loop. Put the header into
1045  // the loop set and then walk the worklist backwards to find all the blocks
1046  // that remain within the loop after cloning.
1047  if (!BlocksInClonedLoop.empty()) {
1048  BlocksInClonedLoop.insert(ClonedHeader);
1049 
1050  while (!Worklist.empty()) {
1051  BasicBlock *BB = Worklist.pop_back_val();
1052  assert(BlocksInClonedLoop.count(BB) &&
1053  "Didn't put block into the loop set!");
1054 
1055  // Insert any predecessors that are in the possible set into the cloned
1056  // set, and if the insert is successful, add them to the worklist. Note
1057  // that we filter on the blocks that are definitely reachable via the
1058  // backedge to the loop header so we may prune out dead code within the
1059  // cloned loop.
1060  for (auto *Pred : predecessors(BB))
1061  if (ClonedLoopBlocks.count(Pred) &&
1062  BlocksInClonedLoop.insert(Pred).second)
1063  Worklist.push_back(Pred);
1064  }
1065 
1066  ClonedL = LI.AllocateLoop();
1067  if (ParentL) {
1068  ParentL->addBasicBlockToLoop(ClonedPH, LI);
1069  ParentL->addChildLoop(ClonedL);
1070  } else {
1071  LI.addTopLevelLoop(ClonedL);
1072  }
1073 
1074  ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1075  // We don't want to just add the cloned loop blocks based on how we
1076  // discovered them. The original order of blocks was carefully built in
1077  // a way that doesn't rely on predecessor ordering. Rather than re-invent
1078  // that logic, we just re-walk the original blocks (and those of the child
1079  // loops) and filter them as we add them into the cloned loop.
1080  for (auto *BB : OrigL.blocks()) {
1081  auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1082  if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1083  continue;
1084 
1085  // Directly add the blocks that are only in this loop.
1086  if (LI.getLoopFor(BB) == &OrigL) {
1087  ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1088  continue;
1089  }
1090 
1091  // We want to manually add it to this loop and parents.
1092  // Registering it with LoopInfo will happen when we clone the top
1093  // loop for this block.
1094  for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1095  PL->addBlockEntry(ClonedBB);
1096  }
1097 
1098  // Now add each child loop whose header remains within the cloned loop. All
1099  // of the blocks within the loop must satisfy the same constraints as the
1100  // header so once we pass the header checks we can just clone the entire
1101  // child loop nest.
1102  for (Loop *ChildL : OrigL) {
1103  auto *ClonedChildHeader =
1104  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1105  if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1106  continue;
1107 
1108 #ifndef NDEBUG
1109  // We should never have a cloned child loop header but fail to have
1110  // all of the blocks for that child loop.
1111  for (auto *ChildLoopBB : ChildL->blocks())
1112  assert(BlocksInClonedLoop.count(
1113  cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1114  "Child cloned loop has a header within the cloned outer "
1115  "loop but not all of its blocks!");
1116 #endif
1117 
1118  cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1119  }
1120  }
1121 
1122  // Now that we've handled all the components of the original loop that were
1123  // cloned into a new loop, we still need to handle anything from the original
1124  // loop that wasn't in a cloned loop.
1125 
1126  // Figure out what blocks are left to place within any loop nest containing
1127  // the unswitched loop. If we never formed a loop, the cloned PH is one of
1128  // them.
1129  SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1130  if (BlocksInClonedLoop.empty())
1131  UnloopedBlockSet.insert(ClonedPH);
1132  for (auto *ClonedBB : ClonedLoopBlocks)
1133  if (!BlocksInClonedLoop.count(ClonedBB))
1134  UnloopedBlockSet.insert(ClonedBB);
1135 
1136  // Copy the cloned exits and sort them in ascending loop depth, we'll work
1137  // backwards across these to process them inside out. The order shouldn't
1138  // matter as we're just trying to build up the map from inside-out; we use
1139  // the map in a more stably ordered way below.
1140  auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1141  std::sort(OrderedClonedExitsInLoops.begin(), OrderedClonedExitsInLoops.end(),
1142  [&](BasicBlock *LHS, BasicBlock *RHS) {
1143  return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1144  ExitLoopMap.lookup(RHS)->getLoopDepth();
1145  });
1146 
1147  // Populate the existing ExitLoopMap with everything reachable from each
1148  // exit, starting from the inner most exit.
1149  while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1150  assert(Worklist.empty() && "Didn't clear worklist!");
1151 
1152  BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1153  Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1154 
1155  // Walk the CFG back until we hit the cloned PH adding everything reachable
1156  // and in the unlooped set to this exit block's loop.
1157  Worklist.push_back(ExitBB);
1158  do {
1159  BasicBlock *BB = Worklist.pop_back_val();
1160  // We can stop recursing at the cloned preheader (if we get there).
1161  if (BB == ClonedPH)
1162  continue;
1163 
1164  for (BasicBlock *PredBB : predecessors(BB)) {
1165  // If this pred has already been moved to our set or is part of some
1166  // (inner) loop, no update needed.
1167  if (!UnloopedBlockSet.erase(PredBB)) {
1168  assert(
1169  (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1170  "Predecessor not mapped to a loop!");
1171  continue;
1172  }
1173 
1174  // We just insert into the loop set here. We'll add these blocks to the
1175  // exit loop after we build up the set in an order that doesn't rely on
1176  // predecessor order (which in turn relies on use list order).
1177  bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1178  (void)Inserted;
1179  assert(Inserted && "Should only visit an unlooped block once!");
1180 
1181  // And recurse through to its predecessors.
1182  Worklist.push_back(PredBB);
1183  }
1184  } while (!Worklist.empty());
1185  }
1186 
1187  // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1188  // blocks to their outer loops, walk the cloned blocks and the cloned exits
1189  // in their original order adding them to the correct loop.
1190 
1191  // We need a stable insertion order. We use the order of the original loop
1192  // order and map into the correct parent loop.
1193  for (auto *BB : llvm::concat<BasicBlock *const>(
1194  makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1195  if (Loop *OuterL = ExitLoopMap.lookup(BB))
1196  OuterL->addBasicBlockToLoop(BB, LI);
1197 
1198 #ifndef NDEBUG
1199  for (auto &BBAndL : ExitLoopMap) {
1200  auto *BB = BBAndL.first;
1201  auto *OuterL = BBAndL.second;
1202  assert(LI.getLoopFor(BB) == OuterL &&
1203  "Failed to put all blocks into outer loops!");
1204  }
1205 #endif
1206 
1207  // Now that all the blocks are placed into the correct containing loop in the
1208  // absence of child loops, find all the potentially cloned child loops and
1209  // clone them into whatever outer loop we placed their header into.
1210  for (Loop *ChildL : OrigL) {
1211  auto *ClonedChildHeader =
1212  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1213  if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1214  continue;
1215 
1216 #ifndef NDEBUG
1217  for (auto *ChildLoopBB : ChildL->blocks())
1218  assert(VMap.count(ChildLoopBB) &&
1219  "Cloned a child loop header but not all of that loops blocks!");
1220 #endif
1221 
1222  NonChildClonedLoops.push_back(cloneLoopNest(
1223  *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1224  }
1225 
1226  // Return the main cloned loop if any.
1227  return ClonedL;
1228 }
1229 
1230 static void deleteDeadBlocksFromLoop(Loop &L, BasicBlock *DeadSubtreeRoot,
1231  SmallVectorImpl<BasicBlock *> &ExitBlocks,
1232  DominatorTree &DT, LoopInfo &LI) {
1233  // Walk the dominator tree to build up the set of blocks we will delete here.
1234  // The order is designed to allow us to always delete bottom-up and avoid any
1235  // dangling uses.
1237  DeadBlocks.insert(DeadSubtreeRoot);
1238  for (int i = 0; i < (int)DeadBlocks.size(); ++i)
1239  for (DomTreeNode *ChildN : *DT[DeadBlocks[i]]) {
1240  // FIXME: This assert should pass and that means we don't change nearly
1241  // as much below! Consider rewriting all of this to avoid deleting
1242  // blocks. They are always cloned before being deleted, and so instead
1243  // could just be moved.
1244  // FIXME: This in turn means that we might actually be more able to
1245  // update the domtree.
1246  assert((L.contains(ChildN->getBlock()) ||
1247  llvm::find(ExitBlocks, ChildN->getBlock()) != ExitBlocks.end()) &&
1248  "Should never reach beyond the loop and exits when deleting!");
1249  DeadBlocks.insert(ChildN->getBlock());
1250  }
1251 
1252  // Filter out the dead blocks from the exit blocks list so that it can be
1253  // used in the caller.
1254  llvm::erase_if(ExitBlocks,
1255  [&](BasicBlock *BB) { return DeadBlocks.count(BB); });
1256 
1257  // Remove these blocks from their successors.
1258  for (auto *BB : DeadBlocks)
1259  for (BasicBlock *SuccBB : successors(BB))
1260  SuccBB->removePredecessor(BB, /*DontDeleteUselessPHIs*/ true);
1261 
1262  // Walk from this loop up through its parents removing all of the dead blocks.
1263  for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1264  for (auto *BB : DeadBlocks)
1265  ParentL->getBlocksSet().erase(BB);
1266  llvm::erase_if(ParentL->getBlocksVector(),
1267  [&](BasicBlock *BB) { return DeadBlocks.count(BB); });
1268  }
1269 
1270  // Now delete the dead child loops. This raw delete will clear them
1271  // recursively.
1272  llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1273  if (!DeadBlocks.count(ChildL->getHeader()))
1274  return false;
1275 
1276  assert(llvm::all_of(ChildL->blocks(),
1277  [&](BasicBlock *ChildBB) {
1278  return DeadBlocks.count(ChildBB);
1279  }) &&
1280  "If the child loop header is dead all blocks in the child loop must "
1281  "be dead as well!");
1282  LI.destroy(ChildL);
1283  return true;
1284  });
1285 
1286  // Remove the mappings for the dead blocks.
1287  for (auto *BB : DeadBlocks)
1288  LI.changeLoopFor(BB, nullptr);
1289 
1290  // Drop all the references from these blocks to others to handle cyclic
1291  // references as we start deleting the blocks themselves.
1292  for (auto *BB : DeadBlocks)
1293  BB->dropAllReferences();
1294 
1295  for (auto *BB : llvm::reverse(DeadBlocks)) {
1296  DT.eraseNode(BB);
1297  BB->eraseFromParent();
1298  }
1299 }
1300 
1301 /// Recompute the set of blocks in a loop after unswitching.
1302 ///
1303 /// This walks from the original headers predecessors to rebuild the loop. We
1304 /// take advantage of the fact that new blocks can't have been added, and so we
1305 /// filter by the original loop's blocks. This also handles potentially
1306 /// unreachable code that we don't want to explore but might be found examining
1307 /// the predecessors of the header.
1308 ///
1309 /// If the original loop is no longer a loop, this will return an empty set. If
1310 /// it remains a loop, all the blocks within it will be added to the set
1311 /// (including those blocks in inner loops).
1313  LoopInfo &LI) {
1315 
1316  auto *PH = L.getLoopPreheader();
1317  auto *Header = L.getHeader();
1318 
1319  // A worklist to use while walking backwards from the header.
1321 
1322  // First walk the predecessors of the header to find the backedges. This will
1323  // form the basis of our walk.
1324  for (auto *Pred : predecessors(Header)) {
1325  // Skip the preheader.
1326  if (Pred == PH)
1327  continue;
1328 
1329  // Because the loop was in simplified form, the only non-loop predecessor
1330  // is the preheader.
1331  assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1332  "than the preheader that is not part of the "
1333  "loop!");
1334 
1335  // Insert this block into the loop set and on the first visit and, if it
1336  // isn't the header we're currently walking, put it into the worklist to
1337  // recurse through.
1338  if (LoopBlockSet.insert(Pred).second && Pred != Header)
1339  Worklist.push_back(Pred);
1340  }
1341 
1342  // If no backedges were found, we're done.
1343  if (LoopBlockSet.empty())
1344  return LoopBlockSet;
1345 
1346  // Add the loop header to the set.
1347  LoopBlockSet.insert(Header);
1348 
1349  // We found backedges, recurse through them to identify the loop blocks.
1350  while (!Worklist.empty()) {
1351  BasicBlock *BB = Worklist.pop_back_val();
1352  assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1353 
1354  // Because we know the inner loop structure remains valid we can use the
1355  // loop structure to jump immediately across the entire nested loop.
1356  // Further, because it is in loop simplified form, we can directly jump
1357  // to its preheader afterward.
1358  if (Loop *InnerL = LI.getLoopFor(BB))
1359  if (InnerL != &L) {
1360  assert(L.contains(InnerL) &&
1361  "Should not reach a loop *outside* this loop!");
1362  // The preheader is the only possible predecessor of the loop so
1363  // insert it into the set and check whether it was already handled.
1364  auto *InnerPH = InnerL->getLoopPreheader();
1365  assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1366  "but not contain the inner loop "
1367  "preheader!");
1368  if (!LoopBlockSet.insert(InnerPH).second)
1369  // The only way to reach the preheader is through the loop body
1370  // itself so if it has been visited the loop is already handled.
1371  continue;
1372 
1373  // Insert all of the blocks (other than those already present) into
1374  // the loop set. The only block we expect to already be in the set is
1375  // the one we used to find this loop as we immediately handle the
1376  // others the first time we encounter the loop.
1377  for (auto *InnerBB : InnerL->blocks()) {
1378  if (InnerBB == BB) {
1379  assert(LoopBlockSet.count(InnerBB) &&
1380  "Block should already be in the set!");
1381  continue;
1382  }
1383 
1384  bool Inserted = LoopBlockSet.insert(InnerBB).second;
1385  (void)Inserted;
1386  assert(Inserted && "Should only insert an inner loop once!");
1387  }
1388 
1389  // Add the preheader to the worklist so we will continue past the
1390  // loop body.
1391  Worklist.push_back(InnerPH);
1392  continue;
1393  }
1394 
1395  // Insert any predecessors that were in the original loop into the new
1396  // set, and if the insert is successful, add them to the worklist.
1397  for (auto *Pred : predecessors(BB))
1398  if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1399  Worklist.push_back(Pred);
1400  }
1401 
1402  // We've found all the blocks participating in the loop, return our completed
1403  // set.
1404  return LoopBlockSet;
1405 }
1406 
1407 /// Rebuild a loop after unswitching removes some subset of blocks and edges.
1408 ///
1409 /// The removal may have removed some child loops entirely but cannot have
1410 /// disturbed any remaining child loops. However, they may need to be hoisted
1411 /// to the parent loop (or to be top-level loops). The original loop may be
1412 /// completely removed.
1413 ///
1414 /// The sibling loops resulting from this update are returned. If the original
1415 /// loop remains a valid loop, it will be the first entry in this list with all
1416 /// of the newly sibling loops following it.
1417 ///
1418 /// Returns true if the loop remains a loop after unswitching, and false if it
1419 /// is no longer a loop after unswitching (and should not continue to be
1420 /// referenced).
1422  LoopInfo &LI,
1423  SmallVectorImpl<Loop *> &HoistedLoops) {
1424  auto *PH = L.getLoopPreheader();
1425 
1426  // Compute the actual parent loop from the exit blocks. Because we may have
1427  // pruned some exits the loop may be different from the original parent.
1428  Loop *ParentL = nullptr;
1429  SmallVector<Loop *, 4> ExitLoops;
1430  SmallVector<BasicBlock *, 4> ExitsInLoops;
1431  ExitsInLoops.reserve(ExitBlocks.size());
1432  for (auto *ExitBB : ExitBlocks)
1433  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1434  ExitLoops.push_back(ExitL);
1435  ExitsInLoops.push_back(ExitBB);
1436  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1437  ParentL = ExitL;
1438  }
1439 
1440  // Recompute the blocks participating in this loop. This may be empty if it
1441  // is no longer a loop.
1442  auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1443 
1444  // If we still have a loop, we need to re-set the loop's parent as the exit
1445  // block set changing may have moved it within the loop nest. Note that this
1446  // can only happen when this loop has a parent as it can only hoist the loop
1447  // *up* the nest.
1448  if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1449  // Remove this loop's (original) blocks from all of the intervening loops.
1450  for (Loop *IL = L.getParentLoop(); IL != ParentL;
1451  IL = IL->getParentLoop()) {
1452  IL->getBlocksSet().erase(PH);
1453  for (auto *BB : L.blocks())
1454  IL->getBlocksSet().erase(BB);
1455  llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1456  return BB == PH || L.contains(BB);
1457  });
1458  }
1459 
1460  LI.changeLoopFor(PH, ParentL);
1461  L.getParentLoop()->removeChildLoop(&L);
1462  if (ParentL)
1463  ParentL->addChildLoop(&L);
1464  else
1465  LI.addTopLevelLoop(&L);
1466  }
1467 
1468  // Now we update all the blocks which are no longer within the loop.
1469  auto &Blocks = L.getBlocksVector();
1470  auto BlocksSplitI =
1471  LoopBlockSet.empty()
1472  ? Blocks.begin()
1473  : std::stable_partition(
1474  Blocks.begin(), Blocks.end(),
1475  [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1476 
1477  // Before we erase the list of unlooped blocks, build a set of them.
1478  SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1479  if (LoopBlockSet.empty())
1480  UnloopedBlocks.insert(PH);
1481 
1482  // Now erase these blocks from the loop.
1483  for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1484  L.getBlocksSet().erase(BB);
1485  Blocks.erase(BlocksSplitI, Blocks.end());
1486 
1487  // Sort the exits in ascending loop depth, we'll work backwards across these
1488  // to process them inside out.
1489  std::stable_sort(ExitsInLoops.begin(), ExitsInLoops.end(),
1490  [&](BasicBlock *LHS, BasicBlock *RHS) {
1491  return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1492  });
1493 
1494  // We'll build up a set for each exit loop.
1495  SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1496  Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1497 
1498  auto RemoveUnloopedBlocksFromLoop =
1499  [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1500  for (auto *BB : UnloopedBlocks)
1501  L.getBlocksSet().erase(BB);
1502  llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1503  return UnloopedBlocks.count(BB);
1504  });
1505  };
1506 
1508  while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1509  assert(Worklist.empty() && "Didn't clear worklist!");
1510  assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
1511 
1512  // Grab the next exit block, in decreasing loop depth order.
1513  BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1514  Loop &ExitL = *LI.getLoopFor(ExitBB);
1515  assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
1516 
1517  // Erase all of the unlooped blocks from the loops between the previous
1518  // exit loop and this exit loop. This works because the ExitInLoops list is
1519  // sorted in increasing order of loop depth and thus we visit loops in
1520  // decreasing order of loop depth.
1521  for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1522  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1523 
1524  // Walk the CFG back until we hit the cloned PH adding everything reachable
1525  // and in the unlooped set to this exit block's loop.
1526  Worklist.push_back(ExitBB);
1527  do {
1528  BasicBlock *BB = Worklist.pop_back_val();
1529  // We can stop recursing at the cloned preheader (if we get there).
1530  if (BB == PH)
1531  continue;
1532 
1533  for (BasicBlock *PredBB : predecessors(BB)) {
1534  // If this pred has already been moved to our set or is part of some
1535  // (inner) loop, no update needed.
1536  if (!UnloopedBlocks.erase(PredBB)) {
1537  assert((NewExitLoopBlocks.count(PredBB) ||
1538  ExitL.contains(LI.getLoopFor(PredBB))) &&
1539  "Predecessor not in a nested loop (or already visited)!");
1540  continue;
1541  }
1542 
1543  // We just insert into the loop set here. We'll add these blocks to the
1544  // exit loop after we build up the set in a deterministic order rather
1545  // than the predecessor-influenced visit order.
1546  bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
1547  (void)Inserted;
1548  assert(Inserted && "Should only visit an unlooped block once!");
1549 
1550  // And recurse through to its predecessors.
1551  Worklist.push_back(PredBB);
1552  }
1553  } while (!Worklist.empty());
1554 
1555  // If blocks in this exit loop were directly part of the original loop (as
1556  // opposed to a child loop) update the map to point to this exit loop. This
1557  // just updates a map and so the fact that the order is unstable is fine.
1558  for (auto *BB : NewExitLoopBlocks)
1559  if (Loop *BBL = LI.getLoopFor(BB))
1560  if (BBL == &L || !L.contains(BBL))
1561  LI.changeLoopFor(BB, &ExitL);
1562 
1563  // We will remove the remaining unlooped blocks from this loop in the next
1564  // iteration or below.
1565  NewExitLoopBlocks.clear();
1566  }
1567 
1568  // Any remaining unlooped blocks are no longer part of any loop unless they
1569  // are part of some child loop.
1570  for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
1571  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1572  for (auto *BB : UnloopedBlocks)
1573  if (Loop *BBL = LI.getLoopFor(BB))
1574  if (BBL == &L || !L.contains(BBL))
1575  LI.changeLoopFor(BB, nullptr);
1576 
1577  // Sink all the child loops whose headers are no longer in the loop set to
1578  // the parent (or to be top level loops). We reach into the loop and directly
1579  // update its subloop vector to make this batch update efficient.
1580  auto &SubLoops = L.getSubLoopsVector();
1581  auto SubLoopsSplitI =
1582  LoopBlockSet.empty()
1583  ? SubLoops.begin()
1584  : std::stable_partition(
1585  SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
1586  return LoopBlockSet.count(SubL->getHeader());
1587  });
1588  for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
1589  HoistedLoops.push_back(HoistedL);
1590  HoistedL->setParentLoop(nullptr);
1591 
1592  // To compute the new parent of this hoisted loop we look at where we
1593  // placed the preheader above. We can't lookup the header itself because we
1594  // retained the mapping from the header to the hoisted loop. But the
1595  // preheader and header should have the exact same new parent computed
1596  // based on the set of exit blocks from the original loop as the preheader
1597  // is a predecessor of the header and so reached in the reverse walk. And
1598  // because the loops were all in simplified form the preheader of the
1599  // hoisted loop can't be part of some *other* loop.
1600  if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
1601  NewParentL->addChildLoop(HoistedL);
1602  else
1603  LI.addTopLevelLoop(HoistedL);
1604  }
1605  SubLoops.erase(SubLoopsSplitI, SubLoops.end());
1606 
1607  // Actually delete the loop if nothing remained within it.
1608  if (Blocks.empty()) {
1609  assert(SubLoops.empty() &&
1610  "Failed to remove all subloops from the original loop!");
1611  if (Loop *ParentL = L.getParentLoop())
1612  ParentL->removeChildLoop(llvm::find(*ParentL, &L));
1613  else
1614  LI.removeLoop(llvm::find(LI, &L));
1615  LI.destroy(&L);
1616  return false;
1617  }
1618 
1619  return true;
1620 }
1621 
1622 /// Helper to visit a dominator subtree, invoking a callable on each node.
1623 ///
1624 /// Returning false at any point will stop walking past that node of the tree.
1625 template <typename CallableT>
1626 void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
1627  SmallVector<DomTreeNode *, 4> DomWorklist;
1628  DomWorklist.push_back(DT[BB]);
1629 #ifndef NDEBUG
1631  Visited.insert(DT[BB]);
1632 #endif
1633  do {
1634  DomTreeNode *N = DomWorklist.pop_back_val();
1635 
1636  // Visit this node.
1637  if (!Callable(N->getBlock()))
1638  continue;
1639 
1640  // Accumulate the child nodes.
1641  for (DomTreeNode *ChildN : *N) {
1642  assert(Visited.insert(ChildN).second &&
1643  "Cannot visit a node twice when walking a tree!");
1644  DomWorklist.push_back(ChildN);
1645  }
1646  } while (!DomWorklist.empty());
1647 }
1648 
1649 /// Take an invariant branch that has been determined to be safe and worthwhile
1650 /// to unswitch despite being non-trivial to do so and perform the unswitch.
1651 ///
1652 /// This directly updates the CFG to hoist the predicate out of the loop, and
1653 /// clone the necessary parts of the loop to maintain behavior.
1654 ///
1655 /// It also updates both dominator tree and loopinfo based on the unswitching.
1656 ///
1657 /// Once unswitching has been performed it runs the provided callback to report
1658 /// the new loops and no-longer valid loops to the caller.
1660  Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI,
1661  AssumptionCache &AC,
1662  function_ref<void(bool, ArrayRef<Loop *>)> NonTrivialUnswitchCB) {
1663  assert(BI.isConditional() && "Can only unswitch a conditional branch!");
1665  "Can only unswitch an invariant branch condition!");
1666 
1667  // Constant and BBs tracking the cloned and continuing successor.
1668  const int ClonedSucc = 0;
1669  auto *ParentBB = BI.getParent();
1670  auto *UnswitchedSuccBB = BI.getSuccessor(ClonedSucc);
1671  auto *ContinueSuccBB = BI.getSuccessor(1 - ClonedSucc);
1672 
1673  assert(UnswitchedSuccBB != ContinueSuccBB &&
1674  "Should not unswitch a branch that always goes to the same place!");
1675 
1676  // The branch should be in this exact loop. Any inner loop's invariant branch
1677  // should be handled by unswitching that inner loop. The caller of this
1678  // routine should filter out any candidates that remain (but were skipped for
1679  // whatever reason).
1680  assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
1681 
1682  SmallVector<BasicBlock *, 4> ExitBlocks;
1683  L.getUniqueExitBlocks(ExitBlocks);
1684 
1685  // We cannot unswitch if exit blocks contain a cleanuppad instruction as we
1686  // don't know how to split those exit blocks.
1687  // FIXME: We should teach SplitBlock to handle this and remove this
1688  // restriction.
1689  for (auto *ExitBB : ExitBlocks)
1690  if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI()))
1691  return false;
1692 
1693  SmallPtrSet<BasicBlock *, 4> ExitBlockSet(ExitBlocks.begin(),
1694  ExitBlocks.end());
1695 
1696  // Compute the parent loop now before we start hacking on things.
1697  Loop *ParentL = L.getParentLoop();
1698 
1699  // Compute the outer-most loop containing one of our exit blocks. This is the
1700  // furthest up our loopnest which can be mutated, which we will use below to
1701  // update things.
1702  Loop *OuterExitL = &L;
1703  for (auto *ExitBB : ExitBlocks) {
1704  Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
1705  if (!NewOuterExitL) {
1706  // We exited the entire nest with this block, so we're done.
1707  OuterExitL = nullptr;
1708  break;
1709  }
1710  if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
1711  OuterExitL = NewOuterExitL;
1712  }
1713 
1714  // If the edge we *aren't* cloning in the unswitch (the continuing edge)
1715  // dominates its target, we can skip cloning the dominated region of the loop
1716  // and its exits. We compute this as a set of nodes to be skipped.
1717  SmallPtrSet<BasicBlock *, 4> SkippedLoopAndExitBlocks;
1718  if (ContinueSuccBB->getUniquePredecessor() ||
1719  llvm::all_of(predecessors(ContinueSuccBB), [&](BasicBlock *PredBB) {
1720  return PredBB == ParentBB || DT.dominates(ContinueSuccBB, PredBB);
1721  })) {
1722  visitDomSubTree(DT, ContinueSuccBB, [&](BasicBlock *BB) {
1723  SkippedLoopAndExitBlocks.insert(BB);
1724  return true;
1725  });
1726  }
1727  // Similarly, if the edge we *are* cloning in the unswitch (the unswitched
1728  // edge) dominates its target, we will end up with dead nodes in the original
1729  // loop and its exits that will need to be deleted. Here, we just retain that
1730  // the property holds and will compute the deleted set later.
1731  bool DeleteUnswitchedSucc =
1732  UnswitchedSuccBB->getUniquePredecessor() ||
1733  llvm::all_of(predecessors(UnswitchedSuccBB), [&](BasicBlock *PredBB) {
1734  return PredBB == ParentBB || DT.dominates(UnswitchedSuccBB, PredBB);
1735  });
1736 
1737  // Split the preheader, so that we know that there is a safe place to insert
1738  // the conditional branch. We will change the preheader to have a conditional
1739  // branch on LoopCond. The original preheader will become the split point
1740  // between the unswitched versions, and we will have a new preheader for the
1741  // original loop.
1742  BasicBlock *SplitBB = L.getLoopPreheader();
1743  BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI);
1744 
1745  // Keep a mapping for the cloned values.
1746  ValueToValueMapTy VMap;
1747 
1748  // Build the cloned blocks from the loop.
1749  auto *ClonedPH = buildClonedLoopBlocks(
1750  L, LoopPH, SplitBB, ExitBlocks, ParentBB, UnswitchedSuccBB,
1751  ContinueSuccBB, SkippedLoopAndExitBlocks, VMap, AC, DT, LI);
1752 
1753  // Build the cloned loop structure itself. This may be substantially
1754  // different from the original structure due to the simplified CFG. This also
1755  // handles inserting all the cloned blocks into the correct loops.
1756  SmallVector<Loop *, 4> NonChildClonedLoops;
1757  Loop *ClonedL =
1758  buildClonedLoops(L, ExitBlocks, VMap, LI, NonChildClonedLoops);
1759 
1760  // Remove the parent as a predecessor of the unswitched successor.
1761  UnswitchedSuccBB->removePredecessor(ParentBB, /*DontDeleteUselessPHIs*/ true);
1762 
1763  // Now splice the branch from the original loop and use it to select between
1764  // the two loops.
1765  SplitBB->getTerminator()->eraseFromParent();
1766  SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), BI);
1767  BI.setSuccessor(ClonedSucc, ClonedPH);
1768  BI.setSuccessor(1 - ClonedSucc, LoopPH);
1769 
1770  // Create a new unconditional branch to the continuing block (as opposed to
1771  // the one cloned).
1772  BranchInst::Create(ContinueSuccBB, ParentBB);
1773 
1774  // Delete anything that was made dead in the original loop due to
1775  // unswitching.
1776  if (DeleteUnswitchedSucc)
1777  deleteDeadBlocksFromLoop(L, UnswitchedSuccBB, ExitBlocks, DT, LI);
1778 
1779  SmallVector<Loop *, 4> HoistedLoops;
1780  bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
1781 
1782  // This will have completely invalidated the dominator tree. We can't easily
1783  // bound how much is invalid because in some cases we will refine the
1784  // predecessor set of exit blocks of the loop which can move large unrelated
1785  // regions of code into a new subtree.
1786  //
1787  // FIXME: Eventually, we should use an incremental update utility that
1788  // leverages the existing information in the dominator tree (and potentially
1789  // the nature of the change) to more efficiently update things.
1790  DT.recalculate(*SplitBB->getParent());
1791 
1792  // We can change which blocks are exit blocks of all the cloned sibling
1793  // loops, the current loop, and any parent loops which shared exit blocks
1794  // with the current loop. As a consequence, we need to re-form LCSSA for
1795  // them. But we shouldn't need to re-form LCSSA for any child loops.
1796  // FIXME: This could be made more efficient by tracking which exit blocks are
1797  // new, and focusing on them, but that isn't likely to be necessary.
1798  //
1799  // In order to reasonably rebuild LCSSA we need to walk inside-out across the
1800  // loop nest and update every loop that could have had its exits changed. We
1801  // also need to cover any intervening loops. We add all of these loops to
1802  // a list and sort them by loop depth to achieve this without updating
1803  // unnecessary loops.
1804  auto UpdateLCSSA = [&](Loop &UpdateL) {
1805 #ifndef NDEBUG
1806  for (Loop *ChildL : UpdateL)
1807  assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
1808  "Perturbed a child loop's LCSSA form!");
1809 #endif
1810  formLCSSA(UpdateL, DT, &LI, nullptr);
1811  };
1812 
1813  // For non-child cloned loops and hoisted loops, we just need to update LCSSA
1814  // and we can do it in any order as they don't nest relative to each other.
1815  for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
1816  UpdateLCSSA(*UpdatedL);
1817 
1818  // If the original loop had exit blocks, walk up through the outer most loop
1819  // of those exit blocks to update LCSSA and form updated dedicated exits.
1820  if (OuterExitL != &L) {
1821  SmallVector<Loop *, 4> OuterLoops;
1822  // We start with the cloned loop and the current loop if they are loops and
1823  // move toward OuterExitL. Also, if either the cloned loop or the current
1824  // loop have become top level loops we need to walk all the way out.
1825  if (ClonedL) {
1826  OuterLoops.push_back(ClonedL);
1827  if (!ClonedL->getParentLoop())
1828  OuterExitL = nullptr;
1829  }
1830  if (IsStillLoop) {
1831  OuterLoops.push_back(&L);
1832  if (!L.getParentLoop())
1833  OuterExitL = nullptr;
1834  }
1835  // Grab all of the enclosing loops now.
1836  for (Loop *OuterL = ParentL; OuterL != OuterExitL;
1837  OuterL = OuterL->getParentLoop())
1838  OuterLoops.push_back(OuterL);
1839 
1840  // Finally, update our list of outer loops. This is nicely ordered to work
1841  // inside-out.
1842  for (Loop *OuterL : OuterLoops) {
1843  // First build LCSSA for this loop so that we can preserve it when
1844  // forming dedicated exits. We don't want to perturb some other loop's
1845  // LCSSA while doing that CFG edit.
1846  UpdateLCSSA(*OuterL);
1847 
1848  // For loops reached by this loop's original exit blocks we may
1849  // introduced new, non-dedicated exits. At least try to re-form dedicated
1850  // exits for these loops. This may fail if they couldn't have dedicated
1851  // exits to start with.
1852  formDedicatedExitBlocks(OuterL, &DT, &LI, /*PreserveLCSSA*/ true);
1853  }
1854  }
1855 
1856 #ifndef NDEBUG
1857  // Verify the entire loop structure to catch any incorrect updates before we
1858  // progress in the pass pipeline.
1859  LI.verify(DT);
1860 #endif
1861 
1862  // Now that we've unswitched something, make callbacks to report the changes.
1863  // For that we need to merge together the updated loops and the cloned loops
1864  // and check whether the original loop survived.
1865  SmallVector<Loop *, 4> SibLoops;
1866  for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
1867  if (UpdatedL->getParentLoop() == ParentL)
1868  SibLoops.push_back(UpdatedL);
1869  NonTrivialUnswitchCB(IsStillLoop, SibLoops);
1870 
1871  ++NumBranches;
1872  return true;
1873 }
1874 
1875 /// Recursively compute the cost of a dominator subtree based on the per-block
1876 /// cost map provided.
1877 ///
1878 /// The recursive computation is memozied into the provided DT-indexed cost map
1879 /// to allow querying it for most nodes in the domtree without it becoming
1880 /// quadratic.
1881 static int
1883  const SmallDenseMap<BasicBlock *, int, 4> &BBCostMap,
1885  // Don't accumulate cost (or recurse through) blocks not in our block cost
1886  // map and thus not part of the duplication cost being considered.
1887  auto BBCostIt = BBCostMap.find(N.getBlock());
1888  if (BBCostIt == BBCostMap.end())
1889  return 0;
1890 
1891  // Lookup this node to see if we already computed its cost.
1892  auto DTCostIt = DTCostMap.find(&N);
1893  if (DTCostIt != DTCostMap.end())
1894  return DTCostIt->second;
1895 
1896  // If not, we have to compute it. We can't use insert above and update
1897  // because computing the cost may insert more things into the map.
1898  int Cost = std::accumulate(
1899  N.begin(), N.end(), BBCostIt->second, [&](int Sum, DomTreeNode *ChildN) {
1900  return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
1901  });
1902  bool Inserted = DTCostMap.insert({&N, Cost}).second;
1903  (void)Inserted;
1904  assert(Inserted && "Should not insert a node while visiting children!");
1905  return Cost;
1906 }
1907 
1908 /// Unswitch control flow predicated on loop invariant conditions.
1909 ///
1910 /// This first hoists all branches or switches which are trivial (IE, do not
1911 /// require duplicating any part of the loop) out of the loop body. It then
1912 /// looks at other loop invariant control flows and tries to unswitch those as
1913 /// well by cloning the loop if the result is small enough.
1914 static bool
1916  TargetTransformInfo &TTI, bool NonTrivial,
1917  function_ref<void(bool, ArrayRef<Loop *>)> NonTrivialUnswitchCB) {
1918  assert(L.isRecursivelyLCSSAForm(DT, LI) &&
1919  "Loops must be in LCSSA form before unswitching.");
1920  bool Changed = false;
1921 
1922  // Must be in loop simplified form: we need a preheader and dedicated exits.
1923  if (!L.isLoopSimplifyForm())
1924  return false;
1925 
1926  // Try trivial unswitch first before loop over other basic blocks in the loop.
1927  Changed |= unswitchAllTrivialConditions(L, DT, LI);
1928 
1929  // If we're not doing non-trivial unswitching, we're done. We both accept
1930  // a parameter but also check a local flag that can be used for testing
1931  // a debugging.
1932  if (!NonTrivial && !EnableNonTrivialUnswitch)
1933  return Changed;
1934 
1935  // Collect all remaining invariant branch conditions within this loop (as
1936  // opposed to an inner loop which would be handled when visiting that inner
1937  // loop).
1938  SmallVector<TerminatorInst *, 4> UnswitchCandidates;
1939  for (auto *BB : L.blocks())
1940  if (LI.getLoopFor(BB) == &L)
1941  if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator()))
1942  if (BI->isConditional() && L.isLoopInvariant(BI->getCondition()) &&
1943  BI->getSuccessor(0) != BI->getSuccessor(1))
1944  UnswitchCandidates.push_back(BI);
1945 
1946  // If we didn't find any candidates, we're done.
1947  if (UnswitchCandidates.empty())
1948  return Changed;
1949 
1950  DEBUG(dbgs() << "Considering " << UnswitchCandidates.size()
1951  << " non-trivial loop invariant conditions for unswitching.\n");
1952 
1953  // Given that unswitching these terminators will require duplicating parts of
1954  // the loop, so we need to be able to model that cost. Compute the ephemeral
1955  // values and set up a data structure to hold per-BB costs. We cache each
1956  // block's cost so that we don't recompute this when considering different
1957  // subsets of the loop for duplication during unswitching.
1959  CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
1961 
1962  // Compute the cost of each block, as well as the total loop cost. Also, bail
1963  // out if we see instructions which are incompatible with loop unswitching
1964  // (convergent, noduplicate, or cross-basic-block tokens).
1965  // FIXME: We might be able to safely handle some of these in non-duplicated
1966  // regions.
1967  int LoopCost = 0;
1968  for (auto *BB : L.blocks()) {
1969  int Cost = 0;
1970  for (auto &I : *BB) {
1971  if (EphValues.count(&I))
1972  continue;
1973 
1974  if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
1975  return Changed;
1976  if (auto CS = CallSite(&I))
1977  if (CS.isConvergent() || CS.cannotDuplicate())
1978  return Changed;
1979 
1980  Cost += TTI.getUserCost(&I);
1981  }
1982  assert(Cost >= 0 && "Must not have negative costs!");
1983  LoopCost += Cost;
1984  assert(LoopCost >= 0 && "Must not have negative loop costs!");
1985  BBCostMap[BB] = Cost;
1986  }
1987  DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
1988 
1989  // Now we find the best candidate by searching for the one with the following
1990  // properties in order:
1991  //
1992  // 1) An unswitching cost below the threshold
1993  // 2) The smallest number of duplicated unswitch candidates (to avoid
1994  // creating redundant subsequent unswitching)
1995  // 3) The smallest cost after unswitching.
1996  //
1997  // We prioritize reducing fanout of unswitch candidates provided the cost
1998  // remains below the threshold because this has a multiplicative effect.
1999  //
2000  // This requires memoizing each dominator subtree to avoid redundant work.
2001  //
2002  // FIXME: Need to actually do the number of candidates part above.
2004  // Given a terminator which might be unswitched, computes the non-duplicated
2005  // cost for that terminator.
2006  auto ComputeUnswitchedCost = [&](TerminatorInst *TI) {
2007  BasicBlock &BB = *TI->getParent();
2009 
2010  int Cost = LoopCost;
2011  for (BasicBlock *SuccBB : successors(&BB)) {
2012  // Don't count successors more than once.
2013  if (!Visited.insert(SuccBB).second)
2014  continue;
2015 
2016  // This successor's domtree will not need to be duplicated after
2017  // unswitching if the edge to the successor dominates it (and thus the
2018  // entire tree). This essentially means there is no other path into this
2019  // subtree and so it will end up live in only one clone of the loop.
2020  if (SuccBB->getUniquePredecessor() ||
2021  llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2022  return PredBB == &BB || DT.dominates(SuccBB, PredBB);
2023  })) {
2024  Cost -= computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
2025  assert(Cost >= 0 &&
2026  "Non-duplicated cost should never exceed total loop cost!");
2027  }
2028  }
2029 
2030  // Now scale the cost by the number of unique successors minus one. We
2031  // subtract one because there is already at least one copy of the entire
2032  // loop. This is computing the new cost of unswitching a condition.
2033  assert(Visited.size() > 1 &&
2034  "Cannot unswitch a condition without multiple distinct successors!");
2035  return Cost * (Visited.size() - 1);
2036  };
2037  TerminatorInst *BestUnswitchTI = nullptr;
2038  int BestUnswitchCost;
2039  for (TerminatorInst *CandidateTI : UnswitchCandidates) {
2040  int CandidateCost = ComputeUnswitchedCost(CandidateTI);
2041  DEBUG(dbgs() << " Computed cost of " << CandidateCost
2042  << " for unswitch candidate: " << *CandidateTI << "\n");
2043  if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
2044  BestUnswitchTI = CandidateTI;
2045  BestUnswitchCost = CandidateCost;
2046  }
2047  }
2048 
2049  if (BestUnswitchCost < UnswitchThreshold) {
2050  DEBUG(dbgs() << " Trying to unswitch non-trivial (cost = "
2051  << BestUnswitchCost << ") branch: " << *BestUnswitchTI
2052  << "\n");
2053  Changed |= unswitchInvariantBranch(L, cast<BranchInst>(*BestUnswitchTI), DT,
2054  LI, AC, NonTrivialUnswitchCB);
2055  } else {
2056  DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << BestUnswitchCost
2057  << "\n");
2058  }
2059 
2060  return Changed;
2061 }
2062 
2065  LPMUpdater &U) {
2066  Function &F = *L.getHeader()->getParent();
2067  (void)F;
2068 
2069  DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n");
2070 
2071  // Save the current loop name in a variable so that we can report it even
2072  // after it has been deleted.
2073  std::string LoopName = L.getName();
2074 
2075  auto NonTrivialUnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
2076  ArrayRef<Loop *> NewLoops) {
2077  // If we did a non-trivial unswitch, we have added new (cloned) loops.
2078  U.addSiblingLoops(NewLoops);
2079 
2080  // If the current loop remains valid, we should revisit it to catch any
2081  // other unswitch opportunities. Otherwise, we need to mark it as deleted.
2082  if (CurrentLoopValid)
2083  U.revisitCurrentLoop();
2084  else
2085  U.markLoopAsDeleted(L, LoopName);
2086  };
2087 
2088  if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial,
2089  NonTrivialUnswitchCB))
2090  return PreservedAnalyses::all();
2091 
2092 #ifndef NDEBUG
2093  // Historically this pass has had issues with the dominator tree so verify it
2094  // in asserts builds.
2095  AR.DT.verifyDomTree();
2096 #endif
2098 }
2099 
2100 namespace {
2101 
2102 class SimpleLoopUnswitchLegacyPass : public LoopPass {
2103  bool NonTrivial;
2104 
2105 public:
2106  static char ID; // Pass ID, replacement for typeid
2107 
2108  explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
2109  : LoopPass(ID), NonTrivial(NonTrivial) {
2112  }
2113 
2114  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
2115 
2116  void getAnalysisUsage(AnalysisUsage &AU) const override {
2120  }
2121 };
2122 
2123 } // end anonymous namespace
2124 
2125 bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
2126  if (skipLoop(L))
2127  return false;
2128 
2129  Function &F = *L->getHeader()->getParent();
2130 
2131  DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n");
2132 
2133  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2134  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2135  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
2136  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2137 
2138  auto NonTrivialUnswitchCB = [&L, &LPM](bool CurrentLoopValid,
2139  ArrayRef<Loop *> NewLoops) {
2140  // If we did a non-trivial unswitch, we have added new (cloned) loops.
2141  for (auto *NewL : NewLoops)
2142  LPM.addLoop(*NewL);
2143 
2144  // If the current loop remains valid, re-add it to the queue. This is
2145  // a little wasteful as we'll finish processing the current loop as well,
2146  // but it is the best we can do in the old PM.
2147  if (CurrentLoopValid)
2148  LPM.addLoop(*L);
2149  else
2150  LPM.markLoopAsDeleted(*L);
2151  };
2152 
2153  bool Changed =
2154  unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, NonTrivialUnswitchCB);
2155 
2156  // If anything was unswitched, also clear any cached information about this
2157  // loop.
2158  LPM.deleteSimpleAnalysisLoop(L);
2159 
2160 #ifndef NDEBUG
2161  // Historically this pass has had issues with the dominator tree so verify it
2162  // in asserts builds.
2163  DT.verifyDomTree();
2164 #endif
2165  return Changed;
2166 }
2167 
2169 INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
2170  "Simple unswitch loops", false, false)
2176 INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
2177  "Simple unswitch loops", false, false)
2178 
2180  return new SimpleLoopUnswitchLegacyPass(NonTrivial);
2181 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:73
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Definition: LoopInfo.h:776
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:69
use_iterator use_end()
Definition: Value.h:348
This routine provides some synthesis utilities to produce sequences of values.
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI)
Unswitch a trivial branch if the condition is loop invariant.
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:523
CaseIt case_end()
Returns a read/write iterator that points one past the last in the SwitchInst.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
Definition: LoopInfo.h:170
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
simple loop unswitch
bool isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:182
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
Definition: LoopInfo.h:685
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:92
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
Definition: LoopInfo.h:352
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:320
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock *> ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallPtrSetImpl< BasicBlock *> &SkippedLoopAndExitBlocks, ValueToValueMapTy &VMap, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI)
Build the cloned blocks for an unswitched copy of the given loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
An immutable pass that tracks lazily created AssumptionCache objects.
Value * getCondition() const
CaseIt case_begin()
Returns a read/write iterator that points to the first case in the SwitchInst.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:89
A cache of .assume calls within a function.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:728
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
unsigned second
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:813
std::vector< LoopT * > & getSubLoopsVector()
Definition: LoopInfo.h:135
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
static int computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, int, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, int, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided...
Value * getCondition() const
This defines the Use class.
static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB, BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
void reserve(size_type N)
Definition: SmallVector.h:380
void eraseNode(NodeT *BB)
eraseNode - Removes a node from the dominator tree.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:624
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry &)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:678
SmallPtrSetImpl< const BlockT * > & getBlocksSet()
Return a direct, mutable handle to the blocks set so that we can mutate it efficiently.
Definition: LoopInfo.h:176
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
void deleteSimpleAnalysisLoop(Loop *L)
Invoke deleteAnalysisLoop hook for all passes that implement simple analysis interface.
Definition: LoopPass.cpp:118
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
BlockT * getHeader() const
Definition: LoopInfo.h:100
void getExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:63
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI)
This routine scans the loop to find a branch or switch which occurs before any side effects occur...
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:232
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:183
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:230
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
Definition: LoopInfo.h:729
This header provides classes for managing per-loop analyses.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:430
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:167
static void replaceLoopUsesWithConstant(Loop &L, Value &LIC, Constant &Replacement)
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:292
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:73
void getUniqueExitBlocks(SmallVectorImpl< BasicBlock *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfo.cpp:393
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:211
NodeT * getBlock() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
void verifyDomTree() const
Verify the correctness of the domtree by re-computing it.
Definition: Dominators.cpp:305
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:54
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
Conditional or Unconditional Branch instruction.
static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH, DominatorTree &DT)
Update the dominator tree after unswitching a particular former exit block.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
This function has undefined behavior.
DomTreeNodeBase * getIDom() const
This is an important base class in LLVM.
Definition: Constant.h:42
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
This file contains the declarations for the subclasses of Constant, which represent the different fla...
INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", "Simple unswitch loops", false, false) INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass
const Instruction & front() const
Definition: BasicBlock.h:264
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:535
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:113
BasicBlock * getDefaultDest() const
Represent the analysis usage information of a pass.
void splice(iterator where, iplist_impl &L2)
Definition: ilist.h:342
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:820
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:116
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:101
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:107
CaseIt removeCase(CaseIt I)
This method removes the specified case and its successor from the switch instruction.
static bool updateIDomWithKnownCommonDominator(BasicBlock *BB, BasicBlock *KnownDominatingBB, DominatorTree &DT)
Update the IDom for a basic block whose predecessor set has changed.
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&... args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest...
Definition: STLExtras.h:488
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:834
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:56
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
static void deleteDeadBlocksFromLoop(Loop &L, BasicBlock *DeadSubtreeRoot, SmallVectorImpl< BasicBlock *> &ExitBlocks, DominatorTree &DT, LoopInfo &LI)
bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition: LCSSA.cpp:277
size_type size() const
Definition: SmallPtrSet.h:93
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:317
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
static Loop * buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock *> ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop *> &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:378
iterator end()
Definition: BasicBlock.h:254
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:239
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
Definition: LoopInfo.h:699
LoopT * AllocateLoop(ArgsTy &&... Args)
Definition: LoopInfo.h:642
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
simple loop Simple unswitch loops
Pass * createSimpleLoopUnswitchLegacyPass(bool NonTrivial=false)
Create the legacy pass object for the simple loop unswitcher.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:110
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:142
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:516
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2&#39;s erase_if which is equivalent t...
Definition: STLExtras.h:925
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
CloneBasicBlock - Return a copy of the specified basic block, but without embedding the block into a ...
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM...
Definition: ValueMapper.h:251
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:482
If this flag is set, the remapper ignores missing function-local entries (Argument, Instruction, BasicBlock) that are not in the value map.
Definition: ValueMapper.h:91
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
use_iterator use_begin()
Definition: Value.h:340
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:163
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:191
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
void registerAssumption(CallInst *CI)
Add an .assume intrinsic to this function&#39;s cache.
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:311
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
StringRef getName() const
Definition: LoopInfo.h:577
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, TargetTransformInfo &TTI, bool NonTrivial, function_ref< void(bool, ArrayRef< Loop *>)> NonTrivialUnswitchCB)
Unswitch control flow predicated on loop invariant conditions.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink &#39;this&#39; from the containing function and delete it.
Definition: BasicBlock.cpp:97
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
void addLoop(Loop &L)
Definition: LoopPass.cpp:76
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:1023
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition: LoopInfo.h:710
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI)
Unswitch a trivial switch if the condition is loop invariant.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:141
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:181
bool empty() const
Definition: LoopInfo.h:146
Multiway switch.
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: ValueMap.h:154
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
succ_range successors(BasicBlock *BB)
Definition: CFG.h:143
void setDefaultDest(BasicBlock *DefaultCase)
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:941
#define DEBUG(X)
Definition: Debug.h:118
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr)
Split the edge connecting specified block.
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:958
This file defines a set of templates that efficiently compute a dominator tree over a generic graph...
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:199
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
static bool unswitchInvariantBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, function_ref< void(bool, ArrayRef< Loop *>)> NonTrivialUnswitchCB)
Take an invariant branch that has been determined to be safe and worthwhile to unswitch despite being...
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock *> ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop *> &HoistedLoops)
Rebuild a loop after unswitching removes some subset of blocks and edges.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:103
void appendDomFrontier(DomTreeNode *Node, SmallSetVector< BasicBlock *, 4 > &Worklist, SmallVectorImpl< DomTreeNode *> &DomNodes, SmallPtrSetImpl< BasicBlock *> &DomSet)
void dropAllReferences()
Cause all subinstructions to "let go" of all the references that said subinstructions are maintaining...
Definition: BasicBlock.cpp:210
const BasicBlock * getParent() const
Definition: Instruction.h:66
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.