LLVM  7.0.0svn
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1 //===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 #include "llvm/ADT/DenseMap.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/ADT/Sequence.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/ADT/SmallPtrSet.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/Twine.h"
22 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/LoopPass.h"
24 #include "llvm/IR/BasicBlock.h"
25 #include "llvm/IR/Constant.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/InstrTypes.h"
30 #include "llvm/IR/Instruction.h"
31 #include "llvm/IR/Instructions.h"
32 #include "llvm/IR/IntrinsicInst.h"
33 #include "llvm/IR/Use.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/Debug.h"
46 #include <algorithm>
47 #include <cassert>
48 #include <iterator>
49 #include <numeric>
50 #include <utility>
51 
52 #define DEBUG_TYPE "simple-loop-unswitch"
53 
54 using namespace llvm;
55 
56 STATISTIC(NumBranches, "Number of branches unswitched");
57 STATISTIC(NumSwitches, "Number of switches unswitched");
58 STATISTIC(NumTrivial, "Number of unswitches that are trivial");
59 
61  "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
62  cl::desc("Forcibly enables non-trivial loop unswitching rather than "
63  "following the configuration passed into the pass."));
64 
65 static cl::opt<int>
66  UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
67  cl::desc("The cost threshold for unswitching a loop."));
68 
69 static void replaceLoopUsesWithConstant(Loop &L, Value &LIC,
70  Constant &Replacement) {
71  assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
72 
73  // Replace uses of LIC in the loop with the given constant.
74  for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) {
75  // Grab the use and walk past it so we can clobber it in the use list.
76  Use *U = &*UI++;
77  Instruction *UserI = dyn_cast<Instruction>(U->getUser());
78  if (!UserI || !L.contains(UserI))
79  continue;
80 
81  // Replace this use within the loop body.
82  *U = &Replacement;
83  }
84 }
85 
86 /// Update the IDom for a basic block whose predecessor set has changed.
87 ///
88 /// This routine is designed to work when the domtree update is relatively
89 /// localized by leveraging a known common dominator, often a loop header.
90 ///
91 /// FIXME: Should consider hand-rolling a slightly more efficient non-DFS
92 /// approach here as we can do that easily by persisting the candidate IDom's
93 /// dominating set between each predecessor.
94 ///
95 /// FIXME: Longer term, many uses of this can be replaced by an incremental
96 /// domtree update strategy that starts from a known dominating block and
97 /// rebuilds that subtree.
99  BasicBlock *KnownDominatingBB,
100  DominatorTree &DT) {
101  assert(pred_begin(BB) != pred_end(BB) &&
102  "This routine does not handle unreachable blocks!");
103 
104  BasicBlock *OrigIDom = DT[BB]->getIDom()->getBlock();
105 
106  BasicBlock *IDom = *pred_begin(BB);
107  assert(DT.dominates(KnownDominatingBB, IDom) &&
108  "Bad known dominating block!");
109 
110  // Walk all of the other predecessors finding the nearest common dominator
111  // until all predecessors are covered or we reach the loop header. The loop
112  // header necessarily dominates all loop exit blocks in loop simplified form
113  // so we can early-exit the moment we hit that block.
114  for (auto PI = std::next(pred_begin(BB)), PE = pred_end(BB);
115  PI != PE && IDom != KnownDominatingBB; ++PI) {
116  assert(DT.dominates(KnownDominatingBB, *PI) &&
117  "Bad known dominating block!");
118  IDom = DT.findNearestCommonDominator(IDom, *PI);
119  }
120 
121  if (IDom == OrigIDom)
122  return false;
123 
124  DT.changeImmediateDominator(BB, IDom);
125  return true;
126 }
127 
128 // Note that we don't currently use the IDFCalculator here for two reasons:
129 // 1) It computes dominator tree levels for the entire function on each run
130 // of 'compute'. While this isn't terrible, given that we expect to update
131 // relatively small subtrees of the domtree, it isn't necessarily the right
132 // tradeoff.
133 // 2) The interface doesn't fit this usage well. It doesn't operate in
134 // append-only, and builds several sets that we don't need.
135 //
136 // FIXME: Neither of these issues are a big deal and could be addressed with
137 // some amount of refactoring of IDFCalculator. That would allow us to share
138 // the core logic here (which is solving the same core problem).
139 static void appendDomFrontier(DomTreeNode *Node,
143  assert(DomNodes.empty() && "Must start with no dominator nodes.");
144  assert(DomSet.empty() && "Must start with an empty dominator set.");
145 
146  // First flatten this subtree into sequence of nodes by doing a pre-order
147  // walk.
148  DomNodes.push_back(Node);
149  // We intentionally re-evaluate the size as each node can add new children.
150  // Because this is a tree walk, this cannot add any duplicates.
151  for (int i = 0; i < (int)DomNodes.size(); ++i)
152  DomNodes.insert(DomNodes.end(), DomNodes[i]->begin(), DomNodes[i]->end());
153 
154  // Now create a set of the basic blocks so we can quickly test for
155  // dominated successors. We could in theory use the DFS numbers of the
156  // dominator tree for this, but we want this to remain predictably fast
157  // even while we mutate the dominator tree in ways that would invalidate
158  // the DFS numbering.
159  for (DomTreeNode *InnerN : DomNodes)
160  DomSet.insert(InnerN->getBlock());
161 
162  // Now re-walk the nodes, appending every successor of every node that isn't
163  // in the set. Note that we don't append the node itself, even though if it
164  // is a successor it does not strictly dominate itself and thus it would be
165  // part of the dominance frontier. The reason we don't append it is that
166  // the node passed in came *from* the worklist and so it has already been
167  // processed.
168  for (DomTreeNode *InnerN : DomNodes)
169  for (BasicBlock *SuccBB : successors(InnerN->getBlock()))
170  if (!DomSet.count(SuccBB))
171  Worklist.insert(SuccBB);
172 
173  DomNodes.clear();
174  DomSet.clear();
175 }
176 
177 /// Update the dominator tree after unswitching a particular former exit block.
178 ///
179 /// This handles the full update of the dominator tree after hoisting a block
180 /// that previously was an exit block (or split off of an exit block) up to be
181 /// reached from the new immediate dominator of the preheader.
182 ///
183 /// The common case is simple -- we just move the unswitched block to have an
184 /// immediate dominator of the old preheader. But in complex cases, there may
185 /// be other blocks reachable from the unswitched block that are immediately
186 /// dominated by some node between the unswitched one and the old preheader.
187 /// All of these also need to be hoisted in the dominator tree. We also want to
188 /// minimize queries to the dominator tree because each step of this
189 /// invalidates any DFS numbers that would make queries fast.
190 static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH,
191  DominatorTree &DT) {
192  DomTreeNode *OldPHNode = DT[OldPH];
193  DomTreeNode *UnswitchedNode = DT[UnswitchedBB];
194  // If the dominator tree has already been updated for this unswitched node,
195  // we're done. This makes it easier to use this routine if there are multiple
196  // paths to the same unswitched destination.
197  if (UnswitchedNode->getIDom() == OldPHNode)
198  return;
199 
200  // First collect the domtree nodes that we are hoisting over. These are the
201  // set of nodes which may have children that need to be hoisted as well.
203  for (auto *IDom = UnswitchedNode->getIDom(); IDom != OldPHNode;
204  IDom = IDom->getIDom())
205  DomChain.insert(IDom);
206 
207  // The unswitched block ends up immediately dominated by the old preheader --
208  // regardless of whether it is the loop exit block or split off of the loop
209  // exit block.
210  DT.changeImmediateDominator(UnswitchedNode, OldPHNode);
211 
212  // For everything that moves up the dominator tree, we need to examine the
213  // dominator frontier to see if it additionally should move up the dominator
214  // tree. This lambda appends the dominator frontier for a node on the
215  // worklist.
217 
218  // Scratch data structures reused by domfrontier finding.
221 
222  // Append the initial dom frontier nodes.
223  appendDomFrontier(UnswitchedNode, Worklist, DomNodes, DomSet);
224 
225  // Walk the worklist. We grow the list in the loop and so must recompute size.
226  for (int i = 0; i < (int)Worklist.size(); ++i) {
227  auto *BB = Worklist[i];
228 
229  DomTreeNode *Node = DT[BB];
230  assert(!DomChain.count(Node) &&
231  "Cannot be dominated by a block you can reach!");
232 
233  // If this block had an immediate dominator somewhere in the chain
234  // we hoisted over, then its position in the domtree needs to move as it is
235  // reachable from a node hoisted over this chain.
236  if (!DomChain.count(Node->getIDom()))
237  continue;
238 
239  DT.changeImmediateDominator(Node, OldPHNode);
240 
241  // Now add this node's dominator frontier to the worklist as well.
242  appendDomFrontier(Node, Worklist, DomNodes, DomSet);
243  }
244 }
245 
246 /// Check that all the LCSSA PHI nodes in the loop exit block have trivial
247 /// incoming values along this edge.
248 static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
249  BasicBlock &ExitBB) {
250  for (Instruction &I : ExitBB) {
251  auto *PN = dyn_cast<PHINode>(&I);
252  if (!PN)
253  // No more PHIs to check.
254  return true;
255 
256  // If the incoming value for this edge isn't loop invariant the unswitch
257  // won't be trivial.
258  if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
259  return false;
260  }
261  llvm_unreachable("Basic blocks should never be empty!");
262 }
263 
264 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
265 ///
266 /// Requires that the loop exit and unswitched basic block are the same, and
267 /// that the exiting block was a unique predecessor of that block. Rewrites the
268 /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
269 /// PHI nodes from the old preheader that now contains the unswitched
270 /// terminator.
272  BasicBlock &OldExitingBB,
273  BasicBlock &OldPH) {
274  for (PHINode &PN : UnswitchedBB.phis()) {
275  // When the loop exit is directly unswitched we just need to update the
276  // incoming basic block. We loop to handle weird cases with repeated
277  // incoming blocks, but expect to typically only have one operand here.
278  for (auto i : seq<int>(0, PN.getNumOperands())) {
279  assert(PN.getIncomingBlock(i) == &OldExitingBB &&
280  "Found incoming block different from unique predecessor!");
281  PN.setIncomingBlock(i, &OldPH);
282  }
283  }
284 }
285 
286 /// Rewrite the PHI nodes in the loop exit basic block and the split off
287 /// unswitched block.
288 ///
289 /// Because the exit block remains an exit from the loop, this rewrites the
290 /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
291 /// nodes into the unswitched basic block to select between the value in the
292 /// old preheader and the loop exit.
294  BasicBlock &UnswitchedBB,
295  BasicBlock &OldExitingBB,
296  BasicBlock &OldPH) {
297  assert(&ExitBB != &UnswitchedBB &&
298  "Must have different loop exit and unswitched blocks!");
299  Instruction *InsertPt = &*UnswitchedBB.begin();
300  for (PHINode &PN : ExitBB.phis()) {
301  auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
302  PN.getName() + ".split", InsertPt);
303 
304  // Walk backwards over the old PHI node's inputs to minimize the cost of
305  // removing each one. We have to do this weird loop manually so that we
306  // create the same number of new incoming edges in the new PHI as we expect
307  // each case-based edge to be included in the unswitched switch in some
308  // cases.
309  // FIXME: This is really, really gross. It would be much cleaner if LLVM
310  // allowed us to create a single entry for a predecessor block without
311  // having separate entries for each "edge" even though these edges are
312  // required to produce identical results.
313  for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
314  if (PN.getIncomingBlock(i) != &OldExitingBB)
315  continue;
316 
317  Value *Incoming = PN.removeIncomingValue(i);
318  NewPN->addIncoming(Incoming, &OldPH);
319  }
320 
321  // Now replace the old PHI with the new one and wire the old one in as an
322  // input to the new one.
323  PN.replaceAllUsesWith(NewPN);
324  NewPN->addIncoming(&PN, &ExitBB);
325  }
326 }
327 
328 /// Unswitch a trivial branch if the condition is loop invariant.
329 ///
330 /// This routine should only be called when loop code leading to the branch has
331 /// been validated as trivial (no side effects). This routine checks if the
332 /// condition is invariant and one of the successors is a loop exit. This
333 /// allows us to unswitch without duplicating the loop, making it trivial.
334 ///
335 /// If this routine fails to unswitch the branch it returns false.
336 ///
337 /// If the branch can be unswitched, this routine splits the preheader and
338 /// hoists the branch above that split. Preserves loop simplified form
339 /// (splitting the exit block as necessary). It simplifies the branch within
340 /// the loop to an unconditional branch but doesn't remove it entirely. Further
341 /// cleanup can be done with some simplify-cfg like pass.
343  LoopInfo &LI) {
344  assert(BI.isConditional() && "Can only unswitch a conditional branch!");
345  DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
346 
347  Value *LoopCond = BI.getCondition();
348 
349  // Need a trivial loop condition to unswitch.
350  if (!L.isLoopInvariant(LoopCond))
351  return false;
352 
353  // FIXME: We should compute this once at the start and update it!
355  L.getExitBlocks(ExitBlocks);
356  SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(),
357  ExitBlocks.end());
358 
359  // Check to see if a successor of the branch is guaranteed to
360  // exit through a unique exit block without having any
361  // side-effects. If so, determine the value of Cond that causes
362  // it to do this.
364  ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext());
365  int LoopExitSuccIdx = 0;
366  auto *LoopExitBB = BI.getSuccessor(0);
367  if (!ExitBlockSet.count(LoopExitBB)) {
368  std::swap(CondVal, Replacement);
369  LoopExitSuccIdx = 1;
370  LoopExitBB = BI.getSuccessor(1);
371  if (!ExitBlockSet.count(LoopExitBB))
372  return false;
373  }
374  auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
375  assert(L.contains(ContinueBB) &&
376  "Cannot have both successors exit and still be in the loop!");
377 
378  auto *ParentBB = BI.getParent();
379  if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB))
380  return false;
381 
382  DEBUG(dbgs() << " unswitching trivial branch when: " << CondVal
383  << " == " << LoopCond << "\n");
384 
385  // Split the preheader, so that we know that there is a safe place to insert
386  // the conditional branch. We will change the preheader to have a conditional
387  // branch on LoopCond.
388  BasicBlock *OldPH = L.getLoopPreheader();
389  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
390 
391  // Now that we have a place to insert the conditional branch, create a place
392  // to branch to: this is the exit block out of the loop that we are
393  // unswitching. We need to split this if there are other loop predecessors.
394  // Because the loop is in simplified form, *any* other predecessor is enough.
395  BasicBlock *UnswitchedBB;
396  if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) {
397  (void)PredBB;
398  assert(PredBB == BI.getParent() &&
399  "A branch's parent isn't a predecessor!");
400  UnswitchedBB = LoopExitBB;
401  } else {
402  UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI);
403  }
404 
405  // Now splice the branch to gate reaching the new preheader and re-point its
406  // successors.
407  OldPH->getInstList().splice(std::prev(OldPH->end()),
408  BI.getParent()->getInstList(), BI);
409  OldPH->getTerminator()->eraseFromParent();
410  BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
411  BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
412 
413  // Create a new unconditional branch that will continue the loop as a new
414  // terminator.
415  BranchInst::Create(ContinueBB, ParentBB);
416 
417  // Rewrite the relevant PHI nodes.
418  if (UnswitchedBB == LoopExitBB)
419  rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
420  else
421  rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
422  *ParentBB, *OldPH);
423 
424  // Now we need to update the dominator tree.
425  updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
426  // But if we split something off of the loop exit block then we also removed
427  // one of the predecessors for the loop exit block and may need to update its
428  // idom.
429  if (UnswitchedBB != LoopExitBB)
430  updateIDomWithKnownCommonDominator(LoopExitBB, L.getHeader(), DT);
431 
432  // Since this is an i1 condition we can also trivially replace uses of it
433  // within the loop with a constant.
434  replaceLoopUsesWithConstant(L, *LoopCond, *Replacement);
435 
436  ++NumTrivial;
437  ++NumBranches;
438  return true;
439 }
440 
441 /// Unswitch a trivial switch if the condition is loop invariant.
442 ///
443 /// This routine should only be called when loop code leading to the switch has
444 /// been validated as trivial (no side effects). This routine checks if the
445 /// condition is invariant and that at least one of the successors is a loop
446 /// exit. This allows us to unswitch without duplicating the loop, making it
447 /// trivial.
448 ///
449 /// If this routine fails to unswitch the switch it returns false.
450 ///
451 /// If the switch can be unswitched, this routine splits the preheader and
452 /// copies the switch above that split. If the default case is one of the
453 /// exiting cases, it copies the non-exiting cases and points them at the new
454 /// preheader. If the default case is not exiting, it copies the exiting cases
455 /// and points the default at the preheader. It preserves loop simplified form
456 /// (splitting the exit blocks as necessary). It simplifies the switch within
457 /// the loop by removing now-dead cases. If the default case is one of those
458 /// unswitched, it replaces its destination with a new basic block containing
459 /// only unreachable. Such basic blocks, while technically loop exits, are not
460 /// considered for unswitching so this is a stable transform and the same
461 /// switch will not be revisited. If after unswitching there is only a single
462 /// in-loop successor, the switch is further simplified to an unconditional
463 /// branch. Still more cleanup can be done with some simplify-cfg like pass.
465  LoopInfo &LI) {
466  DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
467  Value *LoopCond = SI.getCondition();
468 
469  // If this isn't switching on an invariant condition, we can't unswitch it.
470  if (!L.isLoopInvariant(LoopCond))
471  return false;
472 
473  auto *ParentBB = SI.getParent();
474 
475  // FIXME: We should compute this once at the start and update it!
477  L.getExitBlocks(ExitBlocks);
478  SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(),
479  ExitBlocks.end());
480 
481  SmallVector<int, 4> ExitCaseIndices;
482  for (auto Case : SI.cases()) {
483  auto *SuccBB = Case.getCaseSuccessor();
484  if (ExitBlockSet.count(SuccBB) &&
485  areLoopExitPHIsLoopInvariant(L, *ParentBB, *SuccBB))
486  ExitCaseIndices.push_back(Case.getCaseIndex());
487  }
488  BasicBlock *DefaultExitBB = nullptr;
489  if (ExitBlockSet.count(SI.getDefaultDest()) &&
490  areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) &&
491  !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))
492  DefaultExitBB = SI.getDefaultDest();
493  else if (ExitCaseIndices.empty())
494  return false;
495 
496  DEBUG(dbgs() << " unswitching trivial cases...\n");
497 
499  ExitCases.reserve(ExitCaseIndices.size());
500  // We walk the case indices backwards so that we remove the last case first
501  // and don't disrupt the earlier indices.
502  for (unsigned Index : reverse(ExitCaseIndices)) {
503  auto CaseI = SI.case_begin() + Index;
504  // Save the value of this case.
505  ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()});
506  // Delete the unswitched cases.
507  SI.removeCase(CaseI);
508  }
509 
510  // Check if after this all of the remaining cases point at the same
511  // successor.
512  BasicBlock *CommonSuccBB = nullptr;
513  if (SI.getNumCases() > 0 &&
514  std::all_of(std::next(SI.case_begin()), SI.case_end(),
515  [&SI](const SwitchInst::CaseHandle &Case) {
516  return Case.getCaseSuccessor() ==
517  SI.case_begin()->getCaseSuccessor();
518  }))
519  CommonSuccBB = SI.case_begin()->getCaseSuccessor();
520 
521  if (DefaultExitBB) {
522  // We can't remove the default edge so replace it with an edge to either
523  // the single common remaining successor (if we have one) or an unreachable
524  // block.
525  if (CommonSuccBB) {
526  SI.setDefaultDest(CommonSuccBB);
527  } else {
528  BasicBlock *UnreachableBB = BasicBlock::Create(
529  ParentBB->getContext(),
530  Twine(ParentBB->getName()) + ".unreachable_default",
531  ParentBB->getParent());
532  new UnreachableInst(ParentBB->getContext(), UnreachableBB);
533  SI.setDefaultDest(UnreachableBB);
534  DT.addNewBlock(UnreachableBB, ParentBB);
535  }
536  } else {
537  // If we're not unswitching the default, we need it to match any cases to
538  // have a common successor or if we have no cases it is the common
539  // successor.
540  if (SI.getNumCases() == 0)
541  CommonSuccBB = SI.getDefaultDest();
542  else if (SI.getDefaultDest() != CommonSuccBB)
543  CommonSuccBB = nullptr;
544  }
545 
546  // Split the preheader, so that we know that there is a safe place to insert
547  // the switch.
548  BasicBlock *OldPH = L.getLoopPreheader();
549  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
550  OldPH->getTerminator()->eraseFromParent();
551 
552  // Now add the unswitched switch.
553  auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
554 
555  // Rewrite the IR for the unswitched basic blocks. This requires two steps.
556  // First, we split any exit blocks with remaining in-loop predecessors. Then
557  // we update the PHIs in one of two ways depending on if there was a split.
558  // We walk in reverse so that we split in the same order as the cases
559  // appeared. This is purely for convenience of reading the resulting IR, but
560  // it doesn't cost anything really.
561  SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
563  // Handle the default exit if necessary.
564  // FIXME: It'd be great if we could merge this with the loop below but LLVM's
565  // ranges aren't quite powerful enough yet.
566  if (DefaultExitBB) {
567  if (pred_empty(DefaultExitBB)) {
568  UnswitchedExitBBs.insert(DefaultExitBB);
569  rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
570  } else {
571  auto *SplitBB =
572  SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);
573  rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
574  *ParentBB, *OldPH);
575  updateIDomWithKnownCommonDominator(DefaultExitBB, L.getHeader(), DT);
576  DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
577  }
578  }
579  // Note that we must use a reference in the for loop so that we update the
580  // container.
581  for (auto &CasePair : reverse(ExitCases)) {
582  // Grab a reference to the exit block in the pair so that we can update it.
583  BasicBlock *ExitBB = CasePair.second;
584 
585  // If this case is the last edge into the exit block, we can simply reuse it
586  // as it will no longer be a loop exit. No mapping necessary.
587  if (pred_empty(ExitBB)) {
588  // Only rewrite once.
589  if (UnswitchedExitBBs.insert(ExitBB).second)
590  rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
591  continue;
592  }
593 
594  // Otherwise we need to split the exit block so that we retain an exit
595  // block from the loop and a target for the unswitched condition.
596  BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
597  if (!SplitExitBB) {
598  // If this is the first time we see this, do the split and remember it.
599  SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
600  rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
601  *ParentBB, *OldPH);
603  }
604  // Update the case pair to point to the split block.
605  CasePair.second = SplitExitBB;
606  }
607 
608  // Now add the unswitched cases. We do this in reverse order as we built them
609  // in reverse order.
610  for (auto CasePair : reverse(ExitCases)) {
611  ConstantInt *CaseVal = CasePair.first;
612  BasicBlock *UnswitchedBB = CasePair.second;
613 
614  NewSI->addCase(CaseVal, UnswitchedBB);
615  updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
616  }
617 
618  // If the default was unswitched, re-point it and add explicit cases for
619  // entering the loop.
620  if (DefaultExitBB) {
621  NewSI->setDefaultDest(DefaultExitBB);
622  updateDTAfterUnswitch(DefaultExitBB, OldPH, DT);
623 
624  // We removed all the exit cases, so we just copy the cases to the
625  // unswitched switch.
626  for (auto Case : SI.cases())
627  NewSI->addCase(Case.getCaseValue(), NewPH);
628  }
629 
630  // If we ended up with a common successor for every path through the switch
631  // after unswitching, rewrite it to an unconditional branch to make it easy
632  // to recognize. Otherwise we potentially have to recognize the default case
633  // pointing at unreachable and other complexity.
634  if (CommonSuccBB) {
635  BasicBlock *BB = SI.getParent();
636  SI.eraseFromParent();
637  BranchInst::Create(CommonSuccBB, BB);
638  }
639 
640  DT.verifyDomTree();
641  ++NumTrivial;
642  ++NumSwitches;
643  return true;
644 }
645 
646 /// This routine scans the loop to find a branch or switch which occurs before
647 /// any side effects occur. These can potentially be unswitched without
648 /// duplicating the loop. If a branch or switch is successfully unswitched the
649 /// scanning continues to see if subsequent branches or switches have become
650 /// trivial. Once all trivial candidates have been unswitched, this routine
651 /// returns.
652 ///
653 /// The return value indicates whether anything was unswitched (and therefore
654 /// changed).
656  LoopInfo &LI) {
657  bool Changed = false;
658 
659  // If loop header has only one reachable successor we should keep looking for
660  // trivial condition candidates in the successor as well. An alternative is
661  // to constant fold conditions and merge successors into loop header (then we
662  // only need to check header's terminator). The reason for not doing this in
663  // LoopUnswitch pass is that it could potentially break LoopPassManager's
664  // invariants. Folding dead branches could either eliminate the current loop
665  // or make other loops unreachable. LCSSA form might also not be preserved
666  // after deleting branches. The following code keeps traversing loop header's
667  // successors until it finds the trivial condition candidate (condition that
668  // is not a constant). Since unswitching generates branches with constant
669  // conditions, this scenario could be very common in practice.
670  BasicBlock *CurrentBB = L.getHeader();
672  Visited.insert(CurrentBB);
673  do {
674  // Check if there are any side-effecting instructions (e.g. stores, calls,
675  // volatile loads) in the part of the loop that the code *would* execute
676  // without unswitching.
677  if (llvm::any_of(*CurrentBB,
678  [](Instruction &I) { return I.mayHaveSideEffects(); }))
679  return Changed;
680 
681  TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
682 
683  if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
684  // Don't bother trying to unswitch past a switch with a constant
685  // condition. This should be removed prior to running this pass by
686  // simplify-cfg.
687  if (isa<Constant>(SI->getCondition()))
688  return Changed;
689 
690  if (!unswitchTrivialSwitch(L, *SI, DT, LI))
691  // Coludn't unswitch this one so we're done.
692  return Changed;
693 
694  // Mark that we managed to unswitch something.
695  Changed = true;
696 
697  // If unswitching turned the terminator into an unconditional branch then
698  // we can continue. The unswitching logic specifically works to fold any
699  // cases it can into an unconditional branch to make it easier to
700  // recognize here.
701  auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
702  if (!BI || BI->isConditional())
703  return Changed;
704 
705  CurrentBB = BI->getSuccessor(0);
706  continue;
707  }
708 
709  auto *BI = dyn_cast<BranchInst>(CurrentTerm);
710  if (!BI)
711  // We do not understand other terminator instructions.
712  return Changed;
713 
714  // Don't bother trying to unswitch past an unconditional branch or a branch
715  // with a constant value. These should be removed by simplify-cfg prior to
716  // running this pass.
717  if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
718  return Changed;
719 
720  // Found a trivial condition candidate: non-foldable conditional branch. If
721  // we fail to unswitch this, we can't do anything else that is trivial.
722  if (!unswitchTrivialBranch(L, *BI, DT, LI))
723  return Changed;
724 
725  // Mark that we managed to unswitch something.
726  Changed = true;
727 
728  // We unswitched the branch. This should always leave us with an
729  // unconditional branch that we can follow now.
730  BI = cast<BranchInst>(CurrentBB->getTerminator());
731  assert(!BI->isConditional() &&
732  "Cannot form a conditional branch by unswitching1");
733  CurrentBB = BI->getSuccessor(0);
734 
735  // When continuing, if we exit the loop or reach a previous visited block,
736  // then we can not reach any trivial condition candidates (unfoldable
737  // branch instructions or switch instructions) and no unswitch can happen.
738  } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
739 
740  return Changed;
741 }
742 
743 /// Build the cloned blocks for an unswitched copy of the given loop.
744 ///
745 /// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
746 /// after the split block (`SplitBB`) that will be used to select between the
747 /// cloned and original loop.
748 ///
749 /// This routine handles cloning all of the necessary loop blocks and exit
750 /// blocks including rewriting their instructions and the relevant PHI nodes.
751 /// It skips loop and exit blocks that are not necessary based on the provided
752 /// set. It also correctly creates the unconditional branch in the cloned
753 /// unswitched parent block to only point at the unswitched successor.
754 ///
755 /// This does not handle most of the necessary updates to `LoopInfo`. Only exit
756 /// block splitting is correctly reflected in `LoopInfo`, essentially all of
757 /// the cloned blocks (and their loops) are left without full `LoopInfo`
758 /// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
759 /// blocks to them but doesn't create the cloned `DominatorTree` structure and
760 /// instead the caller must recompute an accurate DT. It *does* correctly
761 /// update the `AssumptionCache` provided in `AC`.
763  Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
764  ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
765  BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
766  const SmallPtrSetImpl<BasicBlock *> &SkippedLoopAndExitBlocks,
768  LoopInfo &LI) {
770  NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
771 
772  // We will need to clone a bunch of blocks, wrap up the clone operation in
773  // a helper.
774  auto CloneBlock = [&](BasicBlock *OldBB) {
775  // Clone the basic block and insert it before the new preheader.
776  BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
777  NewBB->moveBefore(LoopPH);
778 
779  // Record this block and the mapping.
780  NewBlocks.push_back(NewBB);
781  VMap[OldBB] = NewBB;
782 
783  // Add the block to the domtree. We'll move it to the correct position
784  // below.
785  DT.addNewBlock(NewBB, SplitBB);
786 
787  return NewBB;
788  };
789 
790  // First, clone the preheader.
791  auto *ClonedPH = CloneBlock(LoopPH);
792 
793  // Then clone all the loop blocks, skipping the ones that aren't necessary.
794  for (auto *LoopBB : L.blocks())
795  if (!SkippedLoopAndExitBlocks.count(LoopBB))
796  CloneBlock(LoopBB);
797 
798  // Split all the loop exit edges so that when we clone the exit blocks, if
799  // any of the exit blocks are *also* a preheader for some other loop, we
800  // don't create multiple predecessors entering the loop header.
801  for (auto *ExitBB : ExitBlocks) {
802  if (SkippedLoopAndExitBlocks.count(ExitBB))
803  continue;
804 
805  // When we are going to clone an exit, we don't need to clone all the
806  // instructions in the exit block and we want to ensure we have an easy
807  // place to merge the CFG, so split the exit first. This is always safe to
808  // do because there cannot be any non-loop predecessors of a loop exit in
809  // loop simplified form.
810  auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
811 
812  // Rearrange the names to make it easier to write test cases by having the
813  // exit block carry the suffix rather than the merge block carrying the
814  // suffix.
815  MergeBB->takeName(ExitBB);
816  ExitBB->setName(Twine(MergeBB->getName()) + ".split");
817 
818  // Now clone the original exit block.
819  auto *ClonedExitBB = CloneBlock(ExitBB);
820  assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
821  "Exit block should have been split to have one successor!");
822  assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
823  "Cloned exit block has the wrong successor!");
824 
825  // Move the merge block's idom to be the split point as one exit is
826  // dominated by one header, and the other by another, so we know the split
827  // point dominates both. While the dominator tree isn't fully accurate, we
828  // want sub-trees within the original loop to be correctly reflect
829  // dominance within that original loop (at least) and that requires moving
830  // the merge block out of that subtree.
831  // FIXME: This is very brittle as we essentially have a partial contract on
832  // the dominator tree. We really need to instead update it and keep it
833  // valid or stop relying on it.
834  DT.changeImmediateDominator(MergeBB, SplitBB);
835 
836  // Remap any cloned instructions and create a merge phi node for them.
837  for (auto ZippedInsts : llvm::zip_first(
838  llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
839  llvm::make_range(ClonedExitBB->begin(),
840  std::prev(ClonedExitBB->end())))) {
841  Instruction &I = std::get<0>(ZippedInsts);
842  Instruction &ClonedI = std::get<1>(ZippedInsts);
843 
844  // The only instructions in the exit block should be PHI nodes and
845  // potentially a landing pad.
846  assert(
847  (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
848  "Bad instruction in exit block!");
849  // We should have a value map between the instruction and its clone.
850  assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
851 
852  auto *MergePN =
853  PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
854  &*MergeBB->getFirstInsertionPt());
855  I.replaceAllUsesWith(MergePN);
856  MergePN->addIncoming(&I, ExitBB);
857  MergePN->addIncoming(&ClonedI, ClonedExitBB);
858  }
859  }
860 
861  // Rewrite the instructions in the cloned blocks to refer to the instructions
862  // in the cloned blocks. We have to do this as a second pass so that we have
863  // everything available. Also, we have inserted new instructions which may
864  // include assume intrinsics, so we update the assumption cache while
865  // processing this.
866  for (auto *ClonedBB : NewBlocks)
867  for (Instruction &I : *ClonedBB) {
868  RemapInstruction(&I, VMap,
870  if (auto *II = dyn_cast<IntrinsicInst>(&I))
871  if (II->getIntrinsicID() == Intrinsic::assume)
872  AC.registerAssumption(II);
873  }
874 
875  // Remove the cloned parent as a predecessor of the cloned continue successor
876  // if we did in fact clone it.
877  auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
878  if (auto *ClonedContinueSuccBB =
879  cast_or_null<BasicBlock>(VMap.lookup(ContinueSuccBB)))
880  ClonedContinueSuccBB->removePredecessor(ClonedParentBB,
881  /*DontDeleteUselessPHIs*/ true);
882  // Replace the cloned branch with an unconditional branch to the cloneed
883  // unswitched successor.
884  auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
885  ClonedParentBB->getTerminator()->eraseFromParent();
886  BranchInst::Create(ClonedSuccBB, ClonedParentBB);
887 
888  // Update any PHI nodes in the cloned successors of the skipped blocks to not
889  // have spurious incoming values.
890  for (auto *LoopBB : L.blocks())
891  if (SkippedLoopAndExitBlocks.count(LoopBB))
892  for (auto *SuccBB : successors(LoopBB))
893  if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
894  for (PHINode &PN : ClonedSuccBB->phis())
895  PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
896 
897  return ClonedPH;
898 }
899 
900 /// Recursively clone the specified loop and all of its children.
901 ///
902 /// The target parent loop for the clone should be provided, or can be null if
903 /// the clone is a top-level loop. While cloning, all the blocks are mapped
904 /// with the provided value map. The entire original loop must be present in
905 /// the value map. The cloned loop is returned.
906 static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
907  const ValueToValueMapTy &VMap, LoopInfo &LI) {
908  auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
909  assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
910  ClonedL.reserveBlocks(OrigL.getNumBlocks());
911  for (auto *BB : OrigL.blocks()) {
912  auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
913  ClonedL.addBlockEntry(ClonedBB);
914  if (LI.getLoopFor(BB) == &OrigL) {
915  assert(!LI.getLoopFor(ClonedBB) &&
916  "Should not have an existing loop for this block!");
917  LI.changeLoopFor(ClonedBB, &ClonedL);
918  }
919  }
920  };
921 
922  // We specially handle the first loop because it may get cloned into
923  // a different parent and because we most commonly are cloning leaf loops.
924  Loop *ClonedRootL = LI.AllocateLoop();
925  if (RootParentL)
926  RootParentL->addChildLoop(ClonedRootL);
927  else
928  LI.addTopLevelLoop(ClonedRootL);
929  AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
930 
931  if (OrigRootL.empty())
932  return ClonedRootL;
933 
934  // If we have a nest, we can quickly clone the entire loop nest using an
935  // iterative approach because it is a tree. We keep the cloned parent in the
936  // data structure to avoid repeatedly querying through a map to find it.
937  SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
938  // Build up the loops to clone in reverse order as we'll clone them from the
939  // back.
940  for (Loop *ChildL : llvm::reverse(OrigRootL))
941  LoopsToClone.push_back({ClonedRootL, ChildL});
942  do {
943  Loop *ClonedParentL, *L;
944  std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
945  Loop *ClonedL = LI.AllocateLoop();
946  ClonedParentL->addChildLoop(ClonedL);
947  AddClonedBlocksToLoop(*L, *ClonedL);
948  for (Loop *ChildL : llvm::reverse(*L))
949  LoopsToClone.push_back({ClonedL, ChildL});
950  } while (!LoopsToClone.empty());
951 
952  return ClonedRootL;
953 }
954 
955 /// Build the cloned loops of an original loop from unswitching.
956 ///
957 /// Because unswitching simplifies the CFG of the loop, this isn't a trivial
958 /// operation. We need to re-verify that there even is a loop (as the backedge
959 /// may not have been cloned), and even if there are remaining backedges the
960 /// backedge set may be different. However, we know that each child loop is
961 /// undisturbed, we only need to find where to place each child loop within
962 /// either any parent loop or within a cloned version of the original loop.
963 ///
964 /// Because child loops may end up cloned outside of any cloned version of the
965 /// original loop, multiple cloned sibling loops may be created. All of them
966 /// are returned so that the newly introduced loop nest roots can be
967 /// identified.
969  const ValueToValueMapTy &VMap, LoopInfo &LI,
970  SmallVectorImpl<Loop *> &NonChildClonedLoops) {
971  Loop *ClonedL = nullptr;
972 
973  auto *OrigPH = OrigL.getLoopPreheader();
974  auto *OrigHeader = OrigL.getHeader();
975 
976  auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
977  auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
978 
979  // We need to know the loops of the cloned exit blocks to even compute the
980  // accurate parent loop. If we only clone exits to some parent of the
981  // original parent, we want to clone into that outer loop. We also keep track
982  // of the loops that our cloned exit blocks participate in.
983  Loop *ParentL = nullptr;
984  SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
986  ClonedExitsInLoops.reserve(ExitBlocks.size());
987  for (auto *ExitBB : ExitBlocks)
988  if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
989  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
990  ExitLoopMap[ClonedExitBB] = ExitL;
991  ClonedExitsInLoops.push_back(ClonedExitBB);
992  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
993  ParentL = ExitL;
994  }
995  assert((!ParentL || ParentL == OrigL.getParentLoop() ||
996  ParentL->contains(OrigL.getParentLoop())) &&
997  "The computed parent loop should always contain (or be) the parent of "
998  "the original loop.");
999 
1000  // We build the set of blocks dominated by the cloned header from the set of
1001  // cloned blocks out of the original loop. While not all of these will
1002  // necessarily be in the cloned loop, it is enough to establish that they
1003  // aren't in unreachable cycles, etc.
1004  SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1005  for (auto *BB : OrigL.blocks())
1006  if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1007  ClonedLoopBlocks.insert(ClonedBB);
1008 
1009  // Rebuild the set of blocks that will end up in the cloned loop. We may have
1010  // skipped cloning some region of this loop which can in turn skip some of
1011  // the backedges so we have to rebuild the blocks in the loop based on the
1012  // backedges that remain after cloning.
1014  SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1015  for (auto *Pred : predecessors(ClonedHeader)) {
1016  // The only possible non-loop header predecessor is the preheader because
1017  // we know we cloned the loop in simplified form.
1018  if (Pred == ClonedPH)
1019  continue;
1020 
1021  // Because the loop was in simplified form, the only non-loop predecessor
1022  // should be the preheader.
1023  assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1024  "header other than the preheader "
1025  "that is not part of the loop!");
1026 
1027  // Insert this block into the loop set and on the first visit (and if it
1028  // isn't the header we're currently walking) put it into the worklist to
1029  // recurse through.
1030  if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1031  Worklist.push_back(Pred);
1032  }
1033 
1034  // If we had any backedges then there *is* a cloned loop. Put the header into
1035  // the loop set and then walk the worklist backwards to find all the blocks
1036  // that remain within the loop after cloning.
1037  if (!BlocksInClonedLoop.empty()) {
1038  BlocksInClonedLoop.insert(ClonedHeader);
1039 
1040  while (!Worklist.empty()) {
1041  BasicBlock *BB = Worklist.pop_back_val();
1042  assert(BlocksInClonedLoop.count(BB) &&
1043  "Didn't put block into the loop set!");
1044 
1045  // Insert any predecessors that are in the possible set into the cloned
1046  // set, and if the insert is successful, add them to the worklist. Note
1047  // that we filter on the blocks that are definitely reachable via the
1048  // backedge to the loop header so we may prune out dead code within the
1049  // cloned loop.
1050  for (auto *Pred : predecessors(BB))
1051  if (ClonedLoopBlocks.count(Pred) &&
1052  BlocksInClonedLoop.insert(Pred).second)
1053  Worklist.push_back(Pred);
1054  }
1055 
1056  ClonedL = LI.AllocateLoop();
1057  if (ParentL) {
1058  ParentL->addBasicBlockToLoop(ClonedPH, LI);
1059  ParentL->addChildLoop(ClonedL);
1060  } else {
1061  LI.addTopLevelLoop(ClonedL);
1062  }
1063 
1064  ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1065  // We don't want to just add the cloned loop blocks based on how we
1066  // discovered them. The original order of blocks was carefully built in
1067  // a way that doesn't rely on predecessor ordering. Rather than re-invent
1068  // that logic, we just re-walk the original blocks (and those of the child
1069  // loops) and filter them as we add them into the cloned loop.
1070  for (auto *BB : OrigL.blocks()) {
1071  auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1072  if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1073  continue;
1074 
1075  // Directly add the blocks that are only in this loop.
1076  if (LI.getLoopFor(BB) == &OrigL) {
1077  ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1078  continue;
1079  }
1080 
1081  // We want to manually add it to this loop and parents.
1082  // Registering it with LoopInfo will happen when we clone the top
1083  // loop for this block.
1084  for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1085  PL->addBlockEntry(ClonedBB);
1086  }
1087 
1088  // Now add each child loop whose header remains within the cloned loop. All
1089  // of the blocks within the loop must satisfy the same constraints as the
1090  // header so once we pass the header checks we can just clone the entire
1091  // child loop nest.
1092  for (Loop *ChildL : OrigL) {
1093  auto *ClonedChildHeader =
1094  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1095  if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1096  continue;
1097 
1098 #ifndef NDEBUG
1099  // We should never have a cloned child loop header but fail to have
1100  // all of the blocks for that child loop.
1101  for (auto *ChildLoopBB : ChildL->blocks())
1102  assert(BlocksInClonedLoop.count(
1103  cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1104  "Child cloned loop has a header within the cloned outer "
1105  "loop but not all of its blocks!");
1106 #endif
1107 
1108  cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1109  }
1110  }
1111 
1112  // Now that we've handled all the components of the original loop that were
1113  // cloned into a new loop, we still need to handle anything from the original
1114  // loop that wasn't in a cloned loop.
1115 
1116  // Figure out what blocks are left to place within any loop nest containing
1117  // the unswitched loop. If we never formed a loop, the cloned PH is one of
1118  // them.
1119  SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1120  if (BlocksInClonedLoop.empty())
1121  UnloopedBlockSet.insert(ClonedPH);
1122  for (auto *ClonedBB : ClonedLoopBlocks)
1123  if (!BlocksInClonedLoop.count(ClonedBB))
1124  UnloopedBlockSet.insert(ClonedBB);
1125 
1126  // Copy the cloned exits and sort them in ascending loop depth, we'll work
1127  // backwards across these to process them inside out. The order shouldn't
1128  // matter as we're just trying to build up the map from inside-out; we use
1129  // the map in a more stably ordered way below.
1130  auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1131  std::sort(OrderedClonedExitsInLoops.begin(), OrderedClonedExitsInLoops.end(),
1132  [&](BasicBlock *LHS, BasicBlock *RHS) {
1133  return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1134  ExitLoopMap.lookup(RHS)->getLoopDepth();
1135  });
1136 
1137  // Populate the existing ExitLoopMap with everything reachable from each
1138  // exit, starting from the inner most exit.
1139  while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1140  assert(Worklist.empty() && "Didn't clear worklist!");
1141 
1142  BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1143  Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1144 
1145  // Walk the CFG back until we hit the cloned PH adding everything reachable
1146  // and in the unlooped set to this exit block's loop.
1147  Worklist.push_back(ExitBB);
1148  do {
1149  BasicBlock *BB = Worklist.pop_back_val();
1150  // We can stop recursing at the cloned preheader (if we get there).
1151  if (BB == ClonedPH)
1152  continue;
1153 
1154  for (BasicBlock *PredBB : predecessors(BB)) {
1155  // If this pred has already been moved to our set or is part of some
1156  // (inner) loop, no update needed.
1157  if (!UnloopedBlockSet.erase(PredBB)) {
1158  assert(
1159  (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1160  "Predecessor not mapped to a loop!");
1161  continue;
1162  }
1163 
1164  // We just insert into the loop set here. We'll add these blocks to the
1165  // exit loop after we build up the set in an order that doesn't rely on
1166  // predecessor order (which in turn relies on use list order).
1167  bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1168  (void)Inserted;
1169  assert(Inserted && "Should only visit an unlooped block once!");
1170 
1171  // And recurse through to its predecessors.
1172  Worklist.push_back(PredBB);
1173  }
1174  } while (!Worklist.empty());
1175  }
1176 
1177  // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1178  // blocks to their outer loops, walk the cloned blocks and the cloned exits
1179  // in their original order adding them to the correct loop.
1180 
1181  // We need a stable insertion order. We use the order of the original loop
1182  // order and map into the correct parent loop.
1183  for (auto *BB : llvm::concat<BasicBlock *const>(
1184  makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1185  if (Loop *OuterL = ExitLoopMap.lookup(BB))
1186  OuterL->addBasicBlockToLoop(BB, LI);
1187 
1188 #ifndef NDEBUG
1189  for (auto &BBAndL : ExitLoopMap) {
1190  auto *BB = BBAndL.first;
1191  auto *OuterL = BBAndL.second;
1192  assert(LI.getLoopFor(BB) == OuterL &&
1193  "Failed to put all blocks into outer loops!");
1194  }
1195 #endif
1196 
1197  // Now that all the blocks are placed into the correct containing loop in the
1198  // absence of child loops, find all the potentially cloned child loops and
1199  // clone them into whatever outer loop we placed their header into.
1200  for (Loop *ChildL : OrigL) {
1201  auto *ClonedChildHeader =
1202  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1203  if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1204  continue;
1205 
1206 #ifndef NDEBUG
1207  for (auto *ChildLoopBB : ChildL->blocks())
1208  assert(VMap.count(ChildLoopBB) &&
1209  "Cloned a child loop header but not all of that loops blocks!");
1210 #endif
1211 
1212  NonChildClonedLoops.push_back(cloneLoopNest(
1213  *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1214  }
1215 
1216  // Return the main cloned loop if any.
1217  return ClonedL;
1218 }
1219 
1220 static void deleteDeadBlocksFromLoop(Loop &L, BasicBlock *DeadSubtreeRoot,
1221  SmallVectorImpl<BasicBlock *> &ExitBlocks,
1222  DominatorTree &DT, LoopInfo &LI) {
1223  // Walk the dominator tree to build up the set of blocks we will delete here.
1224  // The order is designed to allow us to always delete bottom-up and avoid any
1225  // dangling uses.
1227  DeadBlocks.insert(DeadSubtreeRoot);
1228  for (int i = 0; i < (int)DeadBlocks.size(); ++i)
1229  for (DomTreeNode *ChildN : *DT[DeadBlocks[i]]) {
1230  // FIXME: This assert should pass and that means we don't change nearly
1231  // as much below! Consider rewriting all of this to avoid deleting
1232  // blocks. They are always cloned before being deleted, and so instead
1233  // could just be moved.
1234  // FIXME: This in turn means that we might actually be more able to
1235  // update the domtree.
1236  assert((L.contains(ChildN->getBlock()) ||
1237  llvm::find(ExitBlocks, ChildN->getBlock()) != ExitBlocks.end()) &&
1238  "Should never reach beyond the loop and exits when deleting!");
1239  DeadBlocks.insert(ChildN->getBlock());
1240  }
1241 
1242  // Filter out the dead blocks from the exit blocks list so that it can be
1243  // used in the caller.
1244  llvm::erase_if(ExitBlocks,
1245  [&](BasicBlock *BB) { return DeadBlocks.count(BB); });
1246 
1247  // Remove these blocks from their successors.
1248  for (auto *BB : DeadBlocks)
1249  for (BasicBlock *SuccBB : successors(BB))
1250  SuccBB->removePredecessor(BB, /*DontDeleteUselessPHIs*/ true);
1251 
1252  // Walk from this loop up through its parents removing all of the dead blocks.
1253  for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1254  for (auto *BB : DeadBlocks)
1255  ParentL->getBlocksSet().erase(BB);
1256  llvm::erase_if(ParentL->getBlocksVector(),
1257  [&](BasicBlock *BB) { return DeadBlocks.count(BB); });
1258  }
1259 
1260  // Now delete the dead child loops. This raw delete will clear them
1261  // recursively.
1262  llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1263  if (!DeadBlocks.count(ChildL->getHeader()))
1264  return false;
1265 
1266  assert(llvm::all_of(ChildL->blocks(),
1267  [&](BasicBlock *ChildBB) {
1268  return DeadBlocks.count(ChildBB);
1269  }) &&
1270  "If the child loop header is dead all blocks in the child loop must "
1271  "be dead as well!");
1272  LI.destroy(ChildL);
1273  return true;
1274  });
1275 
1276  // Remove the mappings for the dead blocks.
1277  for (auto *BB : DeadBlocks)
1278  LI.changeLoopFor(BB, nullptr);
1279 
1280  // Drop all the references from these blocks to others to handle cyclic
1281  // references as we start deleting the blocks themselves.
1282  for (auto *BB : DeadBlocks)
1283  BB->dropAllReferences();
1284 
1285  for (auto *BB : llvm::reverse(DeadBlocks)) {
1286  DT.eraseNode(BB);
1287  BB->eraseFromParent();
1288  }
1289 }
1290 
1291 /// Recompute the set of blocks in a loop after unswitching.
1292 ///
1293 /// This walks from the original headers predecessors to rebuild the loop. We
1294 /// take advantage of the fact that new blocks can't have been added, and so we
1295 /// filter by the original loop's blocks. This also handles potentially
1296 /// unreachable code that we don't want to explore but might be found examining
1297 /// the predecessors of the header.
1298 ///
1299 /// If the original loop is no longer a loop, this will return an empty set. If
1300 /// it remains a loop, all the blocks within it will be added to the set
1301 /// (including those blocks in inner loops).
1303  LoopInfo &LI) {
1305 
1306  auto *PH = L.getLoopPreheader();
1307  auto *Header = L.getHeader();
1308 
1309  // A worklist to use while walking backwards from the header.
1311 
1312  // First walk the predecessors of the header to find the backedges. This will
1313  // form the basis of our walk.
1314  for (auto *Pred : predecessors(Header)) {
1315  // Skip the preheader.
1316  if (Pred == PH)
1317  continue;
1318 
1319  // Because the loop was in simplified form, the only non-loop predecessor
1320  // is the preheader.
1321  assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1322  "than the preheader that is not part of the "
1323  "loop!");
1324 
1325  // Insert this block into the loop set and on the first visit and, if it
1326  // isn't the header we're currently walking, put it into the worklist to
1327  // recurse through.
1328  if (LoopBlockSet.insert(Pred).second && Pred != Header)
1329  Worklist.push_back(Pred);
1330  }
1331 
1332  // If no backedges were found, we're done.
1333  if (LoopBlockSet.empty())
1334  return LoopBlockSet;
1335 
1336  // Add the loop header to the set.
1337  LoopBlockSet.insert(Header);
1338 
1339  // We found backedges, recurse through them to identify the loop blocks.
1340  while (!Worklist.empty()) {
1341  BasicBlock *BB = Worklist.pop_back_val();
1342  assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1343 
1344  // Because we know the inner loop structure remains valid we can use the
1345  // loop structure to jump immediately across the entire nested loop.
1346  // Further, because it is in loop simplified form, we can directly jump
1347  // to its preheader afterward.
1348  if (Loop *InnerL = LI.getLoopFor(BB))
1349  if (InnerL != &L) {
1350  assert(L.contains(InnerL) &&
1351  "Should not reach a loop *outside* this loop!");
1352  // The preheader is the only possible predecessor of the loop so
1353  // insert it into the set and check whether it was already handled.
1354  auto *InnerPH = InnerL->getLoopPreheader();
1355  assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1356  "but not contain the inner loop "
1357  "preheader!");
1358  if (!LoopBlockSet.insert(InnerPH).second)
1359  // The only way to reach the preheader is through the loop body
1360  // itself so if it has been visited the loop is already handled.
1361  continue;
1362 
1363  // Insert all of the blocks (other than those already present) into
1364  // the loop set. The only block we expect to already be in the set is
1365  // the one we used to find this loop as we immediately handle the
1366  // others the first time we encounter the loop.
1367  for (auto *InnerBB : InnerL->blocks()) {
1368  if (InnerBB == BB) {
1369  assert(LoopBlockSet.count(InnerBB) &&
1370  "Block should already be in the set!");
1371  continue;
1372  }
1373 
1374  bool Inserted = LoopBlockSet.insert(InnerBB).second;
1375  (void)Inserted;
1376  assert(Inserted && "Should only insert an inner loop once!");
1377  }
1378 
1379  // Add the preheader to the worklist so we will continue past the
1380  // loop body.
1381  Worklist.push_back(InnerPH);
1382  continue;
1383  }
1384 
1385  // Insert any predecessors that were in the original loop into the new
1386  // set, and if the insert is successful, add them to the worklist.
1387  for (auto *Pred : predecessors(BB))
1388  if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1389  Worklist.push_back(Pred);
1390  }
1391 
1392  // We've found all the blocks participating in the loop, return our completed
1393  // set.
1394  return LoopBlockSet;
1395 }
1396 
1397 /// Rebuild a loop after unswitching removes some subset of blocks and edges.
1398 ///
1399 /// The removal may have removed some child loops entirely but cannot have
1400 /// disturbed any remaining child loops. However, they may need to be hoisted
1401 /// to the parent loop (or to be top-level loops). The original loop may be
1402 /// completely removed.
1403 ///
1404 /// The sibling loops resulting from this update are returned. If the original
1405 /// loop remains a valid loop, it will be the first entry in this list with all
1406 /// of the newly sibling loops following it.
1407 ///
1408 /// Returns true if the loop remains a loop after unswitching, and false if it
1409 /// is no longer a loop after unswitching (and should not continue to be
1410 /// referenced).
1412  LoopInfo &LI,
1413  SmallVectorImpl<Loop *> &HoistedLoops) {
1414  auto *PH = L.getLoopPreheader();
1415 
1416  // Compute the actual parent loop from the exit blocks. Because we may have
1417  // pruned some exits the loop may be different from the original parent.
1418  Loop *ParentL = nullptr;
1419  SmallVector<Loop *, 4> ExitLoops;
1420  SmallVector<BasicBlock *, 4> ExitsInLoops;
1421  ExitsInLoops.reserve(ExitBlocks.size());
1422  for (auto *ExitBB : ExitBlocks)
1423  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1424  ExitLoops.push_back(ExitL);
1425  ExitsInLoops.push_back(ExitBB);
1426  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1427  ParentL = ExitL;
1428  }
1429 
1430  // Recompute the blocks participating in this loop. This may be empty if it
1431  // is no longer a loop.
1432  auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1433 
1434  // If we still have a loop, we need to re-set the loop's parent as the exit
1435  // block set changing may have moved it within the loop nest. Note that this
1436  // can only happen when this loop has a parent as it can only hoist the loop
1437  // *up* the nest.
1438  if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1439  // Remove this loop's (original) blocks from all of the intervening loops.
1440  for (Loop *IL = L.getParentLoop(); IL != ParentL;
1441  IL = IL->getParentLoop()) {
1442  IL->getBlocksSet().erase(PH);
1443  for (auto *BB : L.blocks())
1444  IL->getBlocksSet().erase(BB);
1445  llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1446  return BB == PH || L.contains(BB);
1447  });
1448  }
1449 
1450  LI.changeLoopFor(PH, ParentL);
1451  L.getParentLoop()->removeChildLoop(&L);
1452  if (ParentL)
1453  ParentL->addChildLoop(&L);
1454  else
1455  LI.addTopLevelLoop(&L);
1456  }
1457 
1458  // Now we update all the blocks which are no longer within the loop.
1459  auto &Blocks = L.getBlocksVector();
1460  auto BlocksSplitI =
1461  LoopBlockSet.empty()
1462  ? Blocks.begin()
1463  : std::stable_partition(
1464  Blocks.begin(), Blocks.end(),
1465  [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1466 
1467  // Before we erase the list of unlooped blocks, build a set of them.
1468  SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1469  if (LoopBlockSet.empty())
1470  UnloopedBlocks.insert(PH);
1471 
1472  // Now erase these blocks from the loop.
1473  for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1474  L.getBlocksSet().erase(BB);
1475  Blocks.erase(BlocksSplitI, Blocks.end());
1476 
1477  // Sort the exits in ascending loop depth, we'll work backwards across these
1478  // to process them inside out.
1479  std::stable_sort(ExitsInLoops.begin(), ExitsInLoops.end(),
1480  [&](BasicBlock *LHS, BasicBlock *RHS) {
1481  return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1482  });
1483 
1484  // We'll build up a set for each exit loop.
1485  SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1486  Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1487 
1488  auto RemoveUnloopedBlocksFromLoop =
1489  [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1490  for (auto *BB : UnloopedBlocks)
1491  L.getBlocksSet().erase(BB);
1492  llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1493  return UnloopedBlocks.count(BB);
1494  });
1495  };
1496 
1498  while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1499  assert(Worklist.empty() && "Didn't clear worklist!");
1500  assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
1501 
1502  // Grab the next exit block, in decreasing loop depth order.
1503  BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1504  Loop &ExitL = *LI.getLoopFor(ExitBB);
1505  assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
1506 
1507  // Erase all of the unlooped blocks from the loops between the previous
1508  // exit loop and this exit loop. This works because the ExitInLoops list is
1509  // sorted in increasing order of loop depth and thus we visit loops in
1510  // decreasing order of loop depth.
1511  for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1512  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1513 
1514  // Walk the CFG back until we hit the cloned PH adding everything reachable
1515  // and in the unlooped set to this exit block's loop.
1516  Worklist.push_back(ExitBB);
1517  do {
1518  BasicBlock *BB = Worklist.pop_back_val();
1519  // We can stop recursing at the cloned preheader (if we get there).
1520  if (BB == PH)
1521  continue;
1522 
1523  for (BasicBlock *PredBB : predecessors(BB)) {
1524  // If this pred has already been moved to our set or is part of some
1525  // (inner) loop, no update needed.
1526  if (!UnloopedBlocks.erase(PredBB)) {
1527  assert((NewExitLoopBlocks.count(PredBB) ||
1528  ExitL.contains(LI.getLoopFor(PredBB))) &&
1529  "Predecessor not in a nested loop (or already visited)!");
1530  continue;
1531  }
1532 
1533  // We just insert into the loop set here. We'll add these blocks to the
1534  // exit loop after we build up the set in a deterministic order rather
1535  // than the predecessor-influenced visit order.
1536  bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
1537  (void)Inserted;
1538  assert(Inserted && "Should only visit an unlooped block once!");
1539 
1540  // And recurse through to its predecessors.
1541  Worklist.push_back(PredBB);
1542  }
1543  } while (!Worklist.empty());
1544 
1545  // If blocks in this exit loop were directly part of the original loop (as
1546  // opposed to a child loop) update the map to point to this exit loop. This
1547  // just updates a map and so the fact that the order is unstable is fine.
1548  for (auto *BB : NewExitLoopBlocks)
1549  if (Loop *BBL = LI.getLoopFor(BB))
1550  if (BBL == &L || !L.contains(BBL))
1551  LI.changeLoopFor(BB, &ExitL);
1552 
1553  // We will remove the remaining unlooped blocks from this loop in the next
1554  // iteration or below.
1555  NewExitLoopBlocks.clear();
1556  }
1557 
1558  // Any remaining unlooped blocks are no longer part of any loop unless they
1559  // are part of some child loop.
1560  for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
1561  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1562  for (auto *BB : UnloopedBlocks)
1563  if (Loop *BBL = LI.getLoopFor(BB))
1564  if (BBL == &L || !L.contains(BBL))
1565  LI.changeLoopFor(BB, nullptr);
1566 
1567  // Sink all the child loops whose headers are no longer in the loop set to
1568  // the parent (or to be top level loops). We reach into the loop and directly
1569  // update its subloop vector to make this batch update efficient.
1570  auto &SubLoops = L.getSubLoopsVector();
1571  auto SubLoopsSplitI =
1572  LoopBlockSet.empty()
1573  ? SubLoops.begin()
1574  : std::stable_partition(
1575  SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
1576  return LoopBlockSet.count(SubL->getHeader());
1577  });
1578  for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
1579  HoistedLoops.push_back(HoistedL);
1580  HoistedL->setParentLoop(nullptr);
1581 
1582  // To compute the new parent of this hoisted loop we look at where we
1583  // placed the preheader above. We can't lookup the header itself because we
1584  // retained the mapping from the header to the hoisted loop. But the
1585  // preheader and header should have the exact same new parent computed
1586  // based on the set of exit blocks from the original loop as the preheader
1587  // is a predecessor of the header and so reached in the reverse walk. And
1588  // because the loops were all in simplified form the preheader of the
1589  // hoisted loop can't be part of some *other* loop.
1590  if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
1591  NewParentL->addChildLoop(HoistedL);
1592  else
1593  LI.addTopLevelLoop(HoistedL);
1594  }
1595  SubLoops.erase(SubLoopsSplitI, SubLoops.end());
1596 
1597  // Actually delete the loop if nothing remained within it.
1598  if (Blocks.empty()) {
1599  assert(SubLoops.empty() &&
1600  "Failed to remove all subloops from the original loop!");
1601  if (Loop *ParentL = L.getParentLoop())
1602  ParentL->removeChildLoop(llvm::find(*ParentL, &L));
1603  else
1604  LI.removeLoop(llvm::find(LI, &L));
1605  LI.destroy(&L);
1606  return false;
1607  }
1608 
1609  return true;
1610 }
1611 
1612 /// Helper to visit a dominator subtree, invoking a callable on each node.
1613 ///
1614 /// Returning false at any point will stop walking past that node of the tree.
1615 template <typename CallableT>
1616 void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
1617  SmallVector<DomTreeNode *, 4> DomWorklist;
1618  DomWorklist.push_back(DT[BB]);
1619 #ifndef NDEBUG
1621  Visited.insert(DT[BB]);
1622 #endif
1623  do {
1624  DomTreeNode *N = DomWorklist.pop_back_val();
1625 
1626  // Visit this node.
1627  if (!Callable(N->getBlock()))
1628  continue;
1629 
1630  // Accumulate the child nodes.
1631  for (DomTreeNode *ChildN : *N) {
1632  assert(Visited.insert(ChildN).second &&
1633  "Cannot visit a node twice when walking a tree!");
1634  DomWorklist.push_back(ChildN);
1635  }
1636  } while (!DomWorklist.empty());
1637 }
1638 
1639 /// Take an invariant branch that has been determined to be safe and worthwhile
1640 /// to unswitch despite being non-trivial to do so and perform the unswitch.
1641 ///
1642 /// This directly updates the CFG to hoist the predicate out of the loop, and
1643 /// clone the necessary parts of the loop to maintain behavior.
1644 ///
1645 /// It also updates both dominator tree and loopinfo based on the unswitching.
1646 ///
1647 /// Once unswitching has been performed it runs the provided callback to report
1648 /// the new loops and no-longer valid loops to the caller.
1650  Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI,
1651  AssumptionCache &AC,
1652  function_ref<void(bool, ArrayRef<Loop *>)> NonTrivialUnswitchCB) {
1653  assert(BI.isConditional() && "Can only unswitch a conditional branch!");
1655  "Can only unswitch an invariant branch condition!");
1656 
1657  // Constant and BBs tracking the cloned and continuing successor.
1658  const int ClonedSucc = 0;
1659  auto *ParentBB = BI.getParent();
1660  auto *UnswitchedSuccBB = BI.getSuccessor(ClonedSucc);
1661  auto *ContinueSuccBB = BI.getSuccessor(1 - ClonedSucc);
1662 
1663  assert(UnswitchedSuccBB != ContinueSuccBB &&
1664  "Should not unswitch a branch that always goes to the same place!");
1665 
1666  // The branch should be in this exact loop. Any inner loop's invariant branch
1667  // should be handled by unswitching that inner loop. The caller of this
1668  // routine should filter out any candidates that remain (but were skipped for
1669  // whatever reason).
1670  assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
1671 
1672  SmallVector<BasicBlock *, 4> ExitBlocks;
1673  L.getUniqueExitBlocks(ExitBlocks);
1674 
1675  // We cannot unswitch if exit blocks contain a cleanuppad instruction as we
1676  // don't know how to split those exit blocks.
1677  // FIXME: We should teach SplitBlock to handle this and remove this
1678  // restriction.
1679  for (auto *ExitBB : ExitBlocks)
1680  if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI()))
1681  return false;
1682 
1683  SmallPtrSet<BasicBlock *, 4> ExitBlockSet(ExitBlocks.begin(),
1684  ExitBlocks.end());
1685 
1686  // Compute the parent loop now before we start hacking on things.
1687  Loop *ParentL = L.getParentLoop();
1688 
1689  // Compute the outer-most loop containing one of our exit blocks. This is the
1690  // furthest up our loopnest which can be mutated, which we will use below to
1691  // update things.
1692  Loop *OuterExitL = &L;
1693  for (auto *ExitBB : ExitBlocks) {
1694  Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
1695  if (!NewOuterExitL) {
1696  // We exited the entire nest with this block, so we're done.
1697  OuterExitL = nullptr;
1698  break;
1699  }
1700  if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
1701  OuterExitL = NewOuterExitL;
1702  }
1703 
1704  // If the edge we *aren't* cloning in the unswitch (the continuing edge)
1705  // dominates its target, we can skip cloning the dominated region of the loop
1706  // and its exits. We compute this as a set of nodes to be skipped.
1707  SmallPtrSet<BasicBlock *, 4> SkippedLoopAndExitBlocks;
1708  if (ContinueSuccBB->getUniquePredecessor() ||
1709  llvm::all_of(predecessors(ContinueSuccBB), [&](BasicBlock *PredBB) {
1710  return PredBB == ParentBB || DT.dominates(ContinueSuccBB, PredBB);
1711  })) {
1712  visitDomSubTree(DT, ContinueSuccBB, [&](BasicBlock *BB) {
1713  SkippedLoopAndExitBlocks.insert(BB);
1714  return true;
1715  });
1716  }
1717  // Similarly, if the edge we *are* cloning in the unswitch (the unswitched
1718  // edge) dominates its target, we will end up with dead nodes in the original
1719  // loop and its exits that will need to be deleted. Here, we just retain that
1720  // the property holds and will compute the deleted set later.
1721  bool DeleteUnswitchedSucc =
1722  UnswitchedSuccBB->getUniquePredecessor() ||
1723  llvm::all_of(predecessors(UnswitchedSuccBB), [&](BasicBlock *PredBB) {
1724  return PredBB == ParentBB || DT.dominates(UnswitchedSuccBB, PredBB);
1725  });
1726 
1727  // Split the preheader, so that we know that there is a safe place to insert
1728  // the conditional branch. We will change the preheader to have a conditional
1729  // branch on LoopCond. The original preheader will become the split point
1730  // between the unswitched versions, and we will have a new preheader for the
1731  // original loop.
1732  BasicBlock *SplitBB = L.getLoopPreheader();
1733  BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI);
1734 
1735  // Keep a mapping for the cloned values.
1736  ValueToValueMapTy VMap;
1737 
1738  // Build the cloned blocks from the loop.
1739  auto *ClonedPH = buildClonedLoopBlocks(
1740  L, LoopPH, SplitBB, ExitBlocks, ParentBB, UnswitchedSuccBB,
1741  ContinueSuccBB, SkippedLoopAndExitBlocks, VMap, AC, DT, LI);
1742 
1743  // Build the cloned loop structure itself. This may be substantially
1744  // different from the original structure due to the simplified CFG. This also
1745  // handles inserting all the cloned blocks into the correct loops.
1746  SmallVector<Loop *, 4> NonChildClonedLoops;
1747  Loop *ClonedL =
1748  buildClonedLoops(L, ExitBlocks, VMap, LI, NonChildClonedLoops);
1749 
1750  // Remove the parent as a predecessor of the unswitched successor.
1751  UnswitchedSuccBB->removePredecessor(ParentBB, /*DontDeleteUselessPHIs*/ true);
1752 
1753  // Now splice the branch from the original loop and use it to select between
1754  // the two loops.
1755  SplitBB->getTerminator()->eraseFromParent();
1756  SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), BI);
1757  BI.setSuccessor(ClonedSucc, ClonedPH);
1758  BI.setSuccessor(1 - ClonedSucc, LoopPH);
1759 
1760  // Create a new unconditional branch to the continuing block (as opposed to
1761  // the one cloned).
1762  BranchInst::Create(ContinueSuccBB, ParentBB);
1763 
1764  // Delete anything that was made dead in the original loop due to
1765  // unswitching.
1766  if (DeleteUnswitchedSucc)
1767  deleteDeadBlocksFromLoop(L, UnswitchedSuccBB, ExitBlocks, DT, LI);
1768 
1769  SmallVector<Loop *, 4> HoistedLoops;
1770  bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
1771 
1772  // This will have completely invalidated the dominator tree. We can't easily
1773  // bound how much is invalid because in some cases we will refine the
1774  // predecessor set of exit blocks of the loop which can move large unrelated
1775  // regions of code into a new subtree.
1776  //
1777  // FIXME: Eventually, we should use an incremental update utility that
1778  // leverages the existing information in the dominator tree (and potentially
1779  // the nature of the change) to more efficiently update things.
1780  DT.recalculate(*SplitBB->getParent());
1781 
1782  // We can change which blocks are exit blocks of all the cloned sibling
1783  // loops, the current loop, and any parent loops which shared exit blocks
1784  // with the current loop. As a consequence, we need to re-form LCSSA for
1785  // them. But we shouldn't need to re-form LCSSA for any child loops.
1786  // FIXME: This could be made more efficient by tracking which exit blocks are
1787  // new, and focusing on them, but that isn't likely to be necessary.
1788  //
1789  // In order to reasonably rebuild LCSSA we need to walk inside-out across the
1790  // loop nest and update every loop that could have had its exits changed. We
1791  // also need to cover any intervening loops. We add all of these loops to
1792  // a list and sort them by loop depth to achieve this without updating
1793  // unnecessary loops.
1794  auto UpdateLCSSA = [&](Loop &UpdateL) {
1795 #ifndef NDEBUG
1796  for (Loop *ChildL : UpdateL)
1797  assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
1798  "Perturbed a child loop's LCSSA form!");
1799 #endif
1800  formLCSSA(UpdateL, DT, &LI, nullptr);
1801  };
1802 
1803  // For non-child cloned loops and hoisted loops, we just need to update LCSSA
1804  // and we can do it in any order as they don't nest relative to each other.
1805  for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
1806  UpdateLCSSA(*UpdatedL);
1807 
1808  // If the original loop had exit blocks, walk up through the outer most loop
1809  // of those exit blocks to update LCSSA and form updated dedicated exits.
1810  if (OuterExitL != &L) {
1811  SmallVector<Loop *, 4> OuterLoops;
1812  // We start with the cloned loop and the current loop if they are loops and
1813  // move toward OuterExitL. Also, if either the cloned loop or the current
1814  // loop have become top level loops we need to walk all the way out.
1815  if (ClonedL) {
1816  OuterLoops.push_back(ClonedL);
1817  if (!ClonedL->getParentLoop())
1818  OuterExitL = nullptr;
1819  }
1820  if (IsStillLoop) {
1821  OuterLoops.push_back(&L);
1822  if (!L.getParentLoop())
1823  OuterExitL = nullptr;
1824  }
1825  // Grab all of the enclosing loops now.
1826  for (Loop *OuterL = ParentL; OuterL != OuterExitL;
1827  OuterL = OuterL->getParentLoop())
1828  OuterLoops.push_back(OuterL);
1829 
1830  // Finally, update our list of outer loops. This is nicely ordered to work
1831  // inside-out.
1832  for (Loop *OuterL : OuterLoops) {
1833  // First build LCSSA for this loop so that we can preserve it when
1834  // forming dedicated exits. We don't want to perturb some other loop's
1835  // LCSSA while doing that CFG edit.
1836  UpdateLCSSA(*OuterL);
1837 
1838  // For loops reached by this loop's original exit blocks we may
1839  // introduced new, non-dedicated exits. At least try to re-form dedicated
1840  // exits for these loops. This may fail if they couldn't have dedicated
1841  // exits to start with.
1842  formDedicatedExitBlocks(OuterL, &DT, &LI, /*PreserveLCSSA*/ true);
1843  }
1844  }
1845 
1846 #ifndef NDEBUG
1847  // Verify the entire loop structure to catch any incorrect updates before we
1848  // progress in the pass pipeline.
1849  LI.verify(DT);
1850 #endif
1851 
1852  // Now that we've unswitched something, make callbacks to report the changes.
1853  // For that we need to merge together the updated loops and the cloned loops
1854  // and check whether the original loop survived.
1855  SmallVector<Loop *, 4> SibLoops;
1856  for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
1857  if (UpdatedL->getParentLoop() == ParentL)
1858  SibLoops.push_back(UpdatedL);
1859  NonTrivialUnswitchCB(IsStillLoop, SibLoops);
1860 
1861  ++NumBranches;
1862  return true;
1863 }
1864 
1865 /// Recursively compute the cost of a dominator subtree based on the per-block
1866 /// cost map provided.
1867 ///
1868 /// The recursive computation is memozied into the provided DT-indexed cost map
1869 /// to allow querying it for most nodes in the domtree without it becoming
1870 /// quadratic.
1871 static int
1873  const SmallDenseMap<BasicBlock *, int, 4> &BBCostMap,
1875  // Don't accumulate cost (or recurse through) blocks not in our block cost
1876  // map and thus not part of the duplication cost being considered.
1877  auto BBCostIt = BBCostMap.find(N.getBlock());
1878  if (BBCostIt == BBCostMap.end())
1879  return 0;
1880 
1881  // Lookup this node to see if we already computed its cost.
1882  auto DTCostIt = DTCostMap.find(&N);
1883  if (DTCostIt != DTCostMap.end())
1884  return DTCostIt->second;
1885 
1886  // If not, we have to compute it. We can't use insert above and update
1887  // because computing the cost may insert more things into the map.
1888  int Cost = std::accumulate(
1889  N.begin(), N.end(), BBCostIt->second, [&](int Sum, DomTreeNode *ChildN) {
1890  return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
1891  });
1892  bool Inserted = DTCostMap.insert({&N, Cost}).second;
1893  (void)Inserted;
1894  assert(Inserted && "Should not insert a node while visiting children!");
1895  return Cost;
1896 }
1897 
1898 /// Unswitch control flow predicated on loop invariant conditions.
1899 ///
1900 /// This first hoists all branches or switches which are trivial (IE, do not
1901 /// require duplicating any part of the loop) out of the loop body. It then
1902 /// looks at other loop invariant control flows and tries to unswitch those as
1903 /// well by cloning the loop if the result is small enough.
1904 static bool
1906  TargetTransformInfo &TTI, bool NonTrivial,
1907  function_ref<void(bool, ArrayRef<Loop *>)> NonTrivialUnswitchCB) {
1908  assert(L.isRecursivelyLCSSAForm(DT, LI) &&
1909  "Loops must be in LCSSA form before unswitching.");
1910  bool Changed = false;
1911 
1912  // Must be in loop simplified form: we need a preheader and dedicated exits.
1913  if (!L.isLoopSimplifyForm())
1914  return false;
1915 
1916  // Try trivial unswitch first before loop over other basic blocks in the loop.
1917  Changed |= unswitchAllTrivialConditions(L, DT, LI);
1918 
1919  // If we're not doing non-trivial unswitching, we're done. We both accept
1920  // a parameter but also check a local flag that can be used for testing
1921  // a debugging.
1922  if (!NonTrivial && !EnableNonTrivialUnswitch)
1923  return Changed;
1924 
1925  // Collect all remaining invariant branch conditions within this loop (as
1926  // opposed to an inner loop which would be handled when visiting that inner
1927  // loop).
1928  SmallVector<TerminatorInst *, 4> UnswitchCandidates;
1929  for (auto *BB : L.blocks())
1930  if (LI.getLoopFor(BB) == &L)
1931  if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator()))
1932  if (BI->isConditional() && L.isLoopInvariant(BI->getCondition()) &&
1933  BI->getSuccessor(0) != BI->getSuccessor(1))
1934  UnswitchCandidates.push_back(BI);
1935 
1936  // If we didn't find any candidates, we're done.
1937  if (UnswitchCandidates.empty())
1938  return Changed;
1939 
1940  DEBUG(dbgs() << "Considering " << UnswitchCandidates.size()
1941  << " non-trivial loop invariant conditions for unswitching.\n");
1942 
1943  // Given that unswitching these terminators will require duplicating parts of
1944  // the loop, so we need to be able to model that cost. Compute the ephemeral
1945  // values and set up a data structure to hold per-BB costs. We cache each
1946  // block's cost so that we don't recompute this when considering different
1947  // subsets of the loop for duplication during unswitching.
1949  CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
1951 
1952  // Compute the cost of each block, as well as the total loop cost. Also, bail
1953  // out if we see instructions which are incompatible with loop unswitching
1954  // (convergent, noduplicate, or cross-basic-block tokens).
1955  // FIXME: We might be able to safely handle some of these in non-duplicated
1956  // regions.
1957  int LoopCost = 0;
1958  for (auto *BB : L.blocks()) {
1959  int Cost = 0;
1960  for (auto &I : *BB) {
1961  if (EphValues.count(&I))
1962  continue;
1963 
1964  if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
1965  return Changed;
1966  if (auto CS = CallSite(&I))
1967  if (CS.isConvergent() || CS.cannotDuplicate())
1968  return Changed;
1969 
1970  Cost += TTI.getUserCost(&I);
1971  }
1972  assert(Cost >= 0 && "Must not have negative costs!");
1973  LoopCost += Cost;
1974  assert(LoopCost >= 0 && "Must not have negative loop costs!");
1975  BBCostMap[BB] = Cost;
1976  }
1977  DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
1978 
1979  // Now we find the best candidate by searching for the one with the following
1980  // properties in order:
1981  //
1982  // 1) An unswitching cost below the threshold
1983  // 2) The smallest number of duplicated unswitch candidates (to avoid
1984  // creating redundant subsequent unswitching)
1985  // 3) The smallest cost after unswitching.
1986  //
1987  // We prioritize reducing fanout of unswitch candidates provided the cost
1988  // remains below the threshold because this has a multiplicative effect.
1989  //
1990  // This requires memoizing each dominator subtree to avoid redundant work.
1991  //
1992  // FIXME: Need to actually do the number of candidates part above.
1994  // Given a terminator which might be unswitched, computes the non-duplicated
1995  // cost for that terminator.
1996  auto ComputeUnswitchedCost = [&](TerminatorInst *TI) {
1997  BasicBlock &BB = *TI->getParent();
1999 
2000  int Cost = LoopCost;
2001  for (BasicBlock *SuccBB : successors(&BB)) {
2002  // Don't count successors more than once.
2003  if (!Visited.insert(SuccBB).second)
2004  continue;
2005 
2006  // This successor's domtree will not need to be duplicated after
2007  // unswitching if the edge to the successor dominates it (and thus the
2008  // entire tree). This essentially means there is no other path into this
2009  // subtree and so it will end up live in only one clone of the loop.
2010  if (SuccBB->getUniquePredecessor() ||
2011  llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2012  return PredBB == &BB || DT.dominates(SuccBB, PredBB);
2013  })) {
2014  Cost -= computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
2015  assert(Cost >= 0 &&
2016  "Non-duplicated cost should never exceed total loop cost!");
2017  }
2018  }
2019 
2020  // Now scale the cost by the number of unique successors minus one. We
2021  // subtract one because there is already at least one copy of the entire
2022  // loop. This is computing the new cost of unswitching a condition.
2023  assert(Visited.size() > 1 &&
2024  "Cannot unswitch a condition without multiple distinct successors!");
2025  return Cost * (Visited.size() - 1);
2026  };
2027  TerminatorInst *BestUnswitchTI = nullptr;
2028  int BestUnswitchCost;
2029  for (TerminatorInst *CandidateTI : UnswitchCandidates) {
2030  int CandidateCost = ComputeUnswitchedCost(CandidateTI);
2031  DEBUG(dbgs() << " Computed cost of " << CandidateCost
2032  << " for unswitch candidate: " << *CandidateTI << "\n");
2033  if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
2034  BestUnswitchTI = CandidateTI;
2035  BestUnswitchCost = CandidateCost;
2036  }
2037  }
2038 
2039  if (BestUnswitchCost < UnswitchThreshold) {
2040  DEBUG(dbgs() << " Trying to unswitch non-trivial (cost = "
2041  << BestUnswitchCost << ") branch: " << *BestUnswitchTI
2042  << "\n");
2043  Changed |= unswitchInvariantBranch(L, cast<BranchInst>(*BestUnswitchTI), DT,
2044  LI, AC, NonTrivialUnswitchCB);
2045  } else {
2046  DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << BestUnswitchCost
2047  << "\n");
2048  }
2049 
2050  return Changed;
2051 }
2052 
2055  LPMUpdater &U) {
2056  Function &F = *L.getHeader()->getParent();
2057  (void)F;
2058 
2059  DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n");
2060 
2061  // Save the current loop name in a variable so that we can report it even
2062  // after it has been deleted.
2063  std::string LoopName = L.getName();
2064 
2065  auto NonTrivialUnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
2066  ArrayRef<Loop *> NewLoops) {
2067  // If we did a non-trivial unswitch, we have added new (cloned) loops.
2068  U.addSiblingLoops(NewLoops);
2069 
2070  // If the current loop remains valid, we should revisit it to catch any
2071  // other unswitch opportunities. Otherwise, we need to mark it as deleted.
2072  if (CurrentLoopValid)
2073  U.revisitCurrentLoop();
2074  else
2075  U.markLoopAsDeleted(L, LoopName);
2076  };
2077 
2078  if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial,
2079  NonTrivialUnswitchCB))
2080  return PreservedAnalyses::all();
2081 
2082 #ifndef NDEBUG
2083  // Historically this pass has had issues with the dominator tree so verify it
2084  // in asserts builds.
2085  AR.DT.verifyDomTree();
2086 #endif
2088 }
2089 
2090 namespace {
2091 
2092 class SimpleLoopUnswitchLegacyPass : public LoopPass {
2093  bool NonTrivial;
2094 
2095 public:
2096  static char ID; // Pass ID, replacement for typeid
2097 
2098  explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
2099  : LoopPass(ID), NonTrivial(NonTrivial) {
2102  }
2103 
2104  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
2105 
2106  void getAnalysisUsage(AnalysisUsage &AU) const override {
2110  }
2111 };
2112 
2113 } // end anonymous namespace
2114 
2115 bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
2116  if (skipLoop(L))
2117  return false;
2118 
2119  Function &F = *L->getHeader()->getParent();
2120 
2121  DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n");
2122 
2123  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2124  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2125  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
2126  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2127 
2128  auto NonTrivialUnswitchCB = [&L, &LPM](bool CurrentLoopValid,
2129  ArrayRef<Loop *> NewLoops) {
2130  // If we did a non-trivial unswitch, we have added new (cloned) loops.
2131  for (auto *NewL : NewLoops)
2132  LPM.addLoop(*NewL);
2133 
2134  // If the current loop remains valid, re-add it to the queue. This is
2135  // a little wasteful as we'll finish processing the current loop as well,
2136  // but it is the best we can do in the old PM.
2137  if (CurrentLoopValid)
2138  LPM.addLoop(*L);
2139  else
2140  LPM.markLoopAsDeleted(*L);
2141  };
2142 
2143  bool Changed =
2144  unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, NonTrivialUnswitchCB);
2145 
2146  // If anything was unswitched, also clear any cached information about this
2147  // loop.
2148  LPM.deleteSimpleAnalysisLoop(L);
2149 
2150 #ifndef NDEBUG
2151  // Historically this pass has had issues with the dominator tree so verify it
2152  // in asserts builds.
2153  DT.verifyDomTree();
2154 #endif
2155  return Changed;
2156 }
2157 
2159 INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
2160  "Simple unswitch loops", false, false)
2166 INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
2167  "Simple unswitch loops", false, false)
2168 
2170  return new SimpleLoopUnswitchLegacyPass(NonTrivial);
2171 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:72
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Definition: LoopInfo.h:776
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
use_iterator use_end()
Definition: Value.h:352
This routine provides some synthesis utilities to produce sequences of values.
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI)
Unswitch a trivial branch if the condition is loop invariant.
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:548
CaseIt case_end()
Returns a read/write iterator that points one past the last in the SwitchInst.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
Definition: LoopInfo.h:170
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
simple loop unswitch
bool isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:182
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
Definition: LoopInfo.h:685
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:92
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
Definition: LoopInfo.h:352
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:320
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock *> ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallPtrSetImpl< BasicBlock *> &SkippedLoopAndExitBlocks, ValueToValueMapTy &VMap, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI)
Build the cloned blocks for an unswitched copy of the given loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
An immutable pass that tracks lazily created AssumptionCache objects.
Value * getCondition() const
CaseIt case_begin()
Returns a read/write iterator that points to the first case in the SwitchInst.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:89
A cache of .assume calls within a function.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:738
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
unsigned second
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:814
std::vector< LoopT * > & getSubLoopsVector()
Definition: LoopInfo.h:135
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
static int computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, int, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, int, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided...
Value * getCondition() const
This defines the Use class.
static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB, BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
void reserve(size_type N)
Definition: SmallVector.h:378
void eraseNode(NodeT *BB)
eraseNode - Removes a node from the dominator tree.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:624
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry &)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:678
SmallPtrSetImpl< const BlockT * > & getBlocksSet()
Return a direct, mutable handle to the blocks set so that we can mutate it efficiently.
Definition: LoopInfo.h:176
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
void deleteSimpleAnalysisLoop(Loop *L)
Invoke deleteAnalysisLoop hook for all passes that implement simple analysis interface.
Definition: LoopPass.cpp:118
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
BlockT * getHeader() const
Definition: LoopInfo.h:100
void getExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:63
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI)
This routine scans the loop to find a branch or switch which occurs before any side effects occur...
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:233
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:183
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:230
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
Definition: LoopInfo.h:729
This header provides classes for managing per-loop analyses.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:167
static void replaceLoopUsesWithConstant(Loop &L, Value &LIC, Constant &Replacement)
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:301
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:142
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:73
void getUniqueExitBlocks(SmallVectorImpl< BasicBlock *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfo.cpp:393
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:211
NodeT * getBlock() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
void verifyDomTree() const
Verify the correctness of the domtree by re-computing it.
Definition: Dominators.cpp:309
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:54
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
Conditional or Unconditional Branch instruction.
static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH, DominatorTree &DT)
Update the dominator tree after unswitching a particular former exit block.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
This function has undefined behavior.
DomTreeNodeBase * getIDom() const
This is an important base class in LLVM.
Definition: Constant.h:42
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
This file contains the declarations for the subclasses of Constant, which represent the different fla...
INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", "Simple unswitch loops", false, false) INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass
const Instruction & front() const
Definition: BasicBlock.h:264
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:536
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:113
BasicBlock * getDefaultDest() const
Represent the analysis usage information of a pass.
void splice(iterator where, iplist_impl &L2)
Definition: ilist.h:342
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:821
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:116
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:101
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:107
CaseIt removeCase(CaseIt I)
This method removes the specified case and its successor from the switch instruction.
static bool updateIDomWithKnownCommonDominator(BasicBlock *BB, BasicBlock *KnownDominatingBB, DominatorTree &DT)
Update the IDom for a basic block whose predecessor set has changed.
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&... args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest...
Definition: STLExtras.h:489
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:835
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:56
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
static void deleteDeadBlocksFromLoop(Loop &L, BasicBlock *DeadSubtreeRoot, SmallVectorImpl< BasicBlock *> &ExitBlocks, DominatorTree &DT, LoopInfo &LI)
static void appendDomFrontier(DomTreeNode *Node, SmallSetVector< BasicBlock *, 4 > &Worklist, SmallVectorImpl< DomTreeNode *> &DomNodes, SmallPtrSetImpl< BasicBlock *> &DomSet)
bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition: LCSSA.cpp:278
size_type size() const
Definition: SmallPtrSet.h:93
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:317
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
static Loop * buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock *> ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop *> &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:378
iterator end()
Definition: BasicBlock.h:254
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:243
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
Definition: LoopInfo.h:699
LoopT * AllocateLoop(ArgsTy &&... Args)
Definition: LoopInfo.h:642
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:383
simple loop Simple unswitch loops
Pass * createSimpleLoopUnswitchLegacyPass(bool NonTrivial=false)
Create the legacy pass object for the simple loop unswitcher.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:110
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:142
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:541
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2&#39;s erase_if which is equivalent t...
Definition: STLExtras.h:931
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
CloneBasicBlock - Return a copy of the specified basic block, but without embedding the block into a ...
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM...
Definition: ValueMapper.h:251
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:480
If this flag is set, the remapper ignores missing function-local entries (Argument, Instruction, BasicBlock) that are not in the value map.
Definition: ValueMapper.h:91
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
use_iterator use_begin()
Definition: Value.h:344
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:163
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:191
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
void registerAssumption(CallInst *CI)
Add an .assume intrinsic to this function&#39;s cache.
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:311
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
StringRef getName() const
Definition: LoopInfo.h:577
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, TargetTransformInfo &TTI, bool NonTrivial, function_ref< void(bool, ArrayRef< Loop *>)> NonTrivialUnswitchCB)
Unswitch control flow predicated on loop invariant conditions.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink &#39;this&#39; from the containing function and delete it.
Definition: BasicBlock.cpp:97
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
void addLoop(Loop &L)
Definition: LoopPass.cpp:76
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:1221
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition: LoopInfo.h:710
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:308
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI)
Unswitch a trivial switch if the condition is loop invariant.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:141
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:181
bool empty() const
Definition: LoopInfo.h:146
Multiway switch.
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: ValueMap.h:154
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
succ_range successors(BasicBlock *BB)
Definition: CFG.h:143
void setDefaultDest(BasicBlock *DefaultCase)
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:1139
#define DEBUG(X)
Definition: Debug.h:118
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr)
Split the edge connecting specified block.
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:958
This file defines a set of templates that efficiently compute a dominator tree over a generic graph...
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:260
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:199
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
static bool unswitchInvariantBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, function_ref< void(bool, ArrayRef< Loop *>)> NonTrivialUnswitchCB)
Take an invariant branch that has been determined to be safe and worthwhile to unswitch despite being...
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock *> ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop *> &HoistedLoops)
Rebuild a loop after unswitching removes some subset of blocks and edges.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:103
void dropAllReferences()
Cause all subinstructions to "let go" of all the references that said subinstructions are maintaining...
Definition: BasicBlock.cpp:210
const BasicBlock * getParent() const
Definition: Instruction.h:67
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.