LLVM  7.0.0svn
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1 //===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 #include "llvm/ADT/DenseMap.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/ADT/Sequence.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/ADT/SmallPtrSet.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/Twine.h"
20 #include "llvm/Analysis/CFG.h"
23 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/Analysis/LoopPass.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/Constant.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/InstrTypes.h"
32 #include "llvm/IR/Instruction.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/IntrinsicInst.h"
35 #include "llvm/IR/Use.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Pass.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/Debug.h"
48 #include <algorithm>
49 #include <cassert>
50 #include <iterator>
51 #include <numeric>
52 #include <utility>
53 
54 #define DEBUG_TYPE "simple-loop-unswitch"
55 
56 using namespace llvm;
57 
58 STATISTIC(NumBranches, "Number of branches unswitched");
59 STATISTIC(NumSwitches, "Number of switches unswitched");
60 STATISTIC(NumTrivial, "Number of unswitches that are trivial");
61 
63  "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
64  cl::desc("Forcibly enables non-trivial loop unswitching rather than "
65  "following the configuration passed into the pass."));
66 
67 static cl::opt<int>
68  UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
69  cl::desc("The cost threshold for unswitching a loop."));
70 
71 static void replaceLoopUsesWithConstant(Loop &L, Value &LIC,
72  Constant &Replacement) {
73  assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
74 
75  // Replace uses of LIC in the loop with the given constant.
76  for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) {
77  // Grab the use and walk past it so we can clobber it in the use list.
78  Use *U = &*UI++;
79  Instruction *UserI = dyn_cast<Instruction>(U->getUser());
80  if (!UserI || !L.contains(UserI))
81  continue;
82 
83  // Replace this use within the loop body.
84  *U = &Replacement;
85  }
86 }
87 
88 /// Check that all the LCSSA PHI nodes in the loop exit block have trivial
89 /// incoming values along this edge.
90 static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
91  BasicBlock &ExitBB) {
92  for (Instruction &I : ExitBB) {
93  auto *PN = dyn_cast<PHINode>(&I);
94  if (!PN)
95  // No more PHIs to check.
96  return true;
97 
98  // If the incoming value for this edge isn't loop invariant the unswitch
99  // won't be trivial.
100  if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
101  return false;
102  }
103  llvm_unreachable("Basic blocks should never be empty!");
104 }
105 
106 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
107 ///
108 /// Requires that the loop exit and unswitched basic block are the same, and
109 /// that the exiting block was a unique predecessor of that block. Rewrites the
110 /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
111 /// PHI nodes from the old preheader that now contains the unswitched
112 /// terminator.
114  BasicBlock &OldExitingBB,
115  BasicBlock &OldPH) {
116  for (PHINode &PN : UnswitchedBB.phis()) {
117  // When the loop exit is directly unswitched we just need to update the
118  // incoming basic block. We loop to handle weird cases with repeated
119  // incoming blocks, but expect to typically only have one operand here.
120  for (auto i : seq<int>(0, PN.getNumOperands())) {
121  assert(PN.getIncomingBlock(i) == &OldExitingBB &&
122  "Found incoming block different from unique predecessor!");
123  PN.setIncomingBlock(i, &OldPH);
124  }
125  }
126 }
127 
128 /// Rewrite the PHI nodes in the loop exit basic block and the split off
129 /// unswitched block.
130 ///
131 /// Because the exit block remains an exit from the loop, this rewrites the
132 /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
133 /// nodes into the unswitched basic block to select between the value in the
134 /// old preheader and the loop exit.
136  BasicBlock &UnswitchedBB,
137  BasicBlock &OldExitingBB,
138  BasicBlock &OldPH) {
139  assert(&ExitBB != &UnswitchedBB &&
140  "Must have different loop exit and unswitched blocks!");
141  Instruction *InsertPt = &*UnswitchedBB.begin();
142  for (PHINode &PN : ExitBB.phis()) {
143  auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
144  PN.getName() + ".split", InsertPt);
145 
146  // Walk backwards over the old PHI node's inputs to minimize the cost of
147  // removing each one. We have to do this weird loop manually so that we
148  // create the same number of new incoming edges in the new PHI as we expect
149  // each case-based edge to be included in the unswitched switch in some
150  // cases.
151  // FIXME: This is really, really gross. It would be much cleaner if LLVM
152  // allowed us to create a single entry for a predecessor block without
153  // having separate entries for each "edge" even though these edges are
154  // required to produce identical results.
155  for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
156  if (PN.getIncomingBlock(i) != &OldExitingBB)
157  continue;
158 
159  Value *Incoming = PN.removeIncomingValue(i);
160  NewPN->addIncoming(Incoming, &OldPH);
161  }
162 
163  // Now replace the old PHI with the new one and wire the old one in as an
164  // input to the new one.
165  PN.replaceAllUsesWith(NewPN);
166  NewPN->addIncoming(&PN, &ExitBB);
167  }
168 }
169 
170 /// Unswitch a trivial branch if the condition is loop invariant.
171 ///
172 /// This routine should only be called when loop code leading to the branch has
173 /// been validated as trivial (no side effects). This routine checks if the
174 /// condition is invariant and one of the successors is a loop exit. This
175 /// allows us to unswitch without duplicating the loop, making it trivial.
176 ///
177 /// If this routine fails to unswitch the branch it returns false.
178 ///
179 /// If the branch can be unswitched, this routine splits the preheader and
180 /// hoists the branch above that split. Preserves loop simplified form
181 /// (splitting the exit block as necessary). It simplifies the branch within
182 /// the loop to an unconditional branch but doesn't remove it entirely. Further
183 /// cleanup can be done with some simplify-cfg like pass.
185  LoopInfo &LI) {
186  assert(BI.isConditional() && "Can only unswitch a conditional branch!");
187  LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
188 
189  Value *LoopCond = BI.getCondition();
190 
191  // Need a trivial loop condition to unswitch.
192  if (!L.isLoopInvariant(LoopCond))
193  return false;
194 
195  // Check to see if a successor of the branch is guaranteed to
196  // exit through a unique exit block without having any
197  // side-effects. If so, determine the value of Cond that causes
198  // it to do this.
200  ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext());
201  int LoopExitSuccIdx = 0;
202  auto *LoopExitBB = BI.getSuccessor(0);
203  if (L.contains(LoopExitBB)) {
204  std::swap(CondVal, Replacement);
205  LoopExitSuccIdx = 1;
206  LoopExitBB = BI.getSuccessor(1);
207  if (L.contains(LoopExitBB))
208  return false;
209  }
210  auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
211  auto *ParentBB = BI.getParent();
212  if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB))
213  return false;
214 
215  LLVM_DEBUG(dbgs() << " unswitching trivial branch when: " << CondVal
216  << " == " << LoopCond << "\n");
217 
218  // Split the preheader, so that we know that there is a safe place to insert
219  // the conditional branch. We will change the preheader to have a conditional
220  // branch on LoopCond.
221  BasicBlock *OldPH = L.getLoopPreheader();
222  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
223 
224  // Now that we have a place to insert the conditional branch, create a place
225  // to branch to: this is the exit block out of the loop that we are
226  // unswitching. We need to split this if there are other loop predecessors.
227  // Because the loop is in simplified form, *any* other predecessor is enough.
228  BasicBlock *UnswitchedBB;
229  if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) {
230  (void)PredBB;
231  assert(PredBB == BI.getParent() &&
232  "A branch's parent isn't a predecessor!");
233  UnswitchedBB = LoopExitBB;
234  } else {
235  UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI);
236  }
237 
238  // Now splice the branch to gate reaching the new preheader and re-point its
239  // successors.
240  OldPH->getInstList().splice(std::prev(OldPH->end()),
241  BI.getParent()->getInstList(), BI);
242  OldPH->getTerminator()->eraseFromParent();
243  BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
244  BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
245 
246  // Create a new unconditional branch that will continue the loop as a new
247  // terminator.
248  BranchInst::Create(ContinueBB, ParentBB);
249 
250  // Rewrite the relevant PHI nodes.
251  if (UnswitchedBB == LoopExitBB)
252  rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
253  else
254  rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
255  *ParentBB, *OldPH);
256 
257  // Now we need to update the dominator tree.
258  DT.applyUpdates(
259  {{DT.Delete, ParentBB, UnswitchedBB}, {DT.Insert, OldPH, UnswitchedBB}});
260 
261  // Since this is an i1 condition we can also trivially replace uses of it
262  // within the loop with a constant.
263  replaceLoopUsesWithConstant(L, *LoopCond, *Replacement);
264 
265  ++NumTrivial;
266  ++NumBranches;
267  return true;
268 }
269 
270 /// Unswitch a trivial switch if the condition is loop invariant.
271 ///
272 /// This routine should only be called when loop code leading to the switch has
273 /// been validated as trivial (no side effects). This routine checks if the
274 /// condition is invariant and that at least one of the successors is a loop
275 /// exit. This allows us to unswitch without duplicating the loop, making it
276 /// trivial.
277 ///
278 /// If this routine fails to unswitch the switch it returns false.
279 ///
280 /// If the switch can be unswitched, this routine splits the preheader and
281 /// copies the switch above that split. If the default case is one of the
282 /// exiting cases, it copies the non-exiting cases and points them at the new
283 /// preheader. If the default case is not exiting, it copies the exiting cases
284 /// and points the default at the preheader. It preserves loop simplified form
285 /// (splitting the exit blocks as necessary). It simplifies the switch within
286 /// the loop by removing now-dead cases. If the default case is one of those
287 /// unswitched, it replaces its destination with a new basic block containing
288 /// only unreachable. Such basic blocks, while technically loop exits, are not
289 /// considered for unswitching so this is a stable transform and the same
290 /// switch will not be revisited. If after unswitching there is only a single
291 /// in-loop successor, the switch is further simplified to an unconditional
292 /// branch. Still more cleanup can be done with some simplify-cfg like pass.
294  LoopInfo &LI) {
295  LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
296  Value *LoopCond = SI.getCondition();
297 
298  // If this isn't switching on an invariant condition, we can't unswitch it.
299  if (!L.isLoopInvariant(LoopCond))
300  return false;
301 
302  auto *ParentBB = SI.getParent();
303 
304  SmallVector<int, 4> ExitCaseIndices;
305  for (auto Case : SI.cases()) {
306  auto *SuccBB = Case.getCaseSuccessor();
307  if (!L.contains(SuccBB) &&
308  areLoopExitPHIsLoopInvariant(L, *ParentBB, *SuccBB))
309  ExitCaseIndices.push_back(Case.getCaseIndex());
310  }
311  BasicBlock *DefaultExitBB = nullptr;
312  if (!L.contains(SI.getDefaultDest()) &&
313  areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) &&
314  !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))
315  DefaultExitBB = SI.getDefaultDest();
316  else if (ExitCaseIndices.empty())
317  return false;
318 
319  LLVM_DEBUG(dbgs() << " unswitching trivial cases...\n");
320 
322  ExitCases.reserve(ExitCaseIndices.size());
323  // We walk the case indices backwards so that we remove the last case first
324  // and don't disrupt the earlier indices.
325  for (unsigned Index : reverse(ExitCaseIndices)) {
326  auto CaseI = SI.case_begin() + Index;
327  // Save the value of this case.
328  ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()});
329  // Delete the unswitched cases.
330  SI.removeCase(CaseI);
331  }
332 
333  // Check if after this all of the remaining cases point at the same
334  // successor.
335  BasicBlock *CommonSuccBB = nullptr;
336  if (SI.getNumCases() > 0 &&
337  std::all_of(std::next(SI.case_begin()), SI.case_end(),
338  [&SI](const SwitchInst::CaseHandle &Case) {
339  return Case.getCaseSuccessor() ==
340  SI.case_begin()->getCaseSuccessor();
341  }))
342  CommonSuccBB = SI.case_begin()->getCaseSuccessor();
343 
344  if (DefaultExitBB) {
345  // We can't remove the default edge so replace it with an edge to either
346  // the single common remaining successor (if we have one) or an unreachable
347  // block.
348  if (CommonSuccBB) {
349  SI.setDefaultDest(CommonSuccBB);
350  } else {
351  BasicBlock *UnreachableBB = BasicBlock::Create(
352  ParentBB->getContext(),
353  Twine(ParentBB->getName()) + ".unreachable_default",
354  ParentBB->getParent());
355  new UnreachableInst(ParentBB->getContext(), UnreachableBB);
356  SI.setDefaultDest(UnreachableBB);
357  DT.addNewBlock(UnreachableBB, ParentBB);
358  }
359  } else {
360  // If we're not unswitching the default, we need it to match any cases to
361  // have a common successor or if we have no cases it is the common
362  // successor.
363  if (SI.getNumCases() == 0)
364  CommonSuccBB = SI.getDefaultDest();
365  else if (SI.getDefaultDest() != CommonSuccBB)
366  CommonSuccBB = nullptr;
367  }
368 
369  // Split the preheader, so that we know that there is a safe place to insert
370  // the switch.
371  BasicBlock *OldPH = L.getLoopPreheader();
372  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
373  OldPH->getTerminator()->eraseFromParent();
374 
375  // Now add the unswitched switch.
376  auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
377 
378  // Rewrite the IR for the unswitched basic blocks. This requires two steps.
379  // First, we split any exit blocks with remaining in-loop predecessors. Then
380  // we update the PHIs in one of two ways depending on if there was a split.
381  // We walk in reverse so that we split in the same order as the cases
382  // appeared. This is purely for convenience of reading the resulting IR, but
383  // it doesn't cost anything really.
384  SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
386  // Handle the default exit if necessary.
387  // FIXME: It'd be great if we could merge this with the loop below but LLVM's
388  // ranges aren't quite powerful enough yet.
389  if (DefaultExitBB) {
390  if (pred_empty(DefaultExitBB)) {
391  UnswitchedExitBBs.insert(DefaultExitBB);
392  rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
393  } else {
394  auto *SplitBB =
395  SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);
396  rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
397  *ParentBB, *OldPH);
398  DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
399  }
400  }
401  // Note that we must use a reference in the for loop so that we update the
402  // container.
403  for (auto &CasePair : reverse(ExitCases)) {
404  // Grab a reference to the exit block in the pair so that we can update it.
405  BasicBlock *ExitBB = CasePair.second;
406 
407  // If this case is the last edge into the exit block, we can simply reuse it
408  // as it will no longer be a loop exit. No mapping necessary.
409  if (pred_empty(ExitBB)) {
410  // Only rewrite once.
411  if (UnswitchedExitBBs.insert(ExitBB).second)
412  rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
413  continue;
414  }
415 
416  // Otherwise we need to split the exit block so that we retain an exit
417  // block from the loop and a target for the unswitched condition.
418  BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
419  if (!SplitExitBB) {
420  // If this is the first time we see this, do the split and remember it.
421  SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
422  rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
423  *ParentBB, *OldPH);
424  }
425  // Update the case pair to point to the split block.
426  CasePair.second = SplitExitBB;
427  }
428 
429  // Now add the unswitched cases. We do this in reverse order as we built them
430  // in reverse order.
431  for (auto CasePair : reverse(ExitCases)) {
432  ConstantInt *CaseVal = CasePair.first;
433  BasicBlock *UnswitchedBB = CasePair.second;
434 
435  NewSI->addCase(CaseVal, UnswitchedBB);
436  }
437 
438  // If the default was unswitched, re-point it and add explicit cases for
439  // entering the loop.
440  if (DefaultExitBB) {
441  NewSI->setDefaultDest(DefaultExitBB);
442 
443  // We removed all the exit cases, so we just copy the cases to the
444  // unswitched switch.
445  for (auto Case : SI.cases())
446  NewSI->addCase(Case.getCaseValue(), NewPH);
447  }
448 
449  // If we ended up with a common successor for every path through the switch
450  // after unswitching, rewrite it to an unconditional branch to make it easy
451  // to recognize. Otherwise we potentially have to recognize the default case
452  // pointing at unreachable and other complexity.
453  if (CommonSuccBB) {
454  BasicBlock *BB = SI.getParent();
455  SI.eraseFromParent();
456  BranchInst::Create(CommonSuccBB, BB);
457  }
458 
459  // Walk the unswitched exit blocks and the unswitched split blocks and update
460  // the dominator tree based on the CFG edits. While we are walking unordered
461  // containers here, the API for applyUpdates takes an unordered list of
462  // updates and requires them to not contain duplicates.
464  for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
465  DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
466  DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
467  }
468  for (auto SplitUnswitchedPair : SplitExitBBMap) {
469  auto *UnswitchedBB = SplitUnswitchedPair.second;
470  DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedBB});
471  DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB});
472  }
473  DT.applyUpdates(DTUpdates);
474 
476  ++NumTrivial;
477  ++NumSwitches;
478  return true;
479 }
480 
481 /// This routine scans the loop to find a branch or switch which occurs before
482 /// any side effects occur. These can potentially be unswitched without
483 /// duplicating the loop. If a branch or switch is successfully unswitched the
484 /// scanning continues to see if subsequent branches or switches have become
485 /// trivial. Once all trivial candidates have been unswitched, this routine
486 /// returns.
487 ///
488 /// The return value indicates whether anything was unswitched (and therefore
489 /// changed).
491  LoopInfo &LI) {
492  bool Changed = false;
493 
494  // If loop header has only one reachable successor we should keep looking for
495  // trivial condition candidates in the successor as well. An alternative is
496  // to constant fold conditions and merge successors into loop header (then we
497  // only need to check header's terminator). The reason for not doing this in
498  // LoopUnswitch pass is that it could potentially break LoopPassManager's
499  // invariants. Folding dead branches could either eliminate the current loop
500  // or make other loops unreachable. LCSSA form might also not be preserved
501  // after deleting branches. The following code keeps traversing loop header's
502  // successors until it finds the trivial condition candidate (condition that
503  // is not a constant). Since unswitching generates branches with constant
504  // conditions, this scenario could be very common in practice.
505  BasicBlock *CurrentBB = L.getHeader();
507  Visited.insert(CurrentBB);
508  do {
509  // Check if there are any side-effecting instructions (e.g. stores, calls,
510  // volatile loads) in the part of the loop that the code *would* execute
511  // without unswitching.
512  if (llvm::any_of(*CurrentBB,
513  [](Instruction &I) { return I.mayHaveSideEffects(); }))
514  return Changed;
515 
516  TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
517 
518  if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
519  // Don't bother trying to unswitch past a switch with a constant
520  // condition. This should be removed prior to running this pass by
521  // simplify-cfg.
522  if (isa<Constant>(SI->getCondition()))
523  return Changed;
524 
525  if (!unswitchTrivialSwitch(L, *SI, DT, LI))
526  // Couldn't unswitch this one so we're done.
527  return Changed;
528 
529  // Mark that we managed to unswitch something.
530  Changed = true;
531 
532  // If unswitching turned the terminator into an unconditional branch then
533  // we can continue. The unswitching logic specifically works to fold any
534  // cases it can into an unconditional branch to make it easier to
535  // recognize here.
536  auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
537  if (!BI || BI->isConditional())
538  return Changed;
539 
540  CurrentBB = BI->getSuccessor(0);
541  continue;
542  }
543 
544  auto *BI = dyn_cast<BranchInst>(CurrentTerm);
545  if (!BI)
546  // We do not understand other terminator instructions.
547  return Changed;
548 
549  // Don't bother trying to unswitch past an unconditional branch or a branch
550  // with a constant value. These should be removed by simplify-cfg prior to
551  // running this pass.
552  if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
553  return Changed;
554 
555  // Found a trivial condition candidate: non-foldable conditional branch. If
556  // we fail to unswitch this, we can't do anything else that is trivial.
557  if (!unswitchTrivialBranch(L, *BI, DT, LI))
558  return Changed;
559 
560  // Mark that we managed to unswitch something.
561  Changed = true;
562 
563  // We unswitched the branch. This should always leave us with an
564  // unconditional branch that we can follow now.
565  BI = cast<BranchInst>(CurrentBB->getTerminator());
566  assert(!BI->isConditional() &&
567  "Cannot form a conditional branch by unswitching1");
568  CurrentBB = BI->getSuccessor(0);
569 
570  // When continuing, if we exit the loop or reach a previous visited block,
571  // then we can not reach any trivial condition candidates (unfoldable
572  // branch instructions or switch instructions) and no unswitch can happen.
573  } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
574 
575  return Changed;
576 }
577 
578 /// Build the cloned blocks for an unswitched copy of the given loop.
579 ///
580 /// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
581 /// after the split block (`SplitBB`) that will be used to select between the
582 /// cloned and original loop.
583 ///
584 /// This routine handles cloning all of the necessary loop blocks and exit
585 /// blocks including rewriting their instructions and the relevant PHI nodes.
586 /// It skips loop and exit blocks that are not necessary based on the provided
587 /// set. It also correctly creates the unconditional branch in the cloned
588 /// unswitched parent block to only point at the unswitched successor.
589 ///
590 /// This does not handle most of the necessary updates to `LoopInfo`. Only exit
591 /// block splitting is correctly reflected in `LoopInfo`, essentially all of
592 /// the cloned blocks (and their loops) are left without full `LoopInfo`
593 /// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
594 /// blocks to them but doesn't create the cloned `DominatorTree` structure and
595 /// instead the caller must recompute an accurate DT. It *does* correctly
596 /// update the `AssumptionCache` provided in `AC`.
598  Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
599  ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
600  BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
601  const SmallPtrSetImpl<BasicBlock *> &SkippedLoopAndExitBlocks,
602  ValueToValueMapTy &VMap,
604  DominatorTree &DT, LoopInfo &LI) {
606  NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
607 
608  // We will need to clone a bunch of blocks, wrap up the clone operation in
609  // a helper.
610  auto CloneBlock = [&](BasicBlock *OldBB) {
611  // Clone the basic block and insert it before the new preheader.
612  BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
613  NewBB->moveBefore(LoopPH);
614 
615  // Record this block and the mapping.
616  NewBlocks.push_back(NewBB);
617  VMap[OldBB] = NewBB;
618 
619  return NewBB;
620  };
621 
622  // First, clone the preheader.
623  auto *ClonedPH = CloneBlock(LoopPH);
624 
625  // Then clone all the loop blocks, skipping the ones that aren't necessary.
626  for (auto *LoopBB : L.blocks())
627  if (!SkippedLoopAndExitBlocks.count(LoopBB))
628  CloneBlock(LoopBB);
629 
630  // Split all the loop exit edges so that when we clone the exit blocks, if
631  // any of the exit blocks are *also* a preheader for some other loop, we
632  // don't create multiple predecessors entering the loop header.
633  for (auto *ExitBB : ExitBlocks) {
634  if (SkippedLoopAndExitBlocks.count(ExitBB))
635  continue;
636 
637  // When we are going to clone an exit, we don't need to clone all the
638  // instructions in the exit block and we want to ensure we have an easy
639  // place to merge the CFG, so split the exit first. This is always safe to
640  // do because there cannot be any non-loop predecessors of a loop exit in
641  // loop simplified form.
642  auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
643 
644  // Rearrange the names to make it easier to write test cases by having the
645  // exit block carry the suffix rather than the merge block carrying the
646  // suffix.
647  MergeBB->takeName(ExitBB);
648  ExitBB->setName(Twine(MergeBB->getName()) + ".split");
649 
650  // Now clone the original exit block.
651  auto *ClonedExitBB = CloneBlock(ExitBB);
652  assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
653  "Exit block should have been split to have one successor!");
654  assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
655  "Cloned exit block has the wrong successor!");
656 
657  // Remap any cloned instructions and create a merge phi node for them.
658  for (auto ZippedInsts : llvm::zip_first(
659  llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
660  llvm::make_range(ClonedExitBB->begin(),
661  std::prev(ClonedExitBB->end())))) {
662  Instruction &I = std::get<0>(ZippedInsts);
663  Instruction &ClonedI = std::get<1>(ZippedInsts);
664 
665  // The only instructions in the exit block should be PHI nodes and
666  // potentially a landing pad.
667  assert(
668  (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
669  "Bad instruction in exit block!");
670  // We should have a value map between the instruction and its clone.
671  assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
672 
673  auto *MergePN =
674  PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
675  &*MergeBB->getFirstInsertionPt());
676  I.replaceAllUsesWith(MergePN);
677  MergePN->addIncoming(&I, ExitBB);
678  MergePN->addIncoming(&ClonedI, ClonedExitBB);
679  }
680  }
681 
682  // Rewrite the instructions in the cloned blocks to refer to the instructions
683  // in the cloned blocks. We have to do this as a second pass so that we have
684  // everything available. Also, we have inserted new instructions which may
685  // include assume intrinsics, so we update the assumption cache while
686  // processing this.
687  for (auto *ClonedBB : NewBlocks)
688  for (Instruction &I : *ClonedBB) {
689  RemapInstruction(&I, VMap,
691  if (auto *II = dyn_cast<IntrinsicInst>(&I))
692  if (II->getIntrinsicID() == Intrinsic::assume)
693  AC.registerAssumption(II);
694  }
695 
696  // Remove the cloned parent as a predecessor of the cloned continue successor
697  // if we did in fact clone it.
698  auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
699  if (auto *ClonedContinueSuccBB =
700  cast_or_null<BasicBlock>(VMap.lookup(ContinueSuccBB)))
701  ClonedContinueSuccBB->removePredecessor(ClonedParentBB,
702  /*DontDeleteUselessPHIs*/ true);
703  // Replace the cloned branch with an unconditional branch to the cloned
704  // unswitched successor.
705  auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
706  ClonedParentBB->getTerminator()->eraseFromParent();
707  BranchInst::Create(ClonedSuccBB, ClonedParentBB);
708 
709  // Update any PHI nodes in the cloned successors of the skipped blocks to not
710  // have spurious incoming values.
711  for (auto *LoopBB : L.blocks())
712  if (SkippedLoopAndExitBlocks.count(LoopBB))
713  for (auto *SuccBB : successors(LoopBB))
714  if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
715  for (PHINode &PN : ClonedSuccBB->phis())
716  PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
717 
718  // Record the domtree updates for the new blocks.
720  for (auto *ClonedBB : NewBlocks) {
721  for (auto *SuccBB : successors(ClonedBB))
722  if (SuccSet.insert(SuccBB).second)
723  DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
724  SuccSet.clear();
725  }
726 
727  return ClonedPH;
728 }
729 
730 /// Recursively clone the specified loop and all of its children.
731 ///
732 /// The target parent loop for the clone should be provided, or can be null if
733 /// the clone is a top-level loop. While cloning, all the blocks are mapped
734 /// with the provided value map. The entire original loop must be present in
735 /// the value map. The cloned loop is returned.
736 static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
737  const ValueToValueMapTy &VMap, LoopInfo &LI) {
738  auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
739  assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
740  ClonedL.reserveBlocks(OrigL.getNumBlocks());
741  for (auto *BB : OrigL.blocks()) {
742  auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
743  ClonedL.addBlockEntry(ClonedBB);
744  if (LI.getLoopFor(BB) == &OrigL)
745  LI.changeLoopFor(ClonedBB, &ClonedL);
746  }
747  };
748 
749  // We specially handle the first loop because it may get cloned into
750  // a different parent and because we most commonly are cloning leaf loops.
751  Loop *ClonedRootL = LI.AllocateLoop();
752  if (RootParentL)
753  RootParentL->addChildLoop(ClonedRootL);
754  else
755  LI.addTopLevelLoop(ClonedRootL);
756  AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
757 
758  if (OrigRootL.empty())
759  return ClonedRootL;
760 
761  // If we have a nest, we can quickly clone the entire loop nest using an
762  // iterative approach because it is a tree. We keep the cloned parent in the
763  // data structure to avoid repeatedly querying through a map to find it.
764  SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
765  // Build up the loops to clone in reverse order as we'll clone them from the
766  // back.
767  for (Loop *ChildL : llvm::reverse(OrigRootL))
768  LoopsToClone.push_back({ClonedRootL, ChildL});
769  do {
770  Loop *ClonedParentL, *L;
771  std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
772  Loop *ClonedL = LI.AllocateLoop();
773  ClonedParentL->addChildLoop(ClonedL);
774  AddClonedBlocksToLoop(*L, *ClonedL);
775  for (Loop *ChildL : llvm::reverse(*L))
776  LoopsToClone.push_back({ClonedL, ChildL});
777  } while (!LoopsToClone.empty());
778 
779  return ClonedRootL;
780 }
781 
782 /// Build the cloned loops of an original loop from unswitching.
783 ///
784 /// Because unswitching simplifies the CFG of the loop, this isn't a trivial
785 /// operation. We need to re-verify that there even is a loop (as the backedge
786 /// may not have been cloned), and even if there are remaining backedges the
787 /// backedge set may be different. However, we know that each child loop is
788 /// undisturbed, we only need to find where to place each child loop within
789 /// either any parent loop or within a cloned version of the original loop.
790 ///
791 /// Because child loops may end up cloned outside of any cloned version of the
792 /// original loop, multiple cloned sibling loops may be created. All of them
793 /// are returned so that the newly introduced loop nest roots can be
794 /// identified.
795 static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
796  const ValueToValueMapTy &VMap, LoopInfo &LI,
797  SmallVectorImpl<Loop *> &NonChildClonedLoops) {
798  Loop *ClonedL = nullptr;
799 
800  auto *OrigPH = OrigL.getLoopPreheader();
801  auto *OrigHeader = OrigL.getHeader();
802 
803  auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
804  auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
805 
806  // We need to know the loops of the cloned exit blocks to even compute the
807  // accurate parent loop. If we only clone exits to some parent of the
808  // original parent, we want to clone into that outer loop. We also keep track
809  // of the loops that our cloned exit blocks participate in.
810  Loop *ParentL = nullptr;
811  SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
813  ClonedExitsInLoops.reserve(ExitBlocks.size());
814  for (auto *ExitBB : ExitBlocks)
815  if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
816  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
817  ExitLoopMap[ClonedExitBB] = ExitL;
818  ClonedExitsInLoops.push_back(ClonedExitBB);
819  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
820  ParentL = ExitL;
821  }
822  assert((!ParentL || ParentL == OrigL.getParentLoop() ||
823  ParentL->contains(OrigL.getParentLoop())) &&
824  "The computed parent loop should always contain (or be) the parent of "
825  "the original loop.");
826 
827  // We build the set of blocks dominated by the cloned header from the set of
828  // cloned blocks out of the original loop. While not all of these will
829  // necessarily be in the cloned loop, it is enough to establish that they
830  // aren't in unreachable cycles, etc.
831  SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
832  for (auto *BB : OrigL.blocks())
833  if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
834  ClonedLoopBlocks.insert(ClonedBB);
835 
836  // Rebuild the set of blocks that will end up in the cloned loop. We may have
837  // skipped cloning some region of this loop which can in turn skip some of
838  // the backedges so we have to rebuild the blocks in the loop based on the
839  // backedges that remain after cloning.
841  SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
842  for (auto *Pred : predecessors(ClonedHeader)) {
843  // The only possible non-loop header predecessor is the preheader because
844  // we know we cloned the loop in simplified form.
845  if (Pred == ClonedPH)
846  continue;
847 
848  // Because the loop was in simplified form, the only non-loop predecessor
849  // should be the preheader.
850  assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
851  "header other than the preheader "
852  "that is not part of the loop!");
853 
854  // Insert this block into the loop set and on the first visit (and if it
855  // isn't the header we're currently walking) put it into the worklist to
856  // recurse through.
857  if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
858  Worklist.push_back(Pred);
859  }
860 
861  // If we had any backedges then there *is* a cloned loop. Put the header into
862  // the loop set and then walk the worklist backwards to find all the blocks
863  // that remain within the loop after cloning.
864  if (!BlocksInClonedLoop.empty()) {
865  BlocksInClonedLoop.insert(ClonedHeader);
866 
867  while (!Worklist.empty()) {
868  BasicBlock *BB = Worklist.pop_back_val();
869  assert(BlocksInClonedLoop.count(BB) &&
870  "Didn't put block into the loop set!");
871 
872  // Insert any predecessors that are in the possible set into the cloned
873  // set, and if the insert is successful, add them to the worklist. Note
874  // that we filter on the blocks that are definitely reachable via the
875  // backedge to the loop header so we may prune out dead code within the
876  // cloned loop.
877  for (auto *Pred : predecessors(BB))
878  if (ClonedLoopBlocks.count(Pred) &&
879  BlocksInClonedLoop.insert(Pred).second)
880  Worklist.push_back(Pred);
881  }
882 
883  ClonedL = LI.AllocateLoop();
884  if (ParentL) {
885  ParentL->addBasicBlockToLoop(ClonedPH, LI);
886  ParentL->addChildLoop(ClonedL);
887  } else {
888  LI.addTopLevelLoop(ClonedL);
889  }
890  NonChildClonedLoops.push_back(ClonedL);
891 
892  ClonedL->reserveBlocks(BlocksInClonedLoop.size());
893  // We don't want to just add the cloned loop blocks based on how we
894  // discovered them. The original order of blocks was carefully built in
895  // a way that doesn't rely on predecessor ordering. Rather than re-invent
896  // that logic, we just re-walk the original blocks (and those of the child
897  // loops) and filter them as we add them into the cloned loop.
898  for (auto *BB : OrigL.blocks()) {
899  auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
900  if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
901  continue;
902 
903  // Directly add the blocks that are only in this loop.
904  if (LI.getLoopFor(BB) == &OrigL) {
905  ClonedL->addBasicBlockToLoop(ClonedBB, LI);
906  continue;
907  }
908 
909  // We want to manually add it to this loop and parents.
910  // Registering it with LoopInfo will happen when we clone the top
911  // loop for this block.
912  for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
913  PL->addBlockEntry(ClonedBB);
914  }
915 
916  // Now add each child loop whose header remains within the cloned loop. All
917  // of the blocks within the loop must satisfy the same constraints as the
918  // header so once we pass the header checks we can just clone the entire
919  // child loop nest.
920  for (Loop *ChildL : OrigL) {
921  auto *ClonedChildHeader =
922  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
923  if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
924  continue;
925 
926 #ifndef NDEBUG
927  // We should never have a cloned child loop header but fail to have
928  // all of the blocks for that child loop.
929  for (auto *ChildLoopBB : ChildL->blocks())
930  assert(BlocksInClonedLoop.count(
931  cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
932  "Child cloned loop has a header within the cloned outer "
933  "loop but not all of its blocks!");
934 #endif
935 
936  cloneLoopNest(*ChildL, ClonedL, VMap, LI);
937  }
938  }
939 
940  // Now that we've handled all the components of the original loop that were
941  // cloned into a new loop, we still need to handle anything from the original
942  // loop that wasn't in a cloned loop.
943 
944  // Figure out what blocks are left to place within any loop nest containing
945  // the unswitched loop. If we never formed a loop, the cloned PH is one of
946  // them.
947  SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
948  if (BlocksInClonedLoop.empty())
949  UnloopedBlockSet.insert(ClonedPH);
950  for (auto *ClonedBB : ClonedLoopBlocks)
951  if (!BlocksInClonedLoop.count(ClonedBB))
952  UnloopedBlockSet.insert(ClonedBB);
953 
954  // Copy the cloned exits and sort them in ascending loop depth, we'll work
955  // backwards across these to process them inside out. The order shouldn't
956  // matter as we're just trying to build up the map from inside-out; we use
957  // the map in a more stably ordered way below.
958  auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
959  llvm::sort(OrderedClonedExitsInLoops.begin(),
960  OrderedClonedExitsInLoops.end(),
961  [&](BasicBlock *LHS, BasicBlock *RHS) {
962  return ExitLoopMap.lookup(LHS)->getLoopDepth() <
963  ExitLoopMap.lookup(RHS)->getLoopDepth();
964  });
965 
966  // Populate the existing ExitLoopMap with everything reachable from each
967  // exit, starting from the inner most exit.
968  while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
969  assert(Worklist.empty() && "Didn't clear worklist!");
970 
971  BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
972  Loop *ExitL = ExitLoopMap.lookup(ExitBB);
973 
974  // Walk the CFG back until we hit the cloned PH adding everything reachable
975  // and in the unlooped set to this exit block's loop.
976  Worklist.push_back(ExitBB);
977  do {
978  BasicBlock *BB = Worklist.pop_back_val();
979  // We can stop recursing at the cloned preheader (if we get there).
980  if (BB == ClonedPH)
981  continue;
982 
983  for (BasicBlock *PredBB : predecessors(BB)) {
984  // If this pred has already been moved to our set or is part of some
985  // (inner) loop, no update needed.
986  if (!UnloopedBlockSet.erase(PredBB)) {
987  assert(
988  (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
989  "Predecessor not mapped to a loop!");
990  continue;
991  }
992 
993  // We just insert into the loop set here. We'll add these blocks to the
994  // exit loop after we build up the set in an order that doesn't rely on
995  // predecessor order (which in turn relies on use list order).
996  bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
997  (void)Inserted;
998  assert(Inserted && "Should only visit an unlooped block once!");
999 
1000  // And recurse through to its predecessors.
1001  Worklist.push_back(PredBB);
1002  }
1003  } while (!Worklist.empty());
1004  }
1005 
1006  // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1007  // blocks to their outer loops, walk the cloned blocks and the cloned exits
1008  // in their original order adding them to the correct loop.
1009 
1010  // We need a stable insertion order. We use the order of the original loop
1011  // order and map into the correct parent loop.
1012  for (auto *BB : llvm::concat<BasicBlock *const>(
1013  makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1014  if (Loop *OuterL = ExitLoopMap.lookup(BB))
1015  OuterL->addBasicBlockToLoop(BB, LI);
1016 
1017 #ifndef NDEBUG
1018  for (auto &BBAndL : ExitLoopMap) {
1019  auto *BB = BBAndL.first;
1020  auto *OuterL = BBAndL.second;
1021  assert(LI.getLoopFor(BB) == OuterL &&
1022  "Failed to put all blocks into outer loops!");
1023  }
1024 #endif
1025 
1026  // Now that all the blocks are placed into the correct containing loop in the
1027  // absence of child loops, find all the potentially cloned child loops and
1028  // clone them into whatever outer loop we placed their header into.
1029  for (Loop *ChildL : OrigL) {
1030  auto *ClonedChildHeader =
1031  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1032  if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1033  continue;
1034 
1035 #ifndef NDEBUG
1036  for (auto *ChildLoopBB : ChildL->blocks())
1037  assert(VMap.count(ChildLoopBB) &&
1038  "Cloned a child loop header but not all of that loops blocks!");
1039 #endif
1040 
1041  NonChildClonedLoops.push_back(cloneLoopNest(
1042  *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1043  }
1044 }
1045 
1046 static void
1048  const SmallVectorImpl<BasicBlock *> &DeadBlocks,
1049  SmallVectorImpl<BasicBlock *> &ExitBlocks,
1050  DominatorTree &DT, LoopInfo &LI) {
1051  SmallPtrSet<BasicBlock *, 16> DeadBlockSet(DeadBlocks.begin(),
1052  DeadBlocks.end());
1053 
1054  // Filter out the dead blocks from the exit blocks list so that it can be
1055  // used in the caller.
1056  llvm::erase_if(ExitBlocks,
1057  [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1058 
1059  // Remove these blocks from their successors.
1060  for (auto *BB : DeadBlocks)
1061  for (BasicBlock *SuccBB : successors(BB))
1062  SuccBB->removePredecessor(BB, /*DontDeleteUselessPHIs*/ true);
1063 
1064  // Walk from this loop up through its parents removing all of the dead blocks.
1065  for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1066  for (auto *BB : DeadBlocks)
1067  ParentL->getBlocksSet().erase(BB);
1068  llvm::erase_if(ParentL->getBlocksVector(),
1069  [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1070  }
1071 
1072  // Now delete the dead child loops. This raw delete will clear them
1073  // recursively.
1074  llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1075  if (!DeadBlockSet.count(ChildL->getHeader()))
1076  return false;
1077 
1078  assert(llvm::all_of(ChildL->blocks(),
1079  [&](BasicBlock *ChildBB) {
1080  return DeadBlockSet.count(ChildBB);
1081  }) &&
1082  "If the child loop header is dead all blocks in the child loop must "
1083  "be dead as well!");
1084  LI.destroy(ChildL);
1085  return true;
1086  });
1087 
1088  // Remove the loop mappings for the dead blocks and drop all the references
1089  // from these blocks to others to handle cyclic references as we start
1090  // deleting the blocks themselves.
1091  for (auto *BB : DeadBlocks) {
1092  // Check that the dominator tree has already been updated.
1093  assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1094  LI.changeLoopFor(BB, nullptr);
1095  BB->dropAllReferences();
1096  }
1097 
1098  // Actually delete the blocks now that they've been fully unhooked from the
1099  // IR.
1100  for (auto *BB : DeadBlocks)
1101  BB->eraseFromParent();
1102 }
1103 
1104 /// Recompute the set of blocks in a loop after unswitching.
1105 ///
1106 /// This walks from the original headers predecessors to rebuild the loop. We
1107 /// take advantage of the fact that new blocks can't have been added, and so we
1108 /// filter by the original loop's blocks. This also handles potentially
1109 /// unreachable code that we don't want to explore but might be found examining
1110 /// the predecessors of the header.
1111 ///
1112 /// If the original loop is no longer a loop, this will return an empty set. If
1113 /// it remains a loop, all the blocks within it will be added to the set
1114 /// (including those blocks in inner loops).
1116  LoopInfo &LI) {
1118 
1119  auto *PH = L.getLoopPreheader();
1120  auto *Header = L.getHeader();
1121 
1122  // A worklist to use while walking backwards from the header.
1124 
1125  // First walk the predecessors of the header to find the backedges. This will
1126  // form the basis of our walk.
1127  for (auto *Pred : predecessors(Header)) {
1128  // Skip the preheader.
1129  if (Pred == PH)
1130  continue;
1131 
1132  // Because the loop was in simplified form, the only non-loop predecessor
1133  // is the preheader.
1134  assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1135  "than the preheader that is not part of the "
1136  "loop!");
1137 
1138  // Insert this block into the loop set and on the first visit and, if it
1139  // isn't the header we're currently walking, put it into the worklist to
1140  // recurse through.
1141  if (LoopBlockSet.insert(Pred).second && Pred != Header)
1142  Worklist.push_back(Pred);
1143  }
1144 
1145  // If no backedges were found, we're done.
1146  if (LoopBlockSet.empty())
1147  return LoopBlockSet;
1148 
1149  // We found backedges, recurse through them to identify the loop blocks.
1150  while (!Worklist.empty()) {
1151  BasicBlock *BB = Worklist.pop_back_val();
1152  assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1153 
1154  // No need to walk past the header.
1155  if (BB == Header)
1156  continue;
1157 
1158  // Because we know the inner loop structure remains valid we can use the
1159  // loop structure to jump immediately across the entire nested loop.
1160  // Further, because it is in loop simplified form, we can directly jump
1161  // to its preheader afterward.
1162  if (Loop *InnerL = LI.getLoopFor(BB))
1163  if (InnerL != &L) {
1164  assert(L.contains(InnerL) &&
1165  "Should not reach a loop *outside* this loop!");
1166  // The preheader is the only possible predecessor of the loop so
1167  // insert it into the set and check whether it was already handled.
1168  auto *InnerPH = InnerL->getLoopPreheader();
1169  assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1170  "but not contain the inner loop "
1171  "preheader!");
1172  if (!LoopBlockSet.insert(InnerPH).second)
1173  // The only way to reach the preheader is through the loop body
1174  // itself so if it has been visited the loop is already handled.
1175  continue;
1176 
1177  // Insert all of the blocks (other than those already present) into
1178  // the loop set. We expect at least the block that led us to find the
1179  // inner loop to be in the block set, but we may also have other loop
1180  // blocks if they were already enqueued as predecessors of some other
1181  // outer loop block.
1182  for (auto *InnerBB : InnerL->blocks()) {
1183  if (InnerBB == BB) {
1184  assert(LoopBlockSet.count(InnerBB) &&
1185  "Block should already be in the set!");
1186  continue;
1187  }
1188 
1189  LoopBlockSet.insert(InnerBB);
1190  }
1191 
1192  // Add the preheader to the worklist so we will continue past the
1193  // loop body.
1194  Worklist.push_back(InnerPH);
1195  continue;
1196  }
1197 
1198  // Insert any predecessors that were in the original loop into the new
1199  // set, and if the insert is successful, add them to the worklist.
1200  for (auto *Pred : predecessors(BB))
1201  if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1202  Worklist.push_back(Pred);
1203  }
1204 
1205  assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1206 
1207  // We've found all the blocks participating in the loop, return our completed
1208  // set.
1209  return LoopBlockSet;
1210 }
1211 
1212 /// Rebuild a loop after unswitching removes some subset of blocks and edges.
1213 ///
1214 /// The removal may have removed some child loops entirely but cannot have
1215 /// disturbed any remaining child loops. However, they may need to be hoisted
1216 /// to the parent loop (or to be top-level loops). The original loop may be
1217 /// completely removed.
1218 ///
1219 /// The sibling loops resulting from this update are returned. If the original
1220 /// loop remains a valid loop, it will be the first entry in this list with all
1221 /// of the newly sibling loops following it.
1222 ///
1223 /// Returns true if the loop remains a loop after unswitching, and false if it
1224 /// is no longer a loop after unswitching (and should not continue to be
1225 /// referenced).
1227  LoopInfo &LI,
1228  SmallVectorImpl<Loop *> &HoistedLoops) {
1229  auto *PH = L.getLoopPreheader();
1230 
1231  // Compute the actual parent loop from the exit blocks. Because we may have
1232  // pruned some exits the loop may be different from the original parent.
1233  Loop *ParentL = nullptr;
1234  SmallVector<Loop *, 4> ExitLoops;
1235  SmallVector<BasicBlock *, 4> ExitsInLoops;
1236  ExitsInLoops.reserve(ExitBlocks.size());
1237  for (auto *ExitBB : ExitBlocks)
1238  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1239  ExitLoops.push_back(ExitL);
1240  ExitsInLoops.push_back(ExitBB);
1241  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1242  ParentL = ExitL;
1243  }
1244 
1245  // Recompute the blocks participating in this loop. This may be empty if it
1246  // is no longer a loop.
1247  auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1248 
1249  // If we still have a loop, we need to re-set the loop's parent as the exit
1250  // block set changing may have moved it within the loop nest. Note that this
1251  // can only happen when this loop has a parent as it can only hoist the loop
1252  // *up* the nest.
1253  if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1254  // Remove this loop's (original) blocks from all of the intervening loops.
1255  for (Loop *IL = L.getParentLoop(); IL != ParentL;
1256  IL = IL->getParentLoop()) {
1257  IL->getBlocksSet().erase(PH);
1258  for (auto *BB : L.blocks())
1259  IL->getBlocksSet().erase(BB);
1260  llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1261  return BB == PH || L.contains(BB);
1262  });
1263  }
1264 
1265  LI.changeLoopFor(PH, ParentL);
1266  L.getParentLoop()->removeChildLoop(&L);
1267  if (ParentL)
1268  ParentL->addChildLoop(&L);
1269  else
1270  LI.addTopLevelLoop(&L);
1271  }
1272 
1273  // Now we update all the blocks which are no longer within the loop.
1274  auto &Blocks = L.getBlocksVector();
1275  auto BlocksSplitI =
1276  LoopBlockSet.empty()
1277  ? Blocks.begin()
1278  : std::stable_partition(
1279  Blocks.begin(), Blocks.end(),
1280  [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1281 
1282  // Before we erase the list of unlooped blocks, build a set of them.
1283  SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1284  if (LoopBlockSet.empty())
1285  UnloopedBlocks.insert(PH);
1286 
1287  // Now erase these blocks from the loop.
1288  for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1289  L.getBlocksSet().erase(BB);
1290  Blocks.erase(BlocksSplitI, Blocks.end());
1291 
1292  // Sort the exits in ascending loop depth, we'll work backwards across these
1293  // to process them inside out.
1294  std::stable_sort(ExitsInLoops.begin(), ExitsInLoops.end(),
1295  [&](BasicBlock *LHS, BasicBlock *RHS) {
1296  return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1297  });
1298 
1299  // We'll build up a set for each exit loop.
1300  SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1301  Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1302 
1303  auto RemoveUnloopedBlocksFromLoop =
1304  [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1305  for (auto *BB : UnloopedBlocks)
1306  L.getBlocksSet().erase(BB);
1307  llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1308  return UnloopedBlocks.count(BB);
1309  });
1310  };
1311 
1313  while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1314  assert(Worklist.empty() && "Didn't clear worklist!");
1315  assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
1316 
1317  // Grab the next exit block, in decreasing loop depth order.
1318  BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1319  Loop &ExitL = *LI.getLoopFor(ExitBB);
1320  assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
1321 
1322  // Erase all of the unlooped blocks from the loops between the previous
1323  // exit loop and this exit loop. This works because the ExitInLoops list is
1324  // sorted in increasing order of loop depth and thus we visit loops in
1325  // decreasing order of loop depth.
1326  for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1327  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1328 
1329  // Walk the CFG back until we hit the cloned PH adding everything reachable
1330  // and in the unlooped set to this exit block's loop.
1331  Worklist.push_back(ExitBB);
1332  do {
1333  BasicBlock *BB = Worklist.pop_back_val();
1334  // We can stop recursing at the cloned preheader (if we get there).
1335  if (BB == PH)
1336  continue;
1337 
1338  for (BasicBlock *PredBB : predecessors(BB)) {
1339  // If this pred has already been moved to our set or is part of some
1340  // (inner) loop, no update needed.
1341  if (!UnloopedBlocks.erase(PredBB)) {
1342  assert((NewExitLoopBlocks.count(PredBB) ||
1343  ExitL.contains(LI.getLoopFor(PredBB))) &&
1344  "Predecessor not in a nested loop (or already visited)!");
1345  continue;
1346  }
1347 
1348  // We just insert into the loop set here. We'll add these blocks to the
1349  // exit loop after we build up the set in a deterministic order rather
1350  // than the predecessor-influenced visit order.
1351  bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
1352  (void)Inserted;
1353  assert(Inserted && "Should only visit an unlooped block once!");
1354 
1355  // And recurse through to its predecessors.
1356  Worklist.push_back(PredBB);
1357  }
1358  } while (!Worklist.empty());
1359 
1360  // If blocks in this exit loop were directly part of the original loop (as
1361  // opposed to a child loop) update the map to point to this exit loop. This
1362  // just updates a map and so the fact that the order is unstable is fine.
1363  for (auto *BB : NewExitLoopBlocks)
1364  if (Loop *BBL = LI.getLoopFor(BB))
1365  if (BBL == &L || !L.contains(BBL))
1366  LI.changeLoopFor(BB, &ExitL);
1367 
1368  // We will remove the remaining unlooped blocks from this loop in the next
1369  // iteration or below.
1370  NewExitLoopBlocks.clear();
1371  }
1372 
1373  // Any remaining unlooped blocks are no longer part of any loop unless they
1374  // are part of some child loop.
1375  for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
1376  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1377  for (auto *BB : UnloopedBlocks)
1378  if (Loop *BBL = LI.getLoopFor(BB))
1379  if (BBL == &L || !L.contains(BBL))
1380  LI.changeLoopFor(BB, nullptr);
1381 
1382  // Sink all the child loops whose headers are no longer in the loop set to
1383  // the parent (or to be top level loops). We reach into the loop and directly
1384  // update its subloop vector to make this batch update efficient.
1385  auto &SubLoops = L.getSubLoopsVector();
1386  auto SubLoopsSplitI =
1387  LoopBlockSet.empty()
1388  ? SubLoops.begin()
1389  : std::stable_partition(
1390  SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
1391  return LoopBlockSet.count(SubL->getHeader());
1392  });
1393  for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
1394  HoistedLoops.push_back(HoistedL);
1395  HoistedL->setParentLoop(nullptr);
1396 
1397  // To compute the new parent of this hoisted loop we look at where we
1398  // placed the preheader above. We can't lookup the header itself because we
1399  // retained the mapping from the header to the hoisted loop. But the
1400  // preheader and header should have the exact same new parent computed
1401  // based on the set of exit blocks from the original loop as the preheader
1402  // is a predecessor of the header and so reached in the reverse walk. And
1403  // because the loops were all in simplified form the preheader of the
1404  // hoisted loop can't be part of some *other* loop.
1405  if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
1406  NewParentL->addChildLoop(HoistedL);
1407  else
1408  LI.addTopLevelLoop(HoistedL);
1409  }
1410  SubLoops.erase(SubLoopsSplitI, SubLoops.end());
1411 
1412  // Actually delete the loop if nothing remained within it.
1413  if (Blocks.empty()) {
1414  assert(SubLoops.empty() &&
1415  "Failed to remove all subloops from the original loop!");
1416  if (Loop *ParentL = L.getParentLoop())
1417  ParentL->removeChildLoop(llvm::find(*ParentL, &L));
1418  else
1419  LI.removeLoop(llvm::find(LI, &L));
1420  LI.destroy(&L);
1421  return false;
1422  }
1423 
1424  return true;
1425 }
1426 
1427 /// Helper to visit a dominator subtree, invoking a callable on each node.
1428 ///
1429 /// Returning false at any point will stop walking past that node of the tree.
1430 template <typename CallableT>
1431 void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
1432  SmallVector<DomTreeNode *, 4> DomWorklist;
1433  DomWorklist.push_back(DT[BB]);
1434 #ifndef NDEBUG
1436  Visited.insert(DT[BB]);
1437 #endif
1438  do {
1439  DomTreeNode *N = DomWorklist.pop_back_val();
1440 
1441  // Visit this node.
1442  if (!Callable(N->getBlock()))
1443  continue;
1444 
1445  // Accumulate the child nodes.
1446  for (DomTreeNode *ChildN : *N) {
1447  assert(Visited.insert(ChildN).second &&
1448  "Cannot visit a node twice when walking a tree!");
1449  DomWorklist.push_back(ChildN);
1450  }
1451  } while (!DomWorklist.empty());
1452 }
1453 
1454 /// Take an invariant branch that has been determined to be safe and worthwhile
1455 /// to unswitch despite being non-trivial to do so and perform the unswitch.
1456 ///
1457 /// This directly updates the CFG to hoist the predicate out of the loop, and
1458 /// clone the necessary parts of the loop to maintain behavior.
1459 ///
1460 /// It also updates both dominator tree and loopinfo based on the unswitching.
1461 ///
1462 /// Once unswitching has been performed it runs the provided callback to report
1463 /// the new loops and no-longer valid loops to the caller.
1465  Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI,
1466  AssumptionCache &AC,
1467  function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB) {
1468  assert(BI.isConditional() && "Can only unswitch a conditional branch!");
1470  "Can only unswitch an invariant branch condition!");
1471 
1472  // Constant and BBs tracking the cloned and continuing successor.
1473  const int ClonedSucc = 0;
1474  auto *ParentBB = BI.getParent();
1475  auto *UnswitchedSuccBB = BI.getSuccessor(ClonedSucc);
1476  auto *ContinueSuccBB = BI.getSuccessor(1 - ClonedSucc);
1477 
1478  assert(UnswitchedSuccBB != ContinueSuccBB &&
1479  "Should not unswitch a branch that always goes to the same place!");
1480 
1481  // The branch should be in this exact loop. Any inner loop's invariant branch
1482  // should be handled by unswitching that inner loop. The caller of this
1483  // routine should filter out any candidates that remain (but were skipped for
1484  // whatever reason).
1485  assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
1486 
1487  SmallVector<BasicBlock *, 4> ExitBlocks;
1488  L.getUniqueExitBlocks(ExitBlocks);
1489 
1490  // We cannot unswitch if exit blocks contain a cleanuppad instruction as we
1491  // don't know how to split those exit blocks.
1492  // FIXME: We should teach SplitBlock to handle this and remove this
1493  // restriction.
1494  for (auto *ExitBB : ExitBlocks)
1495  if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI()))
1496  return false;
1497 
1498  SmallPtrSet<BasicBlock *, 4> ExitBlockSet(ExitBlocks.begin(),
1499  ExitBlocks.end());
1500 
1501  // Compute the parent loop now before we start hacking on things.
1502  Loop *ParentL = L.getParentLoop();
1503 
1504  // Compute the outer-most loop containing one of our exit blocks. This is the
1505  // furthest up our loopnest which can be mutated, which we will use below to
1506  // update things.
1507  Loop *OuterExitL = &L;
1508  for (auto *ExitBB : ExitBlocks) {
1509  Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
1510  if (!NewOuterExitL) {
1511  // We exited the entire nest with this block, so we're done.
1512  OuterExitL = nullptr;
1513  break;
1514  }
1515  if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
1516  OuterExitL = NewOuterExitL;
1517  }
1518 
1519  // If the edge we *aren't* cloning in the unswitch (the continuing edge)
1520  // dominates its target, we can skip cloning the dominated region of the loop
1521  // and its exits. We compute this as a set of nodes to be skipped.
1522  SmallPtrSet<BasicBlock *, 4> SkippedLoopAndExitBlocks;
1523  if (ContinueSuccBB->getUniquePredecessor() ||
1524  llvm::all_of(predecessors(ContinueSuccBB), [&](BasicBlock *PredBB) {
1525  return PredBB == ParentBB || DT.dominates(ContinueSuccBB, PredBB);
1526  })) {
1527  visitDomSubTree(DT, ContinueSuccBB, [&](BasicBlock *BB) {
1528  SkippedLoopAndExitBlocks.insert(BB);
1529  return true;
1530  });
1531  }
1532  // Similarly, if the edge we *are* cloning in the unswitch (the unswitched
1533  // edge) dominates its target, we will end up with dead nodes in the original
1534  // loop and its exits that will need to be deleted. Here, we just retain that
1535  // the property holds and will compute the deleted set later.
1536  bool DeleteUnswitchedSucc =
1537  UnswitchedSuccBB->getUniquePredecessor() ||
1538  llvm::all_of(predecessors(UnswitchedSuccBB), [&](BasicBlock *PredBB) {
1539  return PredBB == ParentBB || DT.dominates(UnswitchedSuccBB, PredBB);
1540  });
1541 
1542  // Split the preheader, so that we know that there is a safe place to insert
1543  // the conditional branch. We will change the preheader to have a conditional
1544  // branch on LoopCond. The original preheader will become the split point
1545  // between the unswitched versions, and we will have a new preheader for the
1546  // original loop.
1547  BasicBlock *SplitBB = L.getLoopPreheader();
1548  BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI);
1549 
1550  // Keep a mapping for the cloned values.
1551  ValueToValueMapTy VMap;
1552 
1553  // Keep track of the dominator tree updates needed.
1555 
1556  // Build the cloned blocks from the loop.
1557  auto *ClonedPH = buildClonedLoopBlocks(
1558  L, LoopPH, SplitBB, ExitBlocks, ParentBB, UnswitchedSuccBB,
1559  ContinueSuccBB, SkippedLoopAndExitBlocks, VMap, DTUpdates, AC, DT, LI);
1560 
1561  // Remove the parent as a predecessor of the unswitched successor.
1562  UnswitchedSuccBB->removePredecessor(ParentBB, /*DontDeleteUselessPHIs*/ true);
1563 
1564  // Now splice the branch from the original loop and use it to select between
1565  // the two loops.
1566  SplitBB->getTerminator()->eraseFromParent();
1567  SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), BI);
1568  BI.setSuccessor(ClonedSucc, ClonedPH);
1569  BI.setSuccessor(1 - ClonedSucc, LoopPH);
1570 
1571  // Create a new unconditional branch to the continuing block (as opposed to
1572  // the one cloned).
1573  BranchInst::Create(ContinueSuccBB, ParentBB);
1574 
1575  // Before we update the dominator tree, collect the dead blocks if we're going
1576  // to end up deleting the unswitched successor.
1577  SmallVector<BasicBlock *, 16> DeadBlocks;
1578  if (DeleteUnswitchedSucc) {
1579  DeadBlocks.push_back(UnswitchedSuccBB);
1580  for (int i = 0; i < (int)DeadBlocks.size(); ++i) {
1581  // If we reach an exit block, stop recursing as the unswitched loop will
1582  // end up reaching the merge block which we make the successor of the
1583  // exit.
1584  if (ExitBlockSet.count(DeadBlocks[i]))
1585  continue;
1586 
1587  // Insert the children that are within the loop or exit block set. Other
1588  // children may reach out of the loop. While we don't expect these to be
1589  // dead (as the unswitched clone should reach them) we don't try to prove
1590  // that here.
1591  for (DomTreeNode *ChildN : *DT[DeadBlocks[i]])
1592  if (L.contains(ChildN->getBlock()) ||
1593  ExitBlockSet.count(ChildN->getBlock()))
1594  DeadBlocks.push_back(ChildN->getBlock());
1595  }
1596  }
1597 
1598  // Add the remaining edges to our updates and apply them to get an up-to-date
1599  // dominator tree. Note that this will cause the dead blocks above to be
1600  // unreachable and no longer in the dominator tree.
1601  DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
1602  DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
1603  DT.applyUpdates(DTUpdates);
1604 
1605  // Build the cloned loop structure itself. This may be substantially
1606  // different from the original structure due to the simplified CFG. This also
1607  // handles inserting all the cloned blocks into the correct loops.
1608  SmallVector<Loop *, 4> NonChildClonedLoops;
1609  buildClonedLoops(L, ExitBlocks, VMap, LI, NonChildClonedLoops);
1610 
1611  // Delete anything that was made dead in the original loop due to
1612  // unswitching.
1613  if (!DeadBlocks.empty())
1614  deleteDeadBlocksFromLoop(L, DeadBlocks, ExitBlocks, DT, LI);
1615 
1616  SmallVector<Loop *, 4> HoistedLoops;
1617  bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
1618 
1619  // This transformation has a high risk of corrupting the dominator tree, and
1620  // the below steps to rebuild loop structures will result in hard to debug
1621  // errors in that case so verify that the dominator tree is sane first.
1622  // FIXME: Remove this when the bugs stop showing up and rely on existing
1623  // verification steps.
1625 
1626  // We can change which blocks are exit blocks of all the cloned sibling
1627  // loops, the current loop, and any parent loops which shared exit blocks
1628  // with the current loop. As a consequence, we need to re-form LCSSA for
1629  // them. But we shouldn't need to re-form LCSSA for any child loops.
1630  // FIXME: This could be made more efficient by tracking which exit blocks are
1631  // new, and focusing on them, but that isn't likely to be necessary.
1632  //
1633  // In order to reasonably rebuild LCSSA we need to walk inside-out across the
1634  // loop nest and update every loop that could have had its exits changed. We
1635  // also need to cover any intervening loops. We add all of these loops to
1636  // a list and sort them by loop depth to achieve this without updating
1637  // unnecessary loops.
1638  auto UpdateLoop = [&](Loop &UpdateL) {
1639 #ifndef NDEBUG
1640  UpdateL.verifyLoop();
1641  for (Loop *ChildL : UpdateL) {
1642  ChildL->verifyLoop();
1643  assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
1644  "Perturbed a child loop's LCSSA form!");
1645  }
1646 #endif
1647  // First build LCSSA for this loop so that we can preserve it when
1648  // forming dedicated exits. We don't want to perturb some other loop's
1649  // LCSSA while doing that CFG edit.
1650  formLCSSA(UpdateL, DT, &LI, nullptr);
1651 
1652  // For loops reached by this loop's original exit blocks we may
1653  // introduced new, non-dedicated exits. At least try to re-form dedicated
1654  // exits for these loops. This may fail if they couldn't have dedicated
1655  // exits to start with.
1656  formDedicatedExitBlocks(&UpdateL, &DT, &LI, /*PreserveLCSSA*/ true);
1657  };
1658 
1659  // For non-child cloned loops and hoisted loops, we just need to update LCSSA
1660  // and we can do it in any order as they don't nest relative to each other.
1661  //
1662  // Also check if any of the loops we have updated have become top-level loops
1663  // as that will necessitate widening the outer loop scope.
1664  for (Loop *UpdatedL :
1665  llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
1666  UpdateLoop(*UpdatedL);
1667  if (!UpdatedL->getParentLoop())
1668  OuterExitL = nullptr;
1669  }
1670  if (IsStillLoop) {
1671  UpdateLoop(L);
1672  if (!L.getParentLoop())
1673  OuterExitL = nullptr;
1674  }
1675 
1676  // If the original loop had exit blocks, walk up through the outer most loop
1677  // of those exit blocks to update LCSSA and form updated dedicated exits.
1678  if (OuterExitL != &L)
1679  for (Loop *OuterL = ParentL; OuterL != OuterExitL;
1680  OuterL = OuterL->getParentLoop())
1681  UpdateLoop(*OuterL);
1682 
1683 #ifndef NDEBUG
1684  // Verify the entire loop structure to catch any incorrect updates before we
1685  // progress in the pass pipeline.
1686  LI.verify(DT);
1687 #endif
1688 
1689  // Now that we've unswitched something, make callbacks to report the changes.
1690  // For that we need to merge together the updated loops and the cloned loops
1691  // and check whether the original loop survived.
1692  SmallVector<Loop *, 4> SibLoops;
1693  for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
1694  if (UpdatedL->getParentLoop() == ParentL)
1695  SibLoops.push_back(UpdatedL);
1696  UnswitchCB(IsStillLoop, SibLoops);
1697 
1698  ++NumBranches;
1699  return true;
1700 }
1701 
1702 /// Recursively compute the cost of a dominator subtree based on the per-block
1703 /// cost map provided.
1704 ///
1705 /// The recursive computation is memozied into the provided DT-indexed cost map
1706 /// to allow querying it for most nodes in the domtree without it becoming
1707 /// quadratic.
1708 static int
1710  const SmallDenseMap<BasicBlock *, int, 4> &BBCostMap,
1712  // Don't accumulate cost (or recurse through) blocks not in our block cost
1713  // map and thus not part of the duplication cost being considered.
1714  auto BBCostIt = BBCostMap.find(N.getBlock());
1715  if (BBCostIt == BBCostMap.end())
1716  return 0;
1717 
1718  // Lookup this node to see if we already computed its cost.
1719  auto DTCostIt = DTCostMap.find(&N);
1720  if (DTCostIt != DTCostMap.end())
1721  return DTCostIt->second;
1722 
1723  // If not, we have to compute it. We can't use insert above and update
1724  // because computing the cost may insert more things into the map.
1725  int Cost = std::accumulate(
1726  N.begin(), N.end(), BBCostIt->second, [&](int Sum, DomTreeNode *ChildN) {
1727  return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
1728  });
1729  bool Inserted = DTCostMap.insert({&N, Cost}).second;
1730  (void)Inserted;
1731  assert(Inserted && "Should not insert a node while visiting children!");
1732  return Cost;
1733 }
1734 
1735 /// Unswitch control flow predicated on loop invariant conditions.
1736 ///
1737 /// This first hoists all branches or switches which are trivial (IE, do not
1738 /// require duplicating any part of the loop) out of the loop body. It then
1739 /// looks at other loop invariant control flows and tries to unswitch those as
1740 /// well by cloning the loop if the result is small enough.
1741 static bool
1743  TargetTransformInfo &TTI, bool NonTrivial,
1744  function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB) {
1745  assert(L.isRecursivelyLCSSAForm(DT, LI) &&
1746  "Loops must be in LCSSA form before unswitching.");
1747 
1748  // Must be in loop simplified form: we need a preheader and dedicated exits.
1749  if (!L.isLoopSimplifyForm())
1750  return false;
1751 
1752  // Try trivial unswitch first before loop over other basic blocks in the loop.
1753  if (unswitchAllTrivialConditions(L, DT, LI)) {
1754  // If we unswitched successfully we will want to clean up the loop before
1755  // processing it further so just mark it as unswitched and return.
1756  UnswitchCB(/*CurrentLoopValid*/ true, {});
1757  return true;
1758  }
1759 
1760  // If we're not doing non-trivial unswitching, we're done. We both accept
1761  // a parameter but also check a local flag that can be used for testing
1762  // a debugging.
1763  if (!NonTrivial && !EnableNonTrivialUnswitch)
1764  return false;
1765 
1766  // Collect all remaining invariant branch conditions within this loop (as
1767  // opposed to an inner loop which would be handled when visiting that inner
1768  // loop).
1769  SmallVector<TerminatorInst *, 4> UnswitchCandidates;
1770  for (auto *BB : L.blocks())
1771  if (LI.getLoopFor(BB) == &L)
1772  if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator()))
1773  if (BI->isConditional() && L.isLoopInvariant(BI->getCondition()) &&
1774  BI->getSuccessor(0) != BI->getSuccessor(1))
1775  UnswitchCandidates.push_back(BI);
1776 
1777  // If we didn't find any candidates, we're done.
1778  if (UnswitchCandidates.empty())
1779  return false;
1780 
1781  // Check if there are irreducible CFG cycles in this loop. If so, we cannot
1782  // easily unswitch non-trivial edges out of the loop. Doing so might turn the
1783  // irreducible control flow into reducible control flow and introduce new
1784  // loops "out of thin air". If we ever discover important use cases for doing
1785  // this, we can add support to loop unswitch, but it is a lot of complexity
1786  // for what seems little or no real world benefit.
1787  LoopBlocksRPO RPOT(&L);
1788  RPOT.perform(&LI);
1789  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
1790  return false;
1791 
1792  LLVM_DEBUG(
1793  dbgs() << "Considering " << UnswitchCandidates.size()
1794  << " non-trivial loop invariant conditions for unswitching.\n");
1795 
1796  // Given that unswitching these terminators will require duplicating parts of
1797  // the loop, so we need to be able to model that cost. Compute the ephemeral
1798  // values and set up a data structure to hold per-BB costs. We cache each
1799  // block's cost so that we don't recompute this when considering different
1800  // subsets of the loop for duplication during unswitching.
1802  CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
1804 
1805  // Compute the cost of each block, as well as the total loop cost. Also, bail
1806  // out if we see instructions which are incompatible with loop unswitching
1807  // (convergent, noduplicate, or cross-basic-block tokens).
1808  // FIXME: We might be able to safely handle some of these in non-duplicated
1809  // regions.
1810  int LoopCost = 0;
1811  for (auto *BB : L.blocks()) {
1812  int Cost = 0;
1813  for (auto &I : *BB) {
1814  if (EphValues.count(&I))
1815  continue;
1816 
1817  if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
1818  return false;
1819  if (auto CS = CallSite(&I))
1820  if (CS.isConvergent() || CS.cannotDuplicate())
1821  return false;
1822 
1823  Cost += TTI.getUserCost(&I);
1824  }
1825  assert(Cost >= 0 && "Must not have negative costs!");
1826  LoopCost += Cost;
1827  assert(LoopCost >= 0 && "Must not have negative loop costs!");
1828  BBCostMap[BB] = Cost;
1829  }
1830  LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
1831 
1832  // Now we find the best candidate by searching for the one with the following
1833  // properties in order:
1834  //
1835  // 1) An unswitching cost below the threshold
1836  // 2) The smallest number of duplicated unswitch candidates (to avoid
1837  // creating redundant subsequent unswitching)
1838  // 3) The smallest cost after unswitching.
1839  //
1840  // We prioritize reducing fanout of unswitch candidates provided the cost
1841  // remains below the threshold because this has a multiplicative effect.
1842  //
1843  // This requires memoizing each dominator subtree to avoid redundant work.
1844  //
1845  // FIXME: Need to actually do the number of candidates part above.
1847  // Given a terminator which might be unswitched, computes the non-duplicated
1848  // cost for that terminator.
1849  auto ComputeUnswitchedCost = [&](TerminatorInst *TI) {
1850  BasicBlock &BB = *TI->getParent();
1852 
1853  int Cost = LoopCost;
1854  for (BasicBlock *SuccBB : successors(&BB)) {
1855  // Don't count successors more than once.
1856  if (!Visited.insert(SuccBB).second)
1857  continue;
1858 
1859  // This successor's domtree will not need to be duplicated after
1860  // unswitching if the edge to the successor dominates it (and thus the
1861  // entire tree). This essentially means there is no other path into this
1862  // subtree and so it will end up live in only one clone of the loop.
1863  if (SuccBB->getUniquePredecessor() ||
1864  llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
1865  return PredBB == &BB || DT.dominates(SuccBB, PredBB);
1866  })) {
1867  Cost -= computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
1868  assert(Cost >= 0 &&
1869  "Non-duplicated cost should never exceed total loop cost!");
1870  }
1871  }
1872 
1873  // Now scale the cost by the number of unique successors minus one. We
1874  // subtract one because there is already at least one copy of the entire
1875  // loop. This is computing the new cost of unswitching a condition.
1876  assert(Visited.size() > 1 &&
1877  "Cannot unswitch a condition without multiple distinct successors!");
1878  return Cost * (Visited.size() - 1);
1879  };
1880  TerminatorInst *BestUnswitchTI = nullptr;
1881  int BestUnswitchCost;
1882  for (TerminatorInst *CandidateTI : UnswitchCandidates) {
1883  int CandidateCost = ComputeUnswitchedCost(CandidateTI);
1884  LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
1885  << " for unswitch candidate: " << *CandidateTI << "\n");
1886  if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
1887  BestUnswitchTI = CandidateTI;
1888  BestUnswitchCost = CandidateCost;
1889  }
1890  }
1891 
1892  if (BestUnswitchCost >= UnswitchThreshold) {
1893  LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "
1894  << BestUnswitchCost << "\n");
1895  return false;
1896  }
1897 
1898  LLVM_DEBUG(dbgs() << " Trying to unswitch non-trivial (cost = "
1899  << BestUnswitchCost << ") branch: " << *BestUnswitchTI
1900  << "\n");
1901  return unswitchInvariantBranch(L, cast<BranchInst>(*BestUnswitchTI), DT, LI,
1902  AC, UnswitchCB);
1903 }
1904 
1907  LPMUpdater &U) {
1908  Function &F = *L.getHeader()->getParent();
1909  (void)F;
1910 
1911  LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
1912  << "\n");
1913 
1914  // Save the current loop name in a variable so that we can report it even
1915  // after it has been deleted.
1916  std::string LoopName = L.getName();
1917 
1918  auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
1919  ArrayRef<Loop *> NewLoops) {
1920  // If we did a non-trivial unswitch, we have added new (cloned) loops.
1921  if (!NewLoops.empty())
1922  U.addSiblingLoops(NewLoops);
1923 
1924  // If the current loop remains valid, we should revisit it to catch any
1925  // other unswitch opportunities. Otherwise, we need to mark it as deleted.
1926  if (CurrentLoopValid)
1927  U.revisitCurrentLoop();
1928  else
1929  U.markLoopAsDeleted(L, LoopName);
1930  };
1931 
1932  if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial,
1933  UnswitchCB))
1934  return PreservedAnalyses::all();
1935 
1936  // Historically this pass has had issues with the dominator tree so verify it
1937  // in asserts builds.
1940 }
1941 
1942 namespace {
1943 
1944 class SimpleLoopUnswitchLegacyPass : public LoopPass {
1945  bool NonTrivial;
1946 
1947 public:
1948  static char ID; // Pass ID, replacement for typeid
1949 
1950  explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
1951  : LoopPass(ID), NonTrivial(NonTrivial) {
1954  }
1955 
1956  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
1957 
1958  void getAnalysisUsage(AnalysisUsage &AU) const override {
1962  }
1963 };
1964 
1965 } // end anonymous namespace
1966 
1967 bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
1968  if (skipLoop(L))
1969  return false;
1970 
1971  Function &F = *L->getHeader()->getParent();
1972 
1973  LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
1974  << "\n");
1975 
1976  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1977  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1978  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1979  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1980 
1981  auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid,
1982  ArrayRef<Loop *> NewLoops) {
1983  // If we did a non-trivial unswitch, we have added new (cloned) loops.
1984  for (auto *NewL : NewLoops)
1985  LPM.addLoop(*NewL);
1986 
1987  // If the current loop remains valid, re-add it to the queue. This is
1988  // a little wasteful as we'll finish processing the current loop as well,
1989  // but it is the best we can do in the old PM.
1990  if (CurrentLoopValid)
1991  LPM.addLoop(*L);
1992  else
1993  LPM.markLoopAsDeleted(*L);
1994  };
1995 
1996  bool Changed =
1997  unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB);
1998 
1999  // If anything was unswitched, also clear any cached information about this
2000  // loop.
2001  LPM.deleteSimpleAnalysisLoop(L);
2002 
2003  // Historically this pass has had issues with the dominator tree so verify it
2004  // in asserts builds.
2006 
2007  return Changed;
2008 }
2009 
2011 INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
2012  "Simple unswitch loops", false, false)
2018 INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
2019  "Simple unswitch loops", false, false)
2020 
2022  return new SimpleLoopUnswitchLegacyPass(NonTrivial);
2023 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:72
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Definition: LoopInfo.h:782
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
void push_back(const T &Elt)
Definition: SmallVector.h:213
use_iterator use_end()
Definition: Value.h:346
This routine provides some synthesis utilities to produce sequences of values.
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI)
Unswitch a trivial branch if the condition is loop invariant.
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:574
CaseIt case_end()
Returns a read/write iterator that points one past the last in the SwitchInst.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
Definition: LoopInfo.h:170
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:137
simple loop unswitch
bool isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:183
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
Definition: LoopInfo.h:691
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:92
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
Definition: LoopInfo.h:358
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:326
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
An immutable pass that tracks lazily created AssumptionCache objects.
Value * getCondition() const
CaseIt case_begin()
Returns a read/write iterator that points to the first case in the SwitchInst.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:93
A cache of @llvm.assume calls within a function.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:713
unsigned second
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:908
std::vector< LoopT * > & getSubLoopsVector()
Definition: LoopInfo.h:135
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
static int computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, int, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, int, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided...
Value * getCondition() const
This defines the Use class.
static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB, BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
void reserve(size_type N)
Definition: SmallVector.h:377
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:633
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry &)
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr UpdateKind Delete
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:684
SmallPtrSetImpl< const BlockT * > & getBlocksSet()
Return a direct, mutable handle to the blocks set so that we can mutate it efficiently.
Definition: LoopInfo.h:176
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
void deleteSimpleAnalysisLoop(Loop *L)
Invoke deleteAnalysisLoop hook for all passes that implement simple analysis interface.
Definition: LoopPass.cpp:118
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
BlockT * getHeader() const
Definition: LoopInfo.h:100
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI)
This routine scans the loop to find a branch or switch which occurs before any side effects occur...
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:237
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:183
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:248
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
Definition: LoopInfo.h:735
This header provides classes for managing per-loop analyses.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
static void deleteDeadBlocksFromLoop(Loop &L, const SmallVectorImpl< BasicBlock *> &DeadBlocks, SmallVectorImpl< BasicBlock *> &ExitBlocks, DominatorTree &DT, LoopInfo &LI)
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:171
static void replaceLoopUsesWithConstant(Loop &L, Value &LIC, Constant &Replacement)
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:301
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:142
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:73
void getUniqueExitBlocks(SmallVectorImpl< BasicBlock *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfo.cpp:394
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
NodeT * getBlock() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock *> ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop *> &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:55
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
static constexpr UpdateKind Insert
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
Conditional or Unconditional Branch instruction.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
This function has undefined behavior.
This is an important base class in LLVM.
Definition: Constant.h:42
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:117
This file contains the declarations for the subclasses of Constant, which represent the different fla...
INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", "Simple unswitch loops", false, false) INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass
const Instruction & front() const
Definition: BasicBlock.h:276
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:536
BasicBlock * getDefaultDest() const
Represent the analysis usage information of a pass.
void splice(iterator where, iplist_impl &L2)
Definition: ilist.h:329
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:915
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:101
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:107
CaseIt removeCase(CaseIt I)
This method removes the specified case and its successor from the switch instruction.
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&... args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest...
Definition: STLExtras.h:555
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:929
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:57
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:859
bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition: LCSSA.cpp:288
size_type size() const
Definition: SmallPtrSet.h:93
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:329
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:378
static bool unswitchInvariantBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, function_ref< void(bool, ArrayRef< Loop *>)> UnswitchCB)
Take an invariant branch that has been determined to be safe and worthwhile to unswitch despite being...
iterator end()
Definition: BasicBlock.h:266
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:244
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
Definition: LoopInfo.h:705
LoopT * AllocateLoop(ArgsTy &&... Args)
Definition: LoopInfo.h:648
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
simple loop Simple unswitch loops
Pass * createSimpleLoopUnswitchLegacyPass(bool NonTrivial=false)
Create the legacy pass object for the simple loop unswitcher.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:113
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:142
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:567
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2&#39;s erase_if which is equivalent t...
Definition: STLExtras.h:1025
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
CloneBasicBlock - Return a copy of the specified basic block, but without embedding the block into a ...
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM...
Definition: ValueMapper.h:251
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
If this flag is set, the remapper ignores missing function-local entries (Argument, Instruction, BasicBlock) that are not in the value map.
Definition: ValueMapper.h:91
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
use_iterator use_begin()
Definition: Value.h:338
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:163
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:192
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:121
void registerAssumption(CallInst *CI)
Add an @llvm.assume intrinsic to this function&#39;s cache.
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:317
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:62
StringRef getName() const
Definition: LoopInfo.h:583
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:445
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink &#39;this&#39; from the containing function and delete it.
Definition: BasicBlock.cpp:115
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
void addLoop(Loop &L)
Definition: LoopPass.cpp:76
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock *> ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallPtrSetImpl< BasicBlock *> &SkippedLoopAndExitBlocks, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI)
Build the cloned blocks for an unswitched copy of the given loop.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:1226
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition: LoopInfo.h:716
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:320
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI)
Unswitch a trivial switch if the condition is loop invariant.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:141
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:181
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:173
bool empty() const
Definition: LoopInfo.h:146
Multiway switch.
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: ValueMap.h:158
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
succ_range successors(BasicBlock *BB)
Definition: CFG.h:149
void setDefaultDest(BasicBlock *DefaultCase)
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:1143
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr)
Split the edge connecting specified block.
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:964
This file defines a set of templates that efficiently compute a dominator tree over a generic graph...
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:254
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:181
#define LLVM_DEBUG(X)
Definition: Debug.h:119
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, TargetTransformInfo &TTI, bool NonTrivial, function_ref< void(bool, ArrayRef< Loop *>)> UnswitchCB)
Unswitch control flow predicated on loop invariant conditions.
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock *> ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop *> &HoistedLoops)
Rebuild a loop after unswitching removes some subset of blocks and edges.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:121
void dropAllReferences()
Cause all subinstructions to "let go" of all the references that said subinstructions are maintaining...
Definition: BasicBlock.cpp:228
const BasicBlock * getParent() const
Definition: Instruction.h:67
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.