LLVM  13.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1 ///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/DenseMap.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/Sequence.h"
13 #include "llvm/ADT/SetVector.h"
14 #include "llvm/ADT/SmallPtrSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/Twine.h"
19 #include "llvm/Analysis/CFG.h"
24 #include "llvm/Analysis/LoopInfo.h"
26 #include "llvm/Analysis/LoopPass.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/Constant.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/Dominators.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/IRBuilder.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/IntrinsicInst.h"
41 #include "llvm/IR/Use.h"
42 #include "llvm/IR/Value.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/Pass.h"
45 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Debug.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <iterator>
60 #include <numeric>
61 #include <utility>
62 
63 #define DEBUG_TYPE "simple-loop-unswitch"
64 
65 using namespace llvm;
66 
67 STATISTIC(NumBranches, "Number of branches unswitched");
68 STATISTIC(NumSwitches, "Number of switches unswitched");
69 STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
70 STATISTIC(NumTrivial, "Number of unswitches that are trivial");
71 STATISTIC(
72  NumCostMultiplierSkipped,
73  "Number of unswitch candidates that had their cost multiplier skipped");
74 
76  "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
77  cl::desc("Forcibly enables non-trivial loop unswitching rather than "
78  "following the configuration passed into the pass."));
79 
80 static cl::opt<int>
81  UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
82  cl::desc("The cost threshold for unswitching a loop."));
83 
85  "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
86  cl::desc("Enable unswitch cost multiplier that prohibits exponential "
87  "explosion in nontrivial unswitch."));
89  "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
90  cl::desc("Toplevel siblings divisor for cost multiplier."));
92  "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
93  cl::desc("Number of unswitch candidates that are ignored when calculating "
94  "cost multiplier."));
96  "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
97  cl::desc("If enabled, simple loop unswitching will also consider "
98  "llvm.experimental.guard intrinsics as unswitch candidates."));
100  "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
101  cl::init(false), cl::Hidden,
102  cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
103  "null checks to save time analyzing if we can keep it."));
104 
105 /// Collect all of the loop invariant input values transitively used by the
106 /// homogeneous instruction graph from a given root.
107 ///
108 /// This essentially walks from a root recursively through loop variant operands
109 /// which have the exact same opcode and finds all inputs which are loop
110 /// invariant. For some operations these can be re-associated and unswitched out
111 /// of the loop entirely.
114  LoopInfo &LI) {
115  assert(!L.isLoopInvariant(&Root) &&
116  "Only need to walk the graph if root itself is not invariant.");
117  TinyPtrVector<Value *> Invariants;
118 
119  // Build a worklist and recurse through operators collecting invariants.
122  Worklist.push_back(&Root);
123  Visited.insert(&Root);
124  do {
125  Instruction &I = *Worklist.pop_back_val();
126  for (Value *OpV : I.operand_values()) {
127  // Skip constants as unswitching isn't interesting for them.
128  if (isa<Constant>(OpV))
129  continue;
130 
131  // Add it to our result if loop invariant.
132  if (L.isLoopInvariant(OpV)) {
133  Invariants.push_back(OpV);
134  continue;
135  }
136 
137  // If not an instruction with the same opcode, nothing we can do.
138  Instruction *OpI = dyn_cast<Instruction>(OpV);
139  if (!OpI || OpI->getOpcode() != Root.getOpcode())
140  continue;
141 
142  // Visit this operand.
143  if (Visited.insert(OpI).second)
144  Worklist.push_back(OpI);
145  }
146  } while (!Worklist.empty());
147 
148  return Invariants;
149 }
150 
151 static void replaceLoopInvariantUses(Loop &L, Value *Invariant,
152  Constant &Replacement) {
153  assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
154 
155  // Replace uses of LIC in the loop with the given constant.
156  // We use make_early_inc_range as set invalidates the iterator.
157  for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
158  Instruction *UserI = dyn_cast<Instruction>(U.getUser());
159 
160  // Replace this use within the loop body.
161  if (UserI && L.contains(UserI))
162  U.set(&Replacement);
163  }
164 }
165 
166 /// Check that all the LCSSA PHI nodes in the loop exit block have trivial
167 /// incoming values along this edge.
168 static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
169  BasicBlock &ExitBB) {
170  for (Instruction &I : ExitBB) {
171  auto *PN = dyn_cast<PHINode>(&I);
172  if (!PN)
173  // No more PHIs to check.
174  return true;
175 
176  // If the incoming value for this edge isn't loop invariant the unswitch
177  // won't be trivial.
178  if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
179  return false;
180  }
181  llvm_unreachable("Basic blocks should never be empty!");
182 }
183 
184 /// Insert code to test a set of loop invariant values, and conditionally branch
185 /// on them.
187  ArrayRef<Value *> Invariants,
188  bool Direction,
189  BasicBlock &UnswitchedSucc,
190  BasicBlock &NormalSucc) {
191  IRBuilder<> IRB(&BB);
192 
193  Value *Cond = Direction ? IRB.CreateOr(Invariants) :
194  IRB.CreateAnd(Invariants);
195  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
196  Direction ? &NormalSucc : &UnswitchedSucc);
197 }
198 
199 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
200 ///
201 /// Requires that the loop exit and unswitched basic block are the same, and
202 /// that the exiting block was a unique predecessor of that block. Rewrites the
203 /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
204 /// PHI nodes from the old preheader that now contains the unswitched
205 /// terminator.
207  BasicBlock &OldExitingBB,
208  BasicBlock &OldPH) {
209  for (PHINode &PN : UnswitchedBB.phis()) {
210  // When the loop exit is directly unswitched we just need to update the
211  // incoming basic block. We loop to handle weird cases with repeated
212  // incoming blocks, but expect to typically only have one operand here.
213  for (auto i : seq<int>(0, PN.getNumOperands())) {
214  assert(PN.getIncomingBlock(i) == &OldExitingBB &&
215  "Found incoming block different from unique predecessor!");
216  PN.setIncomingBlock(i, &OldPH);
217  }
218  }
219 }
220 
221 /// Rewrite the PHI nodes in the loop exit basic block and the split off
222 /// unswitched block.
223 ///
224 /// Because the exit block remains an exit from the loop, this rewrites the
225 /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
226 /// nodes into the unswitched basic block to select between the value in the
227 /// old preheader and the loop exit.
229  BasicBlock &UnswitchedBB,
230  BasicBlock &OldExitingBB,
231  BasicBlock &OldPH,
232  bool FullUnswitch) {
233  assert(&ExitBB != &UnswitchedBB &&
234  "Must have different loop exit and unswitched blocks!");
235  Instruction *InsertPt = &*UnswitchedBB.begin();
236  for (PHINode &PN : ExitBB.phis()) {
237  auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
238  PN.getName() + ".split", InsertPt);
239 
240  // Walk backwards over the old PHI node's inputs to minimize the cost of
241  // removing each one. We have to do this weird loop manually so that we
242  // create the same number of new incoming edges in the new PHI as we expect
243  // each case-based edge to be included in the unswitched switch in some
244  // cases.
245  // FIXME: This is really, really gross. It would be much cleaner if LLVM
246  // allowed us to create a single entry for a predecessor block without
247  // having separate entries for each "edge" even though these edges are
248  // required to produce identical results.
249  for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
250  if (PN.getIncomingBlock(i) != &OldExitingBB)
251  continue;
252 
253  Value *Incoming = PN.getIncomingValue(i);
254  if (FullUnswitch)
255  // No more edge from the old exiting block to the exit block.
256  PN.removeIncomingValue(i);
257 
258  NewPN->addIncoming(Incoming, &OldPH);
259  }
260 
261  // Now replace the old PHI with the new one and wire the old one in as an
262  // input to the new one.
263  PN.replaceAllUsesWith(NewPN);
264  NewPN->addIncoming(&PN, &ExitBB);
265  }
266 }
267 
268 /// Hoist the current loop up to the innermost loop containing a remaining exit.
269 ///
270 /// Because we've removed an exit from the loop, we may have changed the set of
271 /// loops reachable and need to move the current loop up the loop nest or even
272 /// to an entirely separate nest.
273 static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
274  DominatorTree &DT, LoopInfo &LI,
275  MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
276  // If the loop is already at the top level, we can't hoist it anywhere.
277  Loop *OldParentL = L.getParentLoop();
278  if (!OldParentL)
279  return;
280 
282  L.getExitBlocks(Exits);
283  Loop *NewParentL = nullptr;
284  for (auto *ExitBB : Exits)
285  if (Loop *ExitL = LI.getLoopFor(ExitBB))
286  if (!NewParentL || NewParentL->contains(ExitL))
287  NewParentL = ExitL;
288 
289  if (NewParentL == OldParentL)
290  return;
291 
292  // The new parent loop (if different) should always contain the old one.
293  if (NewParentL)
294  assert(NewParentL->contains(OldParentL) &&
295  "Can only hoist this loop up the nest!");
296 
297  // The preheader will need to move with the body of this loop. However,
298  // because it isn't in this loop we also need to update the primary loop map.
299  assert(OldParentL == LI.getLoopFor(&Preheader) &&
300  "Parent loop of this loop should contain this loop's preheader!");
301  LI.changeLoopFor(&Preheader, NewParentL);
302 
303  // Remove this loop from its old parent.
304  OldParentL->removeChildLoop(&L);
305 
306  // Add the loop either to the new parent or as a top-level loop.
307  if (NewParentL)
308  NewParentL->addChildLoop(&L);
309  else
310  LI.addTopLevelLoop(&L);
311 
312  // Remove this loops blocks from the old parent and every other loop up the
313  // nest until reaching the new parent. Also update all of these
314  // no-longer-containing loops to reflect the nesting change.
315  for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
316  OldContainingL = OldContainingL->getParentLoop()) {
317  llvm::erase_if(OldContainingL->getBlocksVector(),
318  [&](const BasicBlock *BB) {
319  return BB == &Preheader || L.contains(BB);
320  });
321 
322  OldContainingL->getBlocksSet().erase(&Preheader);
323  for (BasicBlock *BB : L.blocks())
324  OldContainingL->getBlocksSet().erase(BB);
325 
326  // Because we just hoisted a loop out of this one, we have essentially
327  // created new exit paths from it. That means we need to form LCSSA PHI
328  // nodes for values used in the no-longer-nested loop.
329  formLCSSA(*OldContainingL, DT, &LI, SE);
330 
331  // We shouldn't need to form dedicated exits because the exit introduced
332  // here is the (just split by unswitching) preheader. However, after trivial
333  // unswitching it is possible to get new non-dedicated exits out of parent
334  // loop so let's conservatively form dedicated exit blocks and figure out
335  // if we can optimize later.
336  formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
337  /*PreserveLCSSA*/ true);
338  }
339 }
340 
341 // Return the top-most loop containing ExitBB and having ExitBB as exiting block
342 // or the loop containing ExitBB, if there is no parent loop containing ExitBB
343 // as exiting block.
345  Loop *TopMost = LI.getLoopFor(ExitBB);
346  Loop *Current = TopMost;
347  while (Current) {
348  if (Current->isLoopExiting(ExitBB))
349  TopMost = Current;
350  Current = Current->getParentLoop();
351  }
352  return TopMost;
353 }
354 
355 /// Unswitch a trivial branch if the condition is loop invariant.
356 ///
357 /// This routine should only be called when loop code leading to the branch has
358 /// been validated as trivial (no side effects). This routine checks if the
359 /// condition is invariant and one of the successors is a loop exit. This
360 /// allows us to unswitch without duplicating the loop, making it trivial.
361 ///
362 /// If this routine fails to unswitch the branch it returns false.
363 ///
364 /// If the branch can be unswitched, this routine splits the preheader and
365 /// hoists the branch above that split. Preserves loop simplified form
366 /// (splitting the exit block as necessary). It simplifies the branch within
367 /// the loop to an unconditional branch but doesn't remove it entirely. Further
368 /// cleanup can be done with some simplify-cfg like pass.
369 ///
370 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
371 /// invalidated by this.
373  LoopInfo &LI, ScalarEvolution *SE,
374  MemorySSAUpdater *MSSAU) {
375  assert(BI.isConditional() && "Can only unswitch a conditional branch!");
376  LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
377 
378  // The loop invariant values that we want to unswitch.
379  TinyPtrVector<Value *> Invariants;
380 
381  // When true, we're fully unswitching the branch rather than just unswitching
382  // some input conditions to the branch.
383  bool FullUnswitch = false;
384 
385  if (L.isLoopInvariant(BI.getCondition())) {
386  Invariants.push_back(BI.getCondition());
387  FullUnswitch = true;
388  } else {
389  if (auto *CondInst = dyn_cast<Instruction>(BI.getCondition()))
390  Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
391  if (Invariants.empty())
392  // Couldn't find invariant inputs!
393  return false;
394  }
395 
396  // Check that one of the branch's successors exits, and which one.
397  bool ExitDirection = true;
398  int LoopExitSuccIdx = 0;
399  auto *LoopExitBB = BI.getSuccessor(0);
400  if (L.contains(LoopExitBB)) {
401  ExitDirection = false;
402  LoopExitSuccIdx = 1;
403  LoopExitBB = BI.getSuccessor(1);
404  if (L.contains(LoopExitBB))
405  return false;
406  }
407  auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
408  auto *ParentBB = BI.getParent();
409  if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB))
410  return false;
411 
412  // When unswitching only part of the branch's condition, we need the exit
413  // block to be reached directly from the partially unswitched input. This can
414  // be done when the exit block is along the true edge and the branch condition
415  // is a graph of `or` operations, or the exit block is along the false edge
416  // and the condition is a graph of `and` operations.
417  if (!FullUnswitch) {
418  if (ExitDirection) {
419  if (cast<Instruction>(BI.getCondition())->getOpcode() != Instruction::Or)
420  return false;
421  } else {
422  if (cast<Instruction>(BI.getCondition())->getOpcode() != Instruction::And)
423  return false;
424  }
425  }
426 
427  LLVM_DEBUG({
428  dbgs() << " unswitching trivial invariant conditions for: " << BI
429  << "\n";
430  for (Value *Invariant : Invariants) {
431  dbgs() << " " << *Invariant << " == true";
432  if (Invariant != Invariants.back())
433  dbgs() << " ||";
434  dbgs() << "\n";
435  }
436  });
437 
438  // If we have scalar evolutions, we need to invalidate them including this
439  // loop, the loop containing the exit block and the topmost parent loop
440  // exiting via LoopExitBB.
441  if (SE) {
442  if (Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
443  SE->forgetLoop(ExitL);
444  else
445  // Forget the entire nest as this exits the entire nest.
446  SE->forgetTopmostLoop(&L);
447  }
448 
449  if (MSSAU && VerifyMemorySSA)
450  MSSAU->getMemorySSA()->verifyMemorySSA();
451 
452  // Split the preheader, so that we know that there is a safe place to insert
453  // the conditional branch. We will change the preheader to have a conditional
454  // branch on LoopCond.
455  BasicBlock *OldPH = L.getLoopPreheader();
456  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
457 
458  // Now that we have a place to insert the conditional branch, create a place
459  // to branch to: this is the exit block out of the loop that we are
460  // unswitching. We need to split this if there are other loop predecessors.
461  // Because the loop is in simplified form, *any* other predecessor is enough.
462  BasicBlock *UnswitchedBB;
463  if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
464  assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
465  "A branch's parent isn't a predecessor!");
466  UnswitchedBB = LoopExitBB;
467  } else {
468  UnswitchedBB =
469  SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
470  }
471 
472  if (MSSAU && VerifyMemorySSA)
473  MSSAU->getMemorySSA()->verifyMemorySSA();
474 
475  // Actually move the invariant uses into the unswitched position. If possible,
476  // we do this by moving the instructions, but when doing partial unswitching
477  // we do it by building a new merge of the values in the unswitched position.
478  OldPH->getTerminator()->eraseFromParent();
479  if (FullUnswitch) {
480  // If fully unswitching, we can use the existing branch instruction.
481  // Splice it into the old PH to gate reaching the new preheader and re-point
482  // its successors.
483  OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
484  BI);
485  if (MSSAU) {
486  // Temporarily clone the terminator, to make MSSA update cheaper by
487  // separating "insert edge" updates from "remove edge" ones.
488  ParentBB->getInstList().push_back(BI.clone());
489  } else {
490  // Create a new unconditional branch that will continue the loop as a new
491  // terminator.
492  BranchInst::Create(ContinueBB, ParentBB);
493  }
494  BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
495  BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
496  } else {
497  // Only unswitching a subset of inputs to the condition, so we will need to
498  // build a new branch that merges the invariant inputs.
499  if (ExitDirection)
500  assert(cast<Instruction>(BI.getCondition())->getOpcode() ==
501  Instruction::Or &&
502  "Must have an `or` of `i1`s for the condition!");
503  else
504  assert(cast<Instruction>(BI.getCondition())->getOpcode() ==
505  Instruction::And &&
506  "Must have an `and` of `i1`s for the condition!");
507  buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
508  *UnswitchedBB, *NewPH);
509  }
510 
511  // Update the dominator tree with the added edge.
512  DT.insertEdge(OldPH, UnswitchedBB);
513 
514  // After the dominator tree was updated with the added edge, update MemorySSA
515  // if available.
516  if (MSSAU) {
518  Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
519  MSSAU->applyInsertUpdates(Updates, DT);
520  }
521 
522  // Finish updating dominator tree and memory ssa for full unswitch.
523  if (FullUnswitch) {
524  if (MSSAU) {
525  // Remove the cloned branch instruction.
526  ParentBB->getTerminator()->eraseFromParent();
527  // Create unconditional branch now.
528  BranchInst::Create(ContinueBB, ParentBB);
529  MSSAU->removeEdge(ParentBB, LoopExitBB);
530  }
531  DT.deleteEdge(ParentBB, LoopExitBB);
532  }
533 
534  if (MSSAU && VerifyMemorySSA)
535  MSSAU->getMemorySSA()->verifyMemorySSA();
536 
537  // Rewrite the relevant PHI nodes.
538  if (UnswitchedBB == LoopExitBB)
539  rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
540  else
541  rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
542  *ParentBB, *OldPH, FullUnswitch);
543 
544  // The constant we can replace all of our invariants with inside the loop
545  // body. If any of the invariants have a value other than this the loop won't
546  // be entered.
547  ConstantInt *Replacement = ExitDirection
550 
551  // Since this is an i1 condition we can also trivially replace uses of it
552  // within the loop with a constant.
553  for (Value *Invariant : Invariants)
554  replaceLoopInvariantUses(L, Invariant, *Replacement);
555 
556  // If this was full unswitching, we may have changed the nesting relationship
557  // for this loop so hoist it to its correct parent if needed.
558  if (FullUnswitch)
559  hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
560 
561  if (MSSAU && VerifyMemorySSA)
562  MSSAU->getMemorySSA()->verifyMemorySSA();
563 
564  LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
565  ++NumTrivial;
566  ++NumBranches;
567  return true;
568 }
569 
570 /// Unswitch a trivial switch if the condition is loop invariant.
571 ///
572 /// This routine should only be called when loop code leading to the switch has
573 /// been validated as trivial (no side effects). This routine checks if the
574 /// condition is invariant and that at least one of the successors is a loop
575 /// exit. This allows us to unswitch without duplicating the loop, making it
576 /// trivial.
577 ///
578 /// If this routine fails to unswitch the switch it returns false.
579 ///
580 /// If the switch can be unswitched, this routine splits the preheader and
581 /// copies the switch above that split. If the default case is one of the
582 /// exiting cases, it copies the non-exiting cases and points them at the new
583 /// preheader. If the default case is not exiting, it copies the exiting cases
584 /// and points the default at the preheader. It preserves loop simplified form
585 /// (splitting the exit blocks as necessary). It simplifies the switch within
586 /// the loop by removing now-dead cases. If the default case is one of those
587 /// unswitched, it replaces its destination with a new basic block containing
588 /// only unreachable. Such basic blocks, while technically loop exits, are not
589 /// considered for unswitching so this is a stable transform and the same
590 /// switch will not be revisited. If after unswitching there is only a single
591 /// in-loop successor, the switch is further simplified to an unconditional
592 /// branch. Still more cleanup can be done with some simplify-cfg like pass.
593 ///
594 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
595 /// invalidated by this.
597  LoopInfo &LI, ScalarEvolution *SE,
598  MemorySSAUpdater *MSSAU) {
599  LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
600  Value *LoopCond = SI.getCondition();
601 
602  // If this isn't switching on an invariant condition, we can't unswitch it.
603  if (!L.isLoopInvariant(LoopCond))
604  return false;
605 
606  auto *ParentBB = SI.getParent();
607 
608  // The same check must be used both for the default and the exit cases. We
609  // should never leave edges from the switch instruction to a basic block that
610  // we are unswitching, hence the condition used to determine the default case
611  // needs to also be used to populate ExitCaseIndices, which is then used to
612  // remove cases from the switch.
613  auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
614  // BBToCheck is not an exit block if it is inside loop L.
615  if (L.contains(&BBToCheck))
616  return false;
617  // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
618  if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
619  return false;
620  // We do not unswitch a block that only has an unreachable statement, as
621  // it's possible this is a previously unswitched block. Only unswitch if
622  // either the terminator is not unreachable, or, if it is, it's not the only
623  // instruction in the block.
624  auto *TI = BBToCheck.getTerminator();
625  bool isUnreachable = isa<UnreachableInst>(TI);
626  return !isUnreachable ||
627  (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
628  };
629 
630  SmallVector<int, 4> ExitCaseIndices;
631  for (auto Case : SI.cases())
632  if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
633  ExitCaseIndices.push_back(Case.getCaseIndex());
634  BasicBlock *DefaultExitBB = nullptr;
637  if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
638  DefaultExitBB = SI.getDefaultDest();
639  } else if (ExitCaseIndices.empty())
640  return false;
641 
642  LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
643 
644  if (MSSAU && VerifyMemorySSA)
645  MSSAU->getMemorySSA()->verifyMemorySSA();
646 
647  // We may need to invalidate SCEVs for the outermost loop reached by any of
648  // the exits.
649  Loop *OuterL = &L;
650 
651  if (DefaultExitBB) {
652  // Clear out the default destination temporarily to allow accurate
653  // predecessor lists to be examined below.
654  SI.setDefaultDest(nullptr);
655  // Check the loop containing this exit.
656  Loop *ExitL = LI.getLoopFor(DefaultExitBB);
657  if (!ExitL || ExitL->contains(OuterL))
658  OuterL = ExitL;
659  }
660 
661  // Store the exit cases into a separate data structure and remove them from
662  // the switch.
663  SmallVector<std::tuple<ConstantInt *, BasicBlock *,
665  4> ExitCases;
666  ExitCases.reserve(ExitCaseIndices.size());
668  // We walk the case indices backwards so that we remove the last case first
669  // and don't disrupt the earlier indices.
670  for (unsigned Index : reverse(ExitCaseIndices)) {
671  auto CaseI = SI.case_begin() + Index;
672  // Compute the outer loop from this exit.
673  Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
674  if (!ExitL || ExitL->contains(OuterL))
675  OuterL = ExitL;
676  // Save the value of this case.
677  auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
678  ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
679  // Delete the unswitched cases.
680  SIW.removeCase(CaseI);
681  }
682 
683  if (SE) {
684  if (OuterL)
685  SE->forgetLoop(OuterL);
686  else
687  SE->forgetTopmostLoop(&L);
688  }
689 
690  // Check if after this all of the remaining cases point at the same
691  // successor.
692  BasicBlock *CommonSuccBB = nullptr;
693  if (SI.getNumCases() > 0 &&
694  all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
695  return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
696  }))
697  CommonSuccBB = SI.case_begin()->getCaseSuccessor();
698  if (!DefaultExitBB) {
699  // If we're not unswitching the default, we need it to match any cases to
700  // have a common successor or if we have no cases it is the common
701  // successor.
702  if (SI.getNumCases() == 0)
703  CommonSuccBB = SI.getDefaultDest();
704  else if (SI.getDefaultDest() != CommonSuccBB)
705  CommonSuccBB = nullptr;
706  }
707 
708  // Split the preheader, so that we know that there is a safe place to insert
709  // the switch.
710  BasicBlock *OldPH = L.getLoopPreheader();
711  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
712  OldPH->getTerminator()->eraseFromParent();
713 
714  // Now add the unswitched switch.
715  auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
716  SwitchInstProfUpdateWrapper NewSIW(*NewSI);
717 
718  // Rewrite the IR for the unswitched basic blocks. This requires two steps.
719  // First, we split any exit blocks with remaining in-loop predecessors. Then
720  // we update the PHIs in one of two ways depending on if there was a split.
721  // We walk in reverse so that we split in the same order as the cases
722  // appeared. This is purely for convenience of reading the resulting IR, but
723  // it doesn't cost anything really.
724  SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
726  // Handle the default exit if necessary.
727  // FIXME: It'd be great if we could merge this with the loop below but LLVM's
728  // ranges aren't quite powerful enough yet.
729  if (DefaultExitBB) {
730  if (pred_empty(DefaultExitBB)) {
731  UnswitchedExitBBs.insert(DefaultExitBB);
732  rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
733  } else {
734  auto *SplitBB =
735  SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
736  rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
737  *ParentBB, *OldPH,
738  /*FullUnswitch*/ true);
739  DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
740  }
741  }
742  // Note that we must use a reference in the for loop so that we update the
743  // container.
744  for (auto &ExitCase : reverse(ExitCases)) {
745  // Grab a reference to the exit block in the pair so that we can update it.
746  BasicBlock *ExitBB = std::get<1>(ExitCase);
747 
748  // If this case is the last edge into the exit block, we can simply reuse it
749  // as it will no longer be a loop exit. No mapping necessary.
750  if (pred_empty(ExitBB)) {
751  // Only rewrite once.
752  if (UnswitchedExitBBs.insert(ExitBB).second)
753  rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
754  continue;
755  }
756 
757  // Otherwise we need to split the exit block so that we retain an exit
758  // block from the loop and a target for the unswitched condition.
759  BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
760  if (!SplitExitBB) {
761  // If this is the first time we see this, do the split and remember it.
762  SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
763  rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
764  *ParentBB, *OldPH,
765  /*FullUnswitch*/ true);
766  }
767  // Update the case pair to point to the split block.
768  std::get<1>(ExitCase) = SplitExitBB;
769  }
770 
771  // Now add the unswitched cases. We do this in reverse order as we built them
772  // in reverse order.
773  for (auto &ExitCase : reverse(ExitCases)) {
774  ConstantInt *CaseVal = std::get<0>(ExitCase);
775  BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
776 
777  NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
778  }
779 
780  // If the default was unswitched, re-point it and add explicit cases for
781  // entering the loop.
782  if (DefaultExitBB) {
783  NewSIW->setDefaultDest(DefaultExitBB);
784  NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
785 
786  // We removed all the exit cases, so we just copy the cases to the
787  // unswitched switch.
788  for (const auto &Case : SI.cases())
789  NewSIW.addCase(Case.getCaseValue(), NewPH,
791  } else if (DefaultCaseWeight) {
792  // We have to set branch weight of the default case.
793  uint64_t SW = *DefaultCaseWeight;
794  for (const auto &Case : SI.cases()) {
795  auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
796  assert(W &&
797  "case weight must be defined as default case weight is defined");
798  SW += *W;
799  }
800  NewSIW.setSuccessorWeight(0, SW);
801  }
802 
803  // If we ended up with a common successor for every path through the switch
804  // after unswitching, rewrite it to an unconditional branch to make it easy
805  // to recognize. Otherwise we potentially have to recognize the default case
806  // pointing at unreachable and other complexity.
807  if (CommonSuccBB) {
808  BasicBlock *BB = SI.getParent();
809  // We may have had multiple edges to this common successor block, so remove
810  // them as predecessors. We skip the first one, either the default or the
811  // actual first case.
812  bool SkippedFirst = DefaultExitBB == nullptr;
813  for (auto Case : SI.cases()) {
814  assert(Case.getCaseSuccessor() == CommonSuccBB &&
815  "Non-common successor!");
816  (void)Case;
817  if (!SkippedFirst) {
818  SkippedFirst = true;
819  continue;
820  }
821  CommonSuccBB->removePredecessor(BB,
822  /*KeepOneInputPHIs*/ true);
823  }
824  // Now nuke the switch and replace it with a direct branch.
825  SIW.eraseFromParent();
826  BranchInst::Create(CommonSuccBB, BB);
827  } else if (DefaultExitBB) {
828  assert(SI.getNumCases() > 0 &&
829  "If we had no cases we'd have a common successor!");
830  // Move the last case to the default successor. This is valid as if the
831  // default got unswitched it cannot be reached. This has the advantage of
832  // being simple and keeping the number of edges from this switch to
833  // successors the same, and avoiding any PHI update complexity.
834  auto LastCaseI = std::prev(SI.case_end());
835 
836  SI.setDefaultDest(LastCaseI->getCaseSuccessor());
837  SIW.setSuccessorWeight(
838  0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
839  SIW.removeCase(LastCaseI);
840  }
841 
842  // Walk the unswitched exit blocks and the unswitched split blocks and update
843  // the dominator tree based on the CFG edits. While we are walking unordered
844  // containers here, the API for applyUpdates takes an unordered list of
845  // updates and requires them to not contain duplicates.
847  for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
848  DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
849  DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
850  }
851  for (auto SplitUnswitchedPair : SplitExitBBMap) {
852  DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
853  DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
854  }
855 
856  if (MSSAU) {
857  MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
858  if (VerifyMemorySSA)
859  MSSAU->getMemorySSA()->verifyMemorySSA();
860  } else {
861  DT.applyUpdates(DTUpdates);
862  }
863 
865 
866  // We may have changed the nesting relationship for this loop so hoist it to
867  // its correct parent if needed.
868  hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
869 
870  if (MSSAU && VerifyMemorySSA)
871  MSSAU->getMemorySSA()->verifyMemorySSA();
872 
873  ++NumTrivial;
874  ++NumSwitches;
875  LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
876  return true;
877 }
878 
879 /// This routine scans the loop to find a branch or switch which occurs before
880 /// any side effects occur. These can potentially be unswitched without
881 /// duplicating the loop. If a branch or switch is successfully unswitched the
882 /// scanning continues to see if subsequent branches or switches have become
883 /// trivial. Once all trivial candidates have been unswitched, this routine
884 /// returns.
885 ///
886 /// The return value indicates whether anything was unswitched (and therefore
887 /// changed).
888 ///
889 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
890 /// invalidated by this.
892  LoopInfo &LI, ScalarEvolution *SE,
893  MemorySSAUpdater *MSSAU) {
894  bool Changed = false;
895 
896  // If loop header has only one reachable successor we should keep looking for
897  // trivial condition candidates in the successor as well. An alternative is
898  // to constant fold conditions and merge successors into loop header (then we
899  // only need to check header's terminator). The reason for not doing this in
900  // LoopUnswitch pass is that it could potentially break LoopPassManager's
901  // invariants. Folding dead branches could either eliminate the current loop
902  // or make other loops unreachable. LCSSA form might also not be preserved
903  // after deleting branches. The following code keeps traversing loop header's
904  // successors until it finds the trivial condition candidate (condition that
905  // is not a constant). Since unswitching generates branches with constant
906  // conditions, this scenario could be very common in practice.
907  BasicBlock *CurrentBB = L.getHeader();
909  Visited.insert(CurrentBB);
910  do {
911  // Check if there are any side-effecting instructions (e.g. stores, calls,
912  // volatile loads) in the part of the loop that the code *would* execute
913  // without unswitching.
914  if (MSSAU) // Possible early exit with MSSA
915  if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
916  if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
917  return Changed;
918  if (llvm::any_of(*CurrentBB,
919  [](Instruction &I) { return I.mayHaveSideEffects(); }))
920  return Changed;
921 
922  Instruction *CurrentTerm = CurrentBB->getTerminator();
923 
924  if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
925  // Don't bother trying to unswitch past a switch with a constant
926  // condition. This should be removed prior to running this pass by
927  // simplify-cfg.
928  if (isa<Constant>(SI->getCondition()))
929  return Changed;
930 
931  if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
932  // Couldn't unswitch this one so we're done.
933  return Changed;
934 
935  // Mark that we managed to unswitch something.
936  Changed = true;
937 
938  // If unswitching turned the terminator into an unconditional branch then
939  // we can continue. The unswitching logic specifically works to fold any
940  // cases it can into an unconditional branch to make it easier to
941  // recognize here.
942  auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
943  if (!BI || BI->isConditional())
944  return Changed;
945 
946  CurrentBB = BI->getSuccessor(0);
947  continue;
948  }
949 
950  auto *BI = dyn_cast<BranchInst>(CurrentTerm);
951  if (!BI)
952  // We do not understand other terminator instructions.
953  return Changed;
954 
955  // Don't bother trying to unswitch past an unconditional branch or a branch
956  // with a constant value. These should be removed by simplify-cfg prior to
957  // running this pass.
958  if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
959  return Changed;
960 
961  // Found a trivial condition candidate: non-foldable conditional branch. If
962  // we fail to unswitch this, we can't do anything else that is trivial.
963  if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
964  return Changed;
965 
966  // Mark that we managed to unswitch something.
967  Changed = true;
968 
969  // If we only unswitched some of the conditions feeding the branch, we won't
970  // have collapsed it to a single successor.
971  BI = cast<BranchInst>(CurrentBB->getTerminator());
972  if (BI->isConditional())
973  return Changed;
974 
975  // Follow the newly unconditional branch into its successor.
976  CurrentBB = BI->getSuccessor(0);
977 
978  // When continuing, if we exit the loop or reach a previous visited block,
979  // then we can not reach any trivial condition candidates (unfoldable
980  // branch instructions or switch instructions) and no unswitch can happen.
981  } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
982 
983  return Changed;
984 }
985 
986 /// Build the cloned blocks for an unswitched copy of the given loop.
987 ///
988 /// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
989 /// after the split block (`SplitBB`) that will be used to select between the
990 /// cloned and original loop.
991 ///
992 /// This routine handles cloning all of the necessary loop blocks and exit
993 /// blocks including rewriting their instructions and the relevant PHI nodes.
994 /// Any loop blocks or exit blocks which are dominated by a different successor
995 /// than the one for this clone of the loop blocks can be trivially skipped. We
996 /// use the `DominatingSucc` map to determine whether a block satisfies that
997 /// property with a simple map lookup.
998 ///
999 /// It also correctly creates the unconditional branch in the cloned
1000 /// unswitched parent block to only point at the unswitched successor.
1001 ///
1002 /// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1003 /// block splitting is correctly reflected in `LoopInfo`, essentially all of
1004 /// the cloned blocks (and their loops) are left without full `LoopInfo`
1005 /// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1006 /// blocks to them but doesn't create the cloned `DominatorTree` structure and
1007 /// instead the caller must recompute an accurate DT. It *does* correctly
1008 /// update the `AssumptionCache` provided in `AC`.
1010  Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1011  ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1012  BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1013  const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
1014  ValueToValueMapTy &VMap,
1016  DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
1017  SmallVector<BasicBlock *, 4> NewBlocks;
1018  NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1019 
1020  // We will need to clone a bunch of blocks, wrap up the clone operation in
1021  // a helper.
1022  auto CloneBlock = [&](BasicBlock *OldBB) {
1023  // Clone the basic block and insert it before the new preheader.
1024  BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1025  NewBB->moveBefore(LoopPH);
1026 
1027  // Record this block and the mapping.
1028  NewBlocks.push_back(NewBB);
1029  VMap[OldBB] = NewBB;
1030 
1031  return NewBB;
1032  };
1033 
1034  // We skip cloning blocks when they have a dominating succ that is not the
1035  // succ we are cloning for.
1036  auto SkipBlock = [&](BasicBlock *BB) {
1037  auto It = DominatingSucc.find(BB);
1038  return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1039  };
1040 
1041  // First, clone the preheader.
1042  auto *ClonedPH = CloneBlock(LoopPH);
1043 
1044  // Then clone all the loop blocks, skipping the ones that aren't necessary.
1045  for (auto *LoopBB : L.blocks())
1046  if (!SkipBlock(LoopBB))
1047  CloneBlock(LoopBB);
1048 
1049  // Split all the loop exit edges so that when we clone the exit blocks, if
1050  // any of the exit blocks are *also* a preheader for some other loop, we
1051  // don't create multiple predecessors entering the loop header.
1052  for (auto *ExitBB : ExitBlocks) {
1053  if (SkipBlock(ExitBB))
1054  continue;
1055 
1056  // When we are going to clone an exit, we don't need to clone all the
1057  // instructions in the exit block and we want to ensure we have an easy
1058  // place to merge the CFG, so split the exit first. This is always safe to
1059  // do because there cannot be any non-loop predecessors of a loop exit in
1060  // loop simplified form.
1061  auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
1062 
1063  // Rearrange the names to make it easier to write test cases by having the
1064  // exit block carry the suffix rather than the merge block carrying the
1065  // suffix.
1066  MergeBB->takeName(ExitBB);
1067  ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1068 
1069  // Now clone the original exit block.
1070  auto *ClonedExitBB = CloneBlock(ExitBB);
1071  assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1072  "Exit block should have been split to have one successor!");
1073  assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1074  "Cloned exit block has the wrong successor!");
1075 
1076  // Remap any cloned instructions and create a merge phi node for them.
1077  for (auto ZippedInsts : llvm::zip_first(
1078  llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1079  llvm::make_range(ClonedExitBB->begin(),
1080  std::prev(ClonedExitBB->end())))) {
1081  Instruction &I = std::get<0>(ZippedInsts);
1082  Instruction &ClonedI = std::get<1>(ZippedInsts);
1083 
1084  // The only instructions in the exit block should be PHI nodes and
1085  // potentially a landing pad.
1086  assert(
1087  (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
1088  "Bad instruction in exit block!");
1089  // We should have a value map between the instruction and its clone.
1090  assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1091 
1092  auto *MergePN =
1093  PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
1094  &*MergeBB->getFirstInsertionPt());
1095  I.replaceAllUsesWith(MergePN);
1096  MergePN->addIncoming(&I, ExitBB);
1097  MergePN->addIncoming(&ClonedI, ClonedExitBB);
1098  }
1099  }
1100 
1101  // Rewrite the instructions in the cloned blocks to refer to the instructions
1102  // in the cloned blocks. We have to do this as a second pass so that we have
1103  // everything available. Also, we have inserted new instructions which may
1104  // include assume intrinsics, so we update the assumption cache while
1105  // processing this.
1106  for (auto *ClonedBB : NewBlocks)
1107  for (Instruction &I : *ClonedBB) {
1108  RemapInstruction(&I, VMap,
1110  if (auto *II = dyn_cast<IntrinsicInst>(&I))
1111  if (II->getIntrinsicID() == Intrinsic::assume)
1112  AC.registerAssumption(II);
1113  }
1114 
1115  // Update any PHI nodes in the cloned successors of the skipped blocks to not
1116  // have spurious incoming values.
1117  for (auto *LoopBB : L.blocks())
1118  if (SkipBlock(LoopBB))
1119  for (auto *SuccBB : successors(LoopBB))
1120  if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1121  for (PHINode &PN : ClonedSuccBB->phis())
1122  PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1123 
1124  // Remove the cloned parent as a predecessor of any successor we ended up
1125  // cloning other than the unswitched one.
1126  auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1127  for (auto *SuccBB : successors(ParentBB)) {
1128  if (SuccBB == UnswitchedSuccBB)
1129  continue;
1130 
1131  auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1132  if (!ClonedSuccBB)
1133  continue;
1134 
1135  ClonedSuccBB->removePredecessor(ClonedParentBB,
1136  /*KeepOneInputPHIs*/ true);
1137  }
1138 
1139  // Replace the cloned branch with an unconditional branch to the cloned
1140  // unswitched successor.
1141  auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1142  Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1143  // Trivial Simplification. If Terminator is a conditional branch and
1144  // condition becomes dead - erase it.
1145  Value *ClonedConditionToErase = nullptr;
1146  if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1147  ClonedConditionToErase = BI->getCondition();
1148  else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1149  ClonedConditionToErase = SI->getCondition();
1150 
1151  ClonedTerminator->eraseFromParent();
1152  BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1153 
1154  if (ClonedConditionToErase)
1155  RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1156  MSSAU);
1157 
1158  // If there are duplicate entries in the PHI nodes because of multiple edges
1159  // to the unswitched successor, we need to nuke all but one as we replaced it
1160  // with a direct branch.
1161  for (PHINode &PN : ClonedSuccBB->phis()) {
1162  bool Found = false;
1163  // Loop over the incoming operands backwards so we can easily delete as we
1164  // go without invalidating the index.
1165  for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1166  if (PN.getIncomingBlock(i) != ClonedParentBB)
1167  continue;
1168  if (!Found) {
1169  Found = true;
1170  continue;
1171  }
1172  PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1173  }
1174  }
1175 
1176  // Record the domtree updates for the new blocks.
1178  for (auto *ClonedBB : NewBlocks) {
1179  for (auto *SuccBB : successors(ClonedBB))
1180  if (SuccSet.insert(SuccBB).second)
1181  DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1182  SuccSet.clear();
1183  }
1184 
1185  return ClonedPH;
1186 }
1187 
1188 /// Recursively clone the specified loop and all of its children.
1189 ///
1190 /// The target parent loop for the clone should be provided, or can be null if
1191 /// the clone is a top-level loop. While cloning, all the blocks are mapped
1192 /// with the provided value map. The entire original loop must be present in
1193 /// the value map. The cloned loop is returned.
1194 static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1195  const ValueToValueMapTy &VMap, LoopInfo &LI) {
1196  auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1197  assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1198  ClonedL.reserveBlocks(OrigL.getNumBlocks());
1199  for (auto *BB : OrigL.blocks()) {
1200  auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1201  ClonedL.addBlockEntry(ClonedBB);
1202  if (LI.getLoopFor(BB) == &OrigL)
1203  LI.changeLoopFor(ClonedBB, &ClonedL);
1204  }
1205  };
1206 
1207  // We specially handle the first loop because it may get cloned into
1208  // a different parent and because we most commonly are cloning leaf loops.
1209  Loop *ClonedRootL = LI.AllocateLoop();
1210  if (RootParentL)
1211  RootParentL->addChildLoop(ClonedRootL);
1212  else
1213  LI.addTopLevelLoop(ClonedRootL);
1214  AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1215 
1216  if (OrigRootL.isInnermost())
1217  return ClonedRootL;
1218 
1219  // If we have a nest, we can quickly clone the entire loop nest using an
1220  // iterative approach because it is a tree. We keep the cloned parent in the
1221  // data structure to avoid repeatedly querying through a map to find it.
1222  SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1223  // Build up the loops to clone in reverse order as we'll clone them from the
1224  // back.
1225  for (Loop *ChildL : llvm::reverse(OrigRootL))
1226  LoopsToClone.push_back({ClonedRootL, ChildL});
1227  do {
1228  Loop *ClonedParentL, *L;
1229  std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1230  Loop *ClonedL = LI.AllocateLoop();
1231  ClonedParentL->addChildLoop(ClonedL);
1232  AddClonedBlocksToLoop(*L, *ClonedL);
1233  for (Loop *ChildL : llvm::reverse(*L))
1234  LoopsToClone.push_back({ClonedL, ChildL});
1235  } while (!LoopsToClone.empty());
1236 
1237  return ClonedRootL;
1238 }
1239 
1240 /// Build the cloned loops of an original loop from unswitching.
1241 ///
1242 /// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1243 /// operation. We need to re-verify that there even is a loop (as the backedge
1244 /// may not have been cloned), and even if there are remaining backedges the
1245 /// backedge set may be different. However, we know that each child loop is
1246 /// undisturbed, we only need to find where to place each child loop within
1247 /// either any parent loop or within a cloned version of the original loop.
1248 ///
1249 /// Because child loops may end up cloned outside of any cloned version of the
1250 /// original loop, multiple cloned sibling loops may be created. All of them
1251 /// are returned so that the newly introduced loop nest roots can be
1252 /// identified.
1253 static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1254  const ValueToValueMapTy &VMap, LoopInfo &LI,
1255  SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1256  Loop *ClonedL = nullptr;
1257 
1258  auto *OrigPH = OrigL.getLoopPreheader();
1259  auto *OrigHeader = OrigL.getHeader();
1260 
1261  auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1262  auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1263 
1264  // We need to know the loops of the cloned exit blocks to even compute the
1265  // accurate parent loop. If we only clone exits to some parent of the
1266  // original parent, we want to clone into that outer loop. We also keep track
1267  // of the loops that our cloned exit blocks participate in.
1268  Loop *ParentL = nullptr;
1269  SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1271  ClonedExitsInLoops.reserve(ExitBlocks.size());
1272  for (auto *ExitBB : ExitBlocks)
1273  if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1274  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1275  ExitLoopMap[ClonedExitBB] = ExitL;
1276  ClonedExitsInLoops.push_back(ClonedExitBB);
1277  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1278  ParentL = ExitL;
1279  }
1280  assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1281  ParentL->contains(OrigL.getParentLoop())) &&
1282  "The computed parent loop should always contain (or be) the parent of "
1283  "the original loop.");
1284 
1285  // We build the set of blocks dominated by the cloned header from the set of
1286  // cloned blocks out of the original loop. While not all of these will
1287  // necessarily be in the cloned loop, it is enough to establish that they
1288  // aren't in unreachable cycles, etc.
1289  SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1290  for (auto *BB : OrigL.blocks())
1291  if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1292  ClonedLoopBlocks.insert(ClonedBB);
1293 
1294  // Rebuild the set of blocks that will end up in the cloned loop. We may have
1295  // skipped cloning some region of this loop which can in turn skip some of
1296  // the backedges so we have to rebuild the blocks in the loop based on the
1297  // backedges that remain after cloning.
1299  SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1300  for (auto *Pred : predecessors(ClonedHeader)) {
1301  // The only possible non-loop header predecessor is the preheader because
1302  // we know we cloned the loop in simplified form.
1303  if (Pred == ClonedPH)
1304  continue;
1305 
1306  // Because the loop was in simplified form, the only non-loop predecessor
1307  // should be the preheader.
1308  assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1309  "header other than the preheader "
1310  "that is not part of the loop!");
1311 
1312  // Insert this block into the loop set and on the first visit (and if it
1313  // isn't the header we're currently walking) put it into the worklist to
1314  // recurse through.
1315  if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1316  Worklist.push_back(Pred);
1317  }
1318 
1319  // If we had any backedges then there *is* a cloned loop. Put the header into
1320  // the loop set and then walk the worklist backwards to find all the blocks
1321  // that remain within the loop after cloning.
1322  if (!BlocksInClonedLoop.empty()) {
1323  BlocksInClonedLoop.insert(ClonedHeader);
1324 
1325  while (!Worklist.empty()) {
1326  BasicBlock *BB = Worklist.pop_back_val();
1327  assert(BlocksInClonedLoop.count(BB) &&
1328  "Didn't put block into the loop set!");
1329 
1330  // Insert any predecessors that are in the possible set into the cloned
1331  // set, and if the insert is successful, add them to the worklist. Note
1332  // that we filter on the blocks that are definitely reachable via the
1333  // backedge to the loop header so we may prune out dead code within the
1334  // cloned loop.
1335  for (auto *Pred : predecessors(BB))
1336  if (ClonedLoopBlocks.count(Pred) &&
1337  BlocksInClonedLoop.insert(Pred).second)
1338  Worklist.push_back(Pred);
1339  }
1340 
1341  ClonedL = LI.AllocateLoop();
1342  if (ParentL) {
1343  ParentL->addBasicBlockToLoop(ClonedPH, LI);
1344  ParentL->addChildLoop(ClonedL);
1345  } else {
1346  LI.addTopLevelLoop(ClonedL);
1347  }
1348  NonChildClonedLoops.push_back(ClonedL);
1349 
1350  ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1351  // We don't want to just add the cloned loop blocks based on how we
1352  // discovered them. The original order of blocks was carefully built in
1353  // a way that doesn't rely on predecessor ordering. Rather than re-invent
1354  // that logic, we just re-walk the original blocks (and those of the child
1355  // loops) and filter them as we add them into the cloned loop.
1356  for (auto *BB : OrigL.blocks()) {
1357  auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1358  if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1359  continue;
1360 
1361  // Directly add the blocks that are only in this loop.
1362  if (LI.getLoopFor(BB) == &OrigL) {
1363  ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1364  continue;
1365  }
1366 
1367  // We want to manually add it to this loop and parents.
1368  // Registering it with LoopInfo will happen when we clone the top
1369  // loop for this block.
1370  for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1371  PL->addBlockEntry(ClonedBB);
1372  }
1373 
1374  // Now add each child loop whose header remains within the cloned loop. All
1375  // of the blocks within the loop must satisfy the same constraints as the
1376  // header so once we pass the header checks we can just clone the entire
1377  // child loop nest.
1378  for (Loop *ChildL : OrigL) {
1379  auto *ClonedChildHeader =
1380  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1381  if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1382  continue;
1383 
1384 #ifndef NDEBUG
1385  // We should never have a cloned child loop header but fail to have
1386  // all of the blocks for that child loop.
1387  for (auto *ChildLoopBB : ChildL->blocks())
1388  assert(BlocksInClonedLoop.count(
1389  cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1390  "Child cloned loop has a header within the cloned outer "
1391  "loop but not all of its blocks!");
1392 #endif
1393 
1394  cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1395  }
1396  }
1397 
1398  // Now that we've handled all the components of the original loop that were
1399  // cloned into a new loop, we still need to handle anything from the original
1400  // loop that wasn't in a cloned loop.
1401 
1402  // Figure out what blocks are left to place within any loop nest containing
1403  // the unswitched loop. If we never formed a loop, the cloned PH is one of
1404  // them.
1405  SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1406  if (BlocksInClonedLoop.empty())
1407  UnloopedBlockSet.insert(ClonedPH);
1408  for (auto *ClonedBB : ClonedLoopBlocks)
1409  if (!BlocksInClonedLoop.count(ClonedBB))
1410  UnloopedBlockSet.insert(ClonedBB);
1411 
1412  // Copy the cloned exits and sort them in ascending loop depth, we'll work
1413  // backwards across these to process them inside out. The order shouldn't
1414  // matter as we're just trying to build up the map from inside-out; we use
1415  // the map in a more stably ordered way below.
1416  auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1417  llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1418  return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1419  ExitLoopMap.lookup(RHS)->getLoopDepth();
1420  });
1421 
1422  // Populate the existing ExitLoopMap with everything reachable from each
1423  // exit, starting from the inner most exit.
1424  while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1425  assert(Worklist.empty() && "Didn't clear worklist!");
1426 
1427  BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1428  Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1429 
1430  // Walk the CFG back until we hit the cloned PH adding everything reachable
1431  // and in the unlooped set to this exit block's loop.
1432  Worklist.push_back(ExitBB);
1433  do {
1434  BasicBlock *BB = Worklist.pop_back_val();
1435  // We can stop recursing at the cloned preheader (if we get there).
1436  if (BB == ClonedPH)
1437  continue;
1438 
1439  for (BasicBlock *PredBB : predecessors(BB)) {
1440  // If this pred has already been moved to our set or is part of some
1441  // (inner) loop, no update needed.
1442  if (!UnloopedBlockSet.erase(PredBB)) {
1443  assert(
1444  (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1445  "Predecessor not mapped to a loop!");
1446  continue;
1447  }
1448 
1449  // We just insert into the loop set here. We'll add these blocks to the
1450  // exit loop after we build up the set in an order that doesn't rely on
1451  // predecessor order (which in turn relies on use list order).
1452  bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1453  (void)Inserted;
1454  assert(Inserted && "Should only visit an unlooped block once!");
1455 
1456  // And recurse through to its predecessors.
1457  Worklist.push_back(PredBB);
1458  }
1459  } while (!Worklist.empty());
1460  }
1461 
1462  // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1463  // blocks to their outer loops, walk the cloned blocks and the cloned exits
1464  // in their original order adding them to the correct loop.
1465 
1466  // We need a stable insertion order. We use the order of the original loop
1467  // order and map into the correct parent loop.
1468  for (auto *BB : llvm::concat<BasicBlock *const>(
1469  makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1470  if (Loop *OuterL = ExitLoopMap.lookup(BB))
1471  OuterL->addBasicBlockToLoop(BB, LI);
1472 
1473 #ifndef NDEBUG
1474  for (auto &BBAndL : ExitLoopMap) {
1475  auto *BB = BBAndL.first;
1476  auto *OuterL = BBAndL.second;
1477  assert(LI.getLoopFor(BB) == OuterL &&
1478  "Failed to put all blocks into outer loops!");
1479  }
1480 #endif
1481 
1482  // Now that all the blocks are placed into the correct containing loop in the
1483  // absence of child loops, find all the potentially cloned child loops and
1484  // clone them into whatever outer loop we placed their header into.
1485  for (Loop *ChildL : OrigL) {
1486  auto *ClonedChildHeader =
1487  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1488  if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1489  continue;
1490 
1491 #ifndef NDEBUG
1492  for (auto *ChildLoopBB : ChildL->blocks())
1493  assert(VMap.count(ChildLoopBB) &&
1494  "Cloned a child loop header but not all of that loops blocks!");
1495 #endif
1496 
1497  NonChildClonedLoops.push_back(cloneLoopNest(
1498  *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1499  }
1500 }
1501 
1502 static void
1504  ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1505  DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1506  // Find all the dead clones, and remove them from their successors.
1507  SmallVector<BasicBlock *, 16> DeadBlocks;
1508  for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1509  for (auto &VMap : VMaps)
1510  if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1511  if (!DT.isReachableFromEntry(ClonedBB)) {
1512  for (BasicBlock *SuccBB : successors(ClonedBB))
1513  SuccBB->removePredecessor(ClonedBB);
1514  DeadBlocks.push_back(ClonedBB);
1515  }
1516 
1517  // Remove all MemorySSA in the dead blocks
1518  if (MSSAU) {
1519  SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1520  DeadBlocks.end());
1521  MSSAU->removeBlocks(DeadBlockSet);
1522  }
1523 
1524  // Drop any remaining references to break cycles.
1525  for (BasicBlock *BB : DeadBlocks)
1526  BB->dropAllReferences();
1527  // Erase them from the IR.
1528  for (BasicBlock *BB : DeadBlocks)
1529  BB->eraseFromParent();
1530 }
1531 
1533  SmallVectorImpl<BasicBlock *> &ExitBlocks,
1534  DominatorTree &DT, LoopInfo &LI,
1535  MemorySSAUpdater *MSSAU) {
1536  // Find all the dead blocks tied to this loop, and remove them from their
1537  // successors.
1538  SmallSetVector<BasicBlock *, 8> DeadBlockSet;
1539 
1540  // Start with loop/exit blocks and get a transitive closure of reachable dead
1541  // blocks.
1542  SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1543  ExitBlocks.end());
1544  DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1545  while (!DeathCandidates.empty()) {
1546  auto *BB = DeathCandidates.pop_back_val();
1547  if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1548  for (BasicBlock *SuccBB : successors(BB)) {
1549  SuccBB->removePredecessor(BB);
1550  DeathCandidates.push_back(SuccBB);
1551  }
1552  DeadBlockSet.insert(BB);
1553  }
1554  }
1555 
1556  // Remove all MemorySSA in the dead blocks
1557  if (MSSAU)
1558  MSSAU->removeBlocks(DeadBlockSet);
1559 
1560  // Filter out the dead blocks from the exit blocks list so that it can be
1561  // used in the caller.
1562  llvm::erase_if(ExitBlocks,
1563  [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1564 
1565  // Walk from this loop up through its parents removing all of the dead blocks.
1566  for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1567  for (auto *BB : DeadBlockSet)
1568  ParentL->getBlocksSet().erase(BB);
1569  llvm::erase_if(ParentL->getBlocksVector(),
1570  [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1571  }
1572 
1573  // Now delete the dead child loops. This raw delete will clear them
1574  // recursively.
1575  llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1576  if (!DeadBlockSet.count(ChildL->getHeader()))
1577  return false;
1578 
1579  assert(llvm::all_of(ChildL->blocks(),
1580  [&](BasicBlock *ChildBB) {
1581  return DeadBlockSet.count(ChildBB);
1582  }) &&
1583  "If the child loop header is dead all blocks in the child loop must "
1584  "be dead as well!");
1585  LI.destroy(ChildL);
1586  return true;
1587  });
1588 
1589  // Remove the loop mappings for the dead blocks and drop all the references
1590  // from these blocks to others to handle cyclic references as we start
1591  // deleting the blocks themselves.
1592  for (auto *BB : DeadBlockSet) {
1593  // Check that the dominator tree has already been updated.
1594  assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1595  LI.changeLoopFor(BB, nullptr);
1596  // Drop all uses of the instructions to make sure we won't have dangling
1597  // uses in other blocks.
1598  for (auto &I : *BB)
1599  if (!I.use_empty())
1600  I.replaceAllUsesWith(UndefValue::get(I.getType()));
1601  BB->dropAllReferences();
1602  }
1603 
1604  // Actually delete the blocks now that they've been fully unhooked from the
1605  // IR.
1606  for (auto *BB : DeadBlockSet)
1607  BB->eraseFromParent();
1608 }
1609 
1610 /// Recompute the set of blocks in a loop after unswitching.
1611 ///
1612 /// This walks from the original headers predecessors to rebuild the loop. We
1613 /// take advantage of the fact that new blocks can't have been added, and so we
1614 /// filter by the original loop's blocks. This also handles potentially
1615 /// unreachable code that we don't want to explore but might be found examining
1616 /// the predecessors of the header.
1617 ///
1618 /// If the original loop is no longer a loop, this will return an empty set. If
1619 /// it remains a loop, all the blocks within it will be added to the set
1620 /// (including those blocks in inner loops).
1622  LoopInfo &LI) {
1624 
1625  auto *PH = L.getLoopPreheader();
1626  auto *Header = L.getHeader();
1627 
1628  // A worklist to use while walking backwards from the header.
1630 
1631  // First walk the predecessors of the header to find the backedges. This will
1632  // form the basis of our walk.
1633  for (auto *Pred : predecessors(Header)) {
1634  // Skip the preheader.
1635  if (Pred == PH)
1636  continue;
1637 
1638  // Because the loop was in simplified form, the only non-loop predecessor
1639  // is the preheader.
1640  assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1641  "than the preheader that is not part of the "
1642  "loop!");
1643 
1644  // Insert this block into the loop set and on the first visit and, if it
1645  // isn't the header we're currently walking, put it into the worklist to
1646  // recurse through.
1647  if (LoopBlockSet.insert(Pred).second && Pred != Header)
1648  Worklist.push_back(Pred);
1649  }
1650 
1651  // If no backedges were found, we're done.
1652  if (LoopBlockSet.empty())
1653  return LoopBlockSet;
1654 
1655  // We found backedges, recurse through them to identify the loop blocks.
1656  while (!Worklist.empty()) {
1657  BasicBlock *BB = Worklist.pop_back_val();
1658  assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1659 
1660  // No need to walk past the header.
1661  if (BB == Header)
1662  continue;
1663 
1664  // Because we know the inner loop structure remains valid we can use the
1665  // loop structure to jump immediately across the entire nested loop.
1666  // Further, because it is in loop simplified form, we can directly jump
1667  // to its preheader afterward.
1668  if (Loop *InnerL = LI.getLoopFor(BB))
1669  if (InnerL != &L) {
1670  assert(L.contains(InnerL) &&
1671  "Should not reach a loop *outside* this loop!");
1672  // The preheader is the only possible predecessor of the loop so
1673  // insert it into the set and check whether it was already handled.
1674  auto *InnerPH = InnerL->getLoopPreheader();
1675  assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1676  "but not contain the inner loop "
1677  "preheader!");
1678  if (!LoopBlockSet.insert(InnerPH).second)
1679  // The only way to reach the preheader is through the loop body
1680  // itself so if it has been visited the loop is already handled.
1681  continue;
1682 
1683  // Insert all of the blocks (other than those already present) into
1684  // the loop set. We expect at least the block that led us to find the
1685  // inner loop to be in the block set, but we may also have other loop
1686  // blocks if they were already enqueued as predecessors of some other
1687  // outer loop block.
1688  for (auto *InnerBB : InnerL->blocks()) {
1689  if (InnerBB == BB) {
1690  assert(LoopBlockSet.count(InnerBB) &&
1691  "Block should already be in the set!");
1692  continue;
1693  }
1694 
1695  LoopBlockSet.insert(InnerBB);
1696  }
1697 
1698  // Add the preheader to the worklist so we will continue past the
1699  // loop body.
1700  Worklist.push_back(InnerPH);
1701  continue;
1702  }
1703 
1704  // Insert any predecessors that were in the original loop into the new
1705  // set, and if the insert is successful, add them to the worklist.
1706  for (auto *Pred : predecessors(BB))
1707  if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1708  Worklist.push_back(Pred);
1709  }
1710 
1711  assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1712 
1713  // We've found all the blocks participating in the loop, return our completed
1714  // set.
1715  return LoopBlockSet;
1716 }
1717 
1718 /// Rebuild a loop after unswitching removes some subset of blocks and edges.
1719 ///
1720 /// The removal may have removed some child loops entirely but cannot have
1721 /// disturbed any remaining child loops. However, they may need to be hoisted
1722 /// to the parent loop (or to be top-level loops). The original loop may be
1723 /// completely removed.
1724 ///
1725 /// The sibling loops resulting from this update are returned. If the original
1726 /// loop remains a valid loop, it will be the first entry in this list with all
1727 /// of the newly sibling loops following it.
1728 ///
1729 /// Returns true if the loop remains a loop after unswitching, and false if it
1730 /// is no longer a loop after unswitching (and should not continue to be
1731 /// referenced).
1733  LoopInfo &LI,
1734  SmallVectorImpl<Loop *> &HoistedLoops) {
1735  auto *PH = L.getLoopPreheader();
1736 
1737  // Compute the actual parent loop from the exit blocks. Because we may have
1738  // pruned some exits the loop may be different from the original parent.
1739  Loop *ParentL = nullptr;
1740  SmallVector<Loop *, 4> ExitLoops;
1741  SmallVector<BasicBlock *, 4> ExitsInLoops;
1742  ExitsInLoops.reserve(ExitBlocks.size());
1743  for (auto *ExitBB : ExitBlocks)
1744  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1745  ExitLoops.push_back(ExitL);
1746  ExitsInLoops.push_back(ExitBB);
1747  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1748  ParentL = ExitL;
1749  }
1750 
1751  // Recompute the blocks participating in this loop. This may be empty if it
1752  // is no longer a loop.
1753  auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1754 
1755  // If we still have a loop, we need to re-set the loop's parent as the exit
1756  // block set changing may have moved it within the loop nest. Note that this
1757  // can only happen when this loop has a parent as it can only hoist the loop
1758  // *up* the nest.
1759  if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1760  // Remove this loop's (original) blocks from all of the intervening loops.
1761  for (Loop *IL = L.getParentLoop(); IL != ParentL;
1762  IL = IL->getParentLoop()) {
1763  IL->getBlocksSet().erase(PH);
1764  for (auto *BB : L.blocks())
1765  IL->getBlocksSet().erase(BB);
1766  llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1767  return BB == PH || L.contains(BB);
1768  });
1769  }
1770 
1771  LI.changeLoopFor(PH, ParentL);
1772  L.getParentLoop()->removeChildLoop(&L);
1773  if (ParentL)
1774  ParentL->addChildLoop(&L);
1775  else
1776  LI.addTopLevelLoop(&L);
1777  }
1778 
1779  // Now we update all the blocks which are no longer within the loop.
1780  auto &Blocks = L.getBlocksVector();
1781  auto BlocksSplitI =
1782  LoopBlockSet.empty()
1783  ? Blocks.begin()
1784  : std::stable_partition(
1785  Blocks.begin(), Blocks.end(),
1786  [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1787 
1788  // Before we erase the list of unlooped blocks, build a set of them.
1789  SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1790  if (LoopBlockSet.empty())
1791  UnloopedBlocks.insert(PH);
1792 
1793  // Now erase these blocks from the loop.
1794  for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1795  L.getBlocksSet().erase(BB);
1796  Blocks.erase(BlocksSplitI, Blocks.end());
1797 
1798  // Sort the exits in ascending loop depth, we'll work backwards across these
1799  // to process them inside out.
1800  llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1801  return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1802  });
1803 
1804  // We'll build up a set for each exit loop.
1805  SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1806  Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1807 
1808  auto RemoveUnloopedBlocksFromLoop =
1809  [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1810  for (auto *BB : UnloopedBlocks)
1811  L.getBlocksSet().erase(BB);
1813  return UnloopedBlocks.count(BB);
1814  });
1815  };
1816 
1818  while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1819  assert(Worklist.empty() && "Didn't clear worklist!");
1820  assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
1821 
1822  // Grab the next exit block, in decreasing loop depth order.
1823  BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1824  Loop &ExitL = *LI.getLoopFor(ExitBB);
1825  assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
1826 
1827  // Erase all of the unlooped blocks from the loops between the previous
1828  // exit loop and this exit loop. This works because the ExitInLoops list is
1829  // sorted in increasing order of loop depth and thus we visit loops in
1830  // decreasing order of loop depth.
1831  for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1832  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1833 
1834  // Walk the CFG back until we hit the cloned PH adding everything reachable
1835  // and in the unlooped set to this exit block's loop.
1836  Worklist.push_back(ExitBB);
1837  do {
1838  BasicBlock *BB = Worklist.pop_back_val();
1839  // We can stop recursing at the cloned preheader (if we get there).
1840  if (BB == PH)
1841  continue;
1842 
1843  for (BasicBlock *PredBB : predecessors(BB)) {
1844  // If this pred has already been moved to our set or is part of some
1845  // (inner) loop, no update needed.
1846  if (!UnloopedBlocks.erase(PredBB)) {
1847  assert((NewExitLoopBlocks.count(PredBB) ||
1848  ExitL.contains(LI.getLoopFor(PredBB))) &&
1849  "Predecessor not in a nested loop (or already visited)!");
1850  continue;
1851  }
1852 
1853  // We just insert into the loop set here. We'll add these blocks to the
1854  // exit loop after we build up the set in a deterministic order rather
1855  // than the predecessor-influenced visit order.
1856  bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
1857  (void)Inserted;
1858  assert(Inserted && "Should only visit an unlooped block once!");
1859 
1860  // And recurse through to its predecessors.
1861  Worklist.push_back(PredBB);
1862  }
1863  } while (!Worklist.empty());
1864 
1865  // If blocks in this exit loop were directly part of the original loop (as
1866  // opposed to a child loop) update the map to point to this exit loop. This
1867  // just updates a map and so the fact that the order is unstable is fine.
1868  for (auto *BB : NewExitLoopBlocks)
1869  if (Loop *BBL = LI.getLoopFor(BB))
1870  if (BBL == &L || !L.contains(BBL))
1871  LI.changeLoopFor(BB, &ExitL);
1872 
1873  // We will remove the remaining unlooped blocks from this loop in the next
1874  // iteration or below.
1875  NewExitLoopBlocks.clear();
1876  }
1877 
1878  // Any remaining unlooped blocks are no longer part of any loop unless they
1879  // are part of some child loop.
1880  for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
1881  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1882  for (auto *BB : UnloopedBlocks)
1883  if (Loop *BBL = LI.getLoopFor(BB))
1884  if (BBL == &L || !L.contains(BBL))
1885  LI.changeLoopFor(BB, nullptr);
1886 
1887  // Sink all the child loops whose headers are no longer in the loop set to
1888  // the parent (or to be top level loops). We reach into the loop and directly
1889  // update its subloop vector to make this batch update efficient.
1890  auto &SubLoops = L.getSubLoopsVector();
1891  auto SubLoopsSplitI =
1892  LoopBlockSet.empty()
1893  ? SubLoops.begin()
1894  : std::stable_partition(
1895  SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
1896  return LoopBlockSet.count(SubL->getHeader());
1897  });
1898  for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
1899  HoistedLoops.push_back(HoistedL);
1900  HoistedL->setParentLoop(nullptr);
1901 
1902  // To compute the new parent of this hoisted loop we look at where we
1903  // placed the preheader above. We can't lookup the header itself because we
1904  // retained the mapping from the header to the hoisted loop. But the
1905  // preheader and header should have the exact same new parent computed
1906  // based on the set of exit blocks from the original loop as the preheader
1907  // is a predecessor of the header and so reached in the reverse walk. And
1908  // because the loops were all in simplified form the preheader of the
1909  // hoisted loop can't be part of some *other* loop.
1910  if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
1911  NewParentL->addChildLoop(HoistedL);
1912  else
1913  LI.addTopLevelLoop(HoistedL);
1914  }
1915  SubLoops.erase(SubLoopsSplitI, SubLoops.end());
1916 
1917  // Actually delete the loop if nothing remained within it.
1918  if (Blocks.empty()) {
1919  assert(SubLoops.empty() &&
1920  "Failed to remove all subloops from the original loop!");
1921  if (Loop *ParentL = L.getParentLoop())
1922  ParentL->removeChildLoop(llvm::find(*ParentL, &L));
1923  else
1924  LI.removeLoop(llvm::find(LI, &L));
1925  LI.destroy(&L);
1926  return false;
1927  }
1928 
1929  return true;
1930 }
1931 
1932 /// Helper to visit a dominator subtree, invoking a callable on each node.
1933 ///
1934 /// Returning false at any point will stop walking past that node of the tree.
1935 template <typename CallableT>
1936 void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
1937  SmallVector<DomTreeNode *, 4> DomWorklist;
1938  DomWorklist.push_back(DT[BB]);
1939 #ifndef NDEBUG
1941  Visited.insert(DT[BB]);
1942 #endif
1943  do {
1944  DomTreeNode *N = DomWorklist.pop_back_val();
1945 
1946  // Visit this node.
1947  if (!Callable(N->getBlock()))
1948  continue;
1949 
1950  // Accumulate the child nodes.
1951  for (DomTreeNode *ChildN : *N) {
1952  assert(Visited.insert(ChildN).second &&
1953  "Cannot visit a node twice when walking a tree!");
1954  DomWorklist.push_back(ChildN);
1955  }
1956  } while (!DomWorklist.empty());
1957 }
1958 
1960  Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
1962  AssumptionCache &AC, function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB,
1963  ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
1964  auto *ParentBB = TI.getParent();
1965  BranchInst *BI = dyn_cast<BranchInst>(&TI);
1966  SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
1967 
1968  // We can only unswitch switches, conditional branches with an invariant
1969  // condition, or combining invariant conditions with an instruction.
1970  assert((SI || (BI && BI->isConditional())) &&
1971  "Can only unswitch switches and conditional branch!");
1972  bool FullUnswitch = SI || BI->getCondition() == Invariants[0];
1973  if (FullUnswitch)
1974  assert(Invariants.size() == 1 &&
1975  "Cannot have other invariants with full unswitching!");
1976  else
1977  assert(isa<Instruction>(BI->getCondition()) &&
1978  "Partial unswitching requires an instruction as the condition!");
1979 
1980  if (MSSAU && VerifyMemorySSA)
1981  MSSAU->getMemorySSA()->verifyMemorySSA();
1982 
1983  // Constant and BBs tracking the cloned and continuing successor. When we are
1984  // unswitching the entire condition, this can just be trivially chosen to
1985  // unswitch towards `true`. However, when we are unswitching a set of
1986  // invariants combined with `and` or `or`, the combining operation determines
1987  // the best direction to unswitch: we want to unswitch the direction that will
1988  // collapse the branch.
1989  bool Direction = true;
1990  int ClonedSucc = 0;
1991  if (!FullUnswitch) {
1992  if (cast<Instruction>(BI->getCondition())->getOpcode() != Instruction::Or) {
1993  assert(cast<Instruction>(BI->getCondition())->getOpcode() ==
1994  Instruction::And &&
1995  "Only `or` and `and` instructions can combine invariants being "
1996  "unswitched.");
1997  Direction = false;
1998  ClonedSucc = 1;
1999  }
2000  }
2001 
2002  BasicBlock *RetainedSuccBB =
2003  BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2004  SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2005  if (BI)
2006  UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2007  else
2008  for (auto Case : SI->cases())
2009  if (Case.getCaseSuccessor() != RetainedSuccBB)
2010  UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2011 
2012  assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2013  "Should not unswitch the same successor we are retaining!");
2014 
2015  // The branch should be in this exact loop. Any inner loop's invariant branch
2016  // should be handled by unswitching that inner loop. The caller of this
2017  // routine should filter out any candidates that remain (but were skipped for
2018  // whatever reason).
2019  assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2020 
2021  // Compute the parent loop now before we start hacking on things.
2022  Loop *ParentL = L.getParentLoop();
2023  // Get blocks in RPO order for MSSA update, before changing the CFG.
2024  LoopBlocksRPO LBRPO(&L);
2025  if (MSSAU)
2026  LBRPO.perform(&LI);
2027 
2028  // Compute the outer-most loop containing one of our exit blocks. This is the
2029  // furthest up our loopnest which can be mutated, which we will use below to
2030  // update things.
2031  Loop *OuterExitL = &L;
2032  for (auto *ExitBB : ExitBlocks) {
2033  Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
2034  if (!NewOuterExitL) {
2035  // We exited the entire nest with this block, so we're done.
2036  OuterExitL = nullptr;
2037  break;
2038  }
2039  if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2040  OuterExitL = NewOuterExitL;
2041  }
2042 
2043  // At this point, we're definitely going to unswitch something so invalidate
2044  // any cached information in ScalarEvolution for the outer most loop
2045  // containing an exit block and all nested loops.
2046  if (SE) {
2047  if (OuterExitL)
2048  SE->forgetLoop(OuterExitL);
2049  else
2050  SE->forgetTopmostLoop(&L);
2051  }
2052 
2053  // If the edge from this terminator to a successor dominates that successor,
2054  // store a map from each block in its dominator subtree to it. This lets us
2055  // tell when cloning for a particular successor if a block is dominated by
2056  // some *other* successor with a single data structure. We use this to
2057  // significantly reduce cloning.
2059  for (auto *SuccBB : llvm::concat<BasicBlock *const>(
2060  makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs))
2061  if (SuccBB->getUniquePredecessor() ||
2062  llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2063  return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2064  }))
2065  visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2066  DominatingSucc[BB] = SuccBB;
2067  return true;
2068  });
2069 
2070  // Split the preheader, so that we know that there is a safe place to insert
2071  // the conditional branch. We will change the preheader to have a conditional
2072  // branch on LoopCond. The original preheader will become the split point
2073  // between the unswitched versions, and we will have a new preheader for the
2074  // original loop.
2075  BasicBlock *SplitBB = L.getLoopPreheader();
2076  BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2077 
2078  // Keep track of the dominator tree updates needed.
2080 
2081  // Clone the loop for each unswitched successor.
2083  VMaps.reserve(UnswitchedSuccBBs.size());
2085  for (auto *SuccBB : UnswitchedSuccBBs) {
2086  VMaps.emplace_back(new ValueToValueMapTy());
2087  ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2088  L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2089  DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
2090  }
2091 
2092  // Drop metadata if we may break its semantics by moving this instr into the
2093  // split block.
2094  if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2096  // Do not spend time trying to understand if we can keep it, just drop it
2097  // to save compile time.
2098  TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2099  else {
2100  // It is only legal to preserve make.implicit metadata if we are
2101  // guaranteed no reach implicit null check after following this branch.
2102  ICFLoopSafetyInfo SafetyInfo;
2103  SafetyInfo.computeLoopSafetyInfo(&L);
2104  if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2105  TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2106  }
2107  }
2108 
2109  // The stitching of the branched code back together depends on whether we're
2110  // doing full unswitching or not with the exception that we always want to
2111  // nuke the initial terminator placed in the split block.
2112  SplitBB->getTerminator()->eraseFromParent();
2113  if (FullUnswitch) {
2114  // Splice the terminator from the original loop and rewrite its
2115  // successors.
2116  SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
2117 
2118  // Keep a clone of the terminator for MSSA updates.
2119  Instruction *NewTI = TI.clone();
2120  ParentBB->getInstList().push_back(NewTI);
2121 
2122  // First wire up the moved terminator to the preheaders.
2123  if (BI) {
2124  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2125  BI->setSuccessor(ClonedSucc, ClonedPH);
2126  BI->setSuccessor(1 - ClonedSucc, LoopPH);
2127  DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2128  } else {
2129  assert(SI && "Must either be a branch or switch!");
2130 
2131  // Walk the cases and directly update their successors.
2132  assert(SI->getDefaultDest() == RetainedSuccBB &&
2133  "Not retaining default successor!");
2134  SI->setDefaultDest(LoopPH);
2135  for (auto &Case : SI->cases())
2136  if (Case.getCaseSuccessor() == RetainedSuccBB)
2137  Case.setSuccessor(LoopPH);
2138  else
2139  Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2140 
2141  // We need to use the set to populate domtree updates as even when there
2142  // are multiple cases pointing at the same successor we only want to
2143  // remove and insert one edge in the domtree.
2144  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2145  DTUpdates.push_back(
2146  {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2147  }
2148 
2149  if (MSSAU) {
2150  DT.applyUpdates(DTUpdates);
2151  DTUpdates.clear();
2152 
2153  // Remove all but one edge to the retained block and all unswitched
2154  // blocks. This is to avoid having duplicate entries in the cloned Phis,
2155  // when we know we only keep a single edge for each case.
2156  MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2157  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2158  MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2159 
2160  for (auto &VMap : VMaps)
2161  MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2162  /*IgnoreIncomingWithNoClones=*/true);
2163  MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2164 
2165  // Remove all edges to unswitched blocks.
2166  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2167  MSSAU->removeEdge(ParentBB, SuccBB);
2168  }
2169 
2170  // Now unhook the successor relationship as we'll be replacing
2171  // the terminator with a direct branch. This is much simpler for branches
2172  // than switches so we handle those first.
2173  if (BI) {
2174  // Remove the parent as a predecessor of the unswitched successor.
2175  assert(UnswitchedSuccBBs.size() == 1 &&
2176  "Only one possible unswitched block for a branch!");
2177  BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2178  UnswitchedSuccBB->removePredecessor(ParentBB,
2179  /*KeepOneInputPHIs*/ true);
2180  DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2181  } else {
2182  // Note that we actually want to remove the parent block as a predecessor
2183  // of *every* case successor. The case successor is either unswitched,
2184  // completely eliminating an edge from the parent to that successor, or it
2185  // is a duplicate edge to the retained successor as the retained successor
2186  // is always the default successor and as we'll replace this with a direct
2187  // branch we no longer need the duplicate entries in the PHI nodes.
2188  SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2189  assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2190  "Not retaining default successor!");
2191  for (auto &Case : NewSI->cases())
2192  Case.getCaseSuccessor()->removePredecessor(
2193  ParentBB,
2194  /*KeepOneInputPHIs*/ true);
2195 
2196  // We need to use the set to populate domtree updates as even when there
2197  // are multiple cases pointing at the same successor we only want to
2198  // remove and insert one edge in the domtree.
2199  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2200  DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2201  }
2202 
2203  // After MSSAU update, remove the cloned terminator instruction NewTI.
2204  ParentBB->getTerminator()->eraseFromParent();
2205 
2206  // Create a new unconditional branch to the continuing block (as opposed to
2207  // the one cloned).
2208  BranchInst::Create(RetainedSuccBB, ParentBB);
2209  } else {
2210  assert(BI && "Only branches have partial unswitching.");
2211  assert(UnswitchedSuccBBs.size() == 1 &&
2212  "Only one possible unswitched block for a branch!");
2213  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2214  // When doing a partial unswitch, we have to do a bit more work to build up
2215  // the branch in the split block.
2216  buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
2217  *ClonedPH, *LoopPH);
2218  DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2219 
2220  if (MSSAU) {
2221  DT.applyUpdates(DTUpdates);
2222  DTUpdates.clear();
2223 
2224  // Perform MSSA cloning updates.
2225  for (auto &VMap : VMaps)
2226  MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2227  /*IgnoreIncomingWithNoClones=*/true);
2228  MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2229  }
2230  }
2231 
2232  // Apply the updates accumulated above to get an up-to-date dominator tree.
2233  DT.applyUpdates(DTUpdates);
2234 
2235  // Now that we have an accurate dominator tree, first delete the dead cloned
2236  // blocks so that we can accurately build any cloned loops. It is important to
2237  // not delete the blocks from the original loop yet because we still want to
2238  // reference the original loop to understand the cloned loop's structure.
2239  deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2240 
2241  // Build the cloned loop structure itself. This may be substantially
2242  // different from the original structure due to the simplified CFG. This also
2243  // handles inserting all the cloned blocks into the correct loops.
2244  SmallVector<Loop *, 4> NonChildClonedLoops;
2245  for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2246  buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2247 
2248  // Now that our cloned loops have been built, we can update the original loop.
2249  // First we delete the dead blocks from it and then we rebuild the loop
2250  // structure taking these deletions into account.
2251  deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU);
2252 
2253  if (MSSAU && VerifyMemorySSA)
2254  MSSAU->getMemorySSA()->verifyMemorySSA();
2255 
2256  SmallVector<Loop *, 4> HoistedLoops;
2257  bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
2258 
2259  if (MSSAU && VerifyMemorySSA)
2260  MSSAU->getMemorySSA()->verifyMemorySSA();
2261 
2262  // This transformation has a high risk of corrupting the dominator tree, and
2263  // the below steps to rebuild loop structures will result in hard to debug
2264  // errors in that case so verify that the dominator tree is sane first.
2265  // FIXME: Remove this when the bugs stop showing up and rely on existing
2266  // verification steps.
2268 
2269  if (BI) {
2270  // If we unswitched a branch which collapses the condition to a known
2271  // constant we want to replace all the uses of the invariants within both
2272  // the original and cloned blocks. We do this here so that we can use the
2273  // now updated dominator tree to identify which side the users are on.
2274  assert(UnswitchedSuccBBs.size() == 1 &&
2275  "Only one possible unswitched block for a branch!");
2276  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2277 
2278  // When considering multiple partially-unswitched invariants
2279  // we cant just go replace them with constants in both branches.
2280  //
2281  // For 'AND' we infer that true branch ("continue") means true
2282  // for each invariant operand.
2283  // For 'OR' we can infer that false branch ("continue") means false
2284  // for each invariant operand.
2285  // So it happens that for multiple-partial case we dont replace
2286  // in the unswitched branch.
2287  bool ReplaceUnswitched = FullUnswitch || (Invariants.size() == 1);
2288 
2289  ConstantInt *UnswitchedReplacement =
2292  ConstantInt *ContinueReplacement =
2295  for (Value *Invariant : Invariants)
2296  // Use make_early_inc_range here as set invalidates the iterator.
2297  for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2298  Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2299  if (!UserI)
2300  continue;
2301 
2302  // Replace it with the 'continue' side if in the main loop body, and the
2303  // unswitched if in the cloned blocks.
2304  if (DT.dominates(LoopPH, UserI->getParent()))
2305  U.set(ContinueReplacement);
2306  else if (ReplaceUnswitched &&
2307  DT.dominates(ClonedPH, UserI->getParent()))
2308  U.set(UnswitchedReplacement);
2309  }
2310  }
2311 
2312  // We can change which blocks are exit blocks of all the cloned sibling
2313  // loops, the current loop, and any parent loops which shared exit blocks
2314  // with the current loop. As a consequence, we need to re-form LCSSA for
2315  // them. But we shouldn't need to re-form LCSSA for any child loops.
2316  // FIXME: This could be made more efficient by tracking which exit blocks are
2317  // new, and focusing on them, but that isn't likely to be necessary.
2318  //
2319  // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2320  // loop nest and update every loop that could have had its exits changed. We
2321  // also need to cover any intervening loops. We add all of these loops to
2322  // a list and sort them by loop depth to achieve this without updating
2323  // unnecessary loops.
2324  auto UpdateLoop = [&](Loop &UpdateL) {
2325 #ifndef NDEBUG
2326  UpdateL.verifyLoop();
2327  for (Loop *ChildL : UpdateL) {
2328  ChildL->verifyLoop();
2329  assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2330  "Perturbed a child loop's LCSSA form!");
2331  }
2332 #endif
2333  // First build LCSSA for this loop so that we can preserve it when
2334  // forming dedicated exits. We don't want to perturb some other loop's
2335  // LCSSA while doing that CFG edit.
2336  formLCSSA(UpdateL, DT, &LI, SE);
2337 
2338  // For loops reached by this loop's original exit blocks we may
2339  // introduced new, non-dedicated exits. At least try to re-form dedicated
2340  // exits for these loops. This may fail if they couldn't have dedicated
2341  // exits to start with.
2342  formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2343  };
2344 
2345  // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2346  // and we can do it in any order as they don't nest relative to each other.
2347  //
2348  // Also check if any of the loops we have updated have become top-level loops
2349  // as that will necessitate widening the outer loop scope.
2350  for (Loop *UpdatedL :
2351  llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2352  UpdateLoop(*UpdatedL);
2353  if (UpdatedL->isOutermost())
2354  OuterExitL = nullptr;
2355  }
2356  if (IsStillLoop) {
2357  UpdateLoop(L);
2358  if (L.isOutermost())
2359  OuterExitL = nullptr;
2360  }
2361 
2362  // If the original loop had exit blocks, walk up through the outer most loop
2363  // of those exit blocks to update LCSSA and form updated dedicated exits.
2364  if (OuterExitL != &L)
2365  for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2366  OuterL = OuterL->getParentLoop())
2367  UpdateLoop(*OuterL);
2368 
2369 #ifndef NDEBUG
2370  // Verify the entire loop structure to catch any incorrect updates before we
2371  // progress in the pass pipeline.
2372  LI.verify(DT);
2373 #endif
2374 
2375  // Now that we've unswitched something, make callbacks to report the changes.
2376  // For that we need to merge together the updated loops and the cloned loops
2377  // and check whether the original loop survived.
2378  SmallVector<Loop *, 4> SibLoops;
2379  for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2380  if (UpdatedL->getParentLoop() == ParentL)
2381  SibLoops.push_back(UpdatedL);
2382  UnswitchCB(IsStillLoop, SibLoops);
2383 
2384  if (MSSAU && VerifyMemorySSA)
2385  MSSAU->getMemorySSA()->verifyMemorySSA();
2386 
2387  if (BI)
2388  ++NumBranches;
2389  else
2390  ++NumSwitches;
2391 }
2392 
2393 /// Recursively compute the cost of a dominator subtree based on the per-block
2394 /// cost map provided.
2395 ///
2396 /// The recursive computation is memozied into the provided DT-indexed cost map
2397 /// to allow querying it for most nodes in the domtree without it becoming
2398 /// quadratic.
2400  DomTreeNode &N,
2403  // Don't accumulate cost (or recurse through) blocks not in our block cost
2404  // map and thus not part of the duplication cost being considered.
2405  auto BBCostIt = BBCostMap.find(N.getBlock());
2406  if (BBCostIt == BBCostMap.end())
2407  return 0;
2408 
2409  // Lookup this node to see if we already computed its cost.
2410  auto DTCostIt = DTCostMap.find(&N);
2411  if (DTCostIt != DTCostMap.end())
2412  return DTCostIt->second;
2413 
2414  // If not, we have to compute it. We can't use insert above and update
2415  // because computing the cost may insert more things into the map.
2416  InstructionCost Cost = std::accumulate(
2417  N.begin(), N.end(), BBCostIt->second,
2418  [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2419  return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2420  });
2421  bool Inserted = DTCostMap.insert({&N, Cost}).second;
2422  (void)Inserted;
2423  assert(Inserted && "Should not insert a node while visiting children!");
2424  return Cost;
2425 }
2426 
2427 /// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2428 /// making the following replacement:
2429 ///
2430 /// --code before guard--
2431 /// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2432 /// --code after guard--
2433 ///
2434 /// into
2435 ///
2436 /// --code before guard--
2437 /// br i1 %cond, label %guarded, label %deopt
2438 ///
2439 /// guarded:
2440 /// --code after guard--
2441 ///
2442 /// deopt:
2443 /// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2444 /// unreachable
2445 ///
2446 /// It also makes all relevant DT and LI updates, so that all structures are in
2447 /// valid state after this transform.
2448 static BranchInst *
2450  SmallVectorImpl<BasicBlock *> &ExitBlocks,
2451  DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
2453  LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2454  BasicBlock *CheckBB = GI->getParent();
2455 
2456  if (MSSAU && VerifyMemorySSA)
2457  MSSAU->getMemorySSA()->verifyMemorySSA();
2458 
2459  // Remove all CheckBB's successors from DomTree. A block can be seen among
2460  // successors more than once, but for DomTree it should be added only once.
2461  SmallPtrSet<BasicBlock *, 4> Successors;
2462  for (auto *Succ : successors(CheckBB))
2463  if (Successors.insert(Succ).second)
2464  DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
2465 
2466  Instruction *DeoptBlockTerm =
2467  SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
2468  BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2469  // SplitBlockAndInsertIfThen inserts control flow that branches to
2470  // DeoptBlockTerm if the condition is true. We want the opposite.
2471  CheckBI->swapSuccessors();
2472 
2473  BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2474  GuardedBlock->setName("guarded");
2475  CheckBI->getSuccessor(1)->setName("deopt");
2476  BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2477 
2478  // We now have a new exit block.
2479  ExitBlocks.push_back(CheckBI->getSuccessor(1));
2480 
2481  if (MSSAU)
2482  MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2483 
2484  GI->moveBefore(DeoptBlockTerm);
2486 
2487  // Add new successors of CheckBB into DomTree.
2488  for (auto *Succ : successors(CheckBB))
2489  DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
2490 
2491  // Now the blocks that used to be CheckBB's successors are GuardedBlock's
2492  // successors.
2493  for (auto *Succ : Successors)
2494  DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
2495 
2496  // Make proper changes to DT.
2497  DT.applyUpdates(DTUpdates);
2498  // Inform LI of a new loop block.
2499  L.addBasicBlockToLoop(GuardedBlock, LI);
2500 
2501  if (MSSAU) {
2502  MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2503  MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2504  if (VerifyMemorySSA)
2505  MSSAU->getMemorySSA()->verifyMemorySSA();
2506  }
2507 
2508  ++NumGuards;
2509  return CheckBI;
2510 }
2511 
2512 /// Cost multiplier is a way to limit potentially exponential behavior
2513 /// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2514 /// candidates available. Also accounting for the number of "sibling" loops with
2515 /// the idea to account for previous unswitches that already happened on this
2516 /// cluster of loops. There was an attempt to keep this formula simple,
2517 /// just enough to limit the worst case behavior. Even if it is not that simple
2518 /// now it is still not an attempt to provide a detailed heuristic size
2519 /// prediction.
2520 ///
2521 /// TODO: Make a proper accounting of "explosion" effect for all kinds of
2522 /// unswitch candidates, making adequate predictions instead of wild guesses.
2523 /// That requires knowing not just the number of "remaining" candidates but
2524 /// also costs of unswitching for each of these candidates.
2526  Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT,
2528  UnswitchCandidates) {
2529 
2530  // Guards and other exiting conditions do not contribute to exponential
2531  // explosion as soon as they dominate the latch (otherwise there might be
2532  // another path to the latch remaining that does not allow to eliminate the
2533  // loop copy on unswitch).
2534  BasicBlock *Latch = L.getLoopLatch();
2535  BasicBlock *CondBlock = TI.getParent();
2536  if (DT.dominates(CondBlock, Latch) &&
2537  (isGuard(&TI) ||
2538  llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) {
2539  return L.contains(SuccBB);
2540  }) <= 1)) {
2541  NumCostMultiplierSkipped++;
2542  return 1;
2543  }
2544 
2545  auto *ParentL = L.getParentLoop();
2546  int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2547  : std::distance(LI.begin(), LI.end()));
2548  // Count amount of clones that all the candidates might cause during
2549  // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
2550  int UnswitchedClones = 0;
2551  for (auto Candidate : UnswitchCandidates) {
2552  Instruction *CI = Candidate.first;
2553  BasicBlock *CondBlock = CI->getParent();
2554  bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2555  if (isGuard(CI)) {
2556  if (!SkipExitingSuccessors)
2557  UnswitchedClones++;
2558  continue;
2559  }
2560  int NonExitingSuccessors = llvm::count_if(
2561  successors(CondBlock), [SkipExitingSuccessors, &L](BasicBlock *SuccBB) {
2562  return !SkipExitingSuccessors || L.contains(SuccBB);
2563  });
2564  UnswitchedClones += Log2_32(NonExitingSuccessors);
2565  }
2566 
2567  // Ignore up to the "unscaled candidates" number of unswitch candidates
2568  // when calculating the power-of-two scaling of the cost. The main idea
2569  // with this control is to allow a small number of unswitches to happen
2570  // and rely more on siblings multiplier (see below) when the number
2571  // of candidates is small.
2572  unsigned ClonesPower =
2573  std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2574 
2575  // Allowing top-level loops to spread a bit more than nested ones.
2576  int SiblingsMultiplier =
2577  std::max((ParentL ? SiblingsCount
2578  : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2579  1);
2580  // Compute the cost multiplier in a way that won't overflow by saturating
2581  // at an upper bound.
2582  int CostMultiplier;
2583  if (ClonesPower > Log2_32(UnswitchThreshold) ||
2584  SiblingsMultiplier > UnswitchThreshold)
2585  CostMultiplier = UnswitchThreshold;
2586  else
2587  CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2588  (int)UnswitchThreshold);
2589 
2590  LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2591  << " (siblings " << SiblingsMultiplier << " * clones "
2592  << (1 << ClonesPower) << ")"
2593  << " for unswitch candidate: " << TI << "\n");
2594  return CostMultiplier;
2595 }
2596 
2597 static bool
2600  function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB,
2601  ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
2602  // Collect all invariant conditions within this loop (as opposed to an inner
2603  // loop which would be handled when visiting that inner loop).
2605  UnswitchCandidates;
2606 
2607  // Whether or not we should also collect guards in the loop.
2608  bool CollectGuards = false;
2609  if (UnswitchGuards) {
2610  auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2611  Intrinsic::getName(Intrinsic::experimental_guard));
2612  if (GuardDecl && !GuardDecl->use_empty())
2613  CollectGuards = true;
2614  }
2615 
2616  for (auto *BB : L.blocks()) {
2617  if (LI.getLoopFor(BB) != &L)
2618  continue;
2619 
2620  if (CollectGuards)
2621  for (auto &I : *BB)
2622  if (isGuard(&I)) {
2623  auto *Cond = cast<IntrinsicInst>(&I)->getArgOperand(0);
2624  // TODO: Support AND, OR conditions and partial unswitching.
2625  if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2626  UnswitchCandidates.push_back({&I, {Cond}});
2627  }
2628 
2629  if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2630  // We can only consider fully loop-invariant switch conditions as we need
2631  // to completely eliminate the switch after unswitching.
2632  if (!isa<Constant>(SI->getCondition()) &&
2633  L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2634  UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2635  continue;
2636  }
2637 
2638  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2639  if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
2640  BI->getSuccessor(0) == BI->getSuccessor(1))
2641  continue;
2642 
2643  if (L.isLoopInvariant(BI->getCondition())) {
2644  UnswitchCandidates.push_back({BI, {BI->getCondition()}});
2645  continue;
2646  }
2647 
2648  Instruction &CondI = *cast<Instruction>(BI->getCondition());
2649  if (CondI.getOpcode() != Instruction::And &&
2650  CondI.getOpcode() != Instruction::Or)
2651  continue;
2652 
2653  TinyPtrVector<Value *> Invariants =
2655  if (Invariants.empty())
2656  continue;
2657 
2658  UnswitchCandidates.push_back({BI, std::move(Invariants)});
2659  }
2660 
2661  // If we didn't find any candidates, we're done.
2662  if (UnswitchCandidates.empty())
2663  return false;
2664 
2665  // Check if there are irreducible CFG cycles in this loop. If so, we cannot
2666  // easily unswitch non-trivial edges out of the loop. Doing so might turn the
2667  // irreducible control flow into reducible control flow and introduce new
2668  // loops "out of thin air". If we ever discover important use cases for doing
2669  // this, we can add support to loop unswitch, but it is a lot of complexity
2670  // for what seems little or no real world benefit.
2671  LoopBlocksRPO RPOT(&L);
2672  RPOT.perform(&LI);
2673  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
2674  return false;
2675 
2676  SmallVector<BasicBlock *, 4> ExitBlocks;
2677  L.getUniqueExitBlocks(ExitBlocks);
2678 
2679  // We cannot unswitch if exit blocks contain a cleanuppad instruction as we
2680  // don't know how to split those exit blocks.
2681  // FIXME: We should teach SplitBlock to handle this and remove this
2682  // restriction.
2683  for (auto *ExitBB : ExitBlocks)
2684  if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI())) {
2685  dbgs() << "Cannot unswitch because of cleanuppad in exit block\n";
2686  return false;
2687  }
2688 
2689  LLVM_DEBUG(
2690  dbgs() << "Considering " << UnswitchCandidates.size()
2691  << " non-trivial loop invariant conditions for unswitching.\n");
2692 
2693  // Given that unswitching these terminators will require duplicating parts of
2694  // the loop, so we need to be able to model that cost. Compute the ephemeral
2695  // values and set up a data structure to hold per-BB costs. We cache each
2696  // block's cost so that we don't recompute this when considering different
2697  // subsets of the loop for duplication during unswitching.
2699  CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
2701 
2702  // Compute the cost of each block, as well as the total loop cost. Also, bail
2703  // out if we see instructions which are incompatible with loop unswitching
2704  // (convergent, noduplicate, or cross-basic-block tokens).
2705  // FIXME: We might be able to safely handle some of these in non-duplicated
2706  // regions.
2708  L.getHeader()->getParent()->hasMinSize()
2709  ? TargetTransformInfo::TCK_CodeSize
2710  : TargetTransformInfo::TCK_SizeAndLatency;
2711  InstructionCost LoopCost = 0;
2712  for (auto *BB : L.blocks()) {
2713  InstructionCost Cost = 0;
2714  for (auto &I : *BB) {
2715  if (EphValues.count(&I))
2716  continue;
2717 
2718  if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
2719  return false;
2720  if (auto *CB = dyn_cast<CallBase>(&I))
2721  if (CB->isConvergent() || CB->cannotDuplicate())
2722  return false;
2723 
2724  Cost += TTI.getUserCost(&I, CostKind);
2725  }
2726  assert(Cost >= 0 && "Must not have negative costs!");
2727  LoopCost += Cost;
2728  assert(LoopCost >= 0 && "Must not have negative loop costs!");
2729  BBCostMap[BB] = Cost;
2730  }
2731  LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
2732 
2733  // Now we find the best candidate by searching for the one with the following
2734  // properties in order:
2735  //
2736  // 1) An unswitching cost below the threshold
2737  // 2) The smallest number of duplicated unswitch candidates (to avoid
2738  // creating redundant subsequent unswitching)
2739  // 3) The smallest cost after unswitching.
2740  //
2741  // We prioritize reducing fanout of unswitch candidates provided the cost
2742  // remains below the threshold because this has a multiplicative effect.
2743  //
2744  // This requires memoizing each dominator subtree to avoid redundant work.
2745  //
2746  // FIXME: Need to actually do the number of candidates part above.
2748  // Given a terminator which might be unswitched, computes the non-duplicated
2749  // cost for that terminator.
2750  auto ComputeUnswitchedCost = [&](Instruction &TI,
2751  bool FullUnswitch) -> InstructionCost {
2752  BasicBlock &BB = *TI.getParent();
2754 
2755  InstructionCost Cost = 0;
2756  for (BasicBlock *SuccBB : successors(&BB)) {
2757  // Don't count successors more than once.
2758  if (!Visited.insert(SuccBB).second)
2759  continue;
2760 
2761  // If this is a partial unswitch candidate, then it must be a conditional
2762  // branch with a condition of either `or` or `and`. In that case, one of
2763  // the successors is necessarily duplicated, so don't even try to remove
2764  // its cost.
2765  if (!FullUnswitch) {
2766  auto &BI = cast<BranchInst>(TI);
2767  if (cast<Instruction>(BI.getCondition())->getOpcode() ==
2768  Instruction::And) {
2769  if (SuccBB == BI.getSuccessor(1))
2770  continue;
2771  } else {
2772  assert(cast<Instruction>(BI.getCondition())->getOpcode() ==
2773  Instruction::Or &&
2774  "Only `and` and `or` conditions can result in a partial "
2775  "unswitch!");
2776  if (SuccBB == BI.getSuccessor(0))
2777  continue;
2778  }
2779  }
2780 
2781  // This successor's domtree will not need to be duplicated after
2782  // unswitching if the edge to the successor dominates it (and thus the
2783  // entire tree). This essentially means there is no other path into this
2784  // subtree and so it will end up live in only one clone of the loop.
2785  if (SuccBB->getUniquePredecessor() ||
2786  llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2787  return PredBB == &BB || DT.dominates(SuccBB, PredBB);
2788  })) {
2789  Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
2790  assert(Cost <= LoopCost &&
2791  "Non-duplicated cost should never exceed total loop cost!");
2792  }
2793  }
2794 
2795  // Now scale the cost by the number of unique successors minus one. We
2796  // subtract one because there is already at least one copy of the entire
2797  // loop. This is computing the new cost of unswitching a condition.
2798  // Note that guards always have 2 unique successors that are implicit and
2799  // will be materialized if we decide to unswitch it.
2800  int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
2801  assert(SuccessorsCount > 1 &&
2802  "Cannot unswitch a condition without multiple distinct successors!");
2803  return (LoopCost - Cost) * (SuccessorsCount - 1);
2804  };
2805  Instruction *BestUnswitchTI = nullptr;
2806  InstructionCost BestUnswitchCost = 0;
2807  ArrayRef<Value *> BestUnswitchInvariants;
2808  for (auto &TerminatorAndInvariants : UnswitchCandidates) {
2809  Instruction &TI = *TerminatorAndInvariants.first;
2810  ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
2811  BranchInst *BI = dyn_cast<BranchInst>(&TI);
2812  InstructionCost CandidateCost = ComputeUnswitchedCost(
2813  TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 &&
2814  Invariants[0] == BI->getCondition()));
2815  // Calculate cost multiplier which is a tool to limit potentially
2816  // exponential behavior of loop-unswitch.
2818  int CostMultiplier =
2819  CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
2820  assert(
2821  (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
2822  "cost multiplier needs to be in the range of 1..UnswitchThreshold");
2823  CandidateCost *= CostMultiplier;
2824  LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
2825  << " (multiplier: " << CostMultiplier << ")"
2826  << " for unswitch candidate: " << TI << "\n");
2827  } else {
2828  LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
2829  << " for unswitch candidate: " << TI << "\n");
2830  }
2831 
2832  if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
2833  BestUnswitchTI = &TI;
2834  BestUnswitchCost = CandidateCost;
2835  BestUnswitchInvariants = Invariants;
2836  }
2837  }
2838  assert(BestUnswitchTI && "Failed to find loop unswitch candidate");
2839 
2840  if (BestUnswitchCost >= UnswitchThreshold) {
2841  LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "
2842  << BestUnswitchCost << "\n");
2843  return false;
2844  }
2845 
2846  // If the best candidate is a guard, turn it into a branch.
2847  if (isGuard(BestUnswitchTI))
2848  BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L,
2849  ExitBlocks, DT, LI, MSSAU);
2850 
2851  LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = "
2852  << BestUnswitchCost << ") terminator: " << *BestUnswitchTI
2853  << "\n");
2854  unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
2855  ExitBlocks, DT, LI, AC, UnswitchCB, SE, MSSAU);
2856  return true;
2857 }
2858 
2859 /// Unswitch control flow predicated on loop invariant conditions.
2860 ///
2861 /// This first hoists all branches or switches which are trivial (IE, do not
2862 /// require duplicating any part of the loop) out of the loop body. It then
2863 /// looks at other loop invariant control flows and tries to unswitch those as
2864 /// well by cloning the loop if the result is small enough.
2865 ///
2866 /// The `DT`, `LI`, `AC`, `TTI` parameters are required analyses that are also
2867 /// updated based on the unswitch.
2868 /// The `MSSA` analysis is also updated if valid (i.e. its use is enabled).
2869 ///
2870 /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
2871 /// true, we will attempt to do non-trivial unswitching as well as trivial
2872 /// unswitching.
2873 ///
2874 /// The `UnswitchCB` callback provided will be run after unswitching is
2875 /// complete, with the first parameter set to `true` if the provided loop
2876 /// remains a loop, and a list of new sibling loops created.
2877 ///
2878 /// If `SE` is non-null, we will update that analysis based on the unswitching
2879 /// done.
2880 static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
2882  bool NonTrivial,
2883  function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB,
2884  ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
2885  assert(L.isRecursivelyLCSSAForm(DT, LI) &&
2886  "Loops must be in LCSSA form before unswitching.");
2887 
2888  // Must be in loop simplified form: we need a preheader and dedicated exits.
2889  if (!L.isLoopSimplifyForm())
2890  return false;
2891 
2892  // Try trivial unswitch first before loop over other basic blocks in the loop.
2893  if (unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
2894  // If we unswitched successfully we will want to clean up the loop before
2895  // processing it further so just mark it as unswitched and return.
2896  UnswitchCB(/*CurrentLoopValid*/ true, {});
2897  return true;
2898  }
2899 
2900  // If we're not doing non-trivial unswitching, we're done. We both accept
2901  // a parameter but also check a local flag that can be used for testing
2902  // a debugging.
2903  if (!NonTrivial && !EnableNonTrivialUnswitch)
2904  return false;
2905 
2906  // Skip non-trivial unswitching for optsize functions.
2907  if (L.getHeader()->getParent()->hasOptSize())
2908  return false;
2909 
2910  // Skip non-trivial unswitching for loops that cannot be cloned.
2911  if (!L.isSafeToClone())
2912  return false;
2913 
2914  // For non-trivial unswitching, because it often creates new loops, we rely on
2915  // the pass manager to iterate on the loops rather than trying to immediately
2916  // reach a fixed point. There is no substantial advantage to iterating
2917  // internally, and if any of the new loops are simplified enough to contain
2918  // trivial unswitching we want to prefer those.
2919 
2920  // Try to unswitch the best invariant condition. We prefer this full unswitch to
2921  // a partial unswitch when possible below the threshold.
2922  if (unswitchBestCondition(L, DT, LI, AC, TTI, UnswitchCB, SE, MSSAU))
2923  return true;
2924 
2925  // No other opportunities to unswitch.
2926  return false;
2927 }
2928 
2929 PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
2931  LPMUpdater &U) {
2932  Function &F = *L.getHeader()->getParent();
2933  (void)F;
2934 
2935  LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
2936  << "\n");
2937 
2938  // Save the current loop name in a variable so that we can report it even
2939  // after it has been deleted.
2940  std::string LoopName = std::string(L.getName());
2941 
2942  auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
2943  ArrayRef<Loop *> NewLoops) {
2944  // If we did a non-trivial unswitch, we have added new (cloned) loops.
2945  if (!NewLoops.empty())
2946  U.addSiblingLoops(NewLoops);
2947 
2948  // If the current loop remains valid, we should revisit it to catch any
2949  // other unswitch opportunities. Otherwise, we need to mark it as deleted.
2950  if (CurrentLoopValid)
2951  U.revisitCurrentLoop();
2952  else
2953  U.markLoopAsDeleted(L, LoopName);
2954  };
2955 
2957  if (AR.MSSA) {
2958  MSSAU = MemorySSAUpdater(AR.MSSA);
2959  if (VerifyMemorySSA)
2960  AR.MSSA->verifyMemorySSA();
2961  }
2962  if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial, UnswitchCB,
2963  &AR.SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
2964  return PreservedAnalyses::all();
2965 
2966  if (AR.MSSA && VerifyMemorySSA)
2967  AR.MSSA->verifyMemorySSA();
2968 
2969  // Historically this pass has had issues with the dominator tree so verify it
2970  // in asserts builds.
2972 
2973  auto PA = getLoopPassPreservedAnalyses();
2974  if (AR.MSSA)
2975  PA.preserve<MemorySSAAnalysis>();
2976  return PA;
2977 }
2978 
2979 namespace {
2980 
2981 class SimpleLoopUnswitchLegacyPass : public LoopPass {
2982  bool NonTrivial;
2983 
2984 public:
2985  static char ID; // Pass ID, replacement for typeid
2986 
2987  explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
2988  : LoopPass(ID), NonTrivial(NonTrivial) {
2991  }
2992 
2993  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
2994 
2995  void getAnalysisUsage(AnalysisUsage &AU) const override {
3001  }
3003  }
3004 };
3005 
3006 } // end anonymous namespace
3007 
3008 bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
3009  if (skipLoop(L))
3010  return false;
3011 
3012  Function &F = *L->getHeader()->getParent();
3013 
3014  LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
3015  << "\n");
3016 
3017  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3018  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3019  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
3020  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3021  MemorySSA *MSSA = nullptr;
3024  MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
3025  MSSAU = MemorySSAUpdater(MSSA);
3026  }
3027 
3028  auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3029  auto *SE = SEWP ? &SEWP->getSE() : nullptr;
3030 
3031  auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid,
3032  ArrayRef<Loop *> NewLoops) {
3033  // If we did a non-trivial unswitch, we have added new (cloned) loops.
3034  for (auto *NewL : NewLoops)
3035  LPM.addLoop(*NewL);
3036 
3037  // If the current loop remains valid, re-add it to the queue. This is
3038  // a little wasteful as we'll finish processing the current loop as well,
3039  // but it is the best we can do in the old PM.
3040  if (CurrentLoopValid)
3041  LPM.addLoop(*L);
3042  else
3043  LPM.markLoopAsDeleted(*L);
3044  };
3045 
3046  if (MSSA && VerifyMemorySSA)
3047  MSSA->verifyMemorySSA();
3048 
3049  bool Changed = unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB, SE,
3050  MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
3051 
3052  if (MSSA && VerifyMemorySSA)
3053  MSSA->verifyMemorySSA();
3054 
3055  // Historically this pass has had issues with the dominator tree so verify it
3056  // in asserts builds.
3058 
3059  return Changed;
3060 }
3061 
3063 INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
3064  "Simple unswitch loops", false, false)
3071 INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
3073 
3075  return new SimpleLoopUnswitchLegacyPass(NonTrivial);
3076 }
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:26
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::formLCSSA
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition: LCSSA.cpp:337
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:487
cloneLoopNest
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
Definition: SimpleLoopUnswitch.cpp:1194
AssumptionCache.h
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::createSimpleLoopUnswitchLegacyPass
Pass * createSimpleLoopUnswitchLegacyPass(bool NonTrivial=false)
Create the legacy pass object for the simple loop unswitcher.
Definition: SimpleLoopUnswitch.cpp:3074
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:125
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:210
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:63
getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:42
UnswitchNumInitialUnscaledCandidates
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
llvm
This class represents lattice values for constants.
Definition: AllocatorList.h:23
llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition: LoopAnalysisManager.h:54
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:275
llvm::LoopBase::getExitBlocks
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:62
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:378
ValueMapper.h
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::SwitchInstProfUpdateWrapper
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
Definition: Instructions.h:3495
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
deleteDeadBlocksFromLoop
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Definition: SimpleLoopUnswitch.cpp:1532
llvm::SwitchInstProfUpdateWrapper::getSuccessorWeight
CaseWeightOpt getSuccessorWeight(unsigned idx)
Definition: Instructions.cpp:4177
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:529
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
llvm::cfg::UpdateKind::Insert
@ Insert
unswitchTrivialBranch
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
Definition: SimpleLoopUnswitch.cpp:372
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:686
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:130
llvm::SwitchInst::Create
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3339
recomputeLoopBlockSet
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
Definition: SimpleLoopUnswitch.cpp:1621
ErrorHandling.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::IRBuilder<>
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1667
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::LoopInfoBase::changeLoopFor
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition: LoopInfo.h:1001
Local.h
llvm::IRBuilderBase::CreateOr
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1363
llvm::MemorySSAUpdater::removeBlocks
void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
Definition: MemorySSAUpdater.cpp:1375
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:150
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
unswitchBestCondition
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, TargetTransformInfo &TTI, function_ref< void(bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Definition: SimpleLoopUnswitch.cpp:2598
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::begin
iterator begin()
Definition: DenseMap.h:74
llvm::DominatorTree::dominates
bool dominates(const Value *Def, const Use &U) const
Return true if value Def dominates use U, in the sense that Def is available at U,...
Definition: Dominators.cpp:258
CalculateUnswitchCostMultiplier
static int CalculateUnswitchCostMultiplier(Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT, ArrayRef< std::pair< Instruction *, TinyPtrVector< Value * >>> UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
Definition: SimpleLoopUnswitch.cpp:2525
ScalarEvolution.h
DenseMap.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:338
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1249
areLoopExitPHIsLoopInvariant
static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB, BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
Definition: SimpleLoopUnswitch.cpp:168
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::DominatorTreeBase::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
llvm::MemorySSAUpdater::moveAllAfterSpliceBlocks
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
Definition: MemorySSAUpdater.cpp:1250
turnGuardIntoBranch
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
Definition: SimpleLoopUnswitch.cpp:2449
unswitch
simple loop unswitch
Definition: SimpleLoopUnswitch.cpp:3071
llvm::Optional< uint32_t >
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::SwitchInstProfUpdateWrapper::removeCase
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Definition: Instructions.cpp:4135
unswitchLoop
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, TargetTransformInfo &TTI, bool NonTrivial, function_ref< void(bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch control flow predicated on loop invariant conditions.
Definition: SimpleLoopUnswitch.cpp:2880
STLExtras.h
llvm::EnableMSSALoopDependency
cl::opt< bool > EnableMSSALoopDependency
Enables memory ssa as a dependency for loop passes.
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::SwitchInst::cases
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
Definition: Instructions.h:3396
llvm::SwitchInst::getDefaultDest
BasicBlock * getDefaultDest() const
Definition: Instructions.h:3357
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:260
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
Sequence.h
llvm::Module::getFunction
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:174
llvm::zip_first
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&... args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition: STLExtras.h:737
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1588
computeDomSubtreeCost
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
Definition: SimpleLoopUnswitch.cpp:2399
INITIALIZE_PASS_END
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Definition: RegBankSelect.cpp:69
Use.h
MustExecute.h
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:185
llvm::MemorySSAUpdater::applyUpdates
void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
Definition: MemorySSAUpdater.cpp:813
llvm::DominatorTreeBase::applyUpdates
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Definition: GenericDomTree.h:544
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::BranchInst::swapSuccessors
void swapSuccessors()
Swap the successors of this branch instruction.
Definition: Instructions.cpp:1271
llvm::LPPassManager::addLoop
void addLoop(Loop &L)
Definition: LoopPass.cpp:79
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BranchInst::setSuccessor
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Definition: Instructions.h:3103
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1330
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::Optional::getPointer
constexpr const T * getPointer() const
Definition: Optional.h:278
LoopAnalysisManager.h
llvm::LoopBlocksRPO
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:286
llvm::isGuard
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
DropNonTrivialImplicitNullChecks
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
Instruction.h
CommandLine.h
CodeMetrics.h
replaceLoopInvariantUses
static void replaceLoopInvariantUses(Loop &L, Value *Invariant, Constant &Replacement)
Definition: SimpleLoopUnswitch.cpp:151
llvm::SwitchInst::CaseHandleImpl::getCaseSuccessor
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
Definition: Instructions.h:3222
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1498
llvm::RemapInstruction
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:253
hoistLoopToNewParent
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
Definition: SimpleLoopUnswitch.cpp:273
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:960
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::DominatorTreeBase::insertEdge
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
Definition: GenericDomTree.h:584
Twine.h
InstrTypes.h
llvm::LoopBase::addChildLoop
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:395
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::SwitchInst::CaseHandleImpl::getSuccessorIndex
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
Definition: Instructions.h:3233
SI
@ SI
Definition: SIInstrInfo.cpp:7382
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::TinyPtrVector::push_back
void push_back(EltTy NewVal)
Definition: TinyPtrVector.h:244
rewritePHINodesForExitAndUnswitchedBlocks
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
Definition: SimpleLoopUnswitch.cpp:228
llvm::LoopInfoBase::addTopLevelLoop
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
Definition: LoopInfo.h:1020
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::LoopInfoBase::destroy
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Definition: LoopInfo.h:1067
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:379
UnswitchThreshold
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
false
Definition: StackSlotColoring.cpp:142
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:597
llvm::MemorySSAUpdater::moveToPlace
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
Definition: MemorySSAUpdater.cpp:1203
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:278
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::AssumptionCache::registerAssumption
void registerAssumption(CallInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Definition: AssumptionCache.cpp:217
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:367
llvm::SwitchInstProfUpdateWrapper::setSuccessorWeight
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
Definition: Instructions.cpp:4183
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1751
LoopUtils.h
llvm::AArch64CC::PL
@ PL
Definition: AArch64BaseInfo.h:241
llvm::LPPassManager
Definition: LoopPass.h:75
SmallPtrSet.h
visitDomSubTree
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
Definition: SimpleLoopUnswitch.cpp:1936
llvm::ValueMap::count
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: ValueMap.h:152
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:862
llvm::RF_NoModuleLevelChanges
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:72
LoopIterator.h
deleteDeadClonedBlocks
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy >> VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
Definition: SimpleLoopUnswitch.cpp:1503
llvm::IRBuilderBase::CreateAnd
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1337
llvm::Loop::LoopBounds::Direction
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:697
llvm::MemorySSAUpdater::removeDuplicatePhiEdgesBetween
void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
Definition: MemorySSAUpdater.cpp:542
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:277
LoopInfo.h
llvm::LoopInfoBase::AllocateLoop
LoopT * AllocateLoop(ArgsTy &&... Args)
Definition: LoopInfo.h:927
collectHomogenousInstGraphLoopInvariants
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root, LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
Definition: SimpleLoopUnswitch.cpp:113
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3086
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:176
llvm::MemorySSAUpdater::applyInsertUpdates
void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
Definition: MemorySSAUpdater.cpp:859
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:471
llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition: CloneFunction.cpp:43
buildPartialUnswitchConditionalBranch
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc)
Insert code to test a set of loop invariant values, and conditionally branch on them.
Definition: SimpleLoopUnswitch.cpp:186
BasicBlock.h
llvm::cl::opt< bool >
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
llvm::MemorySSAUpdater::removeEdge
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
Definition: MemorySSAUpdater.cpp:535
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::SwitchInst::setDefaultDest
void setDefaultDest(BasicBlock *DefaultCase)
Definition: Instructions.h:3361
llvm::LoopPass
Definition: LoopPass.h:27
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2281
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:56
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:243
llvm::MemorySSAUpdater::getMemorySSA
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Definition: MemorySSAUpdater.h:249
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1518
llvm::LoopBase::getUniqueExitBlocks
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:122
llvm::CallingConv::Fast
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
llvm::MemorySSAUpdater::updateForClonedLoop
void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
Definition: MemorySSAUpdater.cpp:680
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3061
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:703
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:963
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:720
Cloning.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:581
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:378
SimpleLoopUnswitch.h
UnswitchGuards
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::ValueToValueMapTy
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
Definition: MemorySSAUpdater.h:52
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", "Simple unswitch loops", false, false) INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass
getFalse
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Definition: InstructionSimplify.cpp:116
llvm::LoopBase::getLoopDepth
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:96
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:318
llvm::LoopBase::getBlocksVector
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
Definition: LoopInfo.h:192
llvm::LoopInfoBase::begin
iterator begin() const
Definition: LoopInfo.h:938
llvm::ScalarEvolution::forgetTopmostLoop
void forgetTopmostLoop(const Loop *L)
Definition: ScalarEvolution.cpp:7150
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA() const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1866
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:921
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
getTopMostExitingLoop
static Loop * getTopMostExitingLoop(BasicBlock *ExitBB, LoopInfo &LI)
Definition: SimpleLoopUnswitch.cpp:344
CFG.h
llvm::Instruction::clone
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
Definition: Instruction.cpp:781
llvm::LoopInfoBase::getLoopDepth
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
Definition: LoopInfo.h:970
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::LoopBase::removeChildLoop
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:404
rebuildLoopAfterUnswitch
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops)
Rebuild a loop after unswitching removes some subset of blocks and edges.
Definition: SimpleLoopUnswitch.cpp:1732
llvm::LoopBase::getSubLoopsVector
std::vector< LoopT * > & getSubLoopsVector()
Definition: LoopInfo.h:147
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1079
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:339
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1505
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
LoopPass.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::MemorySSA::getBlockDefs
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition: MemorySSA.h:768
llvm::initializeSimpleLoopUnswitchLegacyPassPass
void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry &)
llvm::SwitchInst::CaseHandleImpl::getCaseValue
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Definition: Instructions.h:3215
llvm::LoopBase::isOutermost
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
Definition: LoopInfo.h:168
llvm::LoopBase::reserveBlocks
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
Definition: LoopInfo.h:436
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:523
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:871
loops
simple loop Simple unswitch loops
Definition: SimpleLoopUnswitch.cpp:3072
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
unswitchTrivialSwitch
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
Definition: SimpleLoopUnswitch.cpp:596
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:117
llvm::IRBuilderBase::CreateCondBr
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:970
llvm::DomTreeNodeBase< BasicBlock >
llvm::ValueMap< const Value *, WeakTrackingVH >
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1346
llvm::LoopInfoBase::removeLoop
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
Definition: LoopInfo.h:990
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:831
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:92
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:685
llvm::MemorySSAUpdater::updateExitBlocksForClonedLoop
void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Definition: MemorySSAUpdater.cpp:791
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
llvm::RF_IgnoreMissingLocals
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:90
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
Constant.h
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:499
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
llvm::formDedicatedExitBlocks
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:63
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1633
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:7084
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:824
llvm::SwitchInstProfUpdateWrapper::addCase
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
Definition: Instructions.cpp:4149
llvm::TargetTransformInfo::getUserCost
int getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:222
llvm::LPPassManager::markLoopAsDeleted
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:113
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:59
unswitchNontrivialInvariants
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, function_ref< void(bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Definition: SimpleLoopUnswitch.cpp:1959
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::ICFLoopSafetyInfo
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:132
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2612
GenericDomTree.h
buildClonedLoops
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
Definition: SimpleLoopUnswitch.cpp:1253
llvm::LoopBase::getBlocksSet
SmallPtrSetImpl< const BlockT * > & getBlocksSet()
Return a direct, mutable handle to the blocks set so that we can mutate it efficiently.
Definition: LoopInfo.h:198
Casting.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1439
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::Loop::isRecursivelyLCSSAForm
bool isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:461
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::SwitchInstProfUpdateWrapper::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
Definition: Instructions.cpp:4168
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
GuardUtils.h
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:474
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
llvm::TinyPtrVector::empty
bool empty() const
Definition: TinyPtrVector.h:163
llvm::LoopInfoBase::end
iterator end() const
Definition: LoopInfo.h:939
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
MemorySSA.h
Instructions.h
llvm::BasicBlock::moveBefore
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:133
SmallVector.h
llvm::LoopBase::addBasicBlockToLoop
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:242
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
N
#define N
rewritePHINodesForUnswitchedExitBlock
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
Definition: SimpleLoopUnswitch.cpp:206
llvm::SwitchInst::CaseHandle
Definition: Instructions.h:3249
llvm::BasicBlock::size
size_t size() const
Definition: BasicBlock.h:306
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::Loop::isSafeToClone
bool isSafeToClone() const
Return true if the loop body is safe to clone in practice.
Definition: LoopInfo.cpp:478
InstructionSimplify.h
llvm::ICFLoopSafetyInfo::computeLoopSafetyInfo
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:81
getTrue
static Constant * getTrue(Type *Ty)
For a boolean type or a vector of boolean type, return true or a vector with every element true.
Definition: InstructionSimplify.cpp:122
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition: LoopAnalysisManager.h:61
llvm::PHINode
Definition: Instructions.h:2572
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:321
llvm::SmallVectorImpl< DominatorTree::UpdateType >
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:43
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:225
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ICFLoopSafetyInfo::isGuaranteedToExecute
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
Definition: MustExecute.cpp:270
llvm::ValueMap::lookup
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:165
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3149
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition: BasicBlockUtils.cpp:1203
llvm::cl::desc
Definition: CommandLine.h:411
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3005
raw_ostream.h
llvm::LoopBlocksRPO::perform
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
llvm::TinyPtrVector
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:599
Value.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::DominatorTreeBase::deleteEdge
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Definition: GenericDomTree.h:602
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3084
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3098
SetVector.h
llvm::DominatorTreeBase::Delete
static constexpr UpdateKind Delete
Definition: GenericDomTree.h:243
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:96
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
unswitchAllTrivialConditions
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
Definition: SimpleLoopUnswitch.cpp:891
UnswitchSiblingsToplevelDiv
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
EnableNonTrivialUnswitch
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
EnableUnswitchCostMultiplier
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
buildClonedLoopBlocks
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Build the cloned blocks for an unswitched copy of the given loop.
Definition: SimpleLoopUnswitch.cpp:1009