LLVM  13.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1 ///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/DenseMap.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/Sequence.h"
13 #include "llvm/ADT/SetVector.h"
14 #include "llvm/ADT/SmallPtrSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/Twine.h"
19 #include "llvm/Analysis/CFG.h"
24 #include "llvm/Analysis/LoopInfo.h"
26 #include "llvm/Analysis/LoopPass.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/Constant.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/Dominators.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/IRBuilder.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/IntrinsicInst.h"
41 #include "llvm/IR/PatternMatch.h"
42 #include "llvm/IR/Use.h"
43 #include "llvm/IR/Value.h"
44 #include "llvm/InitializePasses.h"
45 #include "llvm/Pass.h"
46 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <iterator>
61 #include <numeric>
62 #include <utility>
63 
64 #define DEBUG_TYPE "simple-loop-unswitch"
65 
66 using namespace llvm;
67 using namespace llvm::PatternMatch;
68 
69 STATISTIC(NumBranches, "Number of branches unswitched");
70 STATISTIC(NumSwitches, "Number of switches unswitched");
71 STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
72 STATISTIC(NumTrivial, "Number of unswitches that are trivial");
73 STATISTIC(
74  NumCostMultiplierSkipped,
75  "Number of unswitch candidates that had their cost multiplier skipped");
76 
78  "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
79  cl::desc("Forcibly enables non-trivial loop unswitching rather than "
80  "following the configuration passed into the pass."));
81 
82 static cl::opt<int>
83  UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
84  cl::desc("The cost threshold for unswitching a loop."));
85 
87  "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
88  cl::desc("Enable unswitch cost multiplier that prohibits exponential "
89  "explosion in nontrivial unswitch."));
91  "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
92  cl::desc("Toplevel siblings divisor for cost multiplier."));
94  "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
95  cl::desc("Number of unswitch candidates that are ignored when calculating "
96  "cost multiplier."));
98  "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
99  cl::desc("If enabled, simple loop unswitching will also consider "
100  "llvm.experimental.guard intrinsics as unswitch candidates."));
102  "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
103  cl::init(false), cl::Hidden,
104  cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
105  "null checks to save time analyzing if we can keep it."));
106 static cl::opt<unsigned>
107  MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
108  cl::desc("Max number of memory uses to explore during "
109  "partial unswitching analysis"),
110  cl::init(100), cl::Hidden);
111 
112 /// Collect all of the loop invariant input values transitively used by the
113 /// homogeneous instruction graph from a given root.
114 ///
115 /// This essentially walks from a root recursively through loop variant operands
116 /// which have the exact same opcode and finds all inputs which are loop
117 /// invariant. For some operations these can be re-associated and unswitched out
118 /// of the loop entirely.
121  LoopInfo &LI) {
122  assert(!L.isLoopInvariant(&Root) &&
123  "Only need to walk the graph if root itself is not invariant.");
124  TinyPtrVector<Value *> Invariants;
125 
126  bool IsRootAnd = match(&Root, m_LogicalAnd());
127  bool IsRootOr = match(&Root, m_LogicalOr());
128 
129  // Build a worklist and recurse through operators collecting invariants.
132  Worklist.push_back(&Root);
133  Visited.insert(&Root);
134  do {
135  Instruction &I = *Worklist.pop_back_val();
136  for (Value *OpV : I.operand_values()) {
137  // Skip constants as unswitching isn't interesting for them.
138  if (isa<Constant>(OpV))
139  continue;
140 
141  // Add it to our result if loop invariant.
142  if (L.isLoopInvariant(OpV)) {
143  Invariants.push_back(OpV);
144  continue;
145  }
146 
147  // If not an instruction with the same opcode, nothing we can do.
148  Instruction *OpI = dyn_cast<Instruction>(OpV);
149 
150  if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
151  (IsRootOr && match(OpI, m_LogicalOr())))) {
152  // Visit this operand.
153  if (Visited.insert(OpI).second)
154  Worklist.push_back(OpI);
155  }
156  }
157  } while (!Worklist.empty());
158 
159  return Invariants;
160 }
161 
162 static void replaceLoopInvariantUses(Loop &L, Value *Invariant,
163  Constant &Replacement) {
164  assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
165 
166  // Replace uses of LIC in the loop with the given constant.
167  // We use make_early_inc_range as set invalidates the iterator.
168  for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
169  Instruction *UserI = dyn_cast<Instruction>(U.getUser());
170 
171  // Replace this use within the loop body.
172  if (UserI && L.contains(UserI))
173  U.set(&Replacement);
174  }
175 }
176 
177 /// Check that all the LCSSA PHI nodes in the loop exit block have trivial
178 /// incoming values along this edge.
179 static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
180  BasicBlock &ExitBB) {
181  for (Instruction &I : ExitBB) {
182  auto *PN = dyn_cast<PHINode>(&I);
183  if (!PN)
184  // No more PHIs to check.
185  return true;
186 
187  // If the incoming value for this edge isn't loop invariant the unswitch
188  // won't be trivial.
189  if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
190  return false;
191  }
192  llvm_unreachable("Basic blocks should never be empty!");
193 }
194 
195 /// Copy a set of loop invariant values \p ToDuplicate and insert them at the
196 /// end of \p BB and conditionally branch on the copied condition. We only
197 /// branch on a single value.
199  ArrayRef<Value *> Invariants,
200  bool Direction,
201  BasicBlock &UnswitchedSucc,
202  BasicBlock &NormalSucc) {
203  IRBuilder<> IRB(&BB);
204 
205  Value *Cond = Direction ? IRB.CreateOr(Invariants) :
206  IRB.CreateAnd(Invariants);
207  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
208  Direction ? &NormalSucc : &UnswitchedSucc);
209 }
210 
211 /// Copy a set of loop invariant values, and conditionally branch on them.
213  BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
214  BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
215  MemorySSAUpdater *MSSAU) {
216  ValueToValueMapTy VMap;
217  for (auto *Val : reverse(ToDuplicate)) {
218  Instruction *Inst = cast<Instruction>(Val);
219  Instruction *NewInst = Inst->clone();
220  BB.getInstList().insert(BB.end(), NewInst);
221  RemapInstruction(NewInst, VMap,
223  VMap[Val] = NewInst;
224 
225  if (!MSSAU)
226  continue;
227 
228  MemorySSA *MSSA = MSSAU->getMemorySSA();
229  if (auto *MemUse =
230  dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
231  auto *DefiningAccess = MemUse->getDefiningAccess();
232  // Get the first defining access before the loop.
233  while (L.contains(DefiningAccess->getBlock())) {
234  // If the defining access is a MemoryPhi, get the incoming
235  // value for the pre-header as defining access.
236  if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
237  DefiningAccess =
238  MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
239  else
240  DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
241  }
242  MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
243  NewInst->getParent(),
245  }
246  }
247 
248  IRBuilder<> IRB(&BB);
249  Value *Cond = VMap[ToDuplicate[0]];
250  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
251  Direction ? &NormalSucc : &UnswitchedSucc);
252 }
253 
254 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
255 ///
256 /// Requires that the loop exit and unswitched basic block are the same, and
257 /// that the exiting block was a unique predecessor of that block. Rewrites the
258 /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
259 /// PHI nodes from the old preheader that now contains the unswitched
260 /// terminator.
262  BasicBlock &OldExitingBB,
263  BasicBlock &OldPH) {
264  for (PHINode &PN : UnswitchedBB.phis()) {
265  // When the loop exit is directly unswitched we just need to update the
266  // incoming basic block. We loop to handle weird cases with repeated
267  // incoming blocks, but expect to typically only have one operand here.
268  for (auto i : seq<int>(0, PN.getNumOperands())) {
269  assert(PN.getIncomingBlock(i) == &OldExitingBB &&
270  "Found incoming block different from unique predecessor!");
271  PN.setIncomingBlock(i, &OldPH);
272  }
273  }
274 }
275 
276 /// Rewrite the PHI nodes in the loop exit basic block and the split off
277 /// unswitched block.
278 ///
279 /// Because the exit block remains an exit from the loop, this rewrites the
280 /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
281 /// nodes into the unswitched basic block to select between the value in the
282 /// old preheader and the loop exit.
284  BasicBlock &UnswitchedBB,
285  BasicBlock &OldExitingBB,
286  BasicBlock &OldPH,
287  bool FullUnswitch) {
288  assert(&ExitBB != &UnswitchedBB &&
289  "Must have different loop exit and unswitched blocks!");
290  Instruction *InsertPt = &*UnswitchedBB.begin();
291  for (PHINode &PN : ExitBB.phis()) {
292  auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
293  PN.getName() + ".split", InsertPt);
294 
295  // Walk backwards over the old PHI node's inputs to minimize the cost of
296  // removing each one. We have to do this weird loop manually so that we
297  // create the same number of new incoming edges in the new PHI as we expect
298  // each case-based edge to be included in the unswitched switch in some
299  // cases.
300  // FIXME: This is really, really gross. It would be much cleaner if LLVM
301  // allowed us to create a single entry for a predecessor block without
302  // having separate entries for each "edge" even though these edges are
303  // required to produce identical results.
304  for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
305  if (PN.getIncomingBlock(i) != &OldExitingBB)
306  continue;
307 
308  Value *Incoming = PN.getIncomingValue(i);
309  if (FullUnswitch)
310  // No more edge from the old exiting block to the exit block.
311  PN.removeIncomingValue(i);
312 
313  NewPN->addIncoming(Incoming, &OldPH);
314  }
315 
316  // Now replace the old PHI with the new one and wire the old one in as an
317  // input to the new one.
318  PN.replaceAllUsesWith(NewPN);
319  NewPN->addIncoming(&PN, &ExitBB);
320  }
321 }
322 
323 /// Hoist the current loop up to the innermost loop containing a remaining exit.
324 ///
325 /// Because we've removed an exit from the loop, we may have changed the set of
326 /// loops reachable and need to move the current loop up the loop nest or even
327 /// to an entirely separate nest.
328 static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
329  DominatorTree &DT, LoopInfo &LI,
330  MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
331  // If the loop is already at the top level, we can't hoist it anywhere.
332  Loop *OldParentL = L.getParentLoop();
333  if (!OldParentL)
334  return;
335 
337  L.getExitBlocks(Exits);
338  Loop *NewParentL = nullptr;
339  for (auto *ExitBB : Exits)
340  if (Loop *ExitL = LI.getLoopFor(ExitBB))
341  if (!NewParentL || NewParentL->contains(ExitL))
342  NewParentL = ExitL;
343 
344  if (NewParentL == OldParentL)
345  return;
346 
347  // The new parent loop (if different) should always contain the old one.
348  if (NewParentL)
349  assert(NewParentL->contains(OldParentL) &&
350  "Can only hoist this loop up the nest!");
351 
352  // The preheader will need to move with the body of this loop. However,
353  // because it isn't in this loop we also need to update the primary loop map.
354  assert(OldParentL == LI.getLoopFor(&Preheader) &&
355  "Parent loop of this loop should contain this loop's preheader!");
356  LI.changeLoopFor(&Preheader, NewParentL);
357 
358  // Remove this loop from its old parent.
359  OldParentL->removeChildLoop(&L);
360 
361  // Add the loop either to the new parent or as a top-level loop.
362  if (NewParentL)
363  NewParentL->addChildLoop(&L);
364  else
365  LI.addTopLevelLoop(&L);
366 
367  // Remove this loops blocks from the old parent and every other loop up the
368  // nest until reaching the new parent. Also update all of these
369  // no-longer-containing loops to reflect the nesting change.
370  for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
371  OldContainingL = OldContainingL->getParentLoop()) {
372  llvm::erase_if(OldContainingL->getBlocksVector(),
373  [&](const BasicBlock *BB) {
374  return BB == &Preheader || L.contains(BB);
375  });
376 
377  OldContainingL->getBlocksSet().erase(&Preheader);
378  for (BasicBlock *BB : L.blocks())
379  OldContainingL->getBlocksSet().erase(BB);
380 
381  // Because we just hoisted a loop out of this one, we have essentially
382  // created new exit paths from it. That means we need to form LCSSA PHI
383  // nodes for values used in the no-longer-nested loop.
384  formLCSSA(*OldContainingL, DT, &LI, SE);
385 
386  // We shouldn't need to form dedicated exits because the exit introduced
387  // here is the (just split by unswitching) preheader. However, after trivial
388  // unswitching it is possible to get new non-dedicated exits out of parent
389  // loop so let's conservatively form dedicated exit blocks and figure out
390  // if we can optimize later.
391  formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
392  /*PreserveLCSSA*/ true);
393  }
394 }
395 
396 // Return the top-most loop containing ExitBB and having ExitBB as exiting block
397 // or the loop containing ExitBB, if there is no parent loop containing ExitBB
398 // as exiting block.
400  Loop *TopMost = LI.getLoopFor(ExitBB);
401  Loop *Current = TopMost;
402  while (Current) {
403  if (Current->isLoopExiting(ExitBB))
404  TopMost = Current;
405  Current = Current->getParentLoop();
406  }
407  return TopMost;
408 }
409 
410 /// Unswitch a trivial branch if the condition is loop invariant.
411 ///
412 /// This routine should only be called when loop code leading to the branch has
413 /// been validated as trivial (no side effects). This routine checks if the
414 /// condition is invariant and one of the successors is a loop exit. This
415 /// allows us to unswitch without duplicating the loop, making it trivial.
416 ///
417 /// If this routine fails to unswitch the branch it returns false.
418 ///
419 /// If the branch can be unswitched, this routine splits the preheader and
420 /// hoists the branch above that split. Preserves loop simplified form
421 /// (splitting the exit block as necessary). It simplifies the branch within
422 /// the loop to an unconditional branch but doesn't remove it entirely. Further
423 /// cleanup can be done with some simplify-cfg like pass.
424 ///
425 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
426 /// invalidated by this.
428  LoopInfo &LI, ScalarEvolution *SE,
429  MemorySSAUpdater *MSSAU) {
430  assert(BI.isConditional() && "Can only unswitch a conditional branch!");
431  LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
432 
433  // The loop invariant values that we want to unswitch.
434  TinyPtrVector<Value *> Invariants;
435 
436  // When true, we're fully unswitching the branch rather than just unswitching
437  // some input conditions to the branch.
438  bool FullUnswitch = false;
439 
440  if (L.isLoopInvariant(BI.getCondition())) {
441  Invariants.push_back(BI.getCondition());
442  FullUnswitch = true;
443  } else {
444  if (auto *CondInst = dyn_cast<Instruction>(BI.getCondition()))
445  Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
446  if (Invariants.empty())
447  // Couldn't find invariant inputs!
448  return false;
449  }
450 
451  // Check that one of the branch's successors exits, and which one.
452  bool ExitDirection = true;
453  int LoopExitSuccIdx = 0;
454  auto *LoopExitBB = BI.getSuccessor(0);
455  if (L.contains(LoopExitBB)) {
456  ExitDirection = false;
457  LoopExitSuccIdx = 1;
458  LoopExitBB = BI.getSuccessor(1);
459  if (L.contains(LoopExitBB))
460  return false;
461  }
462  auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
463  auto *ParentBB = BI.getParent();
464  if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB))
465  return false;
466 
467  // When unswitching only part of the branch's condition, we need the exit
468  // block to be reached directly from the partially unswitched input. This can
469  // be done when the exit block is along the true edge and the branch condition
470  // is a graph of `or` operations, or the exit block is along the false edge
471  // and the condition is a graph of `and` operations.
472  if (!FullUnswitch) {
473  if (ExitDirection) {
474  if (!match(BI.getCondition(), m_LogicalOr()))
475  return false;
476  } else {
477  if (!match(BI.getCondition(), m_LogicalAnd()))
478  return false;
479  }
480  }
481 
482  LLVM_DEBUG({
483  dbgs() << " unswitching trivial invariant conditions for: " << BI
484  << "\n";
485  for (Value *Invariant : Invariants) {
486  dbgs() << " " << *Invariant << " == true";
487  if (Invariant != Invariants.back())
488  dbgs() << " ||";
489  dbgs() << "\n";
490  }
491  });
492 
493  // If we have scalar evolutions, we need to invalidate them including this
494  // loop, the loop containing the exit block and the topmost parent loop
495  // exiting via LoopExitBB.
496  if (SE) {
497  if (Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
498  SE->forgetLoop(ExitL);
499  else
500  // Forget the entire nest as this exits the entire nest.
501  SE->forgetTopmostLoop(&L);
502  }
503 
504  if (MSSAU && VerifyMemorySSA)
505  MSSAU->getMemorySSA()->verifyMemorySSA();
506 
507  // Split the preheader, so that we know that there is a safe place to insert
508  // the conditional branch. We will change the preheader to have a conditional
509  // branch on LoopCond.
510  BasicBlock *OldPH = L.getLoopPreheader();
511  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
512 
513  // Now that we have a place to insert the conditional branch, create a place
514  // to branch to: this is the exit block out of the loop that we are
515  // unswitching. We need to split this if there are other loop predecessors.
516  // Because the loop is in simplified form, *any* other predecessor is enough.
517  BasicBlock *UnswitchedBB;
518  if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
519  assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
520  "A branch's parent isn't a predecessor!");
521  UnswitchedBB = LoopExitBB;
522  } else {
523  UnswitchedBB =
524  SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
525  }
526 
527  if (MSSAU && VerifyMemorySSA)
528  MSSAU->getMemorySSA()->verifyMemorySSA();
529 
530  // Actually move the invariant uses into the unswitched position. If possible,
531  // we do this by moving the instructions, but when doing partial unswitching
532  // we do it by building a new merge of the values in the unswitched position.
533  OldPH->getTerminator()->eraseFromParent();
534  if (FullUnswitch) {
535  // If fully unswitching, we can use the existing branch instruction.
536  // Splice it into the old PH to gate reaching the new preheader and re-point
537  // its successors.
538  OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
539  BI);
540  if (MSSAU) {
541  // Temporarily clone the terminator, to make MSSA update cheaper by
542  // separating "insert edge" updates from "remove edge" ones.
543  ParentBB->getInstList().push_back(BI.clone());
544  } else {
545  // Create a new unconditional branch that will continue the loop as a new
546  // terminator.
547  BranchInst::Create(ContinueBB, ParentBB);
548  }
549  BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
550  BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
551  } else {
552  // Only unswitching a subset of inputs to the condition, so we will need to
553  // build a new branch that merges the invariant inputs.
554  if (ExitDirection)
556  "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
557  "condition!");
558  else
560  "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
561  " condition!");
562  buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
563  *UnswitchedBB, *NewPH);
564  }
565 
566  // Update the dominator tree with the added edge.
567  DT.insertEdge(OldPH, UnswitchedBB);
568 
569  // After the dominator tree was updated with the added edge, update MemorySSA
570  // if available.
571  if (MSSAU) {
573  Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
574  MSSAU->applyInsertUpdates(Updates, DT);
575  }
576 
577  // Finish updating dominator tree and memory ssa for full unswitch.
578  if (FullUnswitch) {
579  if (MSSAU) {
580  // Remove the cloned branch instruction.
581  ParentBB->getTerminator()->eraseFromParent();
582  // Create unconditional branch now.
583  BranchInst::Create(ContinueBB, ParentBB);
584  MSSAU->removeEdge(ParentBB, LoopExitBB);
585  }
586  DT.deleteEdge(ParentBB, LoopExitBB);
587  }
588 
589  if (MSSAU && VerifyMemorySSA)
590  MSSAU->getMemorySSA()->verifyMemorySSA();
591 
592  // Rewrite the relevant PHI nodes.
593  if (UnswitchedBB == LoopExitBB)
594  rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
595  else
596  rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
597  *ParentBB, *OldPH, FullUnswitch);
598 
599  // The constant we can replace all of our invariants with inside the loop
600  // body. If any of the invariants have a value other than this the loop won't
601  // be entered.
602  ConstantInt *Replacement = ExitDirection
605 
606  // Since this is an i1 condition we can also trivially replace uses of it
607  // within the loop with a constant.
608  for (Value *Invariant : Invariants)
609  replaceLoopInvariantUses(L, Invariant, *Replacement);
610 
611  // If this was full unswitching, we may have changed the nesting relationship
612  // for this loop so hoist it to its correct parent if needed.
613  if (FullUnswitch)
614  hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
615 
616  if (MSSAU && VerifyMemorySSA)
617  MSSAU->getMemorySSA()->verifyMemorySSA();
618 
619  LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
620  ++NumTrivial;
621  ++NumBranches;
622  return true;
623 }
624 
625 /// Unswitch a trivial switch if the condition is loop invariant.
626 ///
627 /// This routine should only be called when loop code leading to the switch has
628 /// been validated as trivial (no side effects). This routine checks if the
629 /// condition is invariant and that at least one of the successors is a loop
630 /// exit. This allows us to unswitch without duplicating the loop, making it
631 /// trivial.
632 ///
633 /// If this routine fails to unswitch the switch it returns false.
634 ///
635 /// If the switch can be unswitched, this routine splits the preheader and
636 /// copies the switch above that split. If the default case is one of the
637 /// exiting cases, it copies the non-exiting cases and points them at the new
638 /// preheader. If the default case is not exiting, it copies the exiting cases
639 /// and points the default at the preheader. It preserves loop simplified form
640 /// (splitting the exit blocks as necessary). It simplifies the switch within
641 /// the loop by removing now-dead cases. If the default case is one of those
642 /// unswitched, it replaces its destination with a new basic block containing
643 /// only unreachable. Such basic blocks, while technically loop exits, are not
644 /// considered for unswitching so this is a stable transform and the same
645 /// switch will not be revisited. If after unswitching there is only a single
646 /// in-loop successor, the switch is further simplified to an unconditional
647 /// branch. Still more cleanup can be done with some simplify-cfg like pass.
648 ///
649 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
650 /// invalidated by this.
652  LoopInfo &LI, ScalarEvolution *SE,
653  MemorySSAUpdater *MSSAU) {
654  LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
655  Value *LoopCond = SI.getCondition();
656 
657  // If this isn't switching on an invariant condition, we can't unswitch it.
658  if (!L.isLoopInvariant(LoopCond))
659  return false;
660 
661  auto *ParentBB = SI.getParent();
662 
663  // The same check must be used both for the default and the exit cases. We
664  // should never leave edges from the switch instruction to a basic block that
665  // we are unswitching, hence the condition used to determine the default case
666  // needs to also be used to populate ExitCaseIndices, which is then used to
667  // remove cases from the switch.
668  auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
669  // BBToCheck is not an exit block if it is inside loop L.
670  if (L.contains(&BBToCheck))
671  return false;
672  // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
673  if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
674  return false;
675  // We do not unswitch a block that only has an unreachable statement, as
676  // it's possible this is a previously unswitched block. Only unswitch if
677  // either the terminator is not unreachable, or, if it is, it's not the only
678  // instruction in the block.
679  auto *TI = BBToCheck.getTerminator();
680  bool isUnreachable = isa<UnreachableInst>(TI);
681  return !isUnreachable ||
682  (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
683  };
684 
685  SmallVector<int, 4> ExitCaseIndices;
686  for (auto Case : SI.cases())
687  if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
688  ExitCaseIndices.push_back(Case.getCaseIndex());
689  BasicBlock *DefaultExitBB = nullptr;
692  if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
693  DefaultExitBB = SI.getDefaultDest();
694  } else if (ExitCaseIndices.empty())
695  return false;
696 
697  LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
698 
699  if (MSSAU && VerifyMemorySSA)
700  MSSAU->getMemorySSA()->verifyMemorySSA();
701 
702  // We may need to invalidate SCEVs for the outermost loop reached by any of
703  // the exits.
704  Loop *OuterL = &L;
705 
706  if (DefaultExitBB) {
707  // Clear out the default destination temporarily to allow accurate
708  // predecessor lists to be examined below.
709  SI.setDefaultDest(nullptr);
710  // Check the loop containing this exit.
711  Loop *ExitL = LI.getLoopFor(DefaultExitBB);
712  if (!ExitL || ExitL->contains(OuterL))
713  OuterL = ExitL;
714  }
715 
716  // Store the exit cases into a separate data structure and remove them from
717  // the switch.
718  SmallVector<std::tuple<ConstantInt *, BasicBlock *,
720  4> ExitCases;
721  ExitCases.reserve(ExitCaseIndices.size());
723  // We walk the case indices backwards so that we remove the last case first
724  // and don't disrupt the earlier indices.
725  for (unsigned Index : reverse(ExitCaseIndices)) {
726  auto CaseI = SI.case_begin() + Index;
727  // Compute the outer loop from this exit.
728  Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
729  if (!ExitL || ExitL->contains(OuterL))
730  OuterL = ExitL;
731  // Save the value of this case.
732  auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
733  ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
734  // Delete the unswitched cases.
735  SIW.removeCase(CaseI);
736  }
737 
738  if (SE) {
739  if (OuterL)
740  SE->forgetLoop(OuterL);
741  else
742  SE->forgetTopmostLoop(&L);
743  }
744 
745  // Check if after this all of the remaining cases point at the same
746  // successor.
747  BasicBlock *CommonSuccBB = nullptr;
748  if (SI.getNumCases() > 0 &&
749  all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
750  return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
751  }))
752  CommonSuccBB = SI.case_begin()->getCaseSuccessor();
753  if (!DefaultExitBB) {
754  // If we're not unswitching the default, we need it to match any cases to
755  // have a common successor or if we have no cases it is the common
756  // successor.
757  if (SI.getNumCases() == 0)
758  CommonSuccBB = SI.getDefaultDest();
759  else if (SI.getDefaultDest() != CommonSuccBB)
760  CommonSuccBB = nullptr;
761  }
762 
763  // Split the preheader, so that we know that there is a safe place to insert
764  // the switch.
765  BasicBlock *OldPH = L.getLoopPreheader();
766  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
767  OldPH->getTerminator()->eraseFromParent();
768 
769  // Now add the unswitched switch.
770  auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
771  SwitchInstProfUpdateWrapper NewSIW(*NewSI);
772 
773  // Rewrite the IR for the unswitched basic blocks. This requires two steps.
774  // First, we split any exit blocks with remaining in-loop predecessors. Then
775  // we update the PHIs in one of two ways depending on if there was a split.
776  // We walk in reverse so that we split in the same order as the cases
777  // appeared. This is purely for convenience of reading the resulting IR, but
778  // it doesn't cost anything really.
779  SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
781  // Handle the default exit if necessary.
782  // FIXME: It'd be great if we could merge this with the loop below but LLVM's
783  // ranges aren't quite powerful enough yet.
784  if (DefaultExitBB) {
785  if (pred_empty(DefaultExitBB)) {
786  UnswitchedExitBBs.insert(DefaultExitBB);
787  rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
788  } else {
789  auto *SplitBB =
790  SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
791  rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
792  *ParentBB, *OldPH,
793  /*FullUnswitch*/ true);
794  DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
795  }
796  }
797  // Note that we must use a reference in the for loop so that we update the
798  // container.
799  for (auto &ExitCase : reverse(ExitCases)) {
800  // Grab a reference to the exit block in the pair so that we can update it.
801  BasicBlock *ExitBB = std::get<1>(ExitCase);
802 
803  // If this case is the last edge into the exit block, we can simply reuse it
804  // as it will no longer be a loop exit. No mapping necessary.
805  if (pred_empty(ExitBB)) {
806  // Only rewrite once.
807  if (UnswitchedExitBBs.insert(ExitBB).second)
808  rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
809  continue;
810  }
811 
812  // Otherwise we need to split the exit block so that we retain an exit
813  // block from the loop and a target for the unswitched condition.
814  BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
815  if (!SplitExitBB) {
816  // If this is the first time we see this, do the split and remember it.
817  SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
818  rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
819  *ParentBB, *OldPH,
820  /*FullUnswitch*/ true);
821  }
822  // Update the case pair to point to the split block.
823  std::get<1>(ExitCase) = SplitExitBB;
824  }
825 
826  // Now add the unswitched cases. We do this in reverse order as we built them
827  // in reverse order.
828  for (auto &ExitCase : reverse(ExitCases)) {
829  ConstantInt *CaseVal = std::get<0>(ExitCase);
830  BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
831 
832  NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
833  }
834 
835  // If the default was unswitched, re-point it and add explicit cases for
836  // entering the loop.
837  if (DefaultExitBB) {
838  NewSIW->setDefaultDest(DefaultExitBB);
839  NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
840 
841  // We removed all the exit cases, so we just copy the cases to the
842  // unswitched switch.
843  for (const auto &Case : SI.cases())
844  NewSIW.addCase(Case.getCaseValue(), NewPH,
846  } else if (DefaultCaseWeight) {
847  // We have to set branch weight of the default case.
848  uint64_t SW = *DefaultCaseWeight;
849  for (const auto &Case : SI.cases()) {
850  auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
851  assert(W &&
852  "case weight must be defined as default case weight is defined");
853  SW += *W;
854  }
855  NewSIW.setSuccessorWeight(0, SW);
856  }
857 
858  // If we ended up with a common successor for every path through the switch
859  // after unswitching, rewrite it to an unconditional branch to make it easy
860  // to recognize. Otherwise we potentially have to recognize the default case
861  // pointing at unreachable and other complexity.
862  if (CommonSuccBB) {
863  BasicBlock *BB = SI.getParent();
864  // We may have had multiple edges to this common successor block, so remove
865  // them as predecessors. We skip the first one, either the default or the
866  // actual first case.
867  bool SkippedFirst = DefaultExitBB == nullptr;
868  for (auto Case : SI.cases()) {
869  assert(Case.getCaseSuccessor() == CommonSuccBB &&
870  "Non-common successor!");
871  (void)Case;
872  if (!SkippedFirst) {
873  SkippedFirst = true;
874  continue;
875  }
876  CommonSuccBB->removePredecessor(BB,
877  /*KeepOneInputPHIs*/ true);
878  }
879  // Now nuke the switch and replace it with a direct branch.
880  SIW.eraseFromParent();
881  BranchInst::Create(CommonSuccBB, BB);
882  } else if (DefaultExitBB) {
883  assert(SI.getNumCases() > 0 &&
884  "If we had no cases we'd have a common successor!");
885  // Move the last case to the default successor. This is valid as if the
886  // default got unswitched it cannot be reached. This has the advantage of
887  // being simple and keeping the number of edges from this switch to
888  // successors the same, and avoiding any PHI update complexity.
889  auto LastCaseI = std::prev(SI.case_end());
890 
891  SI.setDefaultDest(LastCaseI->getCaseSuccessor());
892  SIW.setSuccessorWeight(
893  0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
894  SIW.removeCase(LastCaseI);
895  }
896 
897  // Walk the unswitched exit blocks and the unswitched split blocks and update
898  // the dominator tree based on the CFG edits. While we are walking unordered
899  // containers here, the API for applyUpdates takes an unordered list of
900  // updates and requires them to not contain duplicates.
902  for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
903  DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
904  DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
905  }
906  for (auto SplitUnswitchedPair : SplitExitBBMap) {
907  DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
908  DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
909  }
910 
911  if (MSSAU) {
912  MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
913  if (VerifyMemorySSA)
914  MSSAU->getMemorySSA()->verifyMemorySSA();
915  } else {
916  DT.applyUpdates(DTUpdates);
917  }
918 
920 
921  // We may have changed the nesting relationship for this loop so hoist it to
922  // its correct parent if needed.
923  hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
924 
925  if (MSSAU && VerifyMemorySSA)
926  MSSAU->getMemorySSA()->verifyMemorySSA();
927 
928  ++NumTrivial;
929  ++NumSwitches;
930  LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
931  return true;
932 }
933 
934 /// This routine scans the loop to find a branch or switch which occurs before
935 /// any side effects occur. These can potentially be unswitched without
936 /// duplicating the loop. If a branch or switch is successfully unswitched the
937 /// scanning continues to see if subsequent branches or switches have become
938 /// trivial. Once all trivial candidates have been unswitched, this routine
939 /// returns.
940 ///
941 /// The return value indicates whether anything was unswitched (and therefore
942 /// changed).
943 ///
944 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
945 /// invalidated by this.
947  LoopInfo &LI, ScalarEvolution *SE,
948  MemorySSAUpdater *MSSAU) {
949  bool Changed = false;
950 
951  // If loop header has only one reachable successor we should keep looking for
952  // trivial condition candidates in the successor as well. An alternative is
953  // to constant fold conditions and merge successors into loop header (then we
954  // only need to check header's terminator). The reason for not doing this in
955  // LoopUnswitch pass is that it could potentially break LoopPassManager's
956  // invariants. Folding dead branches could either eliminate the current loop
957  // or make other loops unreachable. LCSSA form might also not be preserved
958  // after deleting branches. The following code keeps traversing loop header's
959  // successors until it finds the trivial condition candidate (condition that
960  // is not a constant). Since unswitching generates branches with constant
961  // conditions, this scenario could be very common in practice.
962  BasicBlock *CurrentBB = L.getHeader();
964  Visited.insert(CurrentBB);
965  do {
966  // Check if there are any side-effecting instructions (e.g. stores, calls,
967  // volatile loads) in the part of the loop that the code *would* execute
968  // without unswitching.
969  if (MSSAU) // Possible early exit with MSSA
970  if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
971  if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
972  return Changed;
973  if (llvm::any_of(*CurrentBB,
974  [](Instruction &I) { return I.mayHaveSideEffects(); }))
975  return Changed;
976 
977  Instruction *CurrentTerm = CurrentBB->getTerminator();
978 
979  if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
980  // Don't bother trying to unswitch past a switch with a constant
981  // condition. This should be removed prior to running this pass by
982  // simplify-cfg.
983  if (isa<Constant>(SI->getCondition()))
984  return Changed;
985 
986  if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
987  // Couldn't unswitch this one so we're done.
988  return Changed;
989 
990  // Mark that we managed to unswitch something.
991  Changed = true;
992 
993  // If unswitching turned the terminator into an unconditional branch then
994  // we can continue. The unswitching logic specifically works to fold any
995  // cases it can into an unconditional branch to make it easier to
996  // recognize here.
997  auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
998  if (!BI || BI->isConditional())
999  return Changed;
1000 
1001  CurrentBB = BI->getSuccessor(0);
1002  continue;
1003  }
1004 
1005  auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1006  if (!BI)
1007  // We do not understand other terminator instructions.
1008  return Changed;
1009 
1010  // Don't bother trying to unswitch past an unconditional branch or a branch
1011  // with a constant value. These should be removed by simplify-cfg prior to
1012  // running this pass.
1013  if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
1014  return Changed;
1015 
1016  // Found a trivial condition candidate: non-foldable conditional branch. If
1017  // we fail to unswitch this, we can't do anything else that is trivial.
1018  if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1019  return Changed;
1020 
1021  // Mark that we managed to unswitch something.
1022  Changed = true;
1023 
1024  // If we only unswitched some of the conditions feeding the branch, we won't
1025  // have collapsed it to a single successor.
1026  BI = cast<BranchInst>(CurrentBB->getTerminator());
1027  if (BI->isConditional())
1028  return Changed;
1029 
1030  // Follow the newly unconditional branch into its successor.
1031  CurrentBB = BI->getSuccessor(0);
1032 
1033  // When continuing, if we exit the loop or reach a previous visited block,
1034  // then we can not reach any trivial condition candidates (unfoldable
1035  // branch instructions or switch instructions) and no unswitch can happen.
1036  } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1037 
1038  return Changed;
1039 }
1040 
1041 /// Build the cloned blocks for an unswitched copy of the given loop.
1042 ///
1043 /// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1044 /// after the split block (`SplitBB`) that will be used to select between the
1045 /// cloned and original loop.
1046 ///
1047 /// This routine handles cloning all of the necessary loop blocks and exit
1048 /// blocks including rewriting their instructions and the relevant PHI nodes.
1049 /// Any loop blocks or exit blocks which are dominated by a different successor
1050 /// than the one for this clone of the loop blocks can be trivially skipped. We
1051 /// use the `DominatingSucc` map to determine whether a block satisfies that
1052 /// property with a simple map lookup.
1053 ///
1054 /// It also correctly creates the unconditional branch in the cloned
1055 /// unswitched parent block to only point at the unswitched successor.
1056 ///
1057 /// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1058 /// block splitting is correctly reflected in `LoopInfo`, essentially all of
1059 /// the cloned blocks (and their loops) are left without full `LoopInfo`
1060 /// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1061 /// blocks to them but doesn't create the cloned `DominatorTree` structure and
1062 /// instead the caller must recompute an accurate DT. It *does* correctly
1063 /// update the `AssumptionCache` provided in `AC`.
1065  Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1066  ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1067  BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1068  const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
1069  ValueToValueMapTy &VMap,
1071  DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
1072  SmallVector<BasicBlock *, 4> NewBlocks;
1073  NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1074 
1075  // We will need to clone a bunch of blocks, wrap up the clone operation in
1076  // a helper.
1077  auto CloneBlock = [&](BasicBlock *OldBB) {
1078  // Clone the basic block and insert it before the new preheader.
1079  BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1080  NewBB->moveBefore(LoopPH);
1081 
1082  // Record this block and the mapping.
1083  NewBlocks.push_back(NewBB);
1084  VMap[OldBB] = NewBB;
1085 
1086  return NewBB;
1087  };
1088 
1089  // We skip cloning blocks when they have a dominating succ that is not the
1090  // succ we are cloning for.
1091  auto SkipBlock = [&](BasicBlock *BB) {
1092  auto It = DominatingSucc.find(BB);
1093  return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1094  };
1095 
1096  // First, clone the preheader.
1097  auto *ClonedPH = CloneBlock(LoopPH);
1098 
1099  // Then clone all the loop blocks, skipping the ones that aren't necessary.
1100  for (auto *LoopBB : L.blocks())
1101  if (!SkipBlock(LoopBB))
1102  CloneBlock(LoopBB);
1103 
1104  // Split all the loop exit edges so that when we clone the exit blocks, if
1105  // any of the exit blocks are *also* a preheader for some other loop, we
1106  // don't create multiple predecessors entering the loop header.
1107  for (auto *ExitBB : ExitBlocks) {
1108  if (SkipBlock(ExitBB))
1109  continue;
1110 
1111  // When we are going to clone an exit, we don't need to clone all the
1112  // instructions in the exit block and we want to ensure we have an easy
1113  // place to merge the CFG, so split the exit first. This is always safe to
1114  // do because there cannot be any non-loop predecessors of a loop exit in
1115  // loop simplified form.
1116  auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
1117 
1118  // Rearrange the names to make it easier to write test cases by having the
1119  // exit block carry the suffix rather than the merge block carrying the
1120  // suffix.
1121  MergeBB->takeName(ExitBB);
1122  ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1123 
1124  // Now clone the original exit block.
1125  auto *ClonedExitBB = CloneBlock(ExitBB);
1126  assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1127  "Exit block should have been split to have one successor!");
1128  assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1129  "Cloned exit block has the wrong successor!");
1130 
1131  // Remap any cloned instructions and create a merge phi node for them.
1132  for (auto ZippedInsts : llvm::zip_first(
1133  llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1134  llvm::make_range(ClonedExitBB->begin(),
1135  std::prev(ClonedExitBB->end())))) {
1136  Instruction &I = std::get<0>(ZippedInsts);
1137  Instruction &ClonedI = std::get<1>(ZippedInsts);
1138 
1139  // The only instructions in the exit block should be PHI nodes and
1140  // potentially a landing pad.
1141  assert(
1142  (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
1143  "Bad instruction in exit block!");
1144  // We should have a value map between the instruction and its clone.
1145  assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1146 
1147  auto *MergePN =
1148  PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
1149  &*MergeBB->getFirstInsertionPt());
1150  I.replaceAllUsesWith(MergePN);
1151  MergePN->addIncoming(&I, ExitBB);
1152  MergePN->addIncoming(&ClonedI, ClonedExitBB);
1153  }
1154  }
1155 
1156  // Rewrite the instructions in the cloned blocks to refer to the instructions
1157  // in the cloned blocks. We have to do this as a second pass so that we have
1158  // everything available. Also, we have inserted new instructions which may
1159  // include assume intrinsics, so we update the assumption cache while
1160  // processing this.
1161  for (auto *ClonedBB : NewBlocks)
1162  for (Instruction &I : *ClonedBB) {
1163  RemapInstruction(&I, VMap,
1165  if (auto *II = dyn_cast<AssumeInst>(&I))
1166  AC.registerAssumption(II);
1167  }
1168 
1169  // Update any PHI nodes in the cloned successors of the skipped blocks to not
1170  // have spurious incoming values.
1171  for (auto *LoopBB : L.blocks())
1172  if (SkipBlock(LoopBB))
1173  for (auto *SuccBB : successors(LoopBB))
1174  if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1175  for (PHINode &PN : ClonedSuccBB->phis())
1176  PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1177 
1178  // Remove the cloned parent as a predecessor of any successor we ended up
1179  // cloning other than the unswitched one.
1180  auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1181  for (auto *SuccBB : successors(ParentBB)) {
1182  if (SuccBB == UnswitchedSuccBB)
1183  continue;
1184 
1185  auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1186  if (!ClonedSuccBB)
1187  continue;
1188 
1189  ClonedSuccBB->removePredecessor(ClonedParentBB,
1190  /*KeepOneInputPHIs*/ true);
1191  }
1192 
1193  // Replace the cloned branch with an unconditional branch to the cloned
1194  // unswitched successor.
1195  auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1196  Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1197  // Trivial Simplification. If Terminator is a conditional branch and
1198  // condition becomes dead - erase it.
1199  Value *ClonedConditionToErase = nullptr;
1200  if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1201  ClonedConditionToErase = BI->getCondition();
1202  else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1203  ClonedConditionToErase = SI->getCondition();
1204 
1205  ClonedTerminator->eraseFromParent();
1206  BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1207 
1208  if (ClonedConditionToErase)
1209  RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1210  MSSAU);
1211 
1212  // If there are duplicate entries in the PHI nodes because of multiple edges
1213  // to the unswitched successor, we need to nuke all but one as we replaced it
1214  // with a direct branch.
1215  for (PHINode &PN : ClonedSuccBB->phis()) {
1216  bool Found = false;
1217  // Loop over the incoming operands backwards so we can easily delete as we
1218  // go without invalidating the index.
1219  for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1220  if (PN.getIncomingBlock(i) != ClonedParentBB)
1221  continue;
1222  if (!Found) {
1223  Found = true;
1224  continue;
1225  }
1226  PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1227  }
1228  }
1229 
1230  // Record the domtree updates for the new blocks.
1232  for (auto *ClonedBB : NewBlocks) {
1233  for (auto *SuccBB : successors(ClonedBB))
1234  if (SuccSet.insert(SuccBB).second)
1235  DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1236  SuccSet.clear();
1237  }
1238 
1239  return ClonedPH;
1240 }
1241 
1242 /// Recursively clone the specified loop and all of its children.
1243 ///
1244 /// The target parent loop for the clone should be provided, or can be null if
1245 /// the clone is a top-level loop. While cloning, all the blocks are mapped
1246 /// with the provided value map. The entire original loop must be present in
1247 /// the value map. The cloned loop is returned.
1248 static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1249  const ValueToValueMapTy &VMap, LoopInfo &LI) {
1250  auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1251  assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1252  ClonedL.reserveBlocks(OrigL.getNumBlocks());
1253  for (auto *BB : OrigL.blocks()) {
1254  auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1255  ClonedL.addBlockEntry(ClonedBB);
1256  if (LI.getLoopFor(BB) == &OrigL)
1257  LI.changeLoopFor(ClonedBB, &ClonedL);
1258  }
1259  };
1260 
1261  // We specially handle the first loop because it may get cloned into
1262  // a different parent and because we most commonly are cloning leaf loops.
1263  Loop *ClonedRootL = LI.AllocateLoop();
1264  if (RootParentL)
1265  RootParentL->addChildLoop(ClonedRootL);
1266  else
1267  LI.addTopLevelLoop(ClonedRootL);
1268  AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1269 
1270  if (OrigRootL.isInnermost())
1271  return ClonedRootL;
1272 
1273  // If we have a nest, we can quickly clone the entire loop nest using an
1274  // iterative approach because it is a tree. We keep the cloned parent in the
1275  // data structure to avoid repeatedly querying through a map to find it.
1276  SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1277  // Build up the loops to clone in reverse order as we'll clone them from the
1278  // back.
1279  for (Loop *ChildL : llvm::reverse(OrigRootL))
1280  LoopsToClone.push_back({ClonedRootL, ChildL});
1281  do {
1282  Loop *ClonedParentL, *L;
1283  std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1284  Loop *ClonedL = LI.AllocateLoop();
1285  ClonedParentL->addChildLoop(ClonedL);
1286  AddClonedBlocksToLoop(*L, *ClonedL);
1287  for (Loop *ChildL : llvm::reverse(*L))
1288  LoopsToClone.push_back({ClonedL, ChildL});
1289  } while (!LoopsToClone.empty());
1290 
1291  return ClonedRootL;
1292 }
1293 
1294 /// Build the cloned loops of an original loop from unswitching.
1295 ///
1296 /// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1297 /// operation. We need to re-verify that there even is a loop (as the backedge
1298 /// may not have been cloned), and even if there are remaining backedges the
1299 /// backedge set may be different. However, we know that each child loop is
1300 /// undisturbed, we only need to find where to place each child loop within
1301 /// either any parent loop or within a cloned version of the original loop.
1302 ///
1303 /// Because child loops may end up cloned outside of any cloned version of the
1304 /// original loop, multiple cloned sibling loops may be created. All of them
1305 /// are returned so that the newly introduced loop nest roots can be
1306 /// identified.
1307 static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1308  const ValueToValueMapTy &VMap, LoopInfo &LI,
1309  SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1310  Loop *ClonedL = nullptr;
1311 
1312  auto *OrigPH = OrigL.getLoopPreheader();
1313  auto *OrigHeader = OrigL.getHeader();
1314 
1315  auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1316  auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1317 
1318  // We need to know the loops of the cloned exit blocks to even compute the
1319  // accurate parent loop. If we only clone exits to some parent of the
1320  // original parent, we want to clone into that outer loop. We also keep track
1321  // of the loops that our cloned exit blocks participate in.
1322  Loop *ParentL = nullptr;
1323  SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1325  ClonedExitsInLoops.reserve(ExitBlocks.size());
1326  for (auto *ExitBB : ExitBlocks)
1327  if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1328  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1329  ExitLoopMap[ClonedExitBB] = ExitL;
1330  ClonedExitsInLoops.push_back(ClonedExitBB);
1331  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1332  ParentL = ExitL;
1333  }
1334  assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1335  ParentL->contains(OrigL.getParentLoop())) &&
1336  "The computed parent loop should always contain (or be) the parent of "
1337  "the original loop.");
1338 
1339  // We build the set of blocks dominated by the cloned header from the set of
1340  // cloned blocks out of the original loop. While not all of these will
1341  // necessarily be in the cloned loop, it is enough to establish that they
1342  // aren't in unreachable cycles, etc.
1343  SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1344  for (auto *BB : OrigL.blocks())
1345  if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1346  ClonedLoopBlocks.insert(ClonedBB);
1347 
1348  // Rebuild the set of blocks that will end up in the cloned loop. We may have
1349  // skipped cloning some region of this loop which can in turn skip some of
1350  // the backedges so we have to rebuild the blocks in the loop based on the
1351  // backedges that remain after cloning.
1353  SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1354  for (auto *Pred : predecessors(ClonedHeader)) {
1355  // The only possible non-loop header predecessor is the preheader because
1356  // we know we cloned the loop in simplified form.
1357  if (Pred == ClonedPH)
1358  continue;
1359 
1360  // Because the loop was in simplified form, the only non-loop predecessor
1361  // should be the preheader.
1362  assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1363  "header other than the preheader "
1364  "that is not part of the loop!");
1365 
1366  // Insert this block into the loop set and on the first visit (and if it
1367  // isn't the header we're currently walking) put it into the worklist to
1368  // recurse through.
1369  if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1370  Worklist.push_back(Pred);
1371  }
1372 
1373  // If we had any backedges then there *is* a cloned loop. Put the header into
1374  // the loop set and then walk the worklist backwards to find all the blocks
1375  // that remain within the loop after cloning.
1376  if (!BlocksInClonedLoop.empty()) {
1377  BlocksInClonedLoop.insert(ClonedHeader);
1378 
1379  while (!Worklist.empty()) {
1380  BasicBlock *BB = Worklist.pop_back_val();
1381  assert(BlocksInClonedLoop.count(BB) &&
1382  "Didn't put block into the loop set!");
1383 
1384  // Insert any predecessors that are in the possible set into the cloned
1385  // set, and if the insert is successful, add them to the worklist. Note
1386  // that we filter on the blocks that are definitely reachable via the
1387  // backedge to the loop header so we may prune out dead code within the
1388  // cloned loop.
1389  for (auto *Pred : predecessors(BB))
1390  if (ClonedLoopBlocks.count(Pred) &&
1391  BlocksInClonedLoop.insert(Pred).second)
1392  Worklist.push_back(Pred);
1393  }
1394 
1395  ClonedL = LI.AllocateLoop();
1396  if (ParentL) {
1397  ParentL->addBasicBlockToLoop(ClonedPH, LI);
1398  ParentL->addChildLoop(ClonedL);
1399  } else {
1400  LI.addTopLevelLoop(ClonedL);
1401  }
1402  NonChildClonedLoops.push_back(ClonedL);
1403 
1404  ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1405  // We don't want to just add the cloned loop blocks based on how we
1406  // discovered them. The original order of blocks was carefully built in
1407  // a way that doesn't rely on predecessor ordering. Rather than re-invent
1408  // that logic, we just re-walk the original blocks (and those of the child
1409  // loops) and filter them as we add them into the cloned loop.
1410  for (auto *BB : OrigL.blocks()) {
1411  auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1412  if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1413  continue;
1414 
1415  // Directly add the blocks that are only in this loop.
1416  if (LI.getLoopFor(BB) == &OrigL) {
1417  ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1418  continue;
1419  }
1420 
1421  // We want to manually add it to this loop and parents.
1422  // Registering it with LoopInfo will happen when we clone the top
1423  // loop for this block.
1424  for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1425  PL->addBlockEntry(ClonedBB);
1426  }
1427 
1428  // Now add each child loop whose header remains within the cloned loop. All
1429  // of the blocks within the loop must satisfy the same constraints as the
1430  // header so once we pass the header checks we can just clone the entire
1431  // child loop nest.
1432  for (Loop *ChildL : OrigL) {
1433  auto *ClonedChildHeader =
1434  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1435  if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1436  continue;
1437 
1438 #ifndef NDEBUG
1439  // We should never have a cloned child loop header but fail to have
1440  // all of the blocks for that child loop.
1441  for (auto *ChildLoopBB : ChildL->blocks())
1442  assert(BlocksInClonedLoop.count(
1443  cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1444  "Child cloned loop has a header within the cloned outer "
1445  "loop but not all of its blocks!");
1446 #endif
1447 
1448  cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1449  }
1450  }
1451 
1452  // Now that we've handled all the components of the original loop that were
1453  // cloned into a new loop, we still need to handle anything from the original
1454  // loop that wasn't in a cloned loop.
1455 
1456  // Figure out what blocks are left to place within any loop nest containing
1457  // the unswitched loop. If we never formed a loop, the cloned PH is one of
1458  // them.
1459  SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1460  if (BlocksInClonedLoop.empty())
1461  UnloopedBlockSet.insert(ClonedPH);
1462  for (auto *ClonedBB : ClonedLoopBlocks)
1463  if (!BlocksInClonedLoop.count(ClonedBB))
1464  UnloopedBlockSet.insert(ClonedBB);
1465 
1466  // Copy the cloned exits and sort them in ascending loop depth, we'll work
1467  // backwards across these to process them inside out. The order shouldn't
1468  // matter as we're just trying to build up the map from inside-out; we use
1469  // the map in a more stably ordered way below.
1470  auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1471  llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1472  return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1473  ExitLoopMap.lookup(RHS)->getLoopDepth();
1474  });
1475 
1476  // Populate the existing ExitLoopMap with everything reachable from each
1477  // exit, starting from the inner most exit.
1478  while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1479  assert(Worklist.empty() && "Didn't clear worklist!");
1480 
1481  BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1482  Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1483 
1484  // Walk the CFG back until we hit the cloned PH adding everything reachable
1485  // and in the unlooped set to this exit block's loop.
1486  Worklist.push_back(ExitBB);
1487  do {
1488  BasicBlock *BB = Worklist.pop_back_val();
1489  // We can stop recursing at the cloned preheader (if we get there).
1490  if (BB == ClonedPH)
1491  continue;
1492 
1493  for (BasicBlock *PredBB : predecessors(BB)) {
1494  // If this pred has already been moved to our set or is part of some
1495  // (inner) loop, no update needed.
1496  if (!UnloopedBlockSet.erase(PredBB)) {
1497  assert(
1498  (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1499  "Predecessor not mapped to a loop!");
1500  continue;
1501  }
1502 
1503  // We just insert into the loop set here. We'll add these blocks to the
1504  // exit loop after we build up the set in an order that doesn't rely on
1505  // predecessor order (which in turn relies on use list order).
1506  bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1507  (void)Inserted;
1508  assert(Inserted && "Should only visit an unlooped block once!");
1509 
1510  // And recurse through to its predecessors.
1511  Worklist.push_back(PredBB);
1512  }
1513  } while (!Worklist.empty());
1514  }
1515 
1516  // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1517  // blocks to their outer loops, walk the cloned blocks and the cloned exits
1518  // in their original order adding them to the correct loop.
1519 
1520  // We need a stable insertion order. We use the order of the original loop
1521  // order and map into the correct parent loop.
1522  for (auto *BB : llvm::concat<BasicBlock *const>(
1523  makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1524  if (Loop *OuterL = ExitLoopMap.lookup(BB))
1525  OuterL->addBasicBlockToLoop(BB, LI);
1526 
1527 #ifndef NDEBUG
1528  for (auto &BBAndL : ExitLoopMap) {
1529  auto *BB = BBAndL.first;
1530  auto *OuterL = BBAndL.second;
1531  assert(LI.getLoopFor(BB) == OuterL &&
1532  "Failed to put all blocks into outer loops!");
1533  }
1534 #endif
1535 
1536  // Now that all the blocks are placed into the correct containing loop in the
1537  // absence of child loops, find all the potentially cloned child loops and
1538  // clone them into whatever outer loop we placed their header into.
1539  for (Loop *ChildL : OrigL) {
1540  auto *ClonedChildHeader =
1541  cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1542  if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1543  continue;
1544 
1545 #ifndef NDEBUG
1546  for (auto *ChildLoopBB : ChildL->blocks())
1547  assert(VMap.count(ChildLoopBB) &&
1548  "Cloned a child loop header but not all of that loops blocks!");
1549 #endif
1550 
1551  NonChildClonedLoops.push_back(cloneLoopNest(
1552  *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1553  }
1554 }
1555 
1556 static void
1558  ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1559  DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1560  // Find all the dead clones, and remove them from their successors.
1561  SmallVector<BasicBlock *, 16> DeadBlocks;
1562  for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1563  for (auto &VMap : VMaps)
1564  if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1565  if (!DT.isReachableFromEntry(ClonedBB)) {
1566  for (BasicBlock *SuccBB : successors(ClonedBB))
1567  SuccBB->removePredecessor(ClonedBB);
1568  DeadBlocks.push_back(ClonedBB);
1569  }
1570 
1571  // Remove all MemorySSA in the dead blocks
1572  if (MSSAU) {
1573  SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1574  DeadBlocks.end());
1575  MSSAU->removeBlocks(DeadBlockSet);
1576  }
1577 
1578  // Drop any remaining references to break cycles.
1579  for (BasicBlock *BB : DeadBlocks)
1580  BB->dropAllReferences();
1581  // Erase them from the IR.
1582  for (BasicBlock *BB : DeadBlocks)
1583  BB->eraseFromParent();
1584 }
1585 
1587  SmallVectorImpl<BasicBlock *> &ExitBlocks,
1588  DominatorTree &DT, LoopInfo &LI,
1589  MemorySSAUpdater *MSSAU) {
1590  // Find all the dead blocks tied to this loop, and remove them from their
1591  // successors.
1592  SmallSetVector<BasicBlock *, 8> DeadBlockSet;
1593 
1594  // Start with loop/exit blocks and get a transitive closure of reachable dead
1595  // blocks.
1596  SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1597  ExitBlocks.end());
1598  DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1599  while (!DeathCandidates.empty()) {
1600  auto *BB = DeathCandidates.pop_back_val();
1601  if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1602  for (BasicBlock *SuccBB : successors(BB)) {
1603  SuccBB->removePredecessor(BB);
1604  DeathCandidates.push_back(SuccBB);
1605  }
1606  DeadBlockSet.insert(BB);
1607  }
1608  }
1609 
1610  // Remove all MemorySSA in the dead blocks
1611  if (MSSAU)
1612  MSSAU->removeBlocks(DeadBlockSet);
1613 
1614  // Filter out the dead blocks from the exit blocks list so that it can be
1615  // used in the caller.
1616  llvm::erase_if(ExitBlocks,
1617  [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1618 
1619  // Walk from this loop up through its parents removing all of the dead blocks.
1620  for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1621  for (auto *BB : DeadBlockSet)
1622  ParentL->getBlocksSet().erase(BB);
1623  llvm::erase_if(ParentL->getBlocksVector(),
1624  [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1625  }
1626 
1627  // Now delete the dead child loops. This raw delete will clear them
1628  // recursively.
1629  llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1630  if (!DeadBlockSet.count(ChildL->getHeader()))
1631  return false;
1632 
1633  assert(llvm::all_of(ChildL->blocks(),
1634  [&](BasicBlock *ChildBB) {
1635  return DeadBlockSet.count(ChildBB);
1636  }) &&
1637  "If the child loop header is dead all blocks in the child loop must "
1638  "be dead as well!");
1639  LI.destroy(ChildL);
1640  return true;
1641  });
1642 
1643  // Remove the loop mappings for the dead blocks and drop all the references
1644  // from these blocks to others to handle cyclic references as we start
1645  // deleting the blocks themselves.
1646  for (auto *BB : DeadBlockSet) {
1647  // Check that the dominator tree has already been updated.
1648  assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1649  LI.changeLoopFor(BB, nullptr);
1650  // Drop all uses of the instructions to make sure we won't have dangling
1651  // uses in other blocks.
1652  for (auto &I : *BB)
1653  if (!I.use_empty())
1654  I.replaceAllUsesWith(UndefValue::get(I.getType()));
1655  BB->dropAllReferences();
1656  }
1657 
1658  // Actually delete the blocks now that they've been fully unhooked from the
1659  // IR.
1660  for (auto *BB : DeadBlockSet)
1661  BB->eraseFromParent();
1662 }
1663 
1664 /// Recompute the set of blocks in a loop after unswitching.
1665 ///
1666 /// This walks from the original headers predecessors to rebuild the loop. We
1667 /// take advantage of the fact that new blocks can't have been added, and so we
1668 /// filter by the original loop's blocks. This also handles potentially
1669 /// unreachable code that we don't want to explore but might be found examining
1670 /// the predecessors of the header.
1671 ///
1672 /// If the original loop is no longer a loop, this will return an empty set. If
1673 /// it remains a loop, all the blocks within it will be added to the set
1674 /// (including those blocks in inner loops).
1676  LoopInfo &LI) {
1678 
1679  auto *PH = L.getLoopPreheader();
1680  auto *Header = L.getHeader();
1681 
1682  // A worklist to use while walking backwards from the header.
1684 
1685  // First walk the predecessors of the header to find the backedges. This will
1686  // form the basis of our walk.
1687  for (auto *Pred : predecessors(Header)) {
1688  // Skip the preheader.
1689  if (Pred == PH)
1690  continue;
1691 
1692  // Because the loop was in simplified form, the only non-loop predecessor
1693  // is the preheader.
1694  assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1695  "than the preheader that is not part of the "
1696  "loop!");
1697 
1698  // Insert this block into the loop set and on the first visit and, if it
1699  // isn't the header we're currently walking, put it into the worklist to
1700  // recurse through.
1701  if (LoopBlockSet.insert(Pred).second && Pred != Header)
1702  Worklist.push_back(Pred);
1703  }
1704 
1705  // If no backedges were found, we're done.
1706  if (LoopBlockSet.empty())
1707  return LoopBlockSet;
1708 
1709  // We found backedges, recurse through them to identify the loop blocks.
1710  while (!Worklist.empty()) {
1711  BasicBlock *BB = Worklist.pop_back_val();
1712  assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1713 
1714  // No need to walk past the header.
1715  if (BB == Header)
1716  continue;
1717 
1718  // Because we know the inner loop structure remains valid we can use the
1719  // loop structure to jump immediately across the entire nested loop.
1720  // Further, because it is in loop simplified form, we can directly jump
1721  // to its preheader afterward.
1722  if (Loop *InnerL = LI.getLoopFor(BB))
1723  if (InnerL != &L) {
1724  assert(L.contains(InnerL) &&
1725  "Should not reach a loop *outside* this loop!");
1726  // The preheader is the only possible predecessor of the loop so
1727  // insert it into the set and check whether it was already handled.
1728  auto *InnerPH = InnerL->getLoopPreheader();
1729  assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1730  "but not contain the inner loop "
1731  "preheader!");
1732  if (!LoopBlockSet.insert(InnerPH).second)
1733  // The only way to reach the preheader is through the loop body
1734  // itself so if it has been visited the loop is already handled.
1735  continue;
1736 
1737  // Insert all of the blocks (other than those already present) into
1738  // the loop set. We expect at least the block that led us to find the
1739  // inner loop to be in the block set, but we may also have other loop
1740  // blocks if they were already enqueued as predecessors of some other
1741  // outer loop block.
1742  for (auto *InnerBB : InnerL->blocks()) {
1743  if (InnerBB == BB) {
1744  assert(LoopBlockSet.count(InnerBB) &&
1745  "Block should already be in the set!");
1746  continue;
1747  }
1748 
1749  LoopBlockSet.insert(InnerBB);
1750  }
1751 
1752  // Add the preheader to the worklist so we will continue past the
1753  // loop body.
1754  Worklist.push_back(InnerPH);
1755  continue;
1756  }
1757 
1758  // Insert any predecessors that were in the original loop into the new
1759  // set, and if the insert is successful, add them to the worklist.
1760  for (auto *Pred : predecessors(BB))
1761  if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1762  Worklist.push_back(Pred);
1763  }
1764 
1765  assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1766 
1767  // We've found all the blocks participating in the loop, return our completed
1768  // set.
1769  return LoopBlockSet;
1770 }
1771 
1772 /// Rebuild a loop after unswitching removes some subset of blocks and edges.
1773 ///
1774 /// The removal may have removed some child loops entirely but cannot have
1775 /// disturbed any remaining child loops. However, they may need to be hoisted
1776 /// to the parent loop (or to be top-level loops). The original loop may be
1777 /// completely removed.
1778 ///
1779 /// The sibling loops resulting from this update are returned. If the original
1780 /// loop remains a valid loop, it will be the first entry in this list with all
1781 /// of the newly sibling loops following it.
1782 ///
1783 /// Returns true if the loop remains a loop after unswitching, and false if it
1784 /// is no longer a loop after unswitching (and should not continue to be
1785 /// referenced).
1787  LoopInfo &LI,
1788  SmallVectorImpl<Loop *> &HoistedLoops) {
1789  auto *PH = L.getLoopPreheader();
1790 
1791  // Compute the actual parent loop from the exit blocks. Because we may have
1792  // pruned some exits the loop may be different from the original parent.
1793  Loop *ParentL = nullptr;
1794  SmallVector<Loop *, 4> ExitLoops;
1795  SmallVector<BasicBlock *, 4> ExitsInLoops;
1796  ExitsInLoops.reserve(ExitBlocks.size());
1797  for (auto *ExitBB : ExitBlocks)
1798  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1799  ExitLoops.push_back(ExitL);
1800  ExitsInLoops.push_back(ExitBB);
1801  if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1802  ParentL = ExitL;
1803  }
1804 
1805  // Recompute the blocks participating in this loop. This may be empty if it
1806  // is no longer a loop.
1807  auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1808 
1809  // If we still have a loop, we need to re-set the loop's parent as the exit
1810  // block set changing may have moved it within the loop nest. Note that this
1811  // can only happen when this loop has a parent as it can only hoist the loop
1812  // *up* the nest.
1813  if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1814  // Remove this loop's (original) blocks from all of the intervening loops.
1815  for (Loop *IL = L.getParentLoop(); IL != ParentL;
1816  IL = IL->getParentLoop()) {
1817  IL->getBlocksSet().erase(PH);
1818  for (auto *BB : L.blocks())
1819  IL->getBlocksSet().erase(BB);
1820  llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1821  return BB == PH || L.contains(BB);
1822  });
1823  }
1824 
1825  LI.changeLoopFor(PH, ParentL);
1826  L.getParentLoop()->removeChildLoop(&L);
1827  if (ParentL)
1828  ParentL->addChildLoop(&L);
1829  else
1830  LI.addTopLevelLoop(&L);
1831  }
1832 
1833  // Now we update all the blocks which are no longer within the loop.
1834  auto &Blocks = L.getBlocksVector();
1835  auto BlocksSplitI =
1836  LoopBlockSet.empty()
1837  ? Blocks.begin()
1838  : std::stable_partition(
1839  Blocks.begin(), Blocks.end(),
1840  [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1841 
1842  // Before we erase the list of unlooped blocks, build a set of them.
1843  SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1844  if (LoopBlockSet.empty())
1845  UnloopedBlocks.insert(PH);
1846 
1847  // Now erase these blocks from the loop.
1848  for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1849  L.getBlocksSet().erase(BB);
1850  Blocks.erase(BlocksSplitI, Blocks.end());
1851 
1852  // Sort the exits in ascending loop depth, we'll work backwards across these
1853  // to process them inside out.
1854  llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1855  return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1856  });
1857 
1858  // We'll build up a set for each exit loop.
1859  SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1860  Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1861 
1862  auto RemoveUnloopedBlocksFromLoop =
1863  [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1864  for (auto *BB : UnloopedBlocks)
1865  L.getBlocksSet().erase(BB);
1867  return UnloopedBlocks.count(BB);
1868  });
1869  };
1870 
1872  while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1873  assert(Worklist.empty() && "Didn't clear worklist!");
1874  assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
1875 
1876  // Grab the next exit block, in decreasing loop depth order.
1877  BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1878  Loop &ExitL = *LI.getLoopFor(ExitBB);
1879  assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
1880 
1881  // Erase all of the unlooped blocks from the loops between the previous
1882  // exit loop and this exit loop. This works because the ExitInLoops list is
1883  // sorted in increasing order of loop depth and thus we visit loops in
1884  // decreasing order of loop depth.
1885  for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1886  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1887 
1888  // Walk the CFG back until we hit the cloned PH adding everything reachable
1889  // and in the unlooped set to this exit block's loop.
1890  Worklist.push_back(ExitBB);
1891  do {
1892  BasicBlock *BB = Worklist.pop_back_val();
1893  // We can stop recursing at the cloned preheader (if we get there).
1894  if (BB == PH)
1895  continue;
1896 
1897  for (BasicBlock *PredBB : predecessors(BB)) {
1898  // If this pred has already been moved to our set or is part of some
1899  // (inner) loop, no update needed.
1900  if (!UnloopedBlocks.erase(PredBB)) {
1901  assert((NewExitLoopBlocks.count(PredBB) ||
1902  ExitL.contains(LI.getLoopFor(PredBB))) &&
1903  "Predecessor not in a nested loop (or already visited)!");
1904  continue;
1905  }
1906 
1907  // We just insert into the loop set here. We'll add these blocks to the
1908  // exit loop after we build up the set in a deterministic order rather
1909  // than the predecessor-influenced visit order.
1910  bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
1911  (void)Inserted;
1912  assert(Inserted && "Should only visit an unlooped block once!");
1913 
1914  // And recurse through to its predecessors.
1915  Worklist.push_back(PredBB);
1916  }
1917  } while (!Worklist.empty());
1918 
1919  // If blocks in this exit loop were directly part of the original loop (as
1920  // opposed to a child loop) update the map to point to this exit loop. This
1921  // just updates a map and so the fact that the order is unstable is fine.
1922  for (auto *BB : NewExitLoopBlocks)
1923  if (Loop *BBL = LI.getLoopFor(BB))
1924  if (BBL == &L || !L.contains(BBL))
1925  LI.changeLoopFor(BB, &ExitL);
1926 
1927  // We will remove the remaining unlooped blocks from this loop in the next
1928  // iteration or below.
1929  NewExitLoopBlocks.clear();
1930  }
1931 
1932  // Any remaining unlooped blocks are no longer part of any loop unless they
1933  // are part of some child loop.
1934  for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
1935  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1936  for (auto *BB : UnloopedBlocks)
1937  if (Loop *BBL = LI.getLoopFor(BB))
1938  if (BBL == &L || !L.contains(BBL))
1939  LI.changeLoopFor(BB, nullptr);
1940 
1941  // Sink all the child loops whose headers are no longer in the loop set to
1942  // the parent (or to be top level loops). We reach into the loop and directly
1943  // update its subloop vector to make this batch update efficient.
1944  auto &SubLoops = L.getSubLoopsVector();
1945  auto SubLoopsSplitI =
1946  LoopBlockSet.empty()
1947  ? SubLoops.begin()
1948  : std::stable_partition(
1949  SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
1950  return LoopBlockSet.count(SubL->getHeader());
1951  });
1952  for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
1953  HoistedLoops.push_back(HoistedL);
1954  HoistedL->setParentLoop(nullptr);
1955 
1956  // To compute the new parent of this hoisted loop we look at where we
1957  // placed the preheader above. We can't lookup the header itself because we
1958  // retained the mapping from the header to the hoisted loop. But the
1959  // preheader and header should have the exact same new parent computed
1960  // based on the set of exit blocks from the original loop as the preheader
1961  // is a predecessor of the header and so reached in the reverse walk. And
1962  // because the loops were all in simplified form the preheader of the
1963  // hoisted loop can't be part of some *other* loop.
1964  if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
1965  NewParentL->addChildLoop(HoistedL);
1966  else
1967  LI.addTopLevelLoop(HoistedL);
1968  }
1969  SubLoops.erase(SubLoopsSplitI, SubLoops.end());
1970 
1971  // Actually delete the loop if nothing remained within it.
1972  if (Blocks.empty()) {
1973  assert(SubLoops.empty() &&
1974  "Failed to remove all subloops from the original loop!");
1975  if (Loop *ParentL = L.getParentLoop())
1976  ParentL->removeChildLoop(llvm::find(*ParentL, &L));
1977  else
1978  LI.removeLoop(llvm::find(LI, &L));
1979  LI.destroy(&L);
1980  return false;
1981  }
1982 
1983  return true;
1984 }
1985 
1986 /// Helper to visit a dominator subtree, invoking a callable on each node.
1987 ///
1988 /// Returning false at any point will stop walking past that node of the tree.
1989 template <typename CallableT>
1990 void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
1991  SmallVector<DomTreeNode *, 4> DomWorklist;
1992  DomWorklist.push_back(DT[BB]);
1993 #ifndef NDEBUG
1995  Visited.insert(DT[BB]);
1996 #endif
1997  do {
1998  DomTreeNode *N = DomWorklist.pop_back_val();
1999 
2000  // Visit this node.
2001  if (!Callable(N->getBlock()))
2002  continue;
2003 
2004  // Accumulate the child nodes.
2005  for (DomTreeNode *ChildN : *N) {
2006  assert(Visited.insert(ChildN).second &&
2007  "Cannot visit a node twice when walking a tree!");
2008  DomWorklist.push_back(ChildN);
2009  }
2010  } while (!DomWorklist.empty());
2011 }
2012 
2014  Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2015  SmallVectorImpl<BasicBlock *> &ExitBlocks, IVConditionInfo &PartialIVInfo,
2016  DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2017  function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2018  ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
2019  auto *ParentBB = TI.getParent();
2020  BranchInst *BI = dyn_cast<BranchInst>(&TI);
2021  SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2022 
2023  // We can only unswitch switches, conditional branches with an invariant
2024  // condition, or combining invariant conditions with an instruction or
2025  // partially invariant instructions.
2026  assert((SI || (BI && BI->isConditional())) &&
2027  "Can only unswitch switches and conditional branch!");
2028  bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2029  bool FullUnswitch =
2030  SI || (BI->getCondition() == Invariants[0] && !PartiallyInvariant);
2031  if (FullUnswitch)
2032  assert(Invariants.size() == 1 &&
2033  "Cannot have other invariants with full unswitching!");
2034  else
2035  assert(isa<Instruction>(BI->getCondition()) &&
2036  "Partial unswitching requires an instruction as the condition!");
2037 
2038  if (MSSAU && VerifyMemorySSA)
2039  MSSAU->getMemorySSA()->verifyMemorySSA();
2040 
2041  // Constant and BBs tracking the cloned and continuing successor. When we are
2042  // unswitching the entire condition, this can just be trivially chosen to
2043  // unswitch towards `true`. However, when we are unswitching a set of
2044  // invariants combined with `and` or `or` or partially invariant instructions,
2045  // the combining operation determines the best direction to unswitch: we want
2046  // to unswitch the direction that will collapse the branch.
2047  bool Direction = true;
2048  int ClonedSucc = 0;
2049  if (!FullUnswitch) {
2050  Value *Cond = BI->getCondition();
2051  (void)Cond;
2053  PartiallyInvariant) &&
2054  "Only `or`, `and`, an `select`, partially invariant instructions "
2055  "can combine invariants being unswitched.");
2056  if (!match(BI->getCondition(), m_LogicalOr())) {
2057  if (match(BI->getCondition(), m_LogicalAnd()) ||
2058  (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2059  Direction = false;
2060  ClonedSucc = 1;
2061  }
2062  }
2063  }
2064 
2065  BasicBlock *RetainedSuccBB =
2066  BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2067  SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2068  if (BI)
2069  UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2070  else
2071  for (auto Case : SI->cases())
2072  if (Case.getCaseSuccessor() != RetainedSuccBB)
2073  UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2074 
2075  assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2076  "Should not unswitch the same successor we are retaining!");
2077 
2078  // The branch should be in this exact loop. Any inner loop's invariant branch
2079  // should be handled by unswitching that inner loop. The caller of this
2080  // routine should filter out any candidates that remain (but were skipped for
2081  // whatever reason).
2082  assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2083 
2084  // Compute the parent loop now before we start hacking on things.
2085  Loop *ParentL = L.getParentLoop();
2086  // Get blocks in RPO order for MSSA update, before changing the CFG.
2087  LoopBlocksRPO LBRPO(&L);
2088  if (MSSAU)
2089  LBRPO.perform(&LI);
2090 
2091  // Compute the outer-most loop containing one of our exit blocks. This is the
2092  // furthest up our loopnest which can be mutated, which we will use below to
2093  // update things.
2094  Loop *OuterExitL = &L;
2095  for (auto *ExitBB : ExitBlocks) {
2096  Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
2097  if (!NewOuterExitL) {
2098  // We exited the entire nest with this block, so we're done.
2099  OuterExitL = nullptr;
2100  break;
2101  }
2102  if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2103  OuterExitL = NewOuterExitL;
2104  }
2105 
2106  // At this point, we're definitely going to unswitch something so invalidate
2107  // any cached information in ScalarEvolution for the outer most loop
2108  // containing an exit block and all nested loops.
2109  if (SE) {
2110  if (OuterExitL)
2111  SE->forgetLoop(OuterExitL);
2112  else
2113  SE->forgetTopmostLoop(&L);
2114  }
2115 
2116  // If the edge from this terminator to a successor dominates that successor,
2117  // store a map from each block in its dominator subtree to it. This lets us
2118  // tell when cloning for a particular successor if a block is dominated by
2119  // some *other* successor with a single data structure. We use this to
2120  // significantly reduce cloning.
2122  for (auto *SuccBB : llvm::concat<BasicBlock *const>(
2123  makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs))
2124  if (SuccBB->getUniquePredecessor() ||
2125  llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2126  return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2127  }))
2128  visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2129  DominatingSucc[BB] = SuccBB;
2130  return true;
2131  });
2132 
2133  // Split the preheader, so that we know that there is a safe place to insert
2134  // the conditional branch. We will change the preheader to have a conditional
2135  // branch on LoopCond. The original preheader will become the split point
2136  // between the unswitched versions, and we will have a new preheader for the
2137  // original loop.
2138  BasicBlock *SplitBB = L.getLoopPreheader();
2139  BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2140 
2141  // Keep track of the dominator tree updates needed.
2143 
2144  // Clone the loop for each unswitched successor.
2146  VMaps.reserve(UnswitchedSuccBBs.size());
2148  for (auto *SuccBB : UnswitchedSuccBBs) {
2149  VMaps.emplace_back(new ValueToValueMapTy());
2150  ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2151  L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2152  DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
2153  }
2154 
2155  // Drop metadata if we may break its semantics by moving this instr into the
2156  // split block.
2157  if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2159  // Do not spend time trying to understand if we can keep it, just drop it
2160  // to save compile time.
2161  TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2162  else {
2163  // It is only legal to preserve make.implicit metadata if we are
2164  // guaranteed no reach implicit null check after following this branch.
2165  ICFLoopSafetyInfo SafetyInfo;
2166  SafetyInfo.computeLoopSafetyInfo(&L);
2167  if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2168  TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2169  }
2170  }
2171 
2172  // The stitching of the branched code back together depends on whether we're
2173  // doing full unswitching or not with the exception that we always want to
2174  // nuke the initial terminator placed in the split block.
2175  SplitBB->getTerminator()->eraseFromParent();
2176  if (FullUnswitch) {
2177  // Splice the terminator from the original loop and rewrite its
2178  // successors.
2179  SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
2180 
2181  // Keep a clone of the terminator for MSSA updates.
2182  Instruction *NewTI = TI.clone();
2183  ParentBB->getInstList().push_back(NewTI);
2184 
2185  // First wire up the moved terminator to the preheaders.
2186  if (BI) {
2187  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2188  BI->setSuccessor(ClonedSucc, ClonedPH);
2189  BI->setSuccessor(1 - ClonedSucc, LoopPH);
2190  DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2191  } else {
2192  assert(SI && "Must either be a branch or switch!");
2193 
2194  // Walk the cases and directly update their successors.
2195  assert(SI->getDefaultDest() == RetainedSuccBB &&
2196  "Not retaining default successor!");
2197  SI->setDefaultDest(LoopPH);
2198  for (auto &Case : SI->cases())
2199  if (Case.getCaseSuccessor() == RetainedSuccBB)
2200  Case.setSuccessor(LoopPH);
2201  else
2202  Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2203 
2204  // We need to use the set to populate domtree updates as even when there
2205  // are multiple cases pointing at the same successor we only want to
2206  // remove and insert one edge in the domtree.
2207  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2208  DTUpdates.push_back(
2209  {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2210  }
2211 
2212  if (MSSAU) {
2213  DT.applyUpdates(DTUpdates);
2214  DTUpdates.clear();
2215 
2216  // Remove all but one edge to the retained block and all unswitched
2217  // blocks. This is to avoid having duplicate entries in the cloned Phis,
2218  // when we know we only keep a single edge for each case.
2219  MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2220  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2221  MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2222 
2223  for (auto &VMap : VMaps)
2224  MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2225  /*IgnoreIncomingWithNoClones=*/true);
2226  MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2227 
2228  // Remove all edges to unswitched blocks.
2229  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2230  MSSAU->removeEdge(ParentBB, SuccBB);
2231  }
2232 
2233  // Now unhook the successor relationship as we'll be replacing
2234  // the terminator with a direct branch. This is much simpler for branches
2235  // than switches so we handle those first.
2236  if (BI) {
2237  // Remove the parent as a predecessor of the unswitched successor.
2238  assert(UnswitchedSuccBBs.size() == 1 &&
2239  "Only one possible unswitched block for a branch!");
2240  BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2241  UnswitchedSuccBB->removePredecessor(ParentBB,
2242  /*KeepOneInputPHIs*/ true);
2243  DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2244  } else {
2245  // Note that we actually want to remove the parent block as a predecessor
2246  // of *every* case successor. The case successor is either unswitched,
2247  // completely eliminating an edge from the parent to that successor, or it
2248  // is a duplicate edge to the retained successor as the retained successor
2249  // is always the default successor and as we'll replace this with a direct
2250  // branch we no longer need the duplicate entries in the PHI nodes.
2251  SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2252  assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2253  "Not retaining default successor!");
2254  for (auto &Case : NewSI->cases())
2255  Case.getCaseSuccessor()->removePredecessor(
2256  ParentBB,
2257  /*KeepOneInputPHIs*/ true);
2258 
2259  // We need to use the set to populate domtree updates as even when there
2260  // are multiple cases pointing at the same successor we only want to
2261  // remove and insert one edge in the domtree.
2262  for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2263  DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2264  }
2265 
2266  // After MSSAU update, remove the cloned terminator instruction NewTI.
2267  ParentBB->getTerminator()->eraseFromParent();
2268 
2269  // Create a new unconditional branch to the continuing block (as opposed to
2270  // the one cloned).
2271  BranchInst::Create(RetainedSuccBB, ParentBB);
2272  } else {
2273  assert(BI && "Only branches have partial unswitching.");
2274  assert(UnswitchedSuccBBs.size() == 1 &&
2275  "Only one possible unswitched block for a branch!");
2276  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2277  // When doing a partial unswitch, we have to do a bit more work to build up
2278  // the branch in the split block.
2279  if (PartiallyInvariant)
2281  *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2282  else
2283  buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
2284  *ClonedPH, *LoopPH);
2285  DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2286 
2287  if (MSSAU) {
2288  DT.applyUpdates(DTUpdates);
2289  DTUpdates.clear();
2290 
2291  // Perform MSSA cloning updates.
2292  for (auto &VMap : VMaps)
2293  MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2294  /*IgnoreIncomingWithNoClones=*/true);
2295  MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2296  }
2297  }
2298 
2299  // Apply the updates accumulated above to get an up-to-date dominator tree.
2300  DT.applyUpdates(DTUpdates);
2301 
2302  // Now that we have an accurate dominator tree, first delete the dead cloned
2303  // blocks so that we can accurately build any cloned loops. It is important to
2304  // not delete the blocks from the original loop yet because we still want to
2305  // reference the original loop to understand the cloned loop's structure.
2306  deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2307 
2308  // Build the cloned loop structure itself. This may be substantially
2309  // different from the original structure due to the simplified CFG. This also
2310  // handles inserting all the cloned blocks into the correct loops.
2311  SmallVector<Loop *, 4> NonChildClonedLoops;
2312  for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2313  buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2314 
2315  // Now that our cloned loops have been built, we can update the original loop.
2316  // First we delete the dead blocks from it and then we rebuild the loop
2317  // structure taking these deletions into account.
2318  deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU);
2319 
2320  if (MSSAU && VerifyMemorySSA)
2321  MSSAU->getMemorySSA()->verifyMemorySSA();
2322 
2323  SmallVector<Loop *, 4> HoistedLoops;
2324  bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
2325 
2326  if (MSSAU && VerifyMemorySSA)
2327  MSSAU->getMemorySSA()->verifyMemorySSA();
2328 
2329  // This transformation has a high risk of corrupting the dominator tree, and
2330  // the below steps to rebuild loop structures will result in hard to debug
2331  // errors in that case so verify that the dominator tree is sane first.
2332  // FIXME: Remove this when the bugs stop showing up and rely on existing
2333  // verification steps.
2335 
2336  if (BI && !PartiallyInvariant) {
2337  // If we unswitched a branch which collapses the condition to a known
2338  // constant we want to replace all the uses of the invariants within both
2339  // the original and cloned blocks. We do this here so that we can use the
2340  // now updated dominator tree to identify which side the users are on.
2341  assert(UnswitchedSuccBBs.size() == 1 &&
2342  "Only one possible unswitched block for a branch!");
2343  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2344 
2345  // When considering multiple partially-unswitched invariants
2346  // we cant just go replace them with constants in both branches.
2347  //
2348  // For 'AND' we infer that true branch ("continue") means true
2349  // for each invariant operand.
2350  // For 'OR' we can infer that false branch ("continue") means false
2351  // for each invariant operand.
2352  // So it happens that for multiple-partial case we dont replace
2353  // in the unswitched branch.
2354  bool ReplaceUnswitched =
2355  FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2356 
2357  ConstantInt *UnswitchedReplacement =
2360  ConstantInt *ContinueReplacement =
2363  for (Value *Invariant : Invariants)
2364  // Use make_early_inc_range here as set invalidates the iterator.
2365  for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2366  Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2367  if (!UserI)
2368  continue;
2369 
2370  // Replace it with the 'continue' side if in the main loop body, and the
2371  // unswitched if in the cloned blocks.
2372  if (DT.dominates(LoopPH, UserI->getParent()))
2373  U.set(ContinueReplacement);
2374  else if (ReplaceUnswitched &&
2375  DT.dominates(ClonedPH, UserI->getParent()))
2376  U.set(UnswitchedReplacement);
2377  }
2378  }
2379 
2380  // We can change which blocks are exit blocks of all the cloned sibling
2381  // loops, the current loop, and any parent loops which shared exit blocks
2382  // with the current loop. As a consequence, we need to re-form LCSSA for
2383  // them. But we shouldn't need to re-form LCSSA for any child loops.
2384  // FIXME: This could be made more efficient by tracking which exit blocks are
2385  // new, and focusing on them, but that isn't likely to be necessary.
2386  //
2387  // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2388  // loop nest and update every loop that could have had its exits changed. We
2389  // also need to cover any intervening loops. We add all of these loops to
2390  // a list and sort them by loop depth to achieve this without updating
2391  // unnecessary loops.
2392  auto UpdateLoop = [&](Loop &UpdateL) {
2393 #ifndef NDEBUG
2394  UpdateL.verifyLoop();
2395  for (Loop *ChildL : UpdateL) {
2396  ChildL->verifyLoop();
2397  assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2398  "Perturbed a child loop's LCSSA form!");
2399  }
2400 #endif
2401  // First build LCSSA for this loop so that we can preserve it when
2402  // forming dedicated exits. We don't want to perturb some other loop's
2403  // LCSSA while doing that CFG edit.
2404  formLCSSA(UpdateL, DT, &LI, SE);
2405 
2406  // For loops reached by this loop's original exit blocks we may
2407  // introduced new, non-dedicated exits. At least try to re-form dedicated
2408  // exits for these loops. This may fail if they couldn't have dedicated
2409  // exits to start with.
2410  formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2411  };
2412 
2413  // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2414  // and we can do it in any order as they don't nest relative to each other.
2415  //
2416  // Also check if any of the loops we have updated have become top-level loops
2417  // as that will necessitate widening the outer loop scope.
2418  for (Loop *UpdatedL :
2419  llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2420  UpdateLoop(*UpdatedL);
2421  if (UpdatedL->isOutermost())
2422  OuterExitL = nullptr;
2423  }
2424  if (IsStillLoop) {
2425  UpdateLoop(L);
2426  if (L.isOutermost())
2427  OuterExitL = nullptr;
2428  }
2429 
2430  // If the original loop had exit blocks, walk up through the outer most loop
2431  // of those exit blocks to update LCSSA and form updated dedicated exits.
2432  if (OuterExitL != &L)
2433  for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2434  OuterL = OuterL->getParentLoop())
2435  UpdateLoop(*OuterL);
2436 
2437 #ifndef NDEBUG
2438  // Verify the entire loop structure to catch any incorrect updates before we
2439  // progress in the pass pipeline.
2440  LI.verify(DT);
2441 #endif
2442 
2443  // Now that we've unswitched something, make callbacks to report the changes.
2444  // For that we need to merge together the updated loops and the cloned loops
2445  // and check whether the original loop survived.
2446  SmallVector<Loop *, 4> SibLoops;
2447  for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2448  if (UpdatedL->getParentLoop() == ParentL)
2449  SibLoops.push_back(UpdatedL);
2450  UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops);
2451 
2452  if (MSSAU && VerifyMemorySSA)
2453  MSSAU->getMemorySSA()->verifyMemorySSA();
2454 
2455  if (BI)
2456  ++NumBranches;
2457  else
2458  ++NumSwitches;
2459 }
2460 
2461 /// Recursively compute the cost of a dominator subtree based on the per-block
2462 /// cost map provided.
2463 ///
2464 /// The recursive computation is memozied into the provided DT-indexed cost map
2465 /// to allow querying it for most nodes in the domtree without it becoming
2466 /// quadratic.
2468  DomTreeNode &N,
2471  // Don't accumulate cost (or recurse through) blocks not in our block cost
2472  // map and thus not part of the duplication cost being considered.
2473  auto BBCostIt = BBCostMap.find(N.getBlock());
2474  if (BBCostIt == BBCostMap.end())
2475  return 0;
2476 
2477  // Lookup this node to see if we already computed its cost.
2478  auto DTCostIt = DTCostMap.find(&N);
2479  if (DTCostIt != DTCostMap.end())
2480  return DTCostIt->second;
2481 
2482  // If not, we have to compute it. We can't use insert above and update
2483  // because computing the cost may insert more things into the map.
2484  InstructionCost Cost = std::accumulate(
2485  N.begin(), N.end(), BBCostIt->second,
2486  [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2487  return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2488  });
2489  bool Inserted = DTCostMap.insert({&N, Cost}).second;
2490  (void)Inserted;
2491  assert(Inserted && "Should not insert a node while visiting children!");
2492  return Cost;
2493 }
2494 
2495 /// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2496 /// making the following replacement:
2497 ///
2498 /// --code before guard--
2499 /// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2500 /// --code after guard--
2501 ///
2502 /// into
2503 ///
2504 /// --code before guard--
2505 /// br i1 %cond, label %guarded, label %deopt
2506 ///
2507 /// guarded:
2508 /// --code after guard--
2509 ///
2510 /// deopt:
2511 /// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2512 /// unreachable
2513 ///
2514 /// It also makes all relevant DT and LI updates, so that all structures are in
2515 /// valid state after this transform.
2516 static BranchInst *
2518  SmallVectorImpl<BasicBlock *> &ExitBlocks,
2519  DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
2521  LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2522  BasicBlock *CheckBB = GI->getParent();
2523 
2524  if (MSSAU && VerifyMemorySSA)
2525  MSSAU->getMemorySSA()->verifyMemorySSA();
2526 
2527  // Remove all CheckBB's successors from DomTree. A block can be seen among
2528  // successors more than once, but for DomTree it should be added only once.
2529  SmallPtrSet<BasicBlock *, 4> Successors;
2530  for (auto *Succ : successors(CheckBB))
2531  if (Successors.insert(Succ).second)
2532  DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
2533 
2534  Instruction *DeoptBlockTerm =
2535  SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
2536  BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2537  // SplitBlockAndInsertIfThen inserts control flow that branches to
2538  // DeoptBlockTerm if the condition is true. We want the opposite.
2539  CheckBI->swapSuccessors();
2540 
2541  BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2542  GuardedBlock->setName("guarded");
2543  CheckBI->getSuccessor(1)->setName("deopt");
2544  BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2545 
2546  // We now have a new exit block.
2547  ExitBlocks.push_back(CheckBI->getSuccessor(1));
2548 
2549  if (MSSAU)
2550  MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2551 
2552  GI->moveBefore(DeoptBlockTerm);
2554 
2555  // Add new successors of CheckBB into DomTree.
2556  for (auto *Succ : successors(CheckBB))
2557  DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
2558 
2559  // Now the blocks that used to be CheckBB's successors are GuardedBlock's
2560  // successors.
2561  for (auto *Succ : Successors)
2562  DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
2563 
2564  // Make proper changes to DT.
2565  DT.applyUpdates(DTUpdates);
2566  // Inform LI of a new loop block.
2567  L.addBasicBlockToLoop(GuardedBlock, LI);
2568 
2569  if (MSSAU) {
2570  MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2571  MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2572  if (VerifyMemorySSA)
2573  MSSAU->getMemorySSA()->verifyMemorySSA();
2574  }
2575 
2576  ++NumGuards;
2577  return CheckBI;
2578 }
2579 
2580 /// Cost multiplier is a way to limit potentially exponential behavior
2581 /// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2582 /// candidates available. Also accounting for the number of "sibling" loops with
2583 /// the idea to account for previous unswitches that already happened on this
2584 /// cluster of loops. There was an attempt to keep this formula simple,
2585 /// just enough to limit the worst case behavior. Even if it is not that simple
2586 /// now it is still not an attempt to provide a detailed heuristic size
2587 /// prediction.
2588 ///
2589 /// TODO: Make a proper accounting of "explosion" effect for all kinds of
2590 /// unswitch candidates, making adequate predictions instead of wild guesses.
2591 /// That requires knowing not just the number of "remaining" candidates but
2592 /// also costs of unswitching for each of these candidates.
2594  Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT,
2596  UnswitchCandidates) {
2597 
2598  // Guards and other exiting conditions do not contribute to exponential
2599  // explosion as soon as they dominate the latch (otherwise there might be
2600  // another path to the latch remaining that does not allow to eliminate the
2601  // loop copy on unswitch).
2602  BasicBlock *Latch = L.getLoopLatch();
2603  BasicBlock *CondBlock = TI.getParent();
2604  if (DT.dominates(CondBlock, Latch) &&
2605  (isGuard(&TI) ||
2606  llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) {
2607  return L.contains(SuccBB);
2608  }) <= 1)) {
2609  NumCostMultiplierSkipped++;
2610  return 1;
2611  }
2612 
2613  auto *ParentL = L.getParentLoop();
2614  int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2615  : std::distance(LI.begin(), LI.end()));
2616  // Count amount of clones that all the candidates might cause during
2617  // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
2618  int UnswitchedClones = 0;
2619  for (auto Candidate : UnswitchCandidates) {
2620  Instruction *CI = Candidate.first;
2621  BasicBlock *CondBlock = CI->getParent();
2622  bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2623  if (isGuard(CI)) {
2624  if (!SkipExitingSuccessors)
2625  UnswitchedClones++;
2626  continue;
2627  }
2628  int NonExitingSuccessors = llvm::count_if(
2629  successors(CondBlock), [SkipExitingSuccessors, &L](BasicBlock *SuccBB) {
2630  return !SkipExitingSuccessors || L.contains(SuccBB);
2631  });
2632  UnswitchedClones += Log2_32(NonExitingSuccessors);
2633  }
2634 
2635  // Ignore up to the "unscaled candidates" number of unswitch candidates
2636  // when calculating the power-of-two scaling of the cost. The main idea
2637  // with this control is to allow a small number of unswitches to happen
2638  // and rely more on siblings multiplier (see below) when the number
2639  // of candidates is small.
2640  unsigned ClonesPower =
2641  std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2642 
2643  // Allowing top-level loops to spread a bit more than nested ones.
2644  int SiblingsMultiplier =
2645  std::max((ParentL ? SiblingsCount
2646  : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2647  1);
2648  // Compute the cost multiplier in a way that won't overflow by saturating
2649  // at an upper bound.
2650  int CostMultiplier;
2651  if (ClonesPower > Log2_32(UnswitchThreshold) ||
2652  SiblingsMultiplier > UnswitchThreshold)
2653  CostMultiplier = UnswitchThreshold;
2654  else
2655  CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2656  (int)UnswitchThreshold);
2657 
2658  LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2659  << " (siblings " << SiblingsMultiplier << " * clones "
2660  << (1 << ClonesPower) << ")"
2661  << " for unswitch candidate: " << TI << "\n");
2662  return CostMultiplier;
2663 }
2664 
2666  Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2668  function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2669  ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
2670  // Collect all invariant conditions within this loop (as opposed to an inner
2671  // loop which would be handled when visiting that inner loop).
2673  UnswitchCandidates;
2674 
2675  // Whether or not we should also collect guards in the loop.
2676  bool CollectGuards = false;
2677  if (UnswitchGuards) {
2678  auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2679  Intrinsic::getName(Intrinsic::experimental_guard));
2680  if (GuardDecl && !GuardDecl->use_empty())
2681  CollectGuards = true;
2682  }
2683 
2684  IVConditionInfo PartialIVInfo;
2685  for (auto *BB : L.blocks()) {
2686  if (LI.getLoopFor(BB) != &L)
2687  continue;
2688 
2689  if (CollectGuards)
2690  for (auto &I : *BB)
2691  if (isGuard(&I)) {
2692  auto *Cond = cast<IntrinsicInst>(&I)->getArgOperand(0);
2693  // TODO: Support AND, OR conditions and partial unswitching.
2694  if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2695  UnswitchCandidates.push_back({&I, {Cond}});
2696  }
2697 
2698  if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2699  // We can only consider fully loop-invariant switch conditions as we need
2700  // to completely eliminate the switch after unswitching.
2701  if (!isa<Constant>(SI->getCondition()) &&
2702  L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2703  UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2704  continue;
2705  }
2706 
2707  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2708  if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
2709  BI->getSuccessor(0) == BI->getSuccessor(1))
2710  continue;
2711 
2712  // If BI's condition is 'select _, true, false', simplify it to confuse
2713  // matchers
2714  Value *Cond = BI->getCondition(), *CondNext;
2715  while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
2716  Cond = CondNext;
2717  BI->setCondition(Cond);
2718 
2719  if (L.isLoopInvariant(BI->getCondition())) {
2720  UnswitchCandidates.push_back({BI, {BI->getCondition()}});
2721  continue;
2722  }
2723 
2724  Instruction &CondI = *cast<Instruction>(BI->getCondition());
2725  if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
2726  TinyPtrVector<Value *> Invariants =
2728  if (Invariants.empty())
2729  continue;
2730 
2731  UnswitchCandidates.push_back({BI, std::move(Invariants)});
2732  continue;
2733  }
2734  }
2735 
2736  Instruction *PartialIVCondBranch = nullptr;
2737  if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
2738  !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2739  return TerminatorAndInvariants.first == L.getHeader()->getTerminator();
2740  })) {
2741  MemorySSA *MSSA = MSSAU->getMemorySSA();
2742  if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2743  LLVM_DEBUG(
2744  dbgs() << "simple-loop-unswitch: Found partially invariant condition "
2745  << *Info->InstToDuplicate[0] << "\n");
2746  PartialIVInfo = *Info;
2747  PartialIVCondBranch = L.getHeader()->getTerminator();
2748  TinyPtrVector<Value *> ValsToDuplicate;
2749  for (auto *Inst : Info->InstToDuplicate)
2750  ValsToDuplicate.push_back(Inst);
2751  UnswitchCandidates.push_back(
2752  {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2753  }
2754  }
2755 
2756  // If we didn't find any candidates, we're done.
2757  if (UnswitchCandidates.empty())
2758  return false;
2759 
2760  // Check if there are irreducible CFG cycles in this loop. If so, we cannot
2761  // easily unswitch non-trivial edges out of the loop. Doing so might turn the
2762  // irreducible control flow into reducible control flow and introduce new
2763  // loops "out of thin air". If we ever discover important use cases for doing
2764  // this, we can add support to loop unswitch, but it is a lot of complexity
2765  // for what seems little or no real world benefit.
2766  LoopBlocksRPO RPOT(&L);
2767  RPOT.perform(&LI);
2768  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
2769  return false;
2770 
2771  SmallVector<BasicBlock *, 4> ExitBlocks;
2772  L.getUniqueExitBlocks(ExitBlocks);
2773 
2774  // We cannot unswitch if exit blocks contain a cleanuppad instruction as we
2775  // don't know how to split those exit blocks.
2776  // FIXME: We should teach SplitBlock to handle this and remove this
2777  // restriction.
2778  for (auto *ExitBB : ExitBlocks) {
2779  if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI())) {
2780  LLVM_DEBUG(
2781  dbgs() << "Cannot unswitch because of cleanuppad in exit block\n");
2782  return false;
2783  }
2784  }
2785 
2786  LLVM_DEBUG(
2787  dbgs() << "Considering " << UnswitchCandidates.size()
2788  << " non-trivial loop invariant conditions for unswitching.\n");
2789 
2790  // Given that unswitching these terminators will require duplicating parts of
2791  // the loop, so we need to be able to model that cost. Compute the ephemeral
2792  // values and set up a data structure to hold per-BB costs. We cache each
2793  // block's cost so that we don't recompute this when considering different
2794  // subsets of the loop for duplication during unswitching.
2796  CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
2798 
2799  // Compute the cost of each block, as well as the total loop cost. Also, bail
2800  // out if we see instructions which are incompatible with loop unswitching
2801  // (convergent, noduplicate, or cross-basic-block tokens).
2802  // FIXME: We might be able to safely handle some of these in non-duplicated
2803  // regions.
2805  L.getHeader()->getParent()->hasMinSize()
2808  InstructionCost LoopCost = 0;
2809  for (auto *BB : L.blocks()) {
2810  InstructionCost Cost = 0;
2811  for (auto &I : *BB) {
2812  if (EphValues.count(&I))
2813  continue;
2814 
2815  if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
2816  return false;
2817  if (auto *CB = dyn_cast<CallBase>(&I))
2818  if (CB->isConvergent() || CB->cannotDuplicate())
2819  return false;
2820 
2821  Cost += TTI.getUserCost(&I, CostKind);
2822  }
2823  assert(Cost >= 0 && "Must not have negative costs!");
2824  LoopCost += Cost;
2825  assert(LoopCost >= 0 && "Must not have negative loop costs!");
2826  BBCostMap[BB] = Cost;
2827  }
2828  LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
2829 
2830  // Now we find the best candidate by searching for the one with the following
2831  // properties in order:
2832  //
2833  // 1) An unswitching cost below the threshold
2834  // 2) The smallest number of duplicated unswitch candidates (to avoid
2835  // creating redundant subsequent unswitching)
2836  // 3) The smallest cost after unswitching.
2837  //
2838  // We prioritize reducing fanout of unswitch candidates provided the cost
2839  // remains below the threshold because this has a multiplicative effect.
2840  //
2841  // This requires memoizing each dominator subtree to avoid redundant work.
2842  //
2843  // FIXME: Need to actually do the number of candidates part above.
2845  // Given a terminator which might be unswitched, computes the non-duplicated
2846  // cost for that terminator.
2847  auto ComputeUnswitchedCost = [&](Instruction &TI,
2848  bool FullUnswitch) -> InstructionCost {
2849  BasicBlock &BB = *TI.getParent();
2851 
2852  InstructionCost Cost = 0;
2853  for (BasicBlock *SuccBB : successors(&BB)) {
2854  // Don't count successors more than once.
2855  if (!Visited.insert(SuccBB).second)
2856  continue;
2857 
2858  // If this is a partial unswitch candidate, then it must be a conditional
2859  // branch with a condition of either `or`, `and`, their corresponding
2860  // select forms or partially invariant instructions. In that case, one of
2861  // the successors is necessarily duplicated, so don't even try to remove
2862  // its cost.
2863  if (!FullUnswitch) {
2864  auto &BI = cast<BranchInst>(TI);
2865  if (match(BI.getCondition(), m_LogicalAnd())) {
2866  if (SuccBB == BI.getSuccessor(1))
2867  continue;
2868  } else if (match(BI.getCondition(), m_LogicalOr())) {
2869  if (SuccBB == BI.getSuccessor(0))
2870  continue;
2871  } else if (!PartialIVInfo.InstToDuplicate.empty()) {
2872  if (PartialIVInfo.KnownValue->isOneValue() &&
2873  SuccBB == BI.getSuccessor(1))
2874  continue;
2875  else if (!PartialIVInfo.KnownValue->isOneValue() &&
2876  SuccBB == BI.getSuccessor(0))
2877  continue;
2878  }
2879  }
2880 
2881  // This successor's domtree will not need to be duplicated after
2882  // unswitching if the edge to the successor dominates it (and thus the
2883  // entire tree). This essentially means there is no other path into this
2884  // subtree and so it will end up live in only one clone of the loop.
2885  if (SuccBB->getUniquePredecessor() ||
2886  llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2887  return PredBB == &BB || DT.dominates(SuccBB, PredBB);
2888  })) {
2889  Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
2890  assert(Cost <= LoopCost &&
2891  "Non-duplicated cost should never exceed total loop cost!");
2892  }
2893  }
2894 
2895  // Now scale the cost by the number of unique successors minus one. We
2896  // subtract one because there is already at least one copy of the entire
2897  // loop. This is computing the new cost of unswitching a condition.
2898  // Note that guards always have 2 unique successors that are implicit and
2899  // will be materialized if we decide to unswitch it.
2900  int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
2901  assert(SuccessorsCount > 1 &&
2902  "Cannot unswitch a condition without multiple distinct successors!");
2903  return (LoopCost - Cost) * (SuccessorsCount - 1);
2904  };
2905  Instruction *BestUnswitchTI = nullptr;
2906  InstructionCost BestUnswitchCost = 0;
2907  ArrayRef<Value *> BestUnswitchInvariants;
2908  for (auto &TerminatorAndInvariants : UnswitchCandidates) {
2909  Instruction &TI = *TerminatorAndInvariants.first;
2910  ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
2911  BranchInst *BI = dyn_cast<BranchInst>(&TI);
2912  InstructionCost CandidateCost = ComputeUnswitchedCost(
2913  TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 &&
2914  Invariants[0] == BI->getCondition()));
2915  // Calculate cost multiplier which is a tool to limit potentially
2916  // exponential behavior of loop-unswitch.
2918  int CostMultiplier =
2919  CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
2920  assert(
2921  (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
2922  "cost multiplier needs to be in the range of 1..UnswitchThreshold");
2923  CandidateCost *= CostMultiplier;
2924  LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
2925  << " (multiplier: " << CostMultiplier << ")"
2926  << " for unswitch candidate: " << TI << "\n");
2927  } else {
2928  LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
2929  << " for unswitch candidate: " << TI << "\n");
2930  }
2931 
2932  if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
2933  BestUnswitchTI = &TI;
2934  BestUnswitchCost = CandidateCost;
2935  BestUnswitchInvariants = Invariants;
2936  }
2937  }
2938  assert(BestUnswitchTI && "Failed to find loop unswitch candidate");
2939 
2940  if (BestUnswitchCost >= UnswitchThreshold) {
2941  LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "
2942  << BestUnswitchCost << "\n");
2943  return false;
2944  }
2945 
2946  if (BestUnswitchTI != PartialIVCondBranch)
2947  PartialIVInfo.InstToDuplicate.clear();
2948 
2949  // If the best candidate is a guard, turn it into a branch.
2950  if (isGuard(BestUnswitchTI))
2951  BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L,
2952  ExitBlocks, DT, LI, MSSAU);
2953 
2954  LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = "
2955  << BestUnswitchCost << ") terminator: " << *BestUnswitchTI
2956  << "\n");
2957  unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
2958  ExitBlocks, PartialIVInfo, DT, LI, AC,
2959  UnswitchCB, SE, MSSAU);
2960  return true;
2961 }
2962 
2963 /// Unswitch control flow predicated on loop invariant conditions.
2964 ///
2965 /// This first hoists all branches or switches which are trivial (IE, do not
2966 /// require duplicating any part of the loop) out of the loop body. It then
2967 /// looks at other loop invariant control flows and tries to unswitch those as
2968 /// well by cloning the loop if the result is small enough.
2969 ///
2970 /// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
2971 /// also updated based on the unswitch. The `MSSA` analysis is also updated if
2972 /// valid (i.e. its use is enabled).
2973 ///
2974 /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
2975 /// true, we will attempt to do non-trivial unswitching as well as trivial
2976 /// unswitching.
2977 ///
2978 /// The `UnswitchCB` callback provided will be run after unswitching is
2979 /// complete, with the first parameter set to `true` if the provided loop
2980 /// remains a loop, and a list of new sibling loops created.
2981 ///
2982 /// If `SE` is non-null, we will update that analysis based on the unswitching
2983 /// done.
2984 static bool
2986  AAResults &AA, TargetTransformInfo &TTI, bool NonTrivial,
2987  function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2988  ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
2989  assert(L.isRecursivelyLCSSAForm(DT, LI) &&
2990  "Loops must be in LCSSA form before unswitching.");
2991 
2992  // Must be in loop simplified form: we need a preheader and dedicated exits.
2993  if (!L.isLoopSimplifyForm())
2994  return false;
2995 
2996  // Try trivial unswitch first before loop over other basic blocks in the loop.
2997  if (unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
2998  // If we unswitched successfully we will want to clean up the loop before
2999  // processing it further so just mark it as unswitched and return.
3000  UnswitchCB(/*CurrentLoopValid*/ true, false, {});
3001  return true;
3002  }
3003 
3004  // Check whether we should continue with non-trivial conditions.
3005  // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3006  // unswitching for testing and debugging.
3007  // NonTrivial: Parameter that enables non-trivial unswitching for this
3008  // invocation of the transform. But this should be allowed only
3009  // for targets without branch divergence.
3010  //
3011  // FIXME: If divergence analysis becomes available to a loop
3012  // transform, we should allow unswitching for non-trivial uniform
3013  // branches even on targets that have divergence.
3014  // https://bugs.llvm.org/show_bug.cgi?id=48819
3015  bool ContinueWithNonTrivial =
3016  EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence());
3017  if (!ContinueWithNonTrivial)
3018  return false;
3019 
3020  // Skip non-trivial unswitching for optsize functions.
3021  if (L.getHeader()->getParent()->hasOptSize())
3022  return false;
3023 
3024  // Skip non-trivial unswitching for loops that cannot be cloned.
3025  if (!L.isSafeToClone())
3026  return false;
3027 
3028  // For non-trivial unswitching, because it often creates new loops, we rely on
3029  // the pass manager to iterate on the loops rather than trying to immediately
3030  // reach a fixed point. There is no substantial advantage to iterating
3031  // internally, and if any of the new loops are simplified enough to contain
3032  // trivial unswitching we want to prefer those.
3033 
3034  // Try to unswitch the best invariant condition. We prefer this full unswitch to
3035  // a partial unswitch when possible below the threshold.
3036  if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU))
3037  return true;
3038 
3039  // No other opportunities to unswitch.
3040  return false;
3041 }
3042 
3045  LPMUpdater &U) {
3046  Function &F = *L.getHeader()->getParent();
3047  (void)F;
3048 
3049  LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3050  << "\n");
3051 
3052  // Save the current loop name in a variable so that we can report it even
3053  // after it has been deleted.
3054  std::string LoopName = std::string(L.getName());
3055 
3056  auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
3057  bool PartiallyInvariant,
3058  ArrayRef<Loop *> NewLoops) {
3059  // If we did a non-trivial unswitch, we have added new (cloned) loops.
3060  if (!NewLoops.empty())
3061  U.addSiblingLoops(NewLoops);
3062 
3063  // If the current loop remains valid, we should revisit it to catch any
3064  // other unswitch opportunities. Otherwise, we need to mark it as deleted.
3065  if (CurrentLoopValid) {
3066  if (PartiallyInvariant) {
3067  // Mark the new loop as partially unswitched, to avoid unswitching on
3068  // the same condition again.
3069  auto &Context = L.getHeader()->getContext();
3070  MDNode *DisableUnswitchMD = MDNode::get(
3071  Context,
3072  MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
3074  Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
3075  {DisableUnswitchMD});
3076  L.setLoopID(NewLoopID);
3077  } else
3078  U.revisitCurrentLoop();
3079  } else
3080  U.markLoopAsDeleted(L, LoopName);
3081  };
3082 
3084  if (AR.MSSA) {
3085  MSSAU = MemorySSAUpdater(AR.MSSA);
3086  if (VerifyMemorySSA)
3087  AR.MSSA->verifyMemorySSA();
3088  }
3089  if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, NonTrivial,
3090  UnswitchCB, &AR.SE,
3091  MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
3092  return PreservedAnalyses::all();
3093 
3094  if (AR.MSSA && VerifyMemorySSA)
3095  AR.MSSA->verifyMemorySSA();
3096 
3097  // Historically this pass has had issues with the dominator tree so verify it
3098  // in asserts builds.
3100 
3101  auto PA = getLoopPassPreservedAnalyses();
3102  if (AR.MSSA)
3103  PA.preserve<MemorySSAAnalysis>();
3104  return PA;
3105 }
3106 
3107 namespace {
3108 
3109 class SimpleLoopUnswitchLegacyPass : public LoopPass {
3110  bool NonTrivial;
3111 
3112 public:
3113  static char ID; // Pass ID, replacement for typeid
3114 
3115  explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
3116  : LoopPass(ID), NonTrivial(NonTrivial) {
3119  }
3120 
3121  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
3122 
3123  void getAnalysisUsage(AnalysisUsage &AU) const override {
3129  }
3131  }
3132 };
3133 
3134 } // end anonymous namespace
3135 
3136 bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
3137  if (skipLoop(L))
3138  return false;
3139 
3140  Function &F = *L->getHeader()->getParent();
3141 
3142  LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
3143  << "\n");
3144 
3145  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3146  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3147  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
3148  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3149  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3150  MemorySSA *MSSA = nullptr;
3153  MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
3154  MSSAU = MemorySSAUpdater(MSSA);
3155  }
3156 
3157  auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3158  auto *SE = SEWP ? &SEWP->getSE() : nullptr;
3159 
3160  auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant,
3161  ArrayRef<Loop *> NewLoops) {
3162  // If we did a non-trivial unswitch, we have added new (cloned) loops.
3163  for (auto *NewL : NewLoops)
3164  LPM.addLoop(*NewL);
3165 
3166  // If the current loop remains valid, re-add it to the queue. This is
3167  // a little wasteful as we'll finish processing the current loop as well,
3168  // but it is the best we can do in the old PM.
3169  if (CurrentLoopValid) {
3170  // If the current loop has been unswitched using a partially invariant
3171  // condition, we should not re-add the current loop to avoid unswitching
3172  // on the same condition again.
3173  if (!PartiallyInvariant)
3174  LPM.addLoop(*L);
3175  } else
3176  LPM.markLoopAsDeleted(*L);
3177  };
3178 
3179  if (MSSA && VerifyMemorySSA)
3180  MSSA->verifyMemorySSA();
3181 
3182  bool Changed =
3183  unswitchLoop(*L, DT, LI, AC, AA, TTI, NonTrivial, UnswitchCB, SE,
3184  MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
3185 
3186  if (MSSA && VerifyMemorySSA)
3187  MSSA->verifyMemorySSA();
3188 
3189  // Historically this pass has had issues with the dominator tree so verify it
3190  // in asserts builds.
3192 
3193  return Changed;
3194 }
3195 
3197 INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
3198  "Simple unswitch loops", false, false)
3205 INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
3207 
3209  return new SimpleLoopUnswitchLegacyPass(NonTrivial);
3210 }
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:26
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::formLCSSA
bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition: LCSSA.cpp:336
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:496
cloneLoopNest
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
Definition: SimpleLoopUnswitch.cpp:1248
AssumptionCache.h
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::createSimpleLoopUnswitchLegacyPass
Pass * createSimpleLoopUnswitchLegacyPass(bool NonTrivial=false)
Create the legacy pass object for the simple loop unswitcher.
Definition: SimpleLoopUnswitch.cpp:3208
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:210
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
UnswitchNumInitialUnscaledCandidates
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
llvm
Definition: AllocatorList.h:23
llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition: LoopAnalysisManager.h:54
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:266
llvm::LoopBase::getExitBlocks
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:62
llvm::AssumptionCache::registerAssumption
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Definition: AssumptionCache.cpp:217
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:378
ValueMapper.h
llvm::SwitchInstProfUpdateWrapper
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
Definition: Instructions.h:3525
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
deleteDeadBlocksFromLoop
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Definition: SimpleLoopUnswitch.cpp:1586
llvm::SwitchInstProfUpdateWrapper::getSuccessorWeight
CaseWeightOpt getSuccessorWeight(unsigned idx)
Definition: Instructions.cpp:4191
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
llvm::cfg::UpdateKind::Insert
@ Insert
unswitchTrivialBranch
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
Definition: SimpleLoopUnswitch.cpp:427
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1167
llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:689
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:181
llvm::SwitchInst::Create
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3369
recomputeLoopBlockSet
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
Definition: SimpleLoopUnswitch.cpp:1675
ErrorHandling.h
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:210
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::Intrinsic::getName
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:840
llvm::IRBuilder<>
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1656
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::LoopInfoBase::changeLoopFor
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Definition: LoopInfo.h:1002
Local.h
llvm::IRBuilderBase::CreateOr
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1374
llvm::MemorySSAUpdater::removeBlocks
void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
Definition: MemorySSAUpdater.cpp:1372
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:150
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:213
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::begin
iterator begin()
Definition: DenseMap.h:74
CalculateUnswitchCostMultiplier
static int CalculateUnswitchCostMultiplier(Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT, ArrayRef< std::pair< Instruction *, TinyPtrVector< Value * >>> UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
Definition: SimpleLoopUnswitch.cpp:2593
ScalarEvolution.h
DenseMap.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:329
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1258
areLoopExitPHIsLoopInvariant
static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB, BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
Definition: SimpleLoopUnswitch.cpp:179
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::DominatorTreeBase::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
llvm::MemorySSAUpdater::moveAllAfterSpliceBlocks
void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
Definition: MemorySSAUpdater.cpp:1247
turnGuardIntoBranch
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
Definition: SimpleLoopUnswitch.cpp:2517
unswitch
simple loop unswitch
Definition: SimpleLoopUnswitch.cpp:3205
llvm::Optional< uint32_t >
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::SwitchInstProfUpdateWrapper::removeCase
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Definition: Instructions.cpp:4149
STLExtras.h
llvm::EnableMSSALoopDependency
cl::opt< bool > EnableMSSALoopDependency
Enables memory ssa as a dependency for loop passes.
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:634
llvm::SwitchInst::cases
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
Definition: Instructions.h:3426
llvm::SwitchInst::getDefaultDest
BasicBlock * getDefaultDest() const
Definition: Instructions.h:3387
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:104
Sequence.h
llvm::Constant::isOneValue
bool isOneValue() const
Returns true if the value is one.
Definition: Constants.cpp:108
llvm::Module::getFunction
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:174
llvm::zip_first
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&... args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition: STLExtras.h:728
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1572
computeDomSubtreeCost
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
Definition: SimpleLoopUnswitch.cpp:2467
Use.h
MustExecute.h
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:185
llvm::MemorySSAUpdater::applyUpdates
void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
Definition: MemorySSAUpdater.cpp:810
llvm::MemorySSAUpdater::createMemoryAccessInBB
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block, with a specified clobbering defin...
Definition: MemorySSAUpdater.cpp:1445
llvm::DominatorTreeBase::applyUpdates
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Definition: GenericDomTree.h:544
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::BranchInst::swapSuccessors
void swapSuccessors()
Swap the successors of this branch instruction.
Definition: Instructions.cpp:1284
llvm::LPPassManager::addLoop
void addLoop(Loop &L)
Definition: LoopPass.cpp:79
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1198
buildPartialInvariantUnswitchConditionalBranch
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
Definition: SimpleLoopUnswitch.cpp:212
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BranchInst::setSuccessor
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Definition: Instructions.h:3133
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1330
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::Optional::getPointer
constexpr const T * getPointer() const
Definition: Optional.h:280
LoopAnalysisManager.h
llvm::makePostTransformationMetadata
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1053
llvm::LoopBlocksRPO
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
llvm::isGuard
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
DropNonTrivialImplicitNullChecks
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
Instruction.h
CommandLine.h
CodeMetrics.h
replaceLoopInvariantUses
static void replaceLoopInvariantUses(Loop &L, Value *Invariant, Constant &Replacement)
Definition: SimpleLoopUnswitch.cpp:162
llvm::SwitchInst::CaseHandleImpl::getCaseSuccessor
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
Definition: Instructions.h:3252
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1482
llvm::RemapInstruction
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:253
hoistLoopToNewParent
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
Definition: SimpleLoopUnswitch.cpp:328
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:961
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1472
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:456
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:647
llvm::DominatorTreeBase::insertEdge
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
Definition: GenericDomTree.h:584
Twine.h
InstrTypes.h
llvm::LoopBase::addChildLoop
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:395
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:236
llvm::SwitchInst::CaseHandleImpl::getSuccessorIndex
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
Definition: Instructions.h:3263
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::TinyPtrVector::push_back
void push_back(EltTy NewVal)
Definition: TinyPtrVector.h:244
rewritePHINodesForExitAndUnswitchedBlocks
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
Definition: SimpleLoopUnswitch.cpp:283
llvm::LoopInfoBase::addTopLevelLoop
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
Definition: LoopInfo.h:1021
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::LoopInfoBase::destroy
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Definition: LoopInfo.h:1068
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:377
unswitchBestCondition
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Definition: SimpleLoopUnswitch.cpp:2665
UnswitchThreshold
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
false
Definition: StackSlotColoring.cpp:142
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:596
llvm::MemorySSAUpdater::moveToPlace
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
Definition: MemorySSAUpdater.cpp:1200
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:281
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:361
llvm::SwitchInstProfUpdateWrapper::setSuccessorWeight
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
Definition: Instructions.cpp:4197
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1783
LoopUtils.h
llvm::AArch64CC::PL
@ PL
Definition: AArch64BaseInfo.h:241
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::LPPassManager
Definition: LoopPass.h:75
SmallPtrSet.h
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
PatternMatch.h
visitDomSubTree
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
Definition: SimpleLoopUnswitch.cpp:1990
llvm::ValueMap::count
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: ValueMap.h:152
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:863
llvm::RF_NoModuleLevelChanges
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:72
LoopIterator.h
deleteDeadClonedBlocks
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy >> VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
Definition: SimpleLoopUnswitch.cpp:1557
llvm::IRBuilderBase::CreateAnd
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1348
llvm::Loop::LoopBounds::Direction
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:698
llvm::MemorySSAUpdater::removeDuplicatePhiEdgesBetween
void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
Definition: MemorySSAUpdater.cpp:539
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:513
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:282
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::LoopInfoBase::AllocateLoop
LoopT * AllocateLoop(ArgsTy &&... Args)
Definition: LoopInfo.h:928
collectHomogenousInstGraphLoopInvariants
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root, LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
Definition: SimpleLoopUnswitch.cpp:120
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3116
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
llvm::MemorySSAUpdater::applyInsertUpdates
void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
Definition: MemorySSAUpdater.cpp:856
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:478
llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition: CloneFunction.cpp:43
buildPartialUnswitchConditionalBranch
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
Definition: SimpleLoopUnswitch.cpp:198
BasicBlock.h
llvm::cl::opt< bool >
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:535
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::MemorySSAUpdater::removeEdge
void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
Definition: MemorySSAUpdater.cpp:532
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::SwitchInst::setDefaultDest
void setDefaultDest(BasicBlock *DefaultCase)
Definition: Instructions.h:3391
llvm::LoopPass
Definition: LoopPass.h:27
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2376
llvm::PatternMatch::m_LogicalOr
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ? true : R.
Definition: PatternMatch.h:2506
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:56
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:240
llvm::MemorySSAUpdater::getMemorySSA
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Definition: MemorySSAUpdater.h:249
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1502
llvm::LoopBase::getUniqueExitBlocks
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:122
llvm::MemorySSAUpdater::updateForClonedLoop
void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
Definition: MemorySSAUpdater.cpp:677
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3091
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:704
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:964
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:721
Cloning.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:572
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:379
SimpleLoopUnswitch.h
UnswitchGuards
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:467
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::ValueToValueMapTy
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
Definition: MemorySSAUpdater.h:52
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", "Simple unswitch loops", false, false) INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass
llvm::LoopBase::getLoopDepth
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:96
llvm::IVConditionInfo::KnownValue
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition: LoopUtils.h:500
unswitchLoop
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool NonTrivial, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch control flow predicated on loop invariant conditions.
Definition: SimpleLoopUnswitch.cpp:2985
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:328
llvm::LoopBase::getBlocksVector
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
Definition: LoopInfo.h:192
llvm::IVConditionInfo::InstToDuplicate
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition: LoopUtils.h:497
llvm::LoopInfoBase::begin
iterator begin() const
Definition: LoopInfo.h:939
llvm::ScalarEvolution::forgetTopmostLoop
void forgetTopmostLoop(const Loop *L)
Definition: ScalarEvolution.cpp:7373
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA() const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1867
llvm::MDNode
Metadata node.
Definition: Metadata.h:897
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:922
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
getTopMostExitingLoop
static Loop * getTopMostExitingLoop(BasicBlock *ExitBB, LoopInfo &LI)
Definition: SimpleLoopUnswitch.cpp:399
CFG.h
llvm::Instruction::clone
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
Definition: Instruction.cpp:825
llvm::LoopInfoBase::getLoopDepth
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
Definition: LoopInfo.h:971
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::LoopBase::removeChildLoop
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:404
rebuildLoopAfterUnswitch
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops)
Rebuild a loop after unswitching removes some subset of blocks and edges.
Definition: SimpleLoopUnswitch.cpp:1786
llvm::LoopBase::getSubLoopsVector
std::vector< LoopT * > & getSubLoopsVector()
Definition: LoopInfo.h:147
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1080
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1489
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:214
LoopPass.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::MemorySSA::getBlockDefs
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition: MemorySSA.h:769
llvm::initializeSimpleLoopUnswitchLegacyPassPass
void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry &)
llvm::SwitchInst::CaseHandleImpl::getCaseValue
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Definition: Instructions.h:3245
llvm::LoopBase::isOutermost
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
Definition: LoopInfo.h:168
llvm::LoopBase::reserveBlocks
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
Definition: LoopInfo.h:436
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:223
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:517
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:965
loops
simple loop Simple unswitch loops
Definition: SimpleLoopUnswitch.cpp:3206
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
unswitchTrivialSwitch
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
Definition: SimpleLoopUnswitch.cpp:651
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:119
llvm::findOptionMDForLoop
MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Definition: LoopInfo.cpp:1045
llvm::IRBuilderBase::CreateCondBr
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:981
llvm::DomTreeNodeBase< BasicBlock >
llvm::ValueMap< const Value *, WeakTrackingVH >
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1346
llvm::LoopInfoBase::removeLoop
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
Definition: LoopInfo.h:991
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:853
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:92
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:716
llvm::MemorySSAUpdater::updateExitBlocksForClonedLoop
void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Definition: MemorySSAUpdater.cpp:788
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
llvm::RF_IgnoreMissingLocals
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:90
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
llvm::SimpleLoopUnswitchPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: SimpleLoopUnswitch.cpp:3043
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
Constant.h
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:495
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
llvm::formDedicatedExitBlocks
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:63
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1617
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:7316
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:846
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:527
llvm::SwitchInstProfUpdateWrapper::addCase
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
Definition: Instructions.cpp:4163
MSSAThreshold
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
llvm::LPPassManager::markLoopAsDeleted
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:113
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:59
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::ICFLoopSafetyInfo
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:132
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2642
GenericDomTree.h
buildClonedLoops
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
Definition: SimpleLoopUnswitch.cpp:1307
llvm::LoopBase::getBlocksSet
SmallPtrSetImpl< const BlockT * > & getBlocksSet()
Return a direct, mutable handle to the blocks set so that we can mutate it efficiently.
Definition: LoopInfo.h:198
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:503
llvm::MemorySSA::BeforeTerminator
@ BeforeTerminator
Definition: MemorySSA.h:791
Casting.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1423
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::IVConditionInfo
Struct to hold information about a partially invariant condition.
Definition: LoopUtils.h:494
llvm::Loop::isRecursivelyLCSSAForm
bool isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:468
llvm::hasPartialIVCondition
Optional< IVConditionInfo > hasPartialIVCondition(Loop &L, unsigned MSSAThreshold, MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
Definition: LoopUtils.cpp:1720
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:584
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:53
llvm::SwitchInstProfUpdateWrapper::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
Definition: Instructions.cpp:4182
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
GuardUtils.h
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:476
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
llvm::TinyPtrVector::empty
bool empty() const
Definition: TinyPtrVector.h:163
llvm::LoopInfoBase::end
iterator end() const
Definition: LoopInfo.h:940
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
MemorySSA.h
Instructions.h
llvm::BasicBlock::moveBefore
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:133
SmallVector.h
llvm::LoopBase::addBasicBlockToLoop
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:242
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
N
#define N
rewritePHINodesForUnswitchedExitBlock
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
Definition: SimpleLoopUnswitch.cpp:261
llvm::SwitchInst::CaseHandle
Definition: Instructions.h:3279
llvm::BasicBlock::size
size_t size() const
Definition: BasicBlock.h:306
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::Loop::isSafeToClone
bool isSafeToClone() const
Return true if the loop body is safe to clone in practice.
Definition: LoopInfo.cpp:485
InstructionSimplify.h
llvm::ICFLoopSafetyInfo::computeLoopSafetyInfo
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:81
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition: LoopAnalysisManager.h:61
llvm::PHINode
Definition: Instructions.h:2600
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:321
unswitchNontrivialInvariants
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, SmallVectorImpl< BasicBlock * > &ExitBlocks, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, function_ref< void(bool, bool, ArrayRef< Loop * >)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Definition: SimpleLoopUnswitch.cpp:2013
llvm::SmallVectorImpl< DominatorTree::UpdateType >
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:713
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::LoopBase::isLoopExiting
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:225
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ICFLoopSafetyInfo::isGuaranteedToExecute
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
Definition: MustExecute.cpp:270
llvm::ValueMap::lookup
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:165
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3179
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition: BasicBlockUtils.cpp:1418
llvm::cl::desc
Definition: CommandLine.h:414
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3035
raw_ostream.h
llvm::LoopBlocksRPO::perform
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
llvm::TinyPtrVector
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:623
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:814
Value.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::DominatorTreeBase::deleteEdge
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Definition: GenericDomTree.h:602
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3114
llvm::PatternMatch::m_LogicalAnd
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ? R : false.
Definition: PatternMatch.h:2495
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3128
SetVector.h
llvm::DominatorTreeBase::Delete
static constexpr UpdateKind Delete
Definition: GenericDomTree.h:243
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:97
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:907
unswitchAllTrivialConditions
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
Definition: SimpleLoopUnswitch.cpp:946
UnswitchSiblingsToplevelDiv
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
EnableNonTrivialUnswitch
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
EnableUnswitchCostMultiplier
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
buildClonedLoopBlocks
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Build the cloned blocks for an unswitched copy of the given loop.
Definition: SimpleLoopUnswitch.cpp:1064