LLVM 22.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
20#include "llvm/Analysis/CFG.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/Dominators.h"
37#include "llvm/IR/Function.h"
38#include "llvm/IR/IRBuilder.h"
39#include "llvm/IR/InstrTypes.h"
40#include "llvm/IR/Instruction.h"
43#include "llvm/IR/Module.h"
46#include "llvm/IR/Use.h"
47#include "llvm/IR/Value.h"
50#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <iterator>
64#include <numeric>
65#include <optional>
66#include <utility>
67
68#define DEBUG_TYPE "simple-loop-unswitch"
69
70using namespace llvm;
71using namespace llvm::PatternMatch;
72
73STATISTIC(NumBranches, "Number of branches unswitched");
74STATISTIC(NumSwitches, "Number of switches unswitched");
75STATISTIC(NumSelects, "Number of selects turned into branches for unswitching");
76STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
77STATISTIC(NumTrivial, "Number of unswitches that are trivial");
79 NumCostMultiplierSkipped,
80 "Number of unswitch candidates that had their cost multiplier skipped");
81STATISTIC(NumInvariantConditionsInjected,
82 "Number of invariant conditions injected and unswitched");
83
85 "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
86 cl::desc("Forcibly enables non-trivial loop unswitching rather than "
87 "following the configuration passed into the pass."));
88
89static cl::opt<int>
90 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
91 cl::desc("The cost threshold for unswitching a loop."));
92
94 "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
95 cl::desc("Enable unswitch cost multiplier that prohibits exponential "
96 "explosion in nontrivial unswitch."));
98 "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
99 cl::desc("Toplevel siblings divisor for cost multiplier."));
101 "unswitch-parent-blocks-div", cl::init(8), cl::Hidden,
102 cl::desc("Outer loop size divisor for cost multiplier."));
104 "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
105 cl::desc("Number of unswitch candidates that are ignored when calculating "
106 "cost multiplier."));
108 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
109 cl::desc("If enabled, simple loop unswitching will also consider "
110 "llvm.experimental.guard intrinsics as unswitch candidates."));
112 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
113 cl::init(false), cl::Hidden,
114 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
115 "null checks to save time analyzing if we can keep it."));
117 MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
118 cl::desc("Max number of memory uses to explore during "
119 "partial unswitching analysis"),
120 cl::init(100), cl::Hidden);
122 "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
123 cl::desc("If enabled, the freeze instruction will be added to condition "
124 "of loop unswitch to prevent miscompilation."));
125
127 "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,
128 cl::desc("Whether we should inject new invariants and unswitch them to "
129 "eliminate some existing (non-invariant) conditions."),
130 cl::init(true));
131
133 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
134 cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
135 "unswitch on them to eliminate branches that are "
136 "not-taken 1/<this option> times or less."),
137 cl::init(16));
138
140namespace {
141struct CompareDesc {
142 BranchInst *Term;
143 Value *Invariant;
144 BasicBlock *InLoopSucc;
145
146 CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
147 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
148};
149
150struct InjectedInvariant {
151 ICmpInst::Predicate Pred;
152 Value *LHS;
153 Value *RHS;
154 BasicBlock *InLoopSucc;
155
156 InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
157 BasicBlock *InLoopSucc)
158 : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
159};
160
161struct NonTrivialUnswitchCandidate {
162 Instruction *TI = nullptr;
163 TinyPtrVector<Value *> Invariants;
164 std::optional<InstructionCost> Cost;
165 std::optional<InjectedInvariant> PendingInjection;
166 NonTrivialUnswitchCandidate(
167 Instruction *TI, ArrayRef<Value *> Invariants,
168 std::optional<InstructionCost> Cost = std::nullopt,
169 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
170 : TI(TI), Invariants(Invariants), Cost(Cost),
171 PendingInjection(PendingInjection) {};
172
173 bool hasPendingInjection() const { return PendingInjection.has_value(); }
174};
175} // end anonymous namespace.
176
177// Helper to skip (select x, true, false), which matches both a logical AND and
178// OR and can confuse code that tries to determine if \p Cond is either a
179// logical AND or OR but not both.
181 Value *CondNext;
182 while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
183 Cond = CondNext;
184 return Cond;
185}
186
187/// Collect all of the loop invariant input values transitively used by the
188/// homogeneous instruction graph from a given root.
189///
190/// This essentially walks from a root recursively through loop variant operands
191/// which have perform the same logical operation (AND or OR) and finds all
192/// inputs which are loop invariant. For some operations these can be
193/// re-associated and unswitched out of the loop entirely.
196 const LoopInfo &LI) {
197 assert(!L.isLoopInvariant(&Root) &&
198 "Only need to walk the graph if root itself is not invariant.");
199 TinyPtrVector<Value *> Invariants;
200
201 bool IsRootAnd = match(&Root, m_LogicalAnd());
202 bool IsRootOr = match(&Root, m_LogicalOr());
203
204 // Build a worklist and recurse through operators collecting invariants.
207 Worklist.push_back(&Root);
208 Visited.insert(&Root);
209 do {
210 Instruction &I = *Worklist.pop_back_val();
211 for (Value *OpV : I.operand_values()) {
212 // Skip constants as unswitching isn't interesting for them.
213 if (isa<Constant>(OpV))
214 continue;
215
216 // Add it to our result if loop invariant.
217 if (L.isLoopInvariant(OpV)) {
218 Invariants.push_back(OpV);
219 continue;
220 }
221
222 // If not an instruction with the same opcode, nothing we can do.
224
225 if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
226 (IsRootOr && match(OpI, m_LogicalOr())))) {
227 // Visit this operand.
228 if (Visited.insert(OpI).second)
229 Worklist.push_back(OpI);
230 }
231 }
232 } while (!Worklist.empty());
233
234 return Invariants;
235}
236
237static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
238 Constant &Replacement) {
239 assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
240
241 // Replace uses of LIC in the loop with the given constant.
242 // We use make_early_inc_range as set invalidates the iterator.
243 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
244 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
245
246 // Replace this use within the loop body.
247 if (UserI && L.contains(UserI))
248 U.set(&Replacement);
249 }
250}
251
252/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
253/// incoming values along this edge.
255 const BasicBlock &ExitingBB,
256 const BasicBlock &ExitBB) {
257 for (const Instruction &I : ExitBB) {
258 auto *PN = dyn_cast<PHINode>(&I);
259 if (!PN)
260 // No more PHIs to check.
261 return true;
262
263 // If the incoming value for this edge isn't loop invariant the unswitch
264 // won't be trivial.
265 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
266 return false;
267 }
268 llvm_unreachable("Basic blocks should never be empty!");
269}
270
271/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
272/// end of \p BB and conditionally branch on the copied condition. We only
273/// branch on a single value.
275 BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
276 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
277 const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
278 IRBuilder<> IRB(&BB);
280
281 SmallVector<Value *> FrozenInvariants;
282 for (Value *Inv : Invariants) {
283 if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
284 Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
285 FrozenInvariants.push_back(Inv);
286 }
287
288 Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
289 : IRB.CreateAnd(FrozenInvariants);
290 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
291 Direction ? &NormalSucc : &UnswitchedSucc);
292}
293
294/// Copy a set of loop invariant values, and conditionally branch on them.
296 BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
297 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
298 MemorySSAUpdater *MSSAU) {
300 for (auto *Val : reverse(ToDuplicate)) {
301 Instruction *Inst = cast<Instruction>(Val);
302 Instruction *NewInst = Inst->clone();
303
304 if (const DebugLoc &DL = Inst->getDebugLoc())
305 mapAtomInstance(DL, VMap);
306
307 NewInst->insertInto(&BB, BB.end());
308 RemapInstruction(NewInst, VMap,
310 VMap[Val] = NewInst;
311
312 if (!MSSAU)
313 continue;
314
315 MemorySSA *MSSA = MSSAU->getMemorySSA();
316 if (auto *MemUse =
318 auto *DefiningAccess = MemUse->getDefiningAccess();
319 // Get the first defining access before the loop.
320 while (L.contains(DefiningAccess->getBlock())) {
321 // If the defining access is a MemoryPhi, get the incoming
322 // value for the pre-header as defining access.
323 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
324 DefiningAccess =
325 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
326 else
327 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
328 }
329 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
330 NewInst->getParent(),
332 }
333 }
334
335 IRBuilder<> IRB(&BB);
337 Value *Cond = VMap[ToDuplicate[0]];
338 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
339 Direction ? &NormalSucc : &UnswitchedSucc);
340}
341
342/// Rewrite the PHI nodes in an unswitched loop exit basic block.
343///
344/// Requires that the loop exit and unswitched basic block are the same, and
345/// that the exiting block was a unique predecessor of that block. Rewrites the
346/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
347/// PHI nodes from the old preheader that now contains the unswitched
348/// terminator.
350 BasicBlock &OldExitingBB,
351 BasicBlock &OldPH) {
352 for (PHINode &PN : UnswitchedBB.phis()) {
353 // When the loop exit is directly unswitched we just need to update the
354 // incoming basic block. We loop to handle weird cases with repeated
355 // incoming blocks, but expect to typically only have one operand here.
356 for (auto i : seq<int>(0, PN.getNumOperands())) {
357 assert(PN.getIncomingBlock(i) == &OldExitingBB &&
358 "Found incoming block different from unique predecessor!");
359 PN.setIncomingBlock(i, &OldPH);
360 }
361 }
362}
363
364/// Rewrite the PHI nodes in the loop exit basic block and the split off
365/// unswitched block.
366///
367/// Because the exit block remains an exit from the loop, this rewrites the
368/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
369/// nodes into the unswitched basic block to select between the value in the
370/// old preheader and the loop exit.
372 BasicBlock &UnswitchedBB,
373 BasicBlock &OldExitingBB,
374 BasicBlock &OldPH,
375 bool FullUnswitch) {
376 assert(&ExitBB != &UnswitchedBB &&
377 "Must have different loop exit and unswitched blocks!");
378 BasicBlock::iterator InsertPt = UnswitchedBB.begin();
379 for (PHINode &PN : ExitBB.phis()) {
380 auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
381 PN.getName() + ".split");
382 NewPN->insertBefore(InsertPt);
383
384 // Walk backwards over the old PHI node's inputs to minimize the cost of
385 // removing each one. We have to do this weird loop manually so that we
386 // create the same number of new incoming edges in the new PHI as we expect
387 // each case-based edge to be included in the unswitched switch in some
388 // cases.
389 // FIXME: This is really, really gross. It would be much cleaner if LLVM
390 // allowed us to create a single entry for a predecessor block without
391 // having separate entries for each "edge" even though these edges are
392 // required to produce identical results.
393 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
394 if (PN.getIncomingBlock(i) != &OldExitingBB)
395 continue;
396
397 Value *Incoming = PN.getIncomingValue(i);
398 if (FullUnswitch)
399 // No more edge from the old exiting block to the exit block.
400 PN.removeIncomingValue(i);
401
402 NewPN->addIncoming(Incoming, &OldPH);
403 }
404
405 // Now replace the old PHI with the new one and wire the old one in as an
406 // input to the new one.
407 PN.replaceAllUsesWith(NewPN);
408 NewPN->addIncoming(&PN, &ExitBB);
409 }
410}
411
412/// Hoist the current loop up to the innermost loop containing a remaining exit.
413///
414/// Because we've removed an exit from the loop, we may have changed the set of
415/// loops reachable and need to move the current loop up the loop nest or even
416/// to an entirely separate nest.
417static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
418 DominatorTree &DT, LoopInfo &LI,
419 MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
420 // If the loop is already at the top level, we can't hoist it anywhere.
421 Loop *OldParentL = L.getParentLoop();
422 if (!OldParentL)
423 return;
424
426 L.getExitBlocks(Exits);
427 Loop *NewParentL = nullptr;
428 for (auto *ExitBB : Exits)
429 if (Loop *ExitL = LI.getLoopFor(ExitBB))
430 if (!NewParentL || NewParentL->contains(ExitL))
431 NewParentL = ExitL;
432
433 if (NewParentL == OldParentL)
434 return;
435
436 // The new parent loop (if different) should always contain the old one.
437 if (NewParentL)
438 assert(NewParentL->contains(OldParentL) &&
439 "Can only hoist this loop up the nest!");
440
441 // The preheader will need to move with the body of this loop. However,
442 // because it isn't in this loop we also need to update the primary loop map.
443 assert(OldParentL == LI.getLoopFor(&Preheader) &&
444 "Parent loop of this loop should contain this loop's preheader!");
445 LI.changeLoopFor(&Preheader, NewParentL);
446
447 // Remove this loop from its old parent.
448 OldParentL->removeChildLoop(&L);
449
450 // Add the loop either to the new parent or as a top-level loop.
451 if (NewParentL)
452 NewParentL->addChildLoop(&L);
453 else
454 LI.addTopLevelLoop(&L);
455
456 // Remove this loops blocks from the old parent and every other loop up the
457 // nest until reaching the new parent. Also update all of these
458 // no-longer-containing loops to reflect the nesting change.
459 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
460 OldContainingL = OldContainingL->getParentLoop()) {
461 llvm::erase_if(OldContainingL->getBlocksVector(),
462 [&](const BasicBlock *BB) {
463 return BB == &Preheader || L.contains(BB);
464 });
465
466 OldContainingL->getBlocksSet().erase(&Preheader);
467 for (BasicBlock *BB : L.blocks())
468 OldContainingL->getBlocksSet().erase(BB);
469
470 // Because we just hoisted a loop out of this one, we have essentially
471 // created new exit paths from it. That means we need to form LCSSA PHI
472 // nodes for values used in the no-longer-nested loop.
473 formLCSSA(*OldContainingL, DT, &LI, SE);
474
475 // We shouldn't need to form dedicated exits because the exit introduced
476 // here is the (just split by unswitching) preheader. However, after trivial
477 // unswitching it is possible to get new non-dedicated exits out of parent
478 // loop so let's conservatively form dedicated exit blocks and figure out
479 // if we can optimize later.
480 formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
481 /*PreserveLCSSA*/ true);
482 }
483}
484
485// Return the top-most loop containing ExitBB and having ExitBB as exiting block
486// or the loop containing ExitBB, if there is no parent loop containing ExitBB
487// as exiting block.
489 const LoopInfo &LI) {
490 Loop *TopMost = LI.getLoopFor(ExitBB);
491 Loop *Current = TopMost;
492 while (Current) {
493 if (Current->isLoopExiting(ExitBB))
494 TopMost = Current;
495 Current = Current->getParentLoop();
496 }
497 return TopMost;
498}
499
500/// Unswitch a trivial branch if the condition is loop invariant.
501///
502/// This routine should only be called when loop code leading to the branch has
503/// been validated as trivial (no side effects). This routine checks if the
504/// condition is invariant and one of the successors is a loop exit. This
505/// allows us to unswitch without duplicating the loop, making it trivial.
506///
507/// If this routine fails to unswitch the branch it returns false.
508///
509/// If the branch can be unswitched, this routine splits the preheader and
510/// hoists the branch above that split. Preserves loop simplified form
511/// (splitting the exit block as necessary). It simplifies the branch within
512/// the loop to an unconditional branch but doesn't remove it entirely. Further
513/// cleanup can be done with some simplifycfg like pass.
514///
515/// If `SE` is not null, it will be updated based on the potential loop SCEVs
516/// invalidated by this.
518 LoopInfo &LI, ScalarEvolution *SE,
519 MemorySSAUpdater *MSSAU) {
520 assert(BI.isConditional() && "Can only unswitch a conditional branch!");
521 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
522
523 // The loop invariant values that we want to unswitch.
524 TinyPtrVector<Value *> Invariants;
525
526 // When true, we're fully unswitching the branch rather than just unswitching
527 // some input conditions to the branch.
528 bool FullUnswitch = false;
529
531 if (L.isLoopInvariant(Cond)) {
532 Invariants.push_back(Cond);
533 FullUnswitch = true;
534 } else {
535 if (auto *CondInst = dyn_cast<Instruction>(Cond))
536 Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
537 if (Invariants.empty()) {
538 LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
539 return false;
540 }
541 }
542
543 // Check that one of the branch's successors exits, and which one.
544 bool ExitDirection = true;
545 int LoopExitSuccIdx = 0;
546 auto *LoopExitBB = BI.getSuccessor(0);
547 if (L.contains(LoopExitBB)) {
548 ExitDirection = false;
549 LoopExitSuccIdx = 1;
550 LoopExitBB = BI.getSuccessor(1);
551 if (L.contains(LoopExitBB)) {
552 LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
553 return false;
554 }
555 }
556 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
557 auto *ParentBB = BI.getParent();
558 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
559 LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
560 return false;
561 }
562
563 // When unswitching only part of the branch's condition, we need the exit
564 // block to be reached directly from the partially unswitched input. This can
565 // be done when the exit block is along the true edge and the branch condition
566 // is a graph of `or` operations, or the exit block is along the false edge
567 // and the condition is a graph of `and` operations.
568 if (!FullUnswitch) {
569 if (ExitDirection ? !match(Cond, m_LogicalOr())
570 : !match(Cond, m_LogicalAnd())) {
571 LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
572 "non-full unswitch!\n");
573 return false;
574 }
575 }
576
577 LLVM_DEBUG({
578 dbgs() << " unswitching trivial invariant conditions for: " << BI
579 << "\n";
580 for (Value *Invariant : Invariants) {
581 dbgs() << " " << *Invariant << " == true";
582 if (Invariant != Invariants.back())
583 dbgs() << " ||";
584 dbgs() << "\n";
585 }
586 });
587
588 // If we have scalar evolutions, we need to invalidate them including this
589 // loop, the loop containing the exit block and the topmost parent loop
590 // exiting via LoopExitBB.
591 if (SE) {
592 if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
593 SE->forgetLoop(ExitL);
594 else
595 // Forget the entire nest as this exits the entire nest.
596 SE->forgetTopmostLoop(&L);
598 }
599
600 if (MSSAU && VerifyMemorySSA)
601 MSSAU->getMemorySSA()->verifyMemorySSA();
602
603 // Split the preheader, so that we know that there is a safe place to insert
604 // the conditional branch. We will change the preheader to have a conditional
605 // branch on LoopCond.
606 BasicBlock *OldPH = L.getLoopPreheader();
607 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
608
609 // Now that we have a place to insert the conditional branch, create a place
610 // to branch to: this is the exit block out of the loop that we are
611 // unswitching. We need to split this if there are other loop predecessors.
612 // Because the loop is in simplified form, *any* other predecessor is enough.
613 BasicBlock *UnswitchedBB;
614 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
615 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
616 "A branch's parent isn't a predecessor!");
617 UnswitchedBB = LoopExitBB;
618 } else {
619 UnswitchedBB =
620 SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU, "", false);
621 }
622
623 if (MSSAU && VerifyMemorySSA)
624 MSSAU->getMemorySSA()->verifyMemorySSA();
625
626 // Actually move the invariant uses into the unswitched position. If possible,
627 // we do this by moving the instructions, but when doing partial unswitching
628 // we do it by building a new merge of the values in the unswitched position.
629 OldPH->getTerminator()->eraseFromParent();
630 if (FullUnswitch) {
631 // If fully unswitching, we can use the existing branch instruction.
632 // Splice it into the old PH to gate reaching the new preheader and re-point
633 // its successors.
634 BI.moveBefore(*OldPH, OldPH->end());
635 BI.setCondition(Cond);
636 if (MSSAU) {
637 // Temporarily clone the terminator, to make MSSA update cheaper by
638 // separating "insert edge" updates from "remove edge" ones.
639 BI.clone()->insertInto(ParentBB, ParentBB->end());
640 } else {
641 // Create a new unconditional branch that will continue the loop as a new
642 // terminator.
643 Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB);
644 NewBI->setDebugLoc(BI.getDebugLoc());
645 }
646 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
647 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
648 } else {
649 // Only unswitching a subset of inputs to the condition, so we will need to
650 // build a new branch that merges the invariant inputs.
651 if (ExitDirection)
653 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
654 "condition!");
655 else
657 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
658 " condition!");
660 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
661 FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
662 }
663
664 // Update the dominator tree with the added edge.
665 DT.insertEdge(OldPH, UnswitchedBB);
666
667 // After the dominator tree was updated with the added edge, update MemorySSA
668 // if available.
669 if (MSSAU) {
671 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
672 MSSAU->applyInsertUpdates(Updates, DT);
673 }
674
675 // Finish updating dominator tree and memory ssa for full unswitch.
676 if (FullUnswitch) {
677 if (MSSAU) {
678 Instruction *Term = ParentBB->getTerminator();
679 // Remove the cloned branch instruction and create unconditional branch
680 // now.
681 Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB);
682 NewBI->setDebugLoc(Term->getDebugLoc());
683 Term->eraseFromParent();
684 MSSAU->removeEdge(ParentBB, LoopExitBB);
685 }
686 DT.deleteEdge(ParentBB, LoopExitBB);
687 }
688
689 if (MSSAU && VerifyMemorySSA)
690 MSSAU->getMemorySSA()->verifyMemorySSA();
691
692 // Rewrite the relevant PHI nodes.
693 if (UnswitchedBB == LoopExitBB)
694 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
695 else
696 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
697 *ParentBB, *OldPH, FullUnswitch);
698
699 // The constant we can replace all of our invariants with inside the loop
700 // body. If any of the invariants have a value other than this the loop won't
701 // be entered.
702 ConstantInt *Replacement = ExitDirection
705
706 // Since this is an i1 condition we can also trivially replace uses of it
707 // within the loop with a constant.
708 for (Value *Invariant : Invariants)
709 replaceLoopInvariantUses(L, Invariant, *Replacement);
710
711 // If this was full unswitching, we may have changed the nesting relationship
712 // for this loop so hoist it to its correct parent if needed.
713 if (FullUnswitch)
714 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
715
716 if (MSSAU && VerifyMemorySSA)
717 MSSAU->getMemorySSA()->verifyMemorySSA();
718
719 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
720 ++NumTrivial;
721 ++NumBranches;
722 return true;
723}
724
725/// Unswitch a trivial switch if the condition is loop invariant.
726///
727/// This routine should only be called when loop code leading to the switch has
728/// been validated as trivial (no side effects). This routine checks if the
729/// condition is invariant and that at least one of the successors is a loop
730/// exit. This allows us to unswitch without duplicating the loop, making it
731/// trivial.
732///
733/// If this routine fails to unswitch the switch it returns false.
734///
735/// If the switch can be unswitched, this routine splits the preheader and
736/// copies the switch above that split. If the default case is one of the
737/// exiting cases, it copies the non-exiting cases and points them at the new
738/// preheader. If the default case is not exiting, it copies the exiting cases
739/// and points the default at the preheader. It preserves loop simplified form
740/// (splitting the exit blocks as necessary). It simplifies the switch within
741/// the loop by removing now-dead cases. If the default case is one of those
742/// unswitched, it replaces its destination with a new basic block containing
743/// only unreachable. Such basic blocks, while technically loop exits, are not
744/// considered for unswitching so this is a stable transform and the same
745/// switch will not be revisited. If after unswitching there is only a single
746/// in-loop successor, the switch is further simplified to an unconditional
747/// branch. Still more cleanup can be done with some simplifycfg like pass.
748///
749/// If `SE` is not null, it will be updated based on the potential loop SCEVs
750/// invalidated by this.
752 LoopInfo &LI, ScalarEvolution *SE,
753 MemorySSAUpdater *MSSAU) {
754 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
755 Value *LoopCond = SI.getCondition();
756
757 // If this isn't switching on an invariant condition, we can't unswitch it.
758 if (!L.isLoopInvariant(LoopCond))
759 return false;
760
761 auto *ParentBB = SI.getParent();
762
763 // The same check must be used both for the default and the exit cases. We
764 // should never leave edges from the switch instruction to a basic block that
765 // we are unswitching, hence the condition used to determine the default case
766 // needs to also be used to populate ExitCaseIndices, which is then used to
767 // remove cases from the switch.
768 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
769 // BBToCheck is not an exit block if it is inside loop L.
770 if (L.contains(&BBToCheck))
771 return false;
772 // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
773 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
774 return false;
775 // We do not unswitch a block that only has an unreachable statement, as
776 // it's possible this is a previously unswitched block. Only unswitch if
777 // either the terminator is not unreachable, or, if it is, it's not the only
778 // instruction in the block.
779 auto *TI = BBToCheck.getTerminator();
780 bool isUnreachable = isa<UnreachableInst>(TI);
781 return !isUnreachable || &*BBToCheck.getFirstNonPHIOrDbg() != TI;
782 };
783
784 SmallVector<int, 4> ExitCaseIndices;
785 for (auto Case : SI.cases())
786 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
787 ExitCaseIndices.push_back(Case.getCaseIndex());
788 BasicBlock *DefaultExitBB = nullptr;
791 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
792 DefaultExitBB = SI.getDefaultDest();
793 } else if (ExitCaseIndices.empty())
794 return false;
795
796 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
797
798 if (MSSAU && VerifyMemorySSA)
799 MSSAU->getMemorySSA()->verifyMemorySSA();
800
801 // We may need to invalidate SCEVs for the outermost loop reached by any of
802 // the exits.
803 Loop *OuterL = &L;
804
805 if (DefaultExitBB) {
806 // Check the loop containing this exit.
807 Loop *ExitL = getTopMostExitingLoop(DefaultExitBB, LI);
808 if (!ExitL || ExitL->contains(OuterL))
809 OuterL = ExitL;
810 }
811 for (unsigned Index : ExitCaseIndices) {
812 auto CaseI = SI.case_begin() + Index;
813 // Compute the outer loop from this exit.
814 Loop *ExitL = getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI);
815 if (!ExitL || ExitL->contains(OuterL))
816 OuterL = ExitL;
817 }
818
819 if (SE) {
820 if (OuterL)
821 SE->forgetLoop(OuterL);
822 else
823 SE->forgetTopmostLoop(&L);
824 }
825
826 if (DefaultExitBB) {
827 // Clear out the default destination temporarily to allow accurate
828 // predecessor lists to be examined below.
829 SI.setDefaultDest(nullptr);
830 }
831
832 // Store the exit cases into a separate data structure and remove them from
833 // the switch.
834 SmallVector<std::tuple<ConstantInt *, BasicBlock *,
836 4> ExitCases;
837 ExitCases.reserve(ExitCaseIndices.size());
839 // We walk the case indices backwards so that we remove the last case first
840 // and don't disrupt the earlier indices.
841 for (unsigned Index : reverse(ExitCaseIndices)) {
842 auto CaseI = SI.case_begin() + Index;
843 // Save the value of this case.
844 auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
845 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
846 // Delete the unswitched cases.
847 SIW.removeCase(CaseI);
848 }
849
850 // Check if after this all of the remaining cases point at the same
851 // successor.
852 BasicBlock *CommonSuccBB = nullptr;
853 if (SI.getNumCases() > 0 &&
854 all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
855 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
856 }))
857 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
858 if (!DefaultExitBB) {
859 // If we're not unswitching the default, we need it to match any cases to
860 // have a common successor or if we have no cases it is the common
861 // successor.
862 if (SI.getNumCases() == 0)
863 CommonSuccBB = SI.getDefaultDest();
864 else if (SI.getDefaultDest() != CommonSuccBB)
865 CommonSuccBB = nullptr;
866 }
867
868 // Split the preheader, so that we know that there is a safe place to insert
869 // the switch.
870 BasicBlock *OldPH = L.getLoopPreheader();
871 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
872 OldPH->getTerminator()->eraseFromParent();
873
874 // Now add the unswitched switch. This new switch instruction inherits the
875 // debug location of the old switch, because it semantically replace the old
876 // one.
877 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
878 NewSI->setDebugLoc(SIW->getDebugLoc());
879 SwitchInstProfUpdateWrapper NewSIW(*NewSI);
880
881 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
882 // First, we split any exit blocks with remaining in-loop predecessors. Then
883 // we update the PHIs in one of two ways depending on if there was a split.
884 // We walk in reverse so that we split in the same order as the cases
885 // appeared. This is purely for convenience of reading the resulting IR, but
886 // it doesn't cost anything really.
887 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
889 // Handle the default exit if necessary.
890 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
891 // ranges aren't quite powerful enough yet.
892 if (DefaultExitBB) {
893 if (pred_empty(DefaultExitBB)) {
894 UnswitchedExitBBs.insert(DefaultExitBB);
895 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
896 } else {
897 auto *SplitBB =
898 SplitBlock(DefaultExitBB, DefaultExitBB->begin(), &DT, &LI, MSSAU);
899 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
900 *ParentBB, *OldPH,
901 /*FullUnswitch*/ true);
902 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
903 }
904 }
905 // Note that we must use a reference in the for loop so that we update the
906 // container.
907 for (auto &ExitCase : reverse(ExitCases)) {
908 // Grab a reference to the exit block in the pair so that we can update it.
909 BasicBlock *ExitBB = std::get<1>(ExitCase);
910
911 // If this case is the last edge into the exit block, we can simply reuse it
912 // as it will no longer be a loop exit. No mapping necessary.
913 if (pred_empty(ExitBB)) {
914 // Only rewrite once.
915 if (UnswitchedExitBBs.insert(ExitBB).second)
916 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
917 continue;
918 }
919
920 // Otherwise we need to split the exit block so that we retain an exit
921 // block from the loop and a target for the unswitched condition.
922 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
923 if (!SplitExitBB) {
924 // If this is the first time we see this, do the split and remember it.
925 SplitExitBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
926 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
927 *ParentBB, *OldPH,
928 /*FullUnswitch*/ true);
929 }
930 // Update the case pair to point to the split block.
931 std::get<1>(ExitCase) = SplitExitBB;
932 }
933
934 // Now add the unswitched cases. We do this in reverse order as we built them
935 // in reverse order.
936 for (auto &ExitCase : reverse(ExitCases)) {
937 ConstantInt *CaseVal = std::get<0>(ExitCase);
938 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
939
940 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
941 }
942
943 // If the default was unswitched, re-point it and add explicit cases for
944 // entering the loop.
945 if (DefaultExitBB) {
946 NewSIW->setDefaultDest(DefaultExitBB);
947 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
948
949 // We removed all the exit cases, so we just copy the cases to the
950 // unswitched switch.
951 for (const auto &Case : SI.cases())
952 NewSIW.addCase(Case.getCaseValue(), NewPH,
954 } else if (DefaultCaseWeight) {
955 // We have to set branch weight of the default case.
956 uint64_t SW = *DefaultCaseWeight;
957 for (const auto &Case : SI.cases()) {
958 auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
959 assert(W &&
960 "case weight must be defined as default case weight is defined");
961 SW += *W;
962 }
963 NewSIW.setSuccessorWeight(0, SW);
964 }
965
966 // If we ended up with a common successor for every path through the switch
967 // after unswitching, rewrite it to an unconditional branch to make it easy
968 // to recognize. Otherwise we potentially have to recognize the default case
969 // pointing at unreachable and other complexity.
970 if (CommonSuccBB) {
971 BasicBlock *BB = SI.getParent();
972 // We may have had multiple edges to this common successor block, so remove
973 // them as predecessors. We skip the first one, either the default or the
974 // actual first case.
975 bool SkippedFirst = DefaultExitBB == nullptr;
976 for (auto Case : SI.cases()) {
977 assert(Case.getCaseSuccessor() == CommonSuccBB &&
978 "Non-common successor!");
979 (void)Case;
980 if (!SkippedFirst) {
981 SkippedFirst = true;
982 continue;
983 }
984 CommonSuccBB->removePredecessor(BB,
985 /*KeepOneInputPHIs*/ true);
986 }
987 // Now nuke the switch and replace it with a direct branch.
988 Instruction *NewBI = BranchInst::Create(CommonSuccBB, BB);
989 NewBI->setDebugLoc(SIW->getDebugLoc());
990 SIW.eraseFromParent();
991 } else if (DefaultExitBB) {
992 assert(SI.getNumCases() > 0 &&
993 "If we had no cases we'd have a common successor!");
994 // Move the last case to the default successor. This is valid as if the
995 // default got unswitched it cannot be reached. This has the advantage of
996 // being simple and keeping the number of edges from this switch to
997 // successors the same, and avoiding any PHI update complexity.
998 auto LastCaseI = std::prev(SI.case_end());
999
1000 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
1002 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
1003 SIW.removeCase(LastCaseI);
1004 }
1005
1006 // Walk the unswitched exit blocks and the unswitched split blocks and update
1007 // the dominator tree based on the CFG edits. While we are walking unordered
1008 // containers here, the API for applyUpdates takes an unordered list of
1009 // updates and requires them to not contain duplicates.
1011 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
1012 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
1013 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
1014 }
1015 for (auto SplitUnswitchedPair : SplitExitBBMap) {
1016 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
1017 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
1018 }
1019
1020 if (MSSAU) {
1021 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
1022 if (VerifyMemorySSA)
1023 MSSAU->getMemorySSA()->verifyMemorySSA();
1024 } else {
1025 DT.applyUpdates(DTUpdates);
1026 }
1027
1028 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1029
1030 // We may have changed the nesting relationship for this loop so hoist it to
1031 // its correct parent if needed.
1032 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
1033
1034 if (MSSAU && VerifyMemorySSA)
1035 MSSAU->getMemorySSA()->verifyMemorySSA();
1036
1037 ++NumTrivial;
1038 ++NumSwitches;
1039 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
1040 return true;
1041}
1042
1043/// This routine scans the loop to find a branch or switch which occurs before
1044/// any side effects occur. These can potentially be unswitched without
1045/// duplicating the loop. If a branch or switch is successfully unswitched the
1046/// scanning continues to see if subsequent branches or switches have become
1047/// trivial. Once all trivial candidates have been unswitched, this routine
1048/// returns.
1049///
1050/// The return value indicates whether anything was unswitched (and therefore
1051/// changed).
1052///
1053/// If `SE` is not null, it will be updated based on the potential loop SCEVs
1054/// invalidated by this.
1056 LoopInfo &LI, ScalarEvolution *SE,
1057 MemorySSAUpdater *MSSAU) {
1058 bool Changed = false;
1059
1060 // If loop header has only one reachable successor we should keep looking for
1061 // trivial condition candidates in the successor as well. An alternative is
1062 // to constant fold conditions and merge successors into loop header (then we
1063 // only need to check header's terminator). The reason for not doing this in
1064 // LoopUnswitch pass is that it could potentially break LoopPassManager's
1065 // invariants. Folding dead branches could either eliminate the current loop
1066 // or make other loops unreachable. LCSSA form might also not be preserved
1067 // after deleting branches. The following code keeps traversing loop header's
1068 // successors until it finds the trivial condition candidate (condition that
1069 // is not a constant). Since unswitching generates branches with constant
1070 // conditions, this scenario could be very common in practice.
1071 BasicBlock *CurrentBB = L.getHeader();
1073 Visited.insert(CurrentBB);
1074 do {
1075 // Check if there are any side-effecting instructions (e.g. stores, calls,
1076 // volatile loads) in the part of the loop that the code *would* execute
1077 // without unswitching.
1078 if (MSSAU) // Possible early exit with MSSA
1079 if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1080 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1081 return Changed;
1082 if (llvm::any_of(*CurrentBB,
1083 [](Instruction &I) { return I.mayHaveSideEffects(); }))
1084 return Changed;
1085
1086 Instruction *CurrentTerm = CurrentBB->getTerminator();
1087
1088 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1089 // Don't bother trying to unswitch past a switch with a constant
1090 // condition. This should be removed prior to running this pass by
1091 // simplifycfg.
1092 if (isa<Constant>(SI->getCondition()))
1093 return Changed;
1094
1095 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1096 // Couldn't unswitch this one so we're done.
1097 return Changed;
1098
1099 // Mark that we managed to unswitch something.
1100 Changed = true;
1101
1102 // If unswitching turned the terminator into an unconditional branch then
1103 // we can continue. The unswitching logic specifically works to fold any
1104 // cases it can into an unconditional branch to make it easier to
1105 // recognize here.
1106 auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1107 if (!BI || BI->isConditional())
1108 return Changed;
1109
1110 CurrentBB = BI->getSuccessor(0);
1111 continue;
1112 }
1113
1114 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1115 if (!BI)
1116 // We do not understand other terminator instructions.
1117 return Changed;
1118
1119 // Don't bother trying to unswitch past an unconditional branch or a branch
1120 // with a constant value. These should be removed by simplifycfg prior to
1121 // running this pass.
1122 if (!BI->isConditional() ||
1123 isa<Constant>(skipTrivialSelect(BI->getCondition())))
1124 return Changed;
1125
1126 // Found a trivial condition candidate: non-foldable conditional branch. If
1127 // we fail to unswitch this, we can't do anything else that is trivial.
1128 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1129 return Changed;
1130
1131 // Mark that we managed to unswitch something.
1132 Changed = true;
1133
1134 // If we only unswitched some of the conditions feeding the branch, we won't
1135 // have collapsed it to a single successor.
1136 BI = cast<BranchInst>(CurrentBB->getTerminator());
1137 if (BI->isConditional())
1138 return Changed;
1139
1140 // Follow the newly unconditional branch into its successor.
1141 CurrentBB = BI->getSuccessor(0);
1142
1143 // When continuing, if we exit the loop or reach a previous visited block,
1144 // then we can not reach any trivial condition candidates (unfoldable
1145 // branch instructions or switch instructions) and no unswitch can happen.
1146 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1147
1148 return Changed;
1149}
1150
1151/// Build the cloned blocks for an unswitched copy of the given loop.
1152///
1153/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1154/// after the split block (`SplitBB`) that will be used to select between the
1155/// cloned and original loop.
1156///
1157/// This routine handles cloning all of the necessary loop blocks and exit
1158/// blocks including rewriting their instructions and the relevant PHI nodes.
1159/// Any loop blocks or exit blocks which are dominated by a different successor
1160/// than the one for this clone of the loop blocks can be trivially skipped. We
1161/// use the `DominatingSucc` map to determine whether a block satisfies that
1162/// property with a simple map lookup.
1163///
1164/// It also correctly creates the unconditional branch in the cloned
1165/// unswitched parent block to only point at the unswitched successor.
1166///
1167/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1168/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1169/// the cloned blocks (and their loops) are left without full `LoopInfo`
1170/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1171/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1172/// instead the caller must recompute an accurate DT. It *does* correctly
1173/// update the `AssumptionCache` provided in `AC`.
1175 Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1176 ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1177 BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1179 ValueToValueMapTy &VMap,
1181 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU,
1182 ScalarEvolution *SE) {
1184 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1185
1186 // We will need to clone a bunch of blocks, wrap up the clone operation in
1187 // a helper.
1188 auto CloneBlock = [&](BasicBlock *OldBB) {
1189 // Clone the basic block and insert it before the new preheader.
1190 BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1191 NewBB->moveBefore(LoopPH);
1192
1193 // Record this block and the mapping.
1194 NewBlocks.push_back(NewBB);
1195 VMap[OldBB] = NewBB;
1196
1197 return NewBB;
1198 };
1199
1200 // We skip cloning blocks when they have a dominating succ that is not the
1201 // succ we are cloning for.
1202 auto SkipBlock = [&](BasicBlock *BB) {
1203 auto It = DominatingSucc.find(BB);
1204 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1205 };
1206
1207 // First, clone the preheader.
1208 auto *ClonedPH = CloneBlock(LoopPH);
1209
1210 // Then clone all the loop blocks, skipping the ones that aren't necessary.
1211 for (auto *LoopBB : L.blocks())
1212 if (!SkipBlock(LoopBB))
1213 CloneBlock(LoopBB);
1214
1215 // Split all the loop exit edges so that when we clone the exit blocks, if
1216 // any of the exit blocks are *also* a preheader for some other loop, we
1217 // don't create multiple predecessors entering the loop header.
1218 for (auto *ExitBB : ExitBlocks) {
1219 if (SkipBlock(ExitBB))
1220 continue;
1221
1222 // When we are going to clone an exit, we don't need to clone all the
1223 // instructions in the exit block and we want to ensure we have an easy
1224 // place to merge the CFG, so split the exit first. This is always safe to
1225 // do because there cannot be any non-loop predecessors of a loop exit in
1226 // loop simplified form.
1227 auto *MergeBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1228
1229 // Rearrange the names to make it easier to write test cases by having the
1230 // exit block carry the suffix rather than the merge block carrying the
1231 // suffix.
1232 MergeBB->takeName(ExitBB);
1233 ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1234
1235 // Now clone the original exit block.
1236 auto *ClonedExitBB = CloneBlock(ExitBB);
1237 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1238 "Exit block should have been split to have one successor!");
1239 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1240 "Cloned exit block has the wrong successor!");
1241
1242 // Remap any cloned instructions and create a merge phi node for them.
1243 for (auto ZippedInsts : llvm::zip_first(
1244 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1245 llvm::make_range(ClonedExitBB->begin(),
1246 std::prev(ClonedExitBB->end())))) {
1247 Instruction &I = std::get<0>(ZippedInsts);
1248 Instruction &ClonedI = std::get<1>(ZippedInsts);
1249
1250 // The only instructions in the exit block should be PHI nodes and
1251 // potentially a landing pad.
1252 assert(
1254 "Bad instruction in exit block!");
1255 // We should have a value map between the instruction and its clone.
1256 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1257
1258 // Forget SCEVs based on exit phis in case SCEV looked through the phi.
1259 if (SE)
1260 if (auto *PN = dyn_cast<PHINode>(&I))
1262
1263 BasicBlock::iterator InsertPt = MergeBB->getFirstInsertionPt();
1264
1265 auto *MergePN =
1266 PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi");
1267 MergePN->insertBefore(InsertPt);
1268 MergePN->setDebugLoc(InsertPt->getDebugLoc());
1269 I.replaceAllUsesWith(MergePN);
1270 MergePN->addIncoming(&I, ExitBB);
1271 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1272 }
1273 }
1274
1275 // Rewrite the instructions in the cloned blocks to refer to the instructions
1276 // in the cloned blocks. We have to do this as a second pass so that we have
1277 // everything available. Also, we have inserted new instructions which may
1278 // include assume intrinsics, so we update the assumption cache while
1279 // processing this.
1280 Module *M = ClonedPH->getParent()->getParent();
1281 for (auto *ClonedBB : NewBlocks)
1282 for (Instruction &I : *ClonedBB) {
1283 RemapDbgRecordRange(M, I.getDbgRecordRange(), VMap,
1285 RemapInstruction(&I, VMap,
1287 if (auto *II = dyn_cast<AssumeInst>(&I))
1289 }
1290
1291 // Update any PHI nodes in the cloned successors of the skipped blocks to not
1292 // have spurious incoming values.
1293 for (auto *LoopBB : L.blocks())
1294 if (SkipBlock(LoopBB))
1295 for (auto *SuccBB : successors(LoopBB))
1296 if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1297 for (PHINode &PN : ClonedSuccBB->phis())
1298 PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1299
1300 // Remove the cloned parent as a predecessor of any successor we ended up
1301 // cloning other than the unswitched one.
1302 auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1303 for (auto *SuccBB : successors(ParentBB)) {
1304 if (SuccBB == UnswitchedSuccBB)
1305 continue;
1306
1307 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1308 if (!ClonedSuccBB)
1309 continue;
1310
1311 ClonedSuccBB->removePredecessor(ClonedParentBB,
1312 /*KeepOneInputPHIs*/ true);
1313 }
1314
1315 // Replace the cloned branch with an unconditional branch to the cloned
1316 // unswitched successor.
1317 auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1318 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1319 // Trivial Simplification. If Terminator is a conditional branch and
1320 // condition becomes dead - erase it.
1321 Value *ClonedConditionToErase = nullptr;
1322 if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1323 ClonedConditionToErase = BI->getCondition();
1324 else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1325 ClonedConditionToErase = SI->getCondition();
1326
1327 Instruction *BI = BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1328 BI->setDebugLoc(ClonedTerminator->getDebugLoc());
1329 ClonedTerminator->eraseFromParent();
1330
1331 if (ClonedConditionToErase)
1332 RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1333 MSSAU);
1334
1335 // If there are duplicate entries in the PHI nodes because of multiple edges
1336 // to the unswitched successor, we need to nuke all but one as we replaced it
1337 // with a direct branch.
1338 for (PHINode &PN : ClonedSuccBB->phis()) {
1339 bool Found = false;
1340 // Loop over the incoming operands backwards so we can easily delete as we
1341 // go without invalidating the index.
1342 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1343 if (PN.getIncomingBlock(i) != ClonedParentBB)
1344 continue;
1345 if (!Found) {
1346 Found = true;
1347 continue;
1348 }
1349 PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1350 }
1351 }
1352
1353 // Record the domtree updates for the new blocks.
1355 for (auto *ClonedBB : NewBlocks) {
1356 for (auto *SuccBB : successors(ClonedBB))
1357 if (SuccSet.insert(SuccBB).second)
1358 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1359 SuccSet.clear();
1360 }
1361
1362 return ClonedPH;
1363}
1364
1365/// Recursively clone the specified loop and all of its children.
1366///
1367/// The target parent loop for the clone should be provided, or can be null if
1368/// the clone is a top-level loop. While cloning, all the blocks are mapped
1369/// with the provided value map. The entire original loop must be present in
1370/// the value map. The cloned loop is returned.
1371static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1372 const ValueToValueMapTy &VMap, LoopInfo &LI) {
1373 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1374 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1375 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1376 for (auto *BB : OrigL.blocks()) {
1377 auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1378 ClonedL.addBlockEntry(ClonedBB);
1379 if (LI.getLoopFor(BB) == &OrigL)
1380 LI.changeLoopFor(ClonedBB, &ClonedL);
1381 }
1382 };
1383
1384 // We specially handle the first loop because it may get cloned into
1385 // a different parent and because we most commonly are cloning leaf loops.
1386 Loop *ClonedRootL = LI.AllocateLoop();
1387 if (RootParentL)
1388 RootParentL->addChildLoop(ClonedRootL);
1389 else
1390 LI.addTopLevelLoop(ClonedRootL);
1391 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1392
1393 if (OrigRootL.isInnermost())
1394 return ClonedRootL;
1395
1396 // If we have a nest, we can quickly clone the entire loop nest using an
1397 // iterative approach because it is a tree. We keep the cloned parent in the
1398 // data structure to avoid repeatedly querying through a map to find it.
1399 SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1400 // Build up the loops to clone in reverse order as we'll clone them from the
1401 // back.
1402 for (Loop *ChildL : llvm::reverse(OrigRootL))
1403 LoopsToClone.push_back({ClonedRootL, ChildL});
1404 do {
1405 Loop *ClonedParentL, *L;
1406 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1407 Loop *ClonedL = LI.AllocateLoop();
1408 ClonedParentL->addChildLoop(ClonedL);
1409 AddClonedBlocksToLoop(*L, *ClonedL);
1410 for (Loop *ChildL : llvm::reverse(*L))
1411 LoopsToClone.push_back({ClonedL, ChildL});
1412 } while (!LoopsToClone.empty());
1413
1414 return ClonedRootL;
1415}
1416
1417/// Build the cloned loops of an original loop from unswitching.
1418///
1419/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1420/// operation. We need to re-verify that there even is a loop (as the backedge
1421/// may not have been cloned), and even if there are remaining backedges the
1422/// backedge set may be different. However, we know that each child loop is
1423/// undisturbed, we only need to find where to place each child loop within
1424/// either any parent loop or within a cloned version of the original loop.
1425///
1426/// Because child loops may end up cloned outside of any cloned version of the
1427/// original loop, multiple cloned sibling loops may be created. All of them
1428/// are returned so that the newly introduced loop nest roots can be
1429/// identified.
1430static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1431 const ValueToValueMapTy &VMap, LoopInfo &LI,
1432 SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1433 Loop *ClonedL = nullptr;
1434
1435 auto *OrigPH = OrigL.getLoopPreheader();
1436 auto *OrigHeader = OrigL.getHeader();
1437
1438 auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1439 auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1440
1441 // We need to know the loops of the cloned exit blocks to even compute the
1442 // accurate parent loop. If we only clone exits to some parent of the
1443 // original parent, we want to clone into that outer loop. We also keep track
1444 // of the loops that our cloned exit blocks participate in.
1445 Loop *ParentL = nullptr;
1446 SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1448 ClonedExitsInLoops.reserve(ExitBlocks.size());
1449 for (auto *ExitBB : ExitBlocks)
1450 if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1451 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1452 ExitLoopMap[ClonedExitBB] = ExitL;
1453 ClonedExitsInLoops.push_back(ClonedExitBB);
1454 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1455 ParentL = ExitL;
1456 }
1457 assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1458 ParentL->contains(OrigL.getParentLoop())) &&
1459 "The computed parent loop should always contain (or be) the parent of "
1460 "the original loop.");
1461
1462 // We build the set of blocks dominated by the cloned header from the set of
1463 // cloned blocks out of the original loop. While not all of these will
1464 // necessarily be in the cloned loop, it is enough to establish that they
1465 // aren't in unreachable cycles, etc.
1466 SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1467 for (auto *BB : OrigL.blocks())
1468 if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1469 ClonedLoopBlocks.insert(ClonedBB);
1470
1471 // Rebuild the set of blocks that will end up in the cloned loop. We may have
1472 // skipped cloning some region of this loop which can in turn skip some of
1473 // the backedges so we have to rebuild the blocks in the loop based on the
1474 // backedges that remain after cloning.
1476 SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1477 for (auto *Pred : predecessors(ClonedHeader)) {
1478 // The only possible non-loop header predecessor is the preheader because
1479 // we know we cloned the loop in simplified form.
1480 if (Pred == ClonedPH)
1481 continue;
1482
1483 // Because the loop was in simplified form, the only non-loop predecessor
1484 // should be the preheader.
1485 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1486 "header other than the preheader "
1487 "that is not part of the loop!");
1488
1489 // Insert this block into the loop set and on the first visit (and if it
1490 // isn't the header we're currently walking) put it into the worklist to
1491 // recurse through.
1492 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1493 Worklist.push_back(Pred);
1494 }
1495
1496 // If we had any backedges then there *is* a cloned loop. Put the header into
1497 // the loop set and then walk the worklist backwards to find all the blocks
1498 // that remain within the loop after cloning.
1499 if (!BlocksInClonedLoop.empty()) {
1500 BlocksInClonedLoop.insert(ClonedHeader);
1501
1502 while (!Worklist.empty()) {
1503 BasicBlock *BB = Worklist.pop_back_val();
1504 assert(BlocksInClonedLoop.count(BB) &&
1505 "Didn't put block into the loop set!");
1506
1507 // Insert any predecessors that are in the possible set into the cloned
1508 // set, and if the insert is successful, add them to the worklist. Note
1509 // that we filter on the blocks that are definitely reachable via the
1510 // backedge to the loop header so we may prune out dead code within the
1511 // cloned loop.
1512 for (auto *Pred : predecessors(BB))
1513 if (ClonedLoopBlocks.count(Pred) &&
1514 BlocksInClonedLoop.insert(Pred).second)
1515 Worklist.push_back(Pred);
1516 }
1517
1518 ClonedL = LI.AllocateLoop();
1519 if (ParentL) {
1520 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1521 ParentL->addChildLoop(ClonedL);
1522 } else {
1523 LI.addTopLevelLoop(ClonedL);
1524 }
1525 NonChildClonedLoops.push_back(ClonedL);
1526
1527 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1528 // We don't want to just add the cloned loop blocks based on how we
1529 // discovered them. The original order of blocks was carefully built in
1530 // a way that doesn't rely on predecessor ordering. Rather than re-invent
1531 // that logic, we just re-walk the original blocks (and those of the child
1532 // loops) and filter them as we add them into the cloned loop.
1533 for (auto *BB : OrigL.blocks()) {
1534 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1535 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1536 continue;
1537
1538 // Directly add the blocks that are only in this loop.
1539 if (LI.getLoopFor(BB) == &OrigL) {
1540 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1541 continue;
1542 }
1543
1544 // We want to manually add it to this loop and parents.
1545 // Registering it with LoopInfo will happen when we clone the top
1546 // loop for this block.
1547 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1548 PL->addBlockEntry(ClonedBB);
1549 }
1550
1551 // Now add each child loop whose header remains within the cloned loop. All
1552 // of the blocks within the loop must satisfy the same constraints as the
1553 // header so once we pass the header checks we can just clone the entire
1554 // child loop nest.
1555 for (Loop *ChildL : OrigL) {
1556 auto *ClonedChildHeader =
1557 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1558 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1559 continue;
1560
1561#ifndef NDEBUG
1562 // We should never have a cloned child loop header but fail to have
1563 // all of the blocks for that child loop.
1564 for (auto *ChildLoopBB : ChildL->blocks())
1565 assert(BlocksInClonedLoop.count(
1566 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1567 "Child cloned loop has a header within the cloned outer "
1568 "loop but not all of its blocks!");
1569#endif
1570
1571 cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1572 }
1573 }
1574
1575 // Now that we've handled all the components of the original loop that were
1576 // cloned into a new loop, we still need to handle anything from the original
1577 // loop that wasn't in a cloned loop.
1578
1579 // Figure out what blocks are left to place within any loop nest containing
1580 // the unswitched loop. If we never formed a loop, the cloned PH is one of
1581 // them.
1582 SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1583 if (BlocksInClonedLoop.empty())
1584 UnloopedBlockSet.insert(ClonedPH);
1585 for (auto *ClonedBB : ClonedLoopBlocks)
1586 if (!BlocksInClonedLoop.count(ClonedBB))
1587 UnloopedBlockSet.insert(ClonedBB);
1588
1589 // Copy the cloned exits and sort them in ascending loop depth, we'll work
1590 // backwards across these to process them inside out. The order shouldn't
1591 // matter as we're just trying to build up the map from inside-out; we use
1592 // the map in a more stably ordered way below.
1593 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1594 llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1595 return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1596 ExitLoopMap.lookup(RHS)->getLoopDepth();
1597 });
1598
1599 // Populate the existing ExitLoopMap with everything reachable from each
1600 // exit, starting from the inner most exit.
1601 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1602 assert(Worklist.empty() && "Didn't clear worklist!");
1603
1604 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1605 Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1606
1607 // Walk the CFG back until we hit the cloned PH adding everything reachable
1608 // and in the unlooped set to this exit block's loop.
1609 Worklist.push_back(ExitBB);
1610 do {
1611 BasicBlock *BB = Worklist.pop_back_val();
1612 // We can stop recursing at the cloned preheader (if we get there).
1613 if (BB == ClonedPH)
1614 continue;
1615
1616 for (BasicBlock *PredBB : predecessors(BB)) {
1617 // If this pred has already been moved to our set or is part of some
1618 // (inner) loop, no update needed.
1619 if (!UnloopedBlockSet.erase(PredBB)) {
1620 assert(
1621 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1622 "Predecessor not mapped to a loop!");
1623 continue;
1624 }
1625
1626 // We just insert into the loop set here. We'll add these blocks to the
1627 // exit loop after we build up the set in an order that doesn't rely on
1628 // predecessor order (which in turn relies on use list order).
1629 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1630 (void)Inserted;
1631 assert(Inserted && "Should only visit an unlooped block once!");
1632
1633 // And recurse through to its predecessors.
1634 Worklist.push_back(PredBB);
1635 }
1636 } while (!Worklist.empty());
1637 }
1638
1639 // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1640 // blocks to their outer loops, walk the cloned blocks and the cloned exits
1641 // in their original order adding them to the correct loop.
1642
1643 // We need a stable insertion order. We use the order of the original loop
1644 // order and map into the correct parent loop.
1645 for (auto *BB : llvm::concat<BasicBlock *const>(
1646 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1647 if (Loop *OuterL = ExitLoopMap.lookup(BB))
1648 OuterL->addBasicBlockToLoop(BB, LI);
1649
1650#ifndef NDEBUG
1651 for (auto &BBAndL : ExitLoopMap) {
1652 auto *BB = BBAndL.first;
1653 auto *OuterL = BBAndL.second;
1654 assert(LI.getLoopFor(BB) == OuterL &&
1655 "Failed to put all blocks into outer loops!");
1656 }
1657#endif
1658
1659 // Now that all the blocks are placed into the correct containing loop in the
1660 // absence of child loops, find all the potentially cloned child loops and
1661 // clone them into whatever outer loop we placed their header into.
1662 for (Loop *ChildL : OrigL) {
1663 auto *ClonedChildHeader =
1664 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1665 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1666 continue;
1667
1668#ifndef NDEBUG
1669 for (auto *ChildLoopBB : ChildL->blocks())
1670 assert(VMap.count(ChildLoopBB) &&
1671 "Cloned a child loop header but not all of that loops blocks!");
1672#endif
1673
1674 NonChildClonedLoops.push_back(cloneLoopNest(
1675 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1676 }
1677}
1678
1679static void
1681 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1682 DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1683 // Find all the dead clones, and remove them from their successors.
1685 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1686 for (const auto &VMap : VMaps)
1687 if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1688 if (!DT.isReachableFromEntry(ClonedBB)) {
1689 for (BasicBlock *SuccBB : successors(ClonedBB))
1690 SuccBB->removePredecessor(ClonedBB);
1691 DeadBlocks.push_back(ClonedBB);
1692 }
1693
1694 // Remove all MemorySSA in the dead blocks
1695 if (MSSAU) {
1696 SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1697 DeadBlocks.end());
1698 MSSAU->removeBlocks(DeadBlockSet);
1699 }
1700
1701 // Drop any remaining references to break cycles.
1702 for (BasicBlock *BB : DeadBlocks)
1703 BB->dropAllReferences();
1704 // Erase them from the IR.
1705 for (BasicBlock *BB : DeadBlocks)
1706 BB->eraseFromParent();
1707}
1708
1711 DominatorTree &DT, LoopInfo &LI,
1712 MemorySSAUpdater *MSSAU,
1713 ScalarEvolution *SE,
1714 LPMUpdater &LoopUpdater) {
1715 // Find all the dead blocks tied to this loop, and remove them from their
1716 // successors.
1718
1719 // Start with loop/exit blocks and get a transitive closure of reachable dead
1720 // blocks.
1721 SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1722 ExitBlocks.end());
1723 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1724 while (!DeathCandidates.empty()) {
1725 auto *BB = DeathCandidates.pop_back_val();
1726 if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1727 for (BasicBlock *SuccBB : successors(BB)) {
1728 SuccBB->removePredecessor(BB);
1729 DeathCandidates.push_back(SuccBB);
1730 }
1731 DeadBlockSet.insert(BB);
1732 }
1733 }
1734
1735 // Remove all MemorySSA in the dead blocks
1736 if (MSSAU)
1737 MSSAU->removeBlocks(DeadBlockSet);
1738
1739 // Filter out the dead blocks from the exit blocks list so that it can be
1740 // used in the caller.
1741 llvm::erase_if(ExitBlocks,
1742 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1743
1744 // Walk from this loop up through its parents removing all of the dead blocks.
1745 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1746 for (auto *BB : DeadBlockSet)
1747 ParentL->getBlocksSet().erase(BB);
1748 llvm::erase_if(ParentL->getBlocksVector(),
1749 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1750 }
1751
1752 // Now delete the dead child loops. This raw delete will clear them
1753 // recursively.
1754 llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1755 if (!DeadBlockSet.count(ChildL->getHeader()))
1756 return false;
1757
1758 assert(llvm::all_of(ChildL->blocks(),
1759 [&](BasicBlock *ChildBB) {
1760 return DeadBlockSet.count(ChildBB);
1761 }) &&
1762 "If the child loop header is dead all blocks in the child loop must "
1763 "be dead as well!");
1764 LoopUpdater.markLoopAsDeleted(*ChildL, ChildL->getName());
1765 if (SE)
1767 LI.destroy(ChildL);
1768 return true;
1769 });
1770
1771 // Remove the loop mappings for the dead blocks and drop all the references
1772 // from these blocks to others to handle cyclic references as we start
1773 // deleting the blocks themselves.
1774 for (auto *BB : DeadBlockSet) {
1775 // Check that the dominator tree has already been updated.
1776 assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1777 LI.changeLoopFor(BB, nullptr);
1778 // Drop all uses of the instructions to make sure we won't have dangling
1779 // uses in other blocks.
1780 for (auto &I : *BB)
1781 if (!I.use_empty())
1782 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
1783 BB->dropAllReferences();
1784 }
1785
1786 // Actually delete the blocks now that they've been fully unhooked from the
1787 // IR.
1788 for (auto *BB : DeadBlockSet)
1789 BB->eraseFromParent();
1790}
1791
1792/// Recompute the set of blocks in a loop after unswitching.
1793///
1794/// This walks from the original headers predecessors to rebuild the loop. We
1795/// take advantage of the fact that new blocks can't have been added, and so we
1796/// filter by the original loop's blocks. This also handles potentially
1797/// unreachable code that we don't want to explore but might be found examining
1798/// the predecessors of the header.
1799///
1800/// If the original loop is no longer a loop, this will return an empty set. If
1801/// it remains a loop, all the blocks within it will be added to the set
1802/// (including those blocks in inner loops).
1804 LoopInfo &LI) {
1806
1807 auto *PH = L.getLoopPreheader();
1808 auto *Header = L.getHeader();
1809
1810 // A worklist to use while walking backwards from the header.
1812
1813 // First walk the predecessors of the header to find the backedges. This will
1814 // form the basis of our walk.
1815 for (auto *Pred : predecessors(Header)) {
1816 // Skip the preheader.
1817 if (Pred == PH)
1818 continue;
1819
1820 // Because the loop was in simplified form, the only non-loop predecessor
1821 // is the preheader.
1822 assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1823 "than the preheader that is not part of the "
1824 "loop!");
1825
1826 // Insert this block into the loop set and on the first visit and, if it
1827 // isn't the header we're currently walking, put it into the worklist to
1828 // recurse through.
1829 if (LoopBlockSet.insert(Pred).second && Pred != Header)
1830 Worklist.push_back(Pred);
1831 }
1832
1833 // If no backedges were found, we're done.
1834 if (LoopBlockSet.empty())
1835 return LoopBlockSet;
1836
1837 // We found backedges, recurse through them to identify the loop blocks.
1838 while (!Worklist.empty()) {
1839 BasicBlock *BB = Worklist.pop_back_val();
1840 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1841
1842 // No need to walk past the header.
1843 if (BB == Header)
1844 continue;
1845
1846 // Because we know the inner loop structure remains valid we can use the
1847 // loop structure to jump immediately across the entire nested loop.
1848 // Further, because it is in loop simplified form, we can directly jump
1849 // to its preheader afterward.
1850 if (Loop *InnerL = LI.getLoopFor(BB))
1851 if (InnerL != &L) {
1852 assert(L.contains(InnerL) &&
1853 "Should not reach a loop *outside* this loop!");
1854 // The preheader is the only possible predecessor of the loop so
1855 // insert it into the set and check whether it was already handled.
1856 auto *InnerPH = InnerL->getLoopPreheader();
1857 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1858 "but not contain the inner loop "
1859 "preheader!");
1860 if (!LoopBlockSet.insert(InnerPH).second)
1861 // The only way to reach the preheader is through the loop body
1862 // itself so if it has been visited the loop is already handled.
1863 continue;
1864
1865 // Insert all of the blocks (other than those already present) into
1866 // the loop set. We expect at least the block that led us to find the
1867 // inner loop to be in the block set, but we may also have other loop
1868 // blocks if they were already enqueued as predecessors of some other
1869 // outer loop block.
1870 for (auto *InnerBB : InnerL->blocks()) {
1871 if (InnerBB == BB) {
1872 assert(LoopBlockSet.count(InnerBB) &&
1873 "Block should already be in the set!");
1874 continue;
1875 }
1876
1877 LoopBlockSet.insert(InnerBB);
1878 }
1879
1880 // Add the preheader to the worklist so we will continue past the
1881 // loop body.
1882 Worklist.push_back(InnerPH);
1883 continue;
1884 }
1885
1886 // Insert any predecessors that were in the original loop into the new
1887 // set, and if the insert is successful, add them to the worklist.
1888 for (auto *Pred : predecessors(BB))
1889 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1890 Worklist.push_back(Pred);
1891 }
1892
1893 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1894
1895 // We've found all the blocks participating in the loop, return our completed
1896 // set.
1897 return LoopBlockSet;
1898}
1899
1900/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1901///
1902/// The removal may have removed some child loops entirely but cannot have
1903/// disturbed any remaining child loops. However, they may need to be hoisted
1904/// to the parent loop (or to be top-level loops). The original loop may be
1905/// completely removed.
1906///
1907/// The sibling loops resulting from this update are returned. If the original
1908/// loop remains a valid loop, it will be the first entry in this list with all
1909/// of the newly sibling loops following it.
1910///
1911/// Returns true if the loop remains a loop after unswitching, and false if it
1912/// is no longer a loop after unswitching (and should not continue to be
1913/// referenced).
1915 LoopInfo &LI,
1916 SmallVectorImpl<Loop *> &HoistedLoops,
1917 ScalarEvolution *SE) {
1918 auto *PH = L.getLoopPreheader();
1919
1920 // Compute the actual parent loop from the exit blocks. Because we may have
1921 // pruned some exits the loop may be different from the original parent.
1922 Loop *ParentL = nullptr;
1923 SmallVector<Loop *, 4> ExitLoops;
1924 SmallVector<BasicBlock *, 4> ExitsInLoops;
1925 ExitsInLoops.reserve(ExitBlocks.size());
1926 for (auto *ExitBB : ExitBlocks)
1927 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1928 ExitLoops.push_back(ExitL);
1929 ExitsInLoops.push_back(ExitBB);
1930 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1931 ParentL = ExitL;
1932 }
1933
1934 // Recompute the blocks participating in this loop. This may be empty if it
1935 // is no longer a loop.
1936 auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1937
1938 // If we still have a loop, we need to re-set the loop's parent as the exit
1939 // block set changing may have moved it within the loop nest. Note that this
1940 // can only happen when this loop has a parent as it can only hoist the loop
1941 // *up* the nest.
1942 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1943 // Remove this loop's (original) blocks from all of the intervening loops.
1944 for (Loop *IL = L.getParentLoop(); IL != ParentL;
1945 IL = IL->getParentLoop()) {
1946 IL->getBlocksSet().erase(PH);
1947 for (auto *BB : L.blocks())
1948 IL->getBlocksSet().erase(BB);
1949 llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1950 return BB == PH || L.contains(BB);
1951 });
1952 }
1953
1954 LI.changeLoopFor(PH, ParentL);
1955 L.getParentLoop()->removeChildLoop(&L);
1956 if (ParentL)
1957 ParentL->addChildLoop(&L);
1958 else
1959 LI.addTopLevelLoop(&L);
1960 }
1961
1962 // Now we update all the blocks which are no longer within the loop.
1963 auto &Blocks = L.getBlocksVector();
1964 auto BlocksSplitI =
1965 LoopBlockSet.empty()
1966 ? Blocks.begin()
1967 : std::stable_partition(
1968 Blocks.begin(), Blocks.end(),
1969 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1970
1971 // Before we erase the list of unlooped blocks, build a set of them.
1972 SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1973 if (LoopBlockSet.empty())
1974 UnloopedBlocks.insert(PH);
1975
1976 // Now erase these blocks from the loop.
1977 for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1978 L.getBlocksSet().erase(BB);
1979 Blocks.erase(BlocksSplitI, Blocks.end());
1980
1981 // Sort the exits in ascending loop depth, we'll work backwards across these
1982 // to process them inside out.
1983 llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1984 return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1985 });
1986
1987 // We'll build up a set for each exit loop.
1988 SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1989 Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1990
1991 auto RemoveUnloopedBlocksFromLoop =
1992 [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1993 for (auto *BB : UnloopedBlocks)
1994 L.getBlocksSet().erase(BB);
1995 llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1996 return UnloopedBlocks.count(BB);
1997 });
1998 };
1999
2001 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
2002 assert(Worklist.empty() && "Didn't clear worklist!");
2003 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
2004
2005 // Grab the next exit block, in decreasing loop depth order.
2006 BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
2007 Loop &ExitL = *LI.getLoopFor(ExitBB);
2008 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
2009
2010 // Erase all of the unlooped blocks from the loops between the previous
2011 // exit loop and this exit loop. This works because the ExitInLoops list is
2012 // sorted in increasing order of loop depth and thus we visit loops in
2013 // decreasing order of loop depth.
2014 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
2015 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2016
2017 // Walk the CFG back until we hit the cloned PH adding everything reachable
2018 // and in the unlooped set to this exit block's loop.
2019 Worklist.push_back(ExitBB);
2020 do {
2021 BasicBlock *BB = Worklist.pop_back_val();
2022 // We can stop recursing at the cloned preheader (if we get there).
2023 if (BB == PH)
2024 continue;
2025
2026 for (BasicBlock *PredBB : predecessors(BB)) {
2027 // If this pred has already been moved to our set or is part of some
2028 // (inner) loop, no update needed.
2029 if (!UnloopedBlocks.erase(PredBB)) {
2030 assert((NewExitLoopBlocks.count(PredBB) ||
2031 ExitL.contains(LI.getLoopFor(PredBB))) &&
2032 "Predecessor not in a nested loop (or already visited)!");
2033 continue;
2034 }
2035
2036 // We just insert into the loop set here. We'll add these blocks to the
2037 // exit loop after we build up the set in a deterministic order rather
2038 // than the predecessor-influenced visit order.
2039 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2040 (void)Inserted;
2041 assert(Inserted && "Should only visit an unlooped block once!");
2042
2043 // And recurse through to its predecessors.
2044 Worklist.push_back(PredBB);
2045 }
2046 } while (!Worklist.empty());
2047
2048 // If blocks in this exit loop were directly part of the original loop (as
2049 // opposed to a child loop) update the map to point to this exit loop. This
2050 // just updates a map and so the fact that the order is unstable is fine.
2051 for (auto *BB : NewExitLoopBlocks)
2052 if (Loop *BBL = LI.getLoopFor(BB))
2053 if (BBL == &L || !L.contains(BBL))
2054 LI.changeLoopFor(BB, &ExitL);
2055
2056 // We will remove the remaining unlooped blocks from this loop in the next
2057 // iteration or below.
2058 NewExitLoopBlocks.clear();
2059 }
2060
2061 // Any remaining unlooped blocks are no longer part of any loop unless they
2062 // are part of some child loop.
2063 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
2064 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2065 for (auto *BB : UnloopedBlocks)
2066 if (Loop *BBL = LI.getLoopFor(BB))
2067 if (BBL == &L || !L.contains(BBL))
2068 LI.changeLoopFor(BB, nullptr);
2069
2070 // Sink all the child loops whose headers are no longer in the loop set to
2071 // the parent (or to be top level loops). We reach into the loop and directly
2072 // update its subloop vector to make this batch update efficient.
2073 auto &SubLoops = L.getSubLoopsVector();
2074 auto SubLoopsSplitI =
2075 LoopBlockSet.empty()
2076 ? SubLoops.begin()
2077 : std::stable_partition(
2078 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
2079 return LoopBlockSet.count(SubL->getHeader());
2080 });
2081 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
2082 HoistedLoops.push_back(HoistedL);
2083 HoistedL->setParentLoop(nullptr);
2084
2085 // To compute the new parent of this hoisted loop we look at where we
2086 // placed the preheader above. We can't lookup the header itself because we
2087 // retained the mapping from the header to the hoisted loop. But the
2088 // preheader and header should have the exact same new parent computed
2089 // based on the set of exit blocks from the original loop as the preheader
2090 // is a predecessor of the header and so reached in the reverse walk. And
2091 // because the loops were all in simplified form the preheader of the
2092 // hoisted loop can't be part of some *other* loop.
2093 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
2094 NewParentL->addChildLoop(HoistedL);
2095 else
2096 LI.addTopLevelLoop(HoistedL);
2097 }
2098 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2099
2100 // Actually delete the loop if nothing remained within it.
2101 if (Blocks.empty()) {
2102 assert(SubLoops.empty() &&
2103 "Failed to remove all subloops from the original loop!");
2104 if (Loop *ParentL = L.getParentLoop())
2105 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2106 else
2107 LI.removeLoop(llvm::find(LI, &L));
2108 // markLoopAsDeleted for L should be triggered by the caller (it is
2109 // typically done within postUnswitch).
2110 if (SE)
2112 LI.destroy(&L);
2113 return false;
2114 }
2115
2116 return true;
2117}
2118
2119/// Helper to visit a dominator subtree, invoking a callable on each node.
2120///
2121/// Returning false at any point will stop walking past that node of the tree.
2122template <typename CallableT>
2123void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2125 DomWorklist.push_back(DT[BB]);
2126#ifndef NDEBUG
2128 Visited.insert(DT[BB]);
2129#endif
2130 do {
2131 DomTreeNode *N = DomWorklist.pop_back_val();
2132
2133 // Visit this node.
2134 if (!Callable(N->getBlock()))
2135 continue;
2136
2137 // Accumulate the child nodes.
2138 for (DomTreeNode *ChildN : *N) {
2139 assert(Visited.insert(ChildN).second &&
2140 "Cannot visit a node twice when walking a tree!");
2141 DomWorklist.push_back(ChildN);
2142 }
2143 } while (!DomWorklist.empty());
2144}
2145
2147 bool CurrentLoopValid, bool PartiallyInvariant,
2148 bool InjectedCondition, ArrayRef<Loop *> NewLoops) {
2149 // If we did a non-trivial unswitch, we have added new (cloned) loops.
2150 if (!NewLoops.empty())
2151 U.addSiblingLoops(NewLoops);
2152
2153 // If the current loop remains valid, we should revisit it to catch any
2154 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
2155 if (CurrentLoopValid) {
2156 if (PartiallyInvariant) {
2157 // Mark the new loop as partially unswitched, to avoid unswitching on
2158 // the same condition again.
2159 auto &Context = L.getHeader()->getContext();
2160 MDNode *DisableUnswitchMD = MDNode::get(
2161 Context,
2162 MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
2164 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
2165 {DisableUnswitchMD});
2166 L.setLoopID(NewLoopID);
2167 } else if (InjectedCondition) {
2168 // Do the same for injection of invariant conditions.
2169 auto &Context = L.getHeader()->getContext();
2170 MDNode *DisableUnswitchMD = MDNode::get(
2171 Context,
2172 MDString::get(Context, "llvm.loop.unswitch.injection.disable"));
2174 Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},
2175 {DisableUnswitchMD});
2176 L.setLoopID(NewLoopID);
2177 } else
2178 U.revisitCurrentLoop();
2179 } else
2180 U.markLoopAsDeleted(L, LoopName);
2181}
2182
2184 Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2185 IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
2187 LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition) {
2188 auto *ParentBB = TI.getParent();
2190 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2191
2192 // Save the current loop name in a variable so that we can report it even
2193 // after it has been deleted.
2194 std::string LoopName(L.getName());
2195
2196 // We can only unswitch switches, conditional branches with an invariant
2197 // condition, or combining invariant conditions with an instruction or
2198 // partially invariant instructions.
2199 assert((SI || (BI && BI->isConditional())) &&
2200 "Can only unswitch switches and conditional branch!");
2201 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2202 bool FullUnswitch =
2203 SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2204 !PartiallyInvariant);
2205 if (FullUnswitch)
2206 assert(Invariants.size() == 1 &&
2207 "Cannot have other invariants with full unswitching!");
2208 else
2210 "Partial unswitching requires an instruction as the condition!");
2211
2212 if (MSSAU && VerifyMemorySSA)
2213 MSSAU->getMemorySSA()->verifyMemorySSA();
2214
2215 // Constant and BBs tracking the cloned and continuing successor. When we are
2216 // unswitching the entire condition, this can just be trivially chosen to
2217 // unswitch towards `true`. However, when we are unswitching a set of
2218 // invariants combined with `and` or `or` or partially invariant instructions,
2219 // the combining operation determines the best direction to unswitch: we want
2220 // to unswitch the direction that will collapse the branch.
2221 bool Direction = true;
2222 int ClonedSucc = 0;
2223 if (!FullUnswitch) {
2225 (void)Cond;
2227 PartiallyInvariant) &&
2228 "Only `or`, `and`, an `select`, partially invariant instructions "
2229 "can combine invariants being unswitched.");
2230 if (!match(Cond, m_LogicalOr())) {
2231 if (match(Cond, m_LogicalAnd()) ||
2232 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2233 Direction = false;
2234 ClonedSucc = 1;
2235 }
2236 }
2237 }
2238
2239 BasicBlock *RetainedSuccBB =
2240 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2241 SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2242 if (BI)
2243 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2244 else
2245 for (auto Case : SI->cases())
2246 if (Case.getCaseSuccessor() != RetainedSuccBB)
2247 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2248
2249 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2250 "Should not unswitch the same successor we are retaining!");
2251
2252 // The branch should be in this exact loop. Any inner loop's invariant branch
2253 // should be handled by unswitching that inner loop. The caller of this
2254 // routine should filter out any candidates that remain (but were skipped for
2255 // whatever reason).
2256 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2257
2258 // Compute the parent loop now before we start hacking on things.
2259 Loop *ParentL = L.getParentLoop();
2260 // Get blocks in RPO order for MSSA update, before changing the CFG.
2261 LoopBlocksRPO LBRPO(&L);
2262 if (MSSAU)
2263 LBRPO.perform(&LI);
2264
2265 // Compute the outer-most loop containing one of our exit blocks. This is the
2266 // furthest up our loopnest which can be mutated, which we will use below to
2267 // update things.
2268 Loop *OuterExitL = &L;
2270 L.getUniqueExitBlocks(ExitBlocks);
2271 for (auto *ExitBB : ExitBlocks) {
2272 // ExitBB can be an exit block for several levels in the loop nest. Make
2273 // sure we find the top most.
2274 Loop *NewOuterExitL = getTopMostExitingLoop(ExitBB, LI);
2275 if (!NewOuterExitL) {
2276 // We exited the entire nest with this block, so we're done.
2277 OuterExitL = nullptr;
2278 break;
2279 }
2280 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2281 OuterExitL = NewOuterExitL;
2282 }
2283
2284 // At this point, we're definitely going to unswitch something so invalidate
2285 // any cached information in ScalarEvolution for the outer most loop
2286 // containing an exit block and all nested loops.
2287 if (SE) {
2288 if (OuterExitL)
2289 SE->forgetLoop(OuterExitL);
2290 else
2291 SE->forgetTopmostLoop(&L);
2293 }
2294
2295 // If the edge from this terminator to a successor dominates that successor,
2296 // store a map from each block in its dominator subtree to it. This lets us
2297 // tell when cloning for a particular successor if a block is dominated by
2298 // some *other* successor with a single data structure. We use this to
2299 // significantly reduce cloning.
2301 for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),
2302 UnswitchedSuccBBs))
2303 if (SuccBB->getUniquePredecessor() ||
2304 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2305 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2306 }))
2307 visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2308 DominatingSucc[BB] = SuccBB;
2309 return true;
2310 });
2311
2312 // Split the preheader, so that we know that there is a safe place to insert
2313 // the conditional branch. We will change the preheader to have a conditional
2314 // branch on LoopCond. The original preheader will become the split point
2315 // between the unswitched versions, and we will have a new preheader for the
2316 // original loop.
2317 BasicBlock *SplitBB = L.getLoopPreheader();
2318 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2319
2320 // Keep track of the dominator tree updates needed.
2322
2323 // Clone the loop for each unswitched successor.
2325 VMaps.reserve(UnswitchedSuccBBs.size());
2327 for (auto *SuccBB : UnswitchedSuccBBs) {
2328 VMaps.emplace_back(new ValueToValueMapTy());
2329 ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2330 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2331 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2332 }
2333
2334 // Drop metadata if we may break its semantics by moving this instr into the
2335 // split block.
2336 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2338 // Do not spend time trying to understand if we can keep it, just drop it
2339 // to save compile time.
2340 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2341 else {
2342 // It is only legal to preserve make.implicit metadata if we are
2343 // guaranteed no reach implicit null check after following this branch.
2344 ICFLoopSafetyInfo SafetyInfo;
2345 SafetyInfo.computeLoopSafetyInfo(&L);
2346 if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2347 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2348 }
2349 }
2350
2351 // The stitching of the branched code back together depends on whether we're
2352 // doing full unswitching or not with the exception that we always want to
2353 // nuke the initial terminator placed in the split block.
2354 SplitBB->getTerminator()->eraseFromParent();
2355 if (FullUnswitch) {
2356 // Keep a clone of the terminator for MSSA updates.
2357 Instruction *NewTI = TI.clone();
2358 NewTI->insertInto(ParentBB, ParentBB->end());
2359
2360 // Splice the terminator from the original loop and rewrite its
2361 // successors.
2362 TI.moveBefore(*SplitBB, SplitBB->end());
2363 TI.dropLocation();
2364
2365 // First wire up the moved terminator to the preheaders.
2366 if (BI) {
2367 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2368 BI->setSuccessor(ClonedSucc, ClonedPH);
2369 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2371 if (InsertFreeze) {
2372 // We don't give any debug location to the new freeze, because the
2373 // BI (`dyn_cast<BranchInst>(TI)`) is an in-loop instruction hoisted
2374 // out of the loop.
2375 Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI->getIterator());
2377 }
2378 BI->setCondition(Cond);
2379 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2380 } else {
2381 assert(SI && "Must either be a branch or switch!");
2382
2383 // Walk the cases and directly update their successors.
2384 assert(SI->getDefaultDest() == RetainedSuccBB &&
2385 "Not retaining default successor!");
2386 SI->setDefaultDest(LoopPH);
2387 for (const auto &Case : SI->cases())
2388 if (Case.getCaseSuccessor() == RetainedSuccBB)
2389 Case.setSuccessor(LoopPH);
2390 else
2391 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2392
2393 if (InsertFreeze)
2394 SI->setCondition(new FreezeInst(SI->getCondition(),
2395 SI->getCondition()->getName() + ".fr",
2396 SI->getIterator()));
2397
2398 // We need to use the set to populate domtree updates as even when there
2399 // are multiple cases pointing at the same successor we only want to
2400 // remove and insert one edge in the domtree.
2401 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2402 DTUpdates.push_back(
2403 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2404 }
2405
2406 if (MSSAU) {
2407 DT.applyUpdates(DTUpdates);
2408 DTUpdates.clear();
2409
2410 // Remove all but one edge to the retained block and all unswitched
2411 // blocks. This is to avoid having duplicate entries in the cloned Phis,
2412 // when we know we only keep a single edge for each case.
2413 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2414 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2415 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2416
2417 for (auto &VMap : VMaps)
2418 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2419 /*IgnoreIncomingWithNoClones=*/true);
2420 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2421
2422 // Remove all edges to unswitched blocks.
2423 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2424 MSSAU->removeEdge(ParentBB, SuccBB);
2425 }
2426
2427 // Now unhook the successor relationship as we'll be replacing
2428 // the terminator with a direct branch. This is much simpler for branches
2429 // than switches so we handle those first.
2430 if (BI) {
2431 // Remove the parent as a predecessor of the unswitched successor.
2432 assert(UnswitchedSuccBBs.size() == 1 &&
2433 "Only one possible unswitched block for a branch!");
2434 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2435 UnswitchedSuccBB->removePredecessor(ParentBB,
2436 /*KeepOneInputPHIs*/ true);
2437 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2438 } else {
2439 // Note that we actually want to remove the parent block as a predecessor
2440 // of *every* case successor. The case successor is either unswitched,
2441 // completely eliminating an edge from the parent to that successor, or it
2442 // is a duplicate edge to the retained successor as the retained successor
2443 // is always the default successor and as we'll replace this with a direct
2444 // branch we no longer need the duplicate entries in the PHI nodes.
2445 SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2446 assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2447 "Not retaining default successor!");
2448 for (const auto &Case : NewSI->cases())
2449 Case.getCaseSuccessor()->removePredecessor(
2450 ParentBB,
2451 /*KeepOneInputPHIs*/ true);
2452
2453 // We need to use the set to populate domtree updates as even when there
2454 // are multiple cases pointing at the same successor we only want to
2455 // remove and insert one edge in the domtree.
2456 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2457 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2458 }
2459
2460 // Create a new unconditional branch to the continuing block (as opposed to
2461 // the one cloned).
2462 Instruction *NewBI = BranchInst::Create(RetainedSuccBB, ParentBB);
2463 NewBI->setDebugLoc(NewTI->getDebugLoc());
2464
2465 // After MSSAU update, remove the cloned terminator instruction NewTI.
2466 NewTI->eraseFromParent();
2467 } else {
2468 assert(BI && "Only branches have partial unswitching.");
2469 assert(UnswitchedSuccBBs.size() == 1 &&
2470 "Only one possible unswitched block for a branch!");
2471 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2472 // When doing a partial unswitch, we have to do a bit more work to build up
2473 // the branch in the split block.
2474 if (PartiallyInvariant)
2476 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2477 else {
2479 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2480 FreezeLoopUnswitchCond, BI, &AC, DT);
2481 }
2482 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2483
2484 if (MSSAU) {
2485 DT.applyUpdates(DTUpdates);
2486 DTUpdates.clear();
2487
2488 // Perform MSSA cloning updates.
2489 for (auto &VMap : VMaps)
2490 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2491 /*IgnoreIncomingWithNoClones=*/true);
2492 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2493 }
2494 }
2495
2496 // Apply the updates accumulated above to get an up-to-date dominator tree.
2497 DT.applyUpdates(DTUpdates);
2498
2499 // Now that we have an accurate dominator tree, first delete the dead cloned
2500 // blocks so that we can accurately build any cloned loops. It is important to
2501 // not delete the blocks from the original loop yet because we still want to
2502 // reference the original loop to understand the cloned loop's structure.
2503 deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2504
2505 // Build the cloned loop structure itself. This may be substantially
2506 // different from the original structure due to the simplified CFG. This also
2507 // handles inserting all the cloned blocks into the correct loops.
2508 SmallVector<Loop *, 4> NonChildClonedLoops;
2509 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2510 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2511
2512 // Now that our cloned loops have been built, we can update the original loop.
2513 // First we delete the dead blocks from it and then we rebuild the loop
2514 // structure taking these deletions into account.
2515 deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE, LoopUpdater);
2516
2517 if (MSSAU && VerifyMemorySSA)
2518 MSSAU->getMemorySSA()->verifyMemorySSA();
2519
2520 SmallVector<Loop *, 4> HoistedLoops;
2521 bool IsStillLoop =
2522 rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
2523
2524 if (MSSAU && VerifyMemorySSA)
2525 MSSAU->getMemorySSA()->verifyMemorySSA();
2526
2527 // This transformation has a high risk of corrupting the dominator tree, and
2528 // the below steps to rebuild loop structures will result in hard to debug
2529 // errors in that case so verify that the dominator tree is sane first.
2530 // FIXME: Remove this when the bugs stop showing up and rely on existing
2531 // verification steps.
2532 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
2533
2534 if (BI && !PartiallyInvariant) {
2535 // If we unswitched a branch which collapses the condition to a known
2536 // constant we want to replace all the uses of the invariants within both
2537 // the original and cloned blocks. We do this here so that we can use the
2538 // now updated dominator tree to identify which side the users are on.
2539 assert(UnswitchedSuccBBs.size() == 1 &&
2540 "Only one possible unswitched block for a branch!");
2541 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2542
2543 // When considering multiple partially-unswitched invariants
2544 // we cant just go replace them with constants in both branches.
2545 //
2546 // For 'AND' we infer that true branch ("continue") means true
2547 // for each invariant operand.
2548 // For 'OR' we can infer that false branch ("continue") means false
2549 // for each invariant operand.
2550 // So it happens that for multiple-partial case we dont replace
2551 // in the unswitched branch.
2552 bool ReplaceUnswitched =
2553 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2554
2555 ConstantInt *UnswitchedReplacement =
2558 ConstantInt *ContinueReplacement =
2561 for (Value *Invariant : Invariants) {
2562 assert(!isa<Constant>(Invariant) &&
2563 "Should not be replacing constant values!");
2564 // Use make_early_inc_range here as set invalidates the iterator.
2565 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2566 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2567 if (!UserI)
2568 continue;
2569
2570 // Replace it with the 'continue' side if in the main loop body, and the
2571 // unswitched if in the cloned blocks.
2572 if (DT.dominates(LoopPH, UserI->getParent()))
2573 U.set(ContinueReplacement);
2574 else if (ReplaceUnswitched &&
2575 DT.dominates(ClonedPH, UserI->getParent()))
2576 U.set(UnswitchedReplacement);
2577 }
2578 }
2579 }
2580
2581 // We can change which blocks are exit blocks of all the cloned sibling
2582 // loops, the current loop, and any parent loops which shared exit blocks
2583 // with the current loop. As a consequence, we need to re-form LCSSA for
2584 // them. But we shouldn't need to re-form LCSSA for any child loops.
2585 // FIXME: This could be made more efficient by tracking which exit blocks are
2586 // new, and focusing on them, but that isn't likely to be necessary.
2587 //
2588 // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2589 // loop nest and update every loop that could have had its exits changed. We
2590 // also need to cover any intervening loops. We add all of these loops to
2591 // a list and sort them by loop depth to achieve this without updating
2592 // unnecessary loops.
2593 auto UpdateLoop = [&](Loop &UpdateL) {
2594#ifndef NDEBUG
2595 UpdateL.verifyLoop();
2596 for (Loop *ChildL : UpdateL) {
2597 ChildL->verifyLoop();
2598 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2599 "Perturbed a child loop's LCSSA form!");
2600 }
2601#endif
2602 // First build LCSSA for this loop so that we can preserve it when
2603 // forming dedicated exits. We don't want to perturb some other loop's
2604 // LCSSA while doing that CFG edit.
2605 formLCSSA(UpdateL, DT, &LI, SE);
2606
2607 // For loops reached by this loop's original exit blocks we may
2608 // introduced new, non-dedicated exits. At least try to re-form dedicated
2609 // exits for these loops. This may fail if they couldn't have dedicated
2610 // exits to start with.
2611 formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2612 };
2613
2614 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2615 // and we can do it in any order as they don't nest relative to each other.
2616 //
2617 // Also check if any of the loops we have updated have become top-level loops
2618 // as that will necessitate widening the outer loop scope.
2619 for (Loop *UpdatedL :
2620 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2621 UpdateLoop(*UpdatedL);
2622 if (UpdatedL->isOutermost())
2623 OuterExitL = nullptr;
2624 }
2625 if (IsStillLoop) {
2626 UpdateLoop(L);
2627 if (L.isOutermost())
2628 OuterExitL = nullptr;
2629 }
2630
2631 // If the original loop had exit blocks, walk up through the outer most loop
2632 // of those exit blocks to update LCSSA and form updated dedicated exits.
2633 if (OuterExitL != &L)
2634 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2635 OuterL = OuterL->getParentLoop())
2636 UpdateLoop(*OuterL);
2637
2638#ifndef NDEBUG
2639 // Verify the entire loop structure to catch any incorrect updates before we
2640 // progress in the pass pipeline.
2641 LI.verify(DT);
2642#endif
2643
2644 // Now that we've unswitched something, make callbacks to report the changes.
2645 // For that we need to merge together the updated loops and the cloned loops
2646 // and check whether the original loop survived.
2647 SmallVector<Loop *, 4> SibLoops;
2648 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2649 if (UpdatedL->getParentLoop() == ParentL)
2650 SibLoops.push_back(UpdatedL);
2651 postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,
2652 InjectedCondition, SibLoops);
2653
2654 if (MSSAU && VerifyMemorySSA)
2655 MSSAU->getMemorySSA()->verifyMemorySSA();
2656
2657 if (BI)
2658 ++NumBranches;
2659 else
2660 ++NumSwitches;
2661}
2662
2663/// Recursively compute the cost of a dominator subtree based on the per-block
2664/// cost map provided.
2665///
2666/// The recursive computation is memozied into the provided DT-indexed cost map
2667/// to allow querying it for most nodes in the domtree without it becoming
2668/// quadratic.
2670 DomTreeNode &N,
2673 // Don't accumulate cost (or recurse through) blocks not in our block cost
2674 // map and thus not part of the duplication cost being considered.
2675 auto BBCostIt = BBCostMap.find(N.getBlock());
2676 if (BBCostIt == BBCostMap.end())
2677 return 0;
2678
2679 // Lookup this node to see if we already computed its cost.
2680 auto DTCostIt = DTCostMap.find(&N);
2681 if (DTCostIt != DTCostMap.end())
2682 return DTCostIt->second;
2683
2684 // If not, we have to compute it. We can't use insert above and update
2685 // because computing the cost may insert more things into the map.
2686 InstructionCost Cost = std::accumulate(
2687 N.begin(), N.end(), BBCostIt->second,
2688 [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2689 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2690 });
2691 bool Inserted = DTCostMap.insert({&N, Cost}).second;
2692 (void)Inserted;
2693 assert(Inserted && "Should not insert a node while visiting children!");
2694 return Cost;
2695}
2696
2697/// Turns a select instruction into implicit control flow branch,
2698/// making the following replacement:
2699///
2700/// head:
2701/// --code before select--
2702/// select %cond, %trueval, %falseval
2703/// --code after select--
2704///
2705/// into
2706///
2707/// head:
2708/// --code before select--
2709/// br i1 %cond, label %then, label %tail
2710///
2711/// then:
2712/// br %tail
2713///
2714/// tail:
2715/// phi [ %trueval, %then ], [ %falseval, %head]
2716/// unreachable
2717///
2718/// It also makes all relevant DT and LI updates, so that all structures are in
2719/// valid state after this transform.
2721 LoopInfo &LI, MemorySSAUpdater *MSSAU,
2722 AssumptionCache *AC) {
2723 LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n");
2724 BasicBlock *HeadBB = SI->getParent();
2725
2726 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2727 SplitBlockAndInsertIfThen(SI->getCondition(), SI, false,
2728 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2729 auto *CondBr = cast<BranchInst>(HeadBB->getTerminator());
2730 BasicBlock *ThenBB = CondBr->getSuccessor(0),
2731 *TailBB = CondBr->getSuccessor(1);
2732 if (MSSAU)
2733 MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
2734
2735 PHINode *Phi =
2736 PHINode::Create(SI->getType(), 2, "unswitched.select", SI->getIterator());
2737 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2738 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2739 Phi->setDebugLoc(SI->getDebugLoc());
2740 SI->replaceAllUsesWith(Phi);
2741 SI->eraseFromParent();
2742
2743 if (MSSAU && VerifyMemorySSA)
2744 MSSAU->getMemorySSA()->verifyMemorySSA();
2745
2746 ++NumSelects;
2747 return CondBr;
2748}
2749
2750/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2751/// making the following replacement:
2752///
2753/// --code before guard--
2754/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2755/// --code after guard--
2756///
2757/// into
2758///
2759/// --code before guard--
2760/// br i1 %cond, label %guarded, label %deopt
2761///
2762/// guarded:
2763/// --code after guard--
2764///
2765/// deopt:
2766/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2767/// unreachable
2768///
2769/// It also makes all relevant DT and LI updates, so that all structures are in
2770/// valid state after this transform.
2772 DominatorTree &DT, LoopInfo &LI,
2773 MemorySSAUpdater *MSSAU) {
2774 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2775 BasicBlock *CheckBB = GI->getParent();
2776
2777 if (MSSAU && VerifyMemorySSA)
2778 MSSAU->getMemorySSA()->verifyMemorySSA();
2779
2780 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2781 Instruction *DeoptBlockTerm =
2783 GI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2784 BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2785 // SplitBlockAndInsertIfThen inserts control flow that branches to
2786 // DeoptBlockTerm if the condition is true. We want the opposite.
2787 CheckBI->swapSuccessors();
2788
2789 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2790 GuardedBlock->setName("guarded");
2791 CheckBI->getSuccessor(1)->setName("deopt");
2792 BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2793
2794 if (MSSAU)
2795 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2796
2797 GI->moveBefore(DeoptBlockTerm->getIterator());
2799
2800 if (MSSAU) {
2802 MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2803 if (VerifyMemorySSA)
2804 MSSAU->getMemorySSA()->verifyMemorySSA();
2805 }
2806
2807 if (VerifyLoopInfo)
2808 LI.verify(DT);
2809 ++NumGuards;
2810 return CheckBI;
2811}
2812
2813/// Cost multiplier is a way to limit potentially exponential behavior
2814/// of loop-unswitch. Cost is multiplied in proportion of 2^number of unswitch
2815/// candidates available. Also consider the number of "sibling" loops with
2816/// the idea of accounting for previous unswitches that already happened on this
2817/// cluster of loops. There was an attempt to keep this formula simple,
2818/// just enough to limit the worst case behavior. Even if it is not that simple
2819/// now it is still not an attempt to provide a detailed heuristic size
2820/// prediction.
2821///
2822/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2823/// unswitch candidates, making adequate predictions instead of wild guesses.
2824/// That requires knowing not just the number of "remaining" candidates but
2825/// also costs of unswitching for each of these candidates.
2827 const Instruction &TI, const Loop &L, const LoopInfo &LI,
2828 const DominatorTree &DT,
2829 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
2830
2831 // Guards and other exiting conditions do not contribute to exponential
2832 // explosion as soon as they dominate the latch (otherwise there might be
2833 // another path to the latch remaining that does not allow to eliminate the
2834 // loop copy on unswitch).
2835 const BasicBlock *Latch = L.getLoopLatch();
2836 const BasicBlock *CondBlock = TI.getParent();
2837 if (DT.dominates(CondBlock, Latch) &&
2838 (isGuard(&TI) ||
2839 (TI.isTerminator() &&
2840 llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2841 return L.contains(SuccBB);
2842 }) <= 1))) {
2843 NumCostMultiplierSkipped++;
2844 return 1;
2845 }
2846
2847 // Each invariant non-trivial condition, after being unswitched, is supposed
2848 // to have its own specialized sibling loop (the invariant condition has been
2849 // hoisted out of the child loop into a newly-cloned loop). When unswitching
2850 // conditions in nested loops, the basic block size of the outer loop should
2851 // not be altered. If such a size significantly increases across unswitching
2852 // invocations, something may be wrong; so adjust the final cost taking this
2853 // into account.
2854 auto *ParentL = L.getParentLoop();
2855 int ParentLoopSizeMultiplier = 1;
2856 if (ParentL)
2857 ParentLoopSizeMultiplier =
2858 std::max<int>(ParentL->getNumBlocks() / UnswitchParentBlocksDiv, 1);
2859
2860 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2861 : std::distance(LI.begin(), LI.end()));
2862 // Count amount of clones that all the candidates might cause during
2863 // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
2864 // cases.
2865 int UnswitchedClones = 0;
2866 for (const auto &Candidate : UnswitchCandidates) {
2867 const Instruction *CI = Candidate.TI;
2868 const BasicBlock *CondBlock = CI->getParent();
2869 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2870 if (isa<SelectInst>(CI)) {
2871 UnswitchedClones++;
2872 continue;
2873 }
2874 if (isGuard(CI)) {
2875 if (!SkipExitingSuccessors)
2876 UnswitchedClones++;
2877 continue;
2878 }
2879 int NonExitingSuccessors =
2880 llvm::count_if(successors(CondBlock),
2881 [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
2882 return !SkipExitingSuccessors || L.contains(SuccBB);
2883 });
2884 UnswitchedClones += Log2_32(NonExitingSuccessors);
2885 }
2886
2887 // Ignore up to the "unscaled candidates" number of unswitch candidates
2888 // when calculating the power-of-two scaling of the cost. The main idea
2889 // with this control is to allow a small number of unswitches to happen
2890 // and rely more on siblings multiplier (see below) when the number
2891 // of candidates is small.
2892 unsigned ClonesPower =
2893 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2894
2895 // Allowing top-level loops to spread a bit more than nested ones.
2896 int SiblingsMultiplier =
2897 std::max((ParentL ? SiblingsCount
2898 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2899 1);
2900 // Compute the cost multiplier in a way that won't overflow by saturating
2901 // at an upper bound.
2902 int CostMultiplier;
2903 if (ClonesPower > Log2_32(UnswitchThreshold) ||
2904 SiblingsMultiplier > UnswitchThreshold ||
2905 ParentLoopSizeMultiplier > UnswitchThreshold)
2906 CostMultiplier = UnswitchThreshold;
2907 else
2908 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2909 (int)UnswitchThreshold);
2910
2911 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2912 << " (siblings " << SiblingsMultiplier << " * parent size "
2913 << ParentLoopSizeMultiplier << " * clones "
2914 << (1 << ClonesPower) << ")"
2915 << " for unswitch candidate: " << TI << "\n");
2916 return CostMultiplier;
2917}
2918
2921 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
2922 const Loop &L, const LoopInfo &LI, AAResults &AA,
2923 const MemorySSAUpdater *MSSAU) {
2924 assert(UnswitchCandidates.empty() && "Should be!");
2925
2926 auto AddUnswitchCandidatesForInst = [&](Instruction *I, Value *Cond) {
2928 if (isa<Constant>(Cond))
2929 return;
2930 if (L.isLoopInvariant(Cond)) {
2931 UnswitchCandidates.push_back({I, {Cond}});
2932 return;
2933 }
2935 TinyPtrVector<Value *> Invariants =
2937 L, *static_cast<Instruction *>(Cond), LI);
2938 if (!Invariants.empty())
2939 UnswitchCandidates.push_back({I, std::move(Invariants)});
2940 }
2941 };
2942
2943 // Whether or not we should also collect guards in the loop.
2944 bool CollectGuards = false;
2945 if (UnswitchGuards) {
2946 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
2947 L.getHeader()->getParent()->getParent(), Intrinsic::experimental_guard);
2948 if (GuardDecl && !GuardDecl->use_empty())
2949 CollectGuards = true;
2950 }
2951
2952 for (auto *BB : L.blocks()) {
2953 if (LI.getLoopFor(BB) != &L)
2954 continue;
2955
2956 for (auto &I : *BB) {
2957 if (auto *SI = dyn_cast<SelectInst>(&I)) {
2958 auto *Cond = SI->getCondition();
2959 // Do not unswitch vector selects and logical and/or selects
2960 if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
2961 AddUnswitchCandidatesForInst(SI, Cond);
2962 } else if (CollectGuards && isGuard(&I)) {
2963 auto *Cond =
2964 skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
2965 // TODO: Support AND, OR conditions and partial unswitching.
2966 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2967 UnswitchCandidates.push_back({&I, {Cond}});
2968 }
2969 }
2970
2971 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2972 // We can only consider fully loop-invariant switch conditions as we need
2973 // to completely eliminate the switch after unswitching.
2974 if (!isa<Constant>(SI->getCondition()) &&
2975 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2976 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2977 continue;
2978 }
2979
2980 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2981 if (!BI || !BI->isConditional() ||
2982 BI->getSuccessor(0) == BI->getSuccessor(1))
2983 continue;
2984
2985 AddUnswitchCandidatesForInst(BI, BI->getCondition());
2986 }
2987
2988 if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
2989 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2990 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2991 })) {
2992 MemorySSA *MSSA = MSSAU->getMemorySSA();
2993 if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2994 LLVM_DEBUG(
2995 dbgs() << "simple-loop-unswitch: Found partially invariant condition "
2996 << *Info->InstToDuplicate[0] << "\n");
2997 PartialIVInfo = *Info;
2998 PartialIVCondBranch = L.getHeader()->getTerminator();
2999 TinyPtrVector<Value *> ValsToDuplicate;
3000 llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
3001 UnswitchCandidates.push_back(
3002 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
3003 }
3004 }
3005 return !UnswitchCandidates.empty();
3006}
3007
3008/// Tries to canonicalize condition described by:
3009///
3010/// br (LHS pred RHS), label IfTrue, label IfFalse
3011///
3012/// into its equivalent where `Pred` is something that we support for injected
3013/// invariants (so far it is limited to ult), LHS in canonicalized form is
3014/// non-invariant and RHS is an invariant.
3016 Value *&LHS, Value *&RHS,
3017 BasicBlock *&IfTrue,
3018 BasicBlock *&IfFalse,
3019 const Loop &L) {
3020 if (!L.contains(IfTrue)) {
3021 Pred = ICmpInst::getInversePredicate(Pred);
3022 std::swap(IfTrue, IfFalse);
3023 }
3024
3025 // Move loop-invariant argument to RHS position.
3026 if (L.isLoopInvariant(LHS)) {
3027 Pred = ICmpInst::getSwappedPredicate(Pred);
3028 std::swap(LHS, RHS);
3029 }
3030
3031 if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) {
3032 // Turn "x >=s 0" into "x <u UMIN_INT"
3033 Pred = ICmpInst::ICMP_ULT;
3034 RHS = ConstantInt::get(
3035 RHS->getContext(),
3036 APInt::getSignedMinValue(RHS->getType()->getIntegerBitWidth()));
3037 }
3038}
3039
3040/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
3041/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
3042/// injecting a loop-invariant condition.
3044 const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
3045 const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
3046 if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
3047 return false;
3048 // TODO: Support other predicates.
3049 if (Pred != ICmpInst::ICMP_ULT)
3050 return false;
3051 // TODO: Support non-loop-exiting branches?
3052 if (!L.contains(IfTrue) || L.contains(IfFalse))
3053 return false;
3054 // FIXME: For some reason this causes problems with MSSA updates, need to
3055 // investigate why. So far, just don't unswitch latch.
3056 if (L.getHeader() == IfTrue)
3057 return false;
3058 return true;
3059}
3060
3061/// Returns true, if metadata on \p BI allows us to optimize branching into \p
3062/// TakenSucc via injection of invariant conditions. The branch should be not
3063/// enough and not previously unswitched, the information about this comes from
3064/// the metadata.
3066 const BasicBlock *TakenSucc) {
3067 SmallVector<uint32_t> Weights;
3068 if (!extractBranchWeights(*BI, Weights))
3069 return false;
3071 BranchProbability LikelyTaken(T - 1, T);
3072
3073 assert(Weights.size() == 2 && "Unexpected profile data!");
3074 size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
3075 auto Num = Weights[Idx];
3076 auto Denom = Weights[0] + Weights[1];
3077 // Degenerate or overflowed metadata.
3078 if (Denom == 0 || Num > Denom)
3079 return false;
3080 BranchProbability ActualTaken(Num, Denom);
3081 if (LikelyTaken > ActualTaken)
3082 return false;
3083 return true;
3084}
3085
3086/// Materialize pending invariant condition of the given candidate into IR. The
3087/// injected loop-invariant condition implies the original loop-variant branch
3088/// condition, so the materialization turns
3089///
3090/// loop_block:
3091/// ...
3092/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3093///
3094/// into
3095///
3096/// preheader:
3097/// %invariant_cond = LHS pred RHS
3098/// ...
3099/// loop_block:
3100/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
3101/// OriginalCheck:
3102/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3103/// ...
3104static NonTrivialUnswitchCandidate
3105injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
3106 DominatorTree &DT, LoopInfo &LI,
3107 AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
3108 assert(Candidate.hasPendingInjection() && "Nothing to inject!");
3109 BasicBlock *Preheader = L.getLoopPreheader();
3110 assert(Preheader && "Loop is not in simplified form?");
3111 assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&
3112 "Unswitching branch of inner loop!");
3113
3114 auto Pred = Candidate.PendingInjection->Pred;
3115 auto *LHS = Candidate.PendingInjection->LHS;
3116 auto *RHS = Candidate.PendingInjection->RHS;
3117 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3118 auto *TI = cast<BranchInst>(Candidate.TI);
3119 auto *BB = Candidate.TI->getParent();
3120 auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3121 : TI->getSuccessor(0);
3122 // FIXME: Remove this once limitation on successors is lifted.
3123 assert(L.contains(InLoopSucc) && "Not supported yet!");
3124 assert(!L.contains(OutOfLoopSucc) && "Not supported yet!");
3125 auto &Ctx = BB->getContext();
3126
3127 IRBuilder<> Builder(Preheader->getTerminator());
3128 assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!");
3129 if (LHS->getType() != RHS->getType()) {
3130 if (LHS->getType()->getIntegerBitWidth() <
3131 RHS->getType()->getIntegerBitWidth())
3132 LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide");
3133 else
3134 RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide");
3135 }
3136 // Do not use builder here: CreateICmp may simplify this into a constant and
3137 // unswitching will break. Better optimize it away later.
3138 auto *InjectedCond =
3139 ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
3140 Preheader->getTerminator()->getIterator());
3141
3142 BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
3143 BB->getParent(), InLoopSucc);
3144 Builder.SetInsertPoint(TI);
3145 auto *InvariantBr =
3146 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3147
3148 Builder.SetInsertPoint(CheckBlock);
3149 Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
3150 TI->getSuccessor(1));
3151 TI->eraseFromParent();
3152
3153 // Fixup phis.
3154 for (auto &I : *InLoopSucc) {
3155 auto *PN = dyn_cast<PHINode>(&I);
3156 if (!PN)
3157 break;
3158 auto *Inc = PN->getIncomingValueForBlock(BB);
3159 PN->addIncoming(Inc, CheckBlock);
3160 }
3161 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3162
3164 { DominatorTree::Insert, BB, CheckBlock },
3165 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3166 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3167 { DominatorTree::Delete, BB, OutOfLoopSucc }
3168 };
3169
3170 DT.applyUpdates(DTUpdates);
3171 if (MSSAU)
3172 MSSAU->applyUpdates(DTUpdates, DT);
3173 L.addBasicBlockToLoop(CheckBlock, LI);
3174
3175#ifndef NDEBUG
3176 DT.verify();
3177 LI.verify(DT);
3178 if (MSSAU && VerifyMemorySSA)
3179 MSSAU->getMemorySSA()->verifyMemorySSA();
3180#endif
3181
3182 // TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
3183 // higher because we have just inserted a new block. Need to think how to
3184 // adjust the cost of injected candidates when it was first computed.
3185 LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr
3186 << " and considering it for unswitching.");
3187 ++NumInvariantConditionsInjected;
3188 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3189 Candidate.Cost);
3190}
3191
3192/// Given chain of loop branch conditions looking like:
3193/// br (Variant < Invariant1)
3194/// br (Variant < Invariant2)
3195/// br (Variant < Invariant3)
3196/// ...
3197/// collect set of invariant conditions on which we want to unswitch, which
3198/// look like:
3199/// Invariant1 <= Invariant2
3200/// Invariant2 <= Invariant3
3201/// ...
3202/// Though they might not immediately exist in the IR, we can still inject them.
3204 SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
3206 const DominatorTree &DT) {
3207
3210 if (Compares.size() < 2)
3211 return false;
3213 for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
3214 Next != Compares.end(); ++Prev, ++Next) {
3215 Value *LHS = Next->Invariant;
3216 Value *RHS = Prev->Invariant;
3217 BasicBlock *InLoopSucc = Prev->InLoopSucc;
3218 InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);
3219 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3220 std::nullopt, std::move(ToInject));
3221 UnswitchCandidates.push_back(std::move(Candidate));
3222 }
3223 return true;
3224}
3225
3226/// Collect unswitch candidates by invariant conditions that are not immediately
3227/// present in the loop. However, they can be injected into the code if we
3228/// decide it's profitable.
3229/// An example of such conditions is following:
3230///
3231/// for (...) {
3232/// x = load ...
3233/// if (! x <u C1) break;
3234/// if (! x <u C2) break;
3235/// <do something>
3236/// }
3237///
3238/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
3239/// C2" automatically implies "x <u C2", so we can get rid of one of
3240/// loop-variant checks in unswitched loop version.
3243 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
3244 const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
3245 const MemorySSAUpdater *MSSAU) {
3247 return false;
3248
3249 if (!DT.isReachableFromEntry(L.getHeader()))
3250 return false;
3251 auto *Latch = L.getLoopLatch();
3252 // Need to have a single latch and a preheader.
3253 if (!Latch)
3254 return false;
3255 assert(L.getLoopPreheader() && "Must have a preheader!");
3256
3258 // Traverse the conditions that dominate latch (and therefore dominate each
3259 // other).
3260 for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
3261 DTN = DTN->getIDom()) {
3262 CmpPredicate Pred;
3263 Value *LHS = nullptr, *RHS = nullptr;
3264 BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
3265 auto *BB = DTN->getBlock();
3266 // Ignore inner loops.
3267 if (LI.getLoopFor(BB) != &L)
3268 continue;
3269 auto *Term = BB->getTerminator();
3270 if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
3271 m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))))
3272 continue;
3273 if (!LHS->getType()->isIntegerTy())
3274 continue;
3275 canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse,
3276 L);
3277 if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L))
3278 continue;
3280 continue;
3281 // Strip ZEXT for unsigned predicate.
3282 // TODO: once signed predicates are supported, also strip SEXT.
3283 CompareDesc Desc(cast<BranchInst>(Term), RHS, IfTrue);
3284 while (auto *Zext = dyn_cast<ZExtInst>(LHS))
3285 LHS = Zext->getOperand(0);
3286 CandidatesULT[LHS].push_back(Desc);
3287 }
3288
3289 bool Found = false;
3290 for (auto &It : CandidatesULT)
3292 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3293 return Found;
3294}
3295
3297 if (!L.isSafeToClone())
3298 return false;
3299 for (auto *BB : L.blocks())
3300 for (auto &I : *BB) {
3301 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
3302 return false;
3303 if (auto *CB = dyn_cast<CallBase>(&I)) {
3304 assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().");
3305 if (CB->isConvergent())
3306 return false;
3307 }
3308 }
3309
3310 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
3311 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
3312 // irreducible control flow into reducible control flow and introduce new
3313 // loops "out of thin air". If we ever discover important use cases for doing
3314 // this, we can add support to loop unswitch, but it is a lot of complexity
3315 // for what seems little or no real world benefit.
3316 LoopBlocksRPO RPOT(&L);
3317 RPOT.perform(&LI);
3319 return false;
3320
3322 L.getUniqueExitBlocks(ExitBlocks);
3323 // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
3324 // instruction as we don't know how to split those exit blocks.
3325 // FIXME: We should teach SplitBlock to handle this and remove this
3326 // restriction.
3327 for (auto *ExitBB : ExitBlocks) {
3328 auto It = ExitBB->getFirstNonPHIIt();
3330 LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
3331 "in exit block\n");
3332 return false;
3333 }
3334 }
3335
3336 return true;
3337}
3338
3339static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
3340 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
3341 const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
3342 const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
3343 // Given that unswitching these terminators will require duplicating parts of
3344 // the loop, so we need to be able to model that cost. Compute the ephemeral
3345 // values and set up a data structure to hold per-BB costs. We cache each
3346 // block's cost so that we don't recompute this when considering different
3347 // subsets of the loop for duplication during unswitching.
3349 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
3351
3352 // Compute the cost of each block, as well as the total loop cost. Also, bail
3353 // out if we see instructions which are incompatible with loop unswitching
3354 // (convergent, noduplicate, or cross-basic-block tokens).
3355 // FIXME: We might be able to safely handle some of these in non-duplicated
3356 // regions.
3358 L.getHeader()->getParent()->hasMinSize()
3361 InstructionCost LoopCost = 0;
3362 for (auto *BB : L.blocks()) {
3363 InstructionCost Cost = 0;
3364 for (auto &I : *BB) {
3365 if (EphValues.count(&I))
3366 continue;
3367 Cost += TTI.getInstructionCost(&I, CostKind);
3368 }
3369 assert(Cost >= 0 && "Must not have negative costs!");
3370 LoopCost += Cost;
3371 assert(LoopCost >= 0 && "Must not have negative loop costs!");
3372 BBCostMap[BB] = Cost;
3373 }
3374 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
3375
3376 // Now we find the best candidate by searching for the one with the following
3377 // properties in order:
3378 //
3379 // 1) An unswitching cost below the threshold
3380 // 2) The smallest number of duplicated unswitch candidates (to avoid
3381 // creating redundant subsequent unswitching)
3382 // 3) The smallest cost after unswitching.
3383 //
3384 // We prioritize reducing fanout of unswitch candidates provided the cost
3385 // remains below the threshold because this has a multiplicative effect.
3386 //
3387 // This requires memoizing each dominator subtree to avoid redundant work.
3388 //
3389 // FIXME: Need to actually do the number of candidates part above.
3391 // Given a terminator which might be unswitched, computes the non-duplicated
3392 // cost for that terminator.
3393 auto ComputeUnswitchedCost = [&](Instruction &TI,
3394 bool FullUnswitch) -> InstructionCost {
3395 // Unswitching selects unswitches the entire loop.
3396 if (isa<SelectInst>(TI))
3397 return LoopCost;
3398
3399 BasicBlock &BB = *TI.getParent();
3401
3402 InstructionCost Cost = 0;
3403 for (BasicBlock *SuccBB : successors(&BB)) {
3404 // Don't count successors more than once.
3405 if (!Visited.insert(SuccBB).second)
3406 continue;
3407
3408 // If this is a partial unswitch candidate, then it must be a conditional
3409 // branch with a condition of either `or`, `and`, their corresponding
3410 // select forms or partially invariant instructions. In that case, one of
3411 // the successors is necessarily duplicated, so don't even try to remove
3412 // its cost.
3413 if (!FullUnswitch) {
3414 auto &BI = cast<BranchInst>(TI);
3415 Value *Cond = skipTrivialSelect(BI.getCondition());
3416 if (match(Cond, m_LogicalAnd())) {
3417 if (SuccBB == BI.getSuccessor(1))
3418 continue;
3419 } else if (match(Cond, m_LogicalOr())) {
3420 if (SuccBB == BI.getSuccessor(0))
3421 continue;
3422 } else if ((PartialIVInfo.KnownValue->isOneValue() &&
3423 SuccBB == BI.getSuccessor(0)) ||
3424 (!PartialIVInfo.KnownValue->isOneValue() &&
3425 SuccBB == BI.getSuccessor(1)))
3426 continue;
3427 }
3428
3429 // This successor's domtree will not need to be duplicated after
3430 // unswitching if the edge to the successor dominates it (and thus the
3431 // entire tree). This essentially means there is no other path into this
3432 // subtree and so it will end up live in only one clone of the loop.
3433 if (SuccBB->getUniquePredecessor() ||
3434 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
3435 return PredBB == &BB || DT.dominates(SuccBB, PredBB);
3436 })) {
3437 Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
3438 assert(Cost <= LoopCost &&
3439 "Non-duplicated cost should never exceed total loop cost!");
3440 }
3441 }
3442
3443 // Now scale the cost by the number of unique successors minus one. We
3444 // subtract one because there is already at least one copy of the entire
3445 // loop. This is computing the new cost of unswitching a condition.
3446 // Note that guards always have 2 unique successors that are implicit and
3447 // will be materialized if we decide to unswitch it.
3448 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
3449 assert(SuccessorsCount > 1 &&
3450 "Cannot unswitch a condition without multiple distinct successors!");
3451 return (LoopCost - Cost) * (SuccessorsCount - 1);
3452 };
3453
3454 std::optional<NonTrivialUnswitchCandidate> Best;
3455 for (auto &Candidate : UnswitchCandidates) {
3456 Instruction &TI = *Candidate.TI;
3457 ArrayRef<Value *> Invariants = Candidate.Invariants;
3459 bool FullUnswitch =
3460 !BI || Candidate.hasPendingInjection() ||
3461 (Invariants.size() == 1 &&
3462 Invariants[0] == skipTrivialSelect(BI->getCondition()));
3463 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3464 // Calculate cost multiplier which is a tool to limit potentially
3465 // exponential behavior of loop-unswitch.
3467 int CostMultiplier =
3468 CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
3469 assert(
3470 (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
3471 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3472 CandidateCost *= CostMultiplier;
3473 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3474 << " (multiplier: " << CostMultiplier << ")"
3475 << " for unswitch candidate: " << TI << "\n");
3476 } else {
3477 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3478 << " for unswitch candidate: " << TI << "\n");
3479 }
3480
3481 if (!Best || CandidateCost < Best->Cost) {
3482 Best = Candidate;
3483 Best->Cost = CandidateCost;
3484 }
3485 }
3486 assert(Best && "Must be!");
3487 return *Best;
3488}
3489
3490// Insert a freeze on an unswitched branch if all is true:
3491// 1. freeze-loop-unswitch-cond option is true
3492// 2. The branch may not execute in the loop pre-transformation. If a branch may
3493// not execute and could cause UB, it would always cause UB if it is hoisted outside
3494// of the loop. Insert a freeze to prevent this case.
3495// 3. The branch condition may be poison or undef
3497 AssumptionCache &AC) {
3500 return false;
3501
3502 ICFLoopSafetyInfo SafetyInfo;
3503 SafetyInfo.computeLoopSafetyInfo(&L);
3504 if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
3505 return false;
3506
3507 Value *Cond;
3508 if (BranchInst *BI = dyn_cast<BranchInst>(&TI))
3509 Cond = skipTrivialSelect(BI->getCondition());
3510 else
3513 Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3514}
3515
3519 MemorySSAUpdater *MSSAU,
3520 LPMUpdater &LoopUpdater) {
3521 // Collect all invariant conditions within this loop (as opposed to an inner
3522 // loop which would be handled when visiting that inner loop).
3524 IVConditionInfo PartialIVInfo;
3525 Instruction *PartialIVCondBranch = nullptr;
3526 collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
3527 PartialIVCondBranch, L, LI, AA, MSSAU);
3528 if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.injection.disable"))
3529 collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
3530 PartialIVCondBranch, L, DT, LI, AA,
3531 MSSAU);
3532 // If we didn't find any candidates, we're done.
3533 if (UnswitchCandidates.empty())
3534 return false;
3535
3536 LLVM_DEBUG(
3537 dbgs() << "Considering " << UnswitchCandidates.size()
3538 << " non-trivial loop invariant conditions for unswitching.\n");
3539
3540 NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
3541 UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
3542
3543 assert(Best.TI && "Failed to find loop unswitch candidate");
3544 assert(Best.Cost && "Failed to compute cost");
3545
3546 if (*Best.Cost >= UnswitchThreshold) {
3547 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost
3548 << "\n");
3549 return false;
3550 }
3551
3552 bool InjectedCondition = false;
3553 if (Best.hasPendingInjection()) {
3554 Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
3555 InjectedCondition = true;
3556 }
3557 assert(!Best.hasPendingInjection() &&
3558 "All injections should have been done by now!");
3559
3560 if (Best.TI != PartialIVCondBranch)
3561 PartialIVInfo.InstToDuplicate.clear();
3562
3563 bool InsertFreeze;
3564 if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3565 // If the best candidate is a select, turn it into a branch. Select
3566 // instructions with a poison conditional do not propagate poison, but
3567 // branching on poison causes UB. Insert a freeze on the select
3568 // conditional to prevent UB after turning the select into a branch.
3569 InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
3570 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3571 Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
3572 } else {
3573 // If the best candidate is a guard, turn it into a branch.
3574 if (isGuard(Best.TI))
3575 Best.TI =
3576 turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3577 InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC);
3578 }
3579
3580 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
3581 << ") terminator: " << *Best.TI << "\n");
3582 unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3583 LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,
3584 InjectedCondition);
3585 return true;
3586}
3587
3588/// Unswitch control flow predicated on loop invariant conditions.
3589///
3590/// This first hoists all branches or switches which are trivial (IE, do not
3591/// require duplicating any part of the loop) out of the loop body. It then
3592/// looks at other loop invariant control flows and tries to unswitch those as
3593/// well by cloning the loop if the result is small enough.
3594///
3595/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3596/// also updated based on the unswitch. The `MSSA` analysis is also updated if
3597/// valid (i.e. its use is enabled).
3598///
3599/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3600/// true, we will attempt to do non-trivial unswitching as well as trivial
3601/// unswitching.
3602///
3603/// The `postUnswitch` function will be run after unswitching is complete
3604/// with information on whether or not the provided loop remains a loop and
3605/// a list of new sibling loops created.
3606///
3607/// If `SE` is non-null, we will update that analysis based on the unswitching
3608/// done.
3609static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
3611 TargetTransformInfo &TTI, bool Trivial,
3612 bool NonTrivial, ScalarEvolution *SE,
3613 MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater) {
3614 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3615 "Loops must be in LCSSA form before unswitching.");
3616
3617 // Must be in loop simplified form: we need a preheader and dedicated exits.
3618 if (!L.isLoopSimplifyForm())
3619 return false;
3620
3621 // Try trivial unswitch first before loop over other basic blocks in the loop.
3622 if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3623 // If we unswitched successfully we will want to clean up the loop before
3624 // processing it further so just mark it as unswitched and return.
3625 postUnswitch(L, LoopUpdater, L.getName(),
3626 /*CurrentLoopValid*/ true, /*PartiallyInvariant*/ false,
3627 /*InjectedCondition*/ false, {});
3628 return true;
3629 }
3630
3631 const Function *F = L.getHeader()->getParent();
3632
3633 // Check whether we should continue with non-trivial conditions.
3634 // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3635 // unswitching for testing and debugging.
3636 // NonTrivial: Parameter that enables non-trivial unswitching for this
3637 // invocation of the transform. But this should be allowed only
3638 // for targets without branch divergence.
3639 //
3640 // FIXME: If divergence analysis becomes available to a loop
3641 // transform, we should allow unswitching for non-trivial uniform
3642 // branches even on targets that have divergence.
3643 // https://bugs.llvm.org/show_bug.cgi?id=48819
3644 bool ContinueWithNonTrivial =
3645 EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence(F));
3646 if (!ContinueWithNonTrivial)
3647 return false;
3648
3649 // Skip non-trivial unswitching for optsize functions.
3650 if (F->hasOptSize())
3651 return false;
3652
3653 // Perform legality checks.
3655 return false;
3656
3657 // For non-trivial unswitching, because it often creates new loops, we rely on
3658 // the pass manager to iterate on the loops rather than trying to immediately
3659 // reach a fixed point. There is no substantial advantage to iterating
3660 // internally, and if any of the new loops are simplified enough to contain
3661 // trivial unswitching we want to prefer those.
3662
3663 // Try to unswitch the best invariant condition. We prefer this full unswitch to
3664 // a partial unswitch when possible below the threshold.
3665 if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, SE, MSSAU, LoopUpdater))
3666 return true;
3667
3668 // No other opportunities to unswitch.
3669 return false;
3670}
3671
3674 LPMUpdater &U) {
3675 Function &F = *L.getHeader()->getParent();
3676 (void)F;
3677 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3678 << "\n");
3679
3680 std::optional<MemorySSAUpdater> MSSAU;
3681 if (AR.MSSA) {
3682 MSSAU = MemorySSAUpdater(AR.MSSA);
3683 if (VerifyMemorySSA)
3684 AR.MSSA->verifyMemorySSA();
3685 }
3686 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3687 &AR.SE, MSSAU ? &*MSSAU : nullptr, U))
3688 return PreservedAnalyses::all();
3689
3690 if (AR.MSSA && VerifyMemorySSA)
3691 AR.MSSA->verifyMemorySSA();
3692
3693 // Historically this pass has had issues with the dominator tree so verify it
3694 // in asserts builds.
3695 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
3696
3697 auto PA = getLoopPassPreservedAnalyses();
3698 if (AR.MSSA)
3699 PA.preserve<MemorySSAAnalysis>();
3700 return PA;
3701}
3702
3704 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3706 OS, MapClassName2PassName);
3707
3708 OS << '<';
3709 OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3710 OS << (Trivial ? "" : "no-") << "trivial";
3711 OS << '>';
3712}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
static Value * getCondition(Instruction *I)
Module.h This file contains the declarations for the Module class.
This defines the Use class.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
uint64_t IntrinsicInst * II
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU)
Copy a set of loop invariant values, and conditionally branch on them.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static BranchInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)
Turns a select instruction into implicit control flow branch, making the following replacement:
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, bool CurrentLoopValid, bool PartiallyInvariant, bool InjectedCondition, ArrayRef< Loop * > NewLoops)
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT)
Copy a set of loop invariant values ToDuplicate and insert them at the end of BB and conditionally br...
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
static Value * skipTrivialSelect(Value *Cond)
static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
static void canonicalizeForInvariantConditionInjection(CmpPredicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
static cl::opt< int > UnswitchParentBlocksDiv("unswitch-parent-blocks-div", cl::init(8), cl::Hidden, cl::desc("Outer loop size divisor for cost multiplier."))
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition)
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
Unswitch control flow predicated on loop invariant conditions.
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, LPMUpdater &LoopUpdater)
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
iterator begin() const
Definition ArrayRef.h:135
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
size_t size() const
Definition BasicBlock.h:480
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition BasicBlock.h:386
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
static LLVM_ABI CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:871
static LLVM_ABI bool isStrictPredicate(Predicate predicate)
This is a static version that you can use without an instruction available.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isUnsigned() const
Definition InstrTypes.h:936
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getCompilerGenerated()
Definition DebugLoc.h:163
static DebugLoc getDropped()
Definition DebugLoc.h:164
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator begin()
Definition DenseMap.h:78
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:233
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2645
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1551
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void dropLocation()
Drop the instruction's debug location.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
iterator end() const
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
StringRef getName() const
Definition LoopInfo.h:389
Metadata node.
Definition Metadata.h:1078
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition MemorySSA.h:371
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:936
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
LLVM_ABI void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
LLVM_ABI void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
LLVM_ABI void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
LLVM_ABI void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
LLVM_ABI void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
LLVM_ABI MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point, bool CreationMustSucceed=true)
Create a MemoryAccess in MemorySSA at a specified point in a block.
LLVM_ABI void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
LLVM_ABI void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
LLVM_ABI void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
LLVM_ABI void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition MemorySSA.h:768
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
The main scalar evolution driver.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LLVM_ABI void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:261
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition SetVector.h:105
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI Instruction::InstListType::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Multiway switch.
BasicBlock * getDefaultDest() const
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
void push_back(EltTy NewVal)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition ValueMap.h:167
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition ValueMap.h:156
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ?
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2058
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
auto cast_or_null(const Y &Val)
Definition Casting.h:714
LLVM_ABI MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Op::Description Desc
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1150
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:95
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI)
Return true if the control flow in RPOTraversal is irreducible.
Definition CFG.h:149
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition STLExtras.h:852
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition LoopInfo.cpp:51
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
TargetTransformInfo TTI
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
LLVM_ABI bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition LoopUtils.cpp:58
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
auto predecessors(const MachineBasicBlock *BB)
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
LLVM_ABI std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
LLVM_ABI bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition LCSSA.cpp:427
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Struct to hold information about a partially invariant condition.
Definition LoopUtils.h:588
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition LoopUtils.h:591
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition LoopUtils.h:594
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70