LLVM 23.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
20#include "llvm/Analysis/CFG.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/Dominators.h"
37#include "llvm/IR/Function.h"
38#include "llvm/IR/IRBuilder.h"
39#include "llvm/IR/InstrTypes.h"
40#include "llvm/IR/Instruction.h"
43#include "llvm/IR/MDBuilder.h"
44#include "llvm/IR/Module.h"
47#include "llvm/IR/Use.h"
48#include "llvm/IR/Value.h"
51#include "llvm/Support/Debug.h"
62#include <algorithm>
63#include <cassert>
64#include <iterator>
65#include <numeric>
66#include <optional>
67#include <utility>
68
69#define DEBUG_TYPE "simple-loop-unswitch"
70
71using namespace llvm;
72using namespace llvm::PatternMatch;
73
74STATISTIC(NumBranches, "Number of branches unswitched");
75STATISTIC(NumSwitches, "Number of switches unswitched");
76STATISTIC(NumSelects, "Number of selects turned into branches for unswitching");
77STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
78STATISTIC(NumTrivial, "Number of unswitches that are trivial");
80 NumCostMultiplierSkipped,
81 "Number of unswitch candidates that had their cost multiplier skipped");
82STATISTIC(NumInvariantConditionsInjected,
83 "Number of invariant conditions injected and unswitched");
84
85namespace llvm {
87 "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
88 cl::desc("Forcibly enables non-trivial loop unswitching rather than "
89 "following the configuration passed into the pass."));
90
91static cl::opt<int>
92 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
93 cl::desc("The cost threshold for unswitching a loop."));
94
96 "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
97 cl::desc("Enable unswitch cost multiplier that prohibits exponential "
98 "explosion in nontrivial unswitch."));
100 "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
101 cl::desc("Toplevel siblings divisor for cost multiplier."));
103 "unswitch-parent-blocks-div", cl::init(8), cl::Hidden,
104 cl::desc("Outer loop size divisor for cost multiplier."));
106 "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
107 cl::desc("Number of unswitch candidates that are ignored when calculating "
108 "cost multiplier."));
110 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
111 cl::desc("If enabled, simple loop unswitching will also consider "
112 "llvm.experimental.guard intrinsics as unswitch candidates."));
114 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
115 cl::init(false), cl::Hidden,
116 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
117 "null checks to save time analyzing if we can keep it."));
119 MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
120 cl::desc("Max number of memory uses to explore during "
121 "partial unswitching analysis"),
122 cl::init(100), cl::Hidden);
124 "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
125 cl::desc("If enabled, the freeze instruction will be added to condition "
126 "of loop unswitch to prevent miscompilation."));
127
129 "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,
130 cl::desc("Whether we should inject new invariants and unswitch them to "
131 "eliminate some existing (non-invariant) conditions."),
132 cl::init(true));
133
135 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
137 cl::desc("Only try to inject loop invariant conditions and "
138 "unswitch on them to eliminate branches that are "
139 "not-taken 1/<this option> times or less."),
140 cl::init(16));
141
142static cl::opt<bool> EstimateProfile("simple-loop-unswitch-estimate-profile",
143 cl::Hidden, cl::init(true));
145} // namespace llvm
146
148namespace {
149struct CompareDesc {
150 CondBrInst *Term;
151 Value *Invariant;
152 BasicBlock *InLoopSucc;
153
154 CompareDesc(CondBrInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
155 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
156};
157
158struct InjectedInvariant {
159 ICmpInst::Predicate Pred;
160 Value *LHS;
161 Value *RHS;
162 BasicBlock *InLoopSucc;
163
164 InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
165 BasicBlock *InLoopSucc)
166 : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
167};
168
169struct NonTrivialUnswitchCandidate {
170 Instruction *TI = nullptr;
171 TinyPtrVector<Value *> Invariants;
172 std::optional<InstructionCost> Cost;
173 std::optional<InjectedInvariant> PendingInjection;
174 NonTrivialUnswitchCandidate(
175 Instruction *TI, ArrayRef<Value *> Invariants,
176 std::optional<InstructionCost> Cost = std::nullopt,
177 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
178 : TI(TI), Invariants(Invariants), Cost(Cost),
179 PendingInjection(PendingInjection) {};
180
181 bool hasPendingInjection() const { return PendingInjection.has_value(); }
182};
183} // end anonymous namespace.
184
185// Helper to skip (select x, true, false), which matches both a logical AND and
186// OR and can confuse code that tries to determine if \p Cond is either a
187// logical AND or OR but not both.
189 Value *CondNext;
190 while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
191 Cond = CondNext;
192 return Cond;
193}
194
195/// Collect all of the loop invariant input values transitively used by the
196/// homogeneous instruction graph from a given root.
197///
198/// This essentially walks from a root recursively through loop variant operands
199/// which have perform the same logical operation (AND or OR) and finds all
200/// inputs which are loop invariant. For some operations these can be
201/// re-associated and unswitched out of the loop entirely.
204 const LoopInfo &LI) {
205 assert(!L.isLoopInvariant(&Root) &&
206 "Only need to walk the graph if root itself is not invariant.");
207 TinyPtrVector<Value *> Invariants;
208
209 bool IsRootAnd = match(&Root, m_LogicalAnd());
210 bool IsRootOr = match(&Root, m_LogicalOr());
211
212 // Build a worklist and recurse through operators collecting invariants.
215 Worklist.push_back(&Root);
216 Visited.insert(&Root);
217 do {
218 Instruction &I = *Worklist.pop_back_val();
219 for (Value *OpV : I.operand_values()) {
220 // Skip constants as unswitching isn't interesting for them.
221 if (isa<Constant>(OpV))
222 continue;
223
224 // Add it to our result if loop invariant.
225 if (L.isLoopInvariant(OpV)) {
226 Invariants.push_back(OpV);
227 continue;
228 }
229
230 // If not an instruction with the same opcode, nothing we can do.
232
233 if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
234 (IsRootOr && match(OpI, m_LogicalOr())))) {
235 // Visit this operand.
236 if (Visited.insert(OpI).second)
237 Worklist.push_back(OpI);
238 }
239 }
240 } while (!Worklist.empty());
241
242 return Invariants;
243}
244
245static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
246 Constant &Replacement) {
247 assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
248
249 // Replace uses of LIC in the loop with the given constant.
250 // We use make_early_inc_range as set invalidates the iterator.
251 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
252 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
253
254 // Replace this use within the loop body.
255 if (UserI && L.contains(UserI))
256 U.set(&Replacement);
257 }
258}
259
260/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
261/// incoming values along this edge.
263 const BasicBlock &ExitingBB,
264 const BasicBlock &ExitBB) {
265 for (const Instruction &I : ExitBB) {
266 auto *PN = dyn_cast<PHINode>(&I);
267 if (!PN)
268 // No more PHIs to check.
269 return true;
270
271 // If the incoming value for this edge isn't loop invariant the unswitch
272 // won't be trivial.
273 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
274 return false;
275 }
276 llvm_unreachable("Basic blocks should never be empty!");
277}
278
279/// Copy a set of loop invariant values \p Invariants and insert them at the
280/// end of \p BB and conditionally branch on the copied condition. We only
281/// branch on a single value.
282/// We attempt to estimate the profile of the resulting conditional branch from
283/// \p ComputeProfFrom, which is the original conditional branch we're
284/// unswitching.
285/// When \p Direction is true, the \p Invariants form a disjunction, and the
286/// branch conditioned on it exits the loop on the "true" case. When \p
287/// Direction is false, the \p Invariants form a conjunction and the branch
288/// exits on the "false" case.
290 BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
291 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
292 const Instruction *I, AssumptionCache *AC, const DominatorTree &DT,
293 const CondBrInst &ComputeProfFrom) {
294
295 SmallVector<uint32_t> BranchWeights;
296 bool HasBranchWeights = EstimateProfile && !ProfcheckDisableMetadataFixes &&
297 extractBranchWeights(ComputeProfFrom, BranchWeights);
298 // If Direction is true, that means we had a disjunction and that the "true"
299 // case exits. The probability of the disjunction of the subset of terms is at
300 // most as high as the original one. So, if the probability is higher than the
301 // one we'd assign in absence of a profile (i.e. 0.5), we will use 0.5,
302 // but if it's lower, we will use the original probability.
303 // Conversely, if Direction is false, that means we had a conjunction, and the
304 // probability of exiting is captured in the second branch weight. That
305 // probability is a disjunction (of the negation of the original terms). The
306 // same reasoning applies as above.
307 // Issue #165649: should we expect BFI to conserve, and use that to calculate
308 // the branch weights?
309 if (HasBranchWeights &&
310 static_cast<double>(BranchWeights[Direction ? 0 : 1]) /
311 static_cast<double>(sum_of(BranchWeights)) >
312 0.5)
313 HasBranchWeights = false;
314
315 IRBuilder<> IRB(&BB);
317
318 SmallVector<Value *> FrozenInvariants;
319 for (Value *Inv : Invariants) {
320 if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
321 Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
322 FrozenInvariants.push_back(Inv);
323 }
324
325 Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
326 : IRB.CreateAnd(FrozenInvariants);
327 auto *BR = IRB.CreateCondBr(
328 Cond, Direction ? &UnswitchedSucc : &NormalSucc,
329 Direction ? &NormalSucc : &UnswitchedSucc,
330 HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof)
331 : nullptr);
332 if (!HasBranchWeights)
334}
335
336/// Copy a set of loop invariant values, and conditionally branch on them.
338 BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
339 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
340 MemorySSAUpdater *MSSAU, const CondBrInst &OriginalBranch) {
342 for (auto *Val : reverse(ToDuplicate)) {
343 Instruction *Inst = cast<Instruction>(Val);
344 Instruction *NewInst = Inst->clone();
345
346 if (const DebugLoc &DL = Inst->getDebugLoc())
347 mapAtomInstance(DL, VMap);
348
349 NewInst->insertInto(&BB, BB.end());
350 RemapInstruction(NewInst, VMap,
352 VMap[Val] = NewInst;
353
354 if (!MSSAU)
355 continue;
356
357 MemorySSA *MSSA = MSSAU->getMemorySSA();
358 if (auto *MemUse =
360 auto *DefiningAccess = MemUse->getDefiningAccess();
361 // Get the first defining access before the loop.
362 while (L.contains(DefiningAccess->getBlock())) {
363 // If the defining access is a MemoryPhi, get the incoming
364 // value for the pre-header as defining access.
365 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
366 DefiningAccess =
367 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
368 else
369 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
370 }
371 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
372 NewInst->getParent(),
374 }
375 }
376
377 IRBuilder<> IRB(&BB);
379 Value *Cond = VMap[ToDuplicate[0]];
380 // The expectation is that ToDuplicate[0] is the condition used by the
381 // OriginalBranch, case in which we can clone the profile metadata from there.
382 auto *ProfData =
384 ToDuplicate[0] == skipTrivialSelect(OriginalBranch.getCondition())
385 ? OriginalBranch.getMetadata(LLVMContext::MD_prof)
386 : nullptr;
387 auto *BR =
388 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
389 Direction ? &NormalSucc : &UnswitchedSucc, ProfData);
390 if (!ProfData)
392}
393
394/// Rewrite the PHI nodes in an unswitched loop exit basic block.
395///
396/// Requires that the loop exit and unswitched basic block are the same, and
397/// that the exiting block was a unique predecessor of that block. Rewrites the
398/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
399/// PHI nodes from the old preheader that now contains the unswitched
400/// terminator.
402 BasicBlock &OldExitingBB,
403 BasicBlock &OldPH) {
404 for (PHINode &PN : UnswitchedBB.phis()) {
405 // When the loop exit is directly unswitched we just need to update the
406 // incoming basic block. We loop to handle weird cases with repeated
407 // incoming blocks, but expect to typically only have one operand here.
408 for (auto i : seq<int>(0, PN.getNumOperands())) {
409 assert(PN.getIncomingBlock(i) == &OldExitingBB &&
410 "Found incoming block different from unique predecessor!");
411 PN.setIncomingBlock(i, &OldPH);
412 }
413 }
414}
415
416/// Rewrite the PHI nodes in the loop exit basic block and the split off
417/// unswitched block.
418///
419/// Because the exit block remains an exit from the loop, this rewrites the
420/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
421/// nodes into the unswitched basic block to select between the value in the
422/// old preheader and the loop exit.
424 BasicBlock &UnswitchedBB,
425 BasicBlock &OldExitingBB,
426 BasicBlock &OldPH,
427 bool FullUnswitch) {
428 assert(&ExitBB != &UnswitchedBB &&
429 "Must have different loop exit and unswitched blocks!");
430 BasicBlock::iterator InsertPt = UnswitchedBB.begin();
431 for (PHINode &PN : ExitBB.phis()) {
432 auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
433 PN.getName() + ".split");
434 NewPN->insertBefore(InsertPt);
435
436 // Walk backwards over the old PHI node's inputs to minimize the cost of
437 // removing each one. We have to do this weird loop manually so that we
438 // create the same number of new incoming edges in the new PHI as we expect
439 // each case-based edge to be included in the unswitched switch in some
440 // cases.
441 // FIXME: This is really, really gross. It would be much cleaner if LLVM
442 // allowed us to create a single entry for a predecessor block without
443 // having separate entries for each "edge" even though these edges are
444 // required to produce identical results.
445 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
446 if (PN.getIncomingBlock(i) != &OldExitingBB)
447 continue;
448
449 Value *Incoming = PN.getIncomingValue(i);
450 if (FullUnswitch)
451 // No more edge from the old exiting block to the exit block.
452 PN.removeIncomingValue(i);
453
454 NewPN->addIncoming(Incoming, &OldPH);
455 }
456
457 // Now replace the old PHI with the new one and wire the old one in as an
458 // input to the new one.
459 PN.replaceAllUsesWith(NewPN);
460 NewPN->addIncoming(&PN, &ExitBB);
461 }
462}
463
464/// Hoist the current loop up to the innermost loop containing a remaining exit.
465///
466/// Because we've removed an exit from the loop, we may have changed the set of
467/// loops reachable and need to move the current loop up the loop nest or even
468/// to an entirely separate nest.
469static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
470 DominatorTree &DT, LoopInfo &LI,
471 MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
472 // If the loop is already at the top level, we can't hoist it anywhere.
473 Loop *OldParentL = L.getParentLoop();
474 if (!OldParentL)
475 return;
476
478 L.getExitBlocks(Exits);
479 Loop *NewParentL = nullptr;
480 for (auto *ExitBB : Exits)
481 if (Loop *ExitL = LI.getLoopFor(ExitBB))
482 if (!NewParentL || NewParentL->contains(ExitL))
483 NewParentL = ExitL;
484
485 if (NewParentL == OldParentL)
486 return;
487
488 // The new parent loop (if different) should always contain the old one.
489 if (NewParentL)
490 assert(NewParentL->contains(OldParentL) &&
491 "Can only hoist this loop up the nest!");
492
493 // The preheader will need to move with the body of this loop. However,
494 // because it isn't in this loop we also need to update the primary loop map.
495 assert(OldParentL == LI.getLoopFor(&Preheader) &&
496 "Parent loop of this loop should contain this loop's preheader!");
497 LI.changeLoopFor(&Preheader, NewParentL);
498
499 // Remove this loop from its old parent.
500 OldParentL->removeChildLoop(&L);
501
502 // Add the loop either to the new parent or as a top-level loop.
503 if (NewParentL)
504 NewParentL->addChildLoop(&L);
505 else
506 LI.addTopLevelLoop(&L);
507
508 // Remove this loops blocks from the old parent and every other loop up the
509 // nest until reaching the new parent. Also update all of these
510 // no-longer-containing loops to reflect the nesting change.
511 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
512 OldContainingL = OldContainingL->getParentLoop()) {
513 llvm::erase_if(OldContainingL->getBlocksVector(),
514 [&](const BasicBlock *BB) {
515 return BB == &Preheader || L.contains(BB);
516 });
517
518 OldContainingL->getBlocksSet().erase(&Preheader);
519 for (BasicBlock *BB : L.blocks())
520 OldContainingL->getBlocksSet().erase(BB);
521
522 // Because we just hoisted a loop out of this one, we have essentially
523 // created new exit paths from it. That means we need to form LCSSA PHI
524 // nodes for values used in the no-longer-nested loop.
525 formLCSSA(*OldContainingL, DT, &LI, SE);
526
527 // We shouldn't need to form dedicated exits because the exit introduced
528 // here is the (just split by unswitching) preheader. However, after trivial
529 // unswitching it is possible to get new non-dedicated exits out of parent
530 // loop so let's conservatively form dedicated exit blocks and figure out
531 // if we can optimize later.
532 formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
533 /*PreserveLCSSA*/ true);
534 }
535}
536
537// Return the top-most loop containing ExitBB and having ExitBB as exiting block
538// or the loop containing ExitBB, if there is no parent loop containing ExitBB
539// as exiting block.
541 const LoopInfo &LI) {
542 Loop *TopMost = LI.getLoopFor(ExitBB);
543 Loop *Current = TopMost;
544 while (Current) {
545 if (Current->isLoopExiting(ExitBB))
546 TopMost = Current;
547 Current = Current->getParentLoop();
548 }
549 return TopMost;
550}
551
552/// Unswitch a trivial branch if the condition is loop invariant.
553///
554/// This routine should only be called when loop code leading to the branch has
555/// been validated as trivial (no side effects). This routine checks if the
556/// condition is invariant and one of the successors is a loop exit. This
557/// allows us to unswitch without duplicating the loop, making it trivial.
558///
559/// If this routine fails to unswitch the branch it returns false.
560///
561/// If the branch can be unswitched, this routine splits the preheader and
562/// hoists the branch above that split. Preserves loop simplified form
563/// (splitting the exit block as necessary). It simplifies the branch within
564/// the loop to an unconditional branch but doesn't remove it entirely. Further
565/// cleanup can be done with some simplifycfg like pass.
566///
567/// If `SE` is not null, it will be updated based on the potential loop SCEVs
568/// invalidated by this.
570 LoopInfo &LI, ScalarEvolution *SE,
571 MemorySSAUpdater *MSSAU) {
572 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
573
574 // The loop invariant values that we want to unswitch.
575 TinyPtrVector<Value *> Invariants;
576
577 // When true, we're fully unswitching the branch rather than just unswitching
578 // some input conditions to the branch.
579 bool FullUnswitch = false;
580
582 if (L.isLoopInvariant(Cond)) {
583 Invariants.push_back(Cond);
584 FullUnswitch = true;
585 } else {
586 if (auto *CondInst = dyn_cast<Instruction>(Cond))
587 Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
588 if (Invariants.empty()) {
589 LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
590 return false;
591 }
592 }
593
594 // Check that one of the branch's successors exits, and which one.
595 bool ExitDirection = true;
596 int LoopExitSuccIdx = 0;
597 auto *LoopExitBB = BI.getSuccessor(0);
598 if (L.contains(LoopExitBB)) {
599 ExitDirection = false;
600 LoopExitSuccIdx = 1;
601 LoopExitBB = BI.getSuccessor(1);
602 if (L.contains(LoopExitBB)) {
603 LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
604 return false;
605 }
606 }
607 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
608 auto *ParentBB = BI.getParent();
609 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
610 LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
611 return false;
612 }
613
614 // When unswitching only part of the branch's condition, we need the exit
615 // block to be reached directly from the partially unswitched input. This can
616 // be done when the exit block is along the true edge and the branch condition
617 // is a graph of `or` operations, or the exit block is along the false edge
618 // and the condition is a graph of `and` operations.
619 if (!FullUnswitch) {
620 if (ExitDirection ? !match(Cond, m_LogicalOr())
621 : !match(Cond, m_LogicalAnd())) {
622 LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
623 "non-full unswitch!\n");
624 return false;
625 }
626 }
627
628 LLVM_DEBUG({
629 dbgs() << " unswitching trivial invariant conditions for: " << BI
630 << "\n";
631 for (Value *Invariant : Invariants) {
632 dbgs() << " " << *Invariant << " == true";
633 if (Invariant != Invariants.back())
634 dbgs() << " ||";
635 dbgs() << "\n";
636 }
637 });
638
639 // If we have scalar evolutions, we need to invalidate them including this
640 // loop, the loop containing the exit block and the topmost parent loop
641 // exiting via LoopExitBB.
642 if (SE) {
643 if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
644 SE->forgetLoop(ExitL);
645 else
646 // Forget the entire nest as this exits the entire nest.
647 SE->forgetTopmostLoop(&L);
649 }
650
651 if (MSSAU && VerifyMemorySSA)
652 MSSAU->getMemorySSA()->verifyMemorySSA();
653
654 // Split the preheader, so that we know that there is a safe place to insert
655 // the conditional branch. We will change the preheader to have a conditional
656 // branch on LoopCond.
657 BasicBlock *OldPH = L.getLoopPreheader();
658 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
659
660 // Now that we have a place to insert the conditional branch, create a place
661 // to branch to: this is the exit block out of the loop that we are
662 // unswitching. We need to split this if there are other loop predecessors.
663 // Because the loop is in simplified form, *any* other predecessor is enough.
664 BasicBlock *UnswitchedBB;
665 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
666 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
667 "A branch's parent isn't a predecessor!");
668 UnswitchedBB = LoopExitBB;
669 } else {
670 UnswitchedBB =
671 SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU, "");
672 }
673
674 if (MSSAU && VerifyMemorySSA)
675 MSSAU->getMemorySSA()->verifyMemorySSA();
676
677 // Actually move the invariant uses into the unswitched position. If possible,
678 // we do this by moving the instructions, but when doing partial unswitching
679 // we do it by building a new merge of the values in the unswitched position.
680 OldPH->getTerminator()->eraseFromParent();
681 if (FullUnswitch) {
682 // If fully unswitching, we can use the existing branch instruction.
683 // Splice it into the old PH to gate reaching the new preheader and re-point
684 // its successors.
685 BI.moveBefore(*OldPH, OldPH->end());
686 BI.setCondition(Cond);
687 if (MSSAU) {
688 // Temporarily clone the terminator, to make MSSA update cheaper by
689 // separating "insert edge" updates from "remove edge" ones.
690 BI.clone()->insertInto(ParentBB, ParentBB->end());
691 } else {
692 // Create a new unconditional branch that will continue the loop as a new
693 // terminator.
694 Instruction *NewBI = UncondBrInst::Create(ContinueBB, ParentBB);
695 NewBI->setDebugLoc(BI.getDebugLoc());
696 }
697 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
698 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
699 } else {
700 // Only unswitching a subset of inputs to the condition, so we will need to
701 // build a new branch that merges the invariant inputs.
702 if (ExitDirection)
704 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
705 "condition!");
706 else
708 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
709 " condition!");
711 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
712 FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT, BI);
713 }
714
715 // Update the dominator tree with the added edge.
716 DT.insertEdge(OldPH, UnswitchedBB);
717
718 // After the dominator tree was updated with the added edge, update MemorySSA
719 // if available.
720 if (MSSAU) {
722 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
723 MSSAU->applyInsertUpdates(Updates, DT);
724 }
725
726 // Finish updating dominator tree and memory ssa for full unswitch.
727 if (FullUnswitch) {
728 if (MSSAU) {
729 Instruction *Term = ParentBB->getTerminator();
730 // Remove the cloned branch instruction and create unconditional branch
731 // now.
732 Instruction *NewBI = UncondBrInst::Create(ContinueBB, ParentBB);
733 NewBI->setDebugLoc(Term->getDebugLoc());
734 Term->eraseFromParent();
735 MSSAU->removeEdge(ParentBB, LoopExitBB);
736 }
737 DT.deleteEdge(ParentBB, LoopExitBB);
738 }
739
740 if (MSSAU && VerifyMemorySSA)
741 MSSAU->getMemorySSA()->verifyMemorySSA();
742
743 // Rewrite the relevant PHI nodes.
744 if (UnswitchedBB == LoopExitBB)
745 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
746 else
747 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
748 *ParentBB, *OldPH, FullUnswitch);
749
750 // The constant we can replace all of our invariants with inside the loop
751 // body. If any of the invariants have a value other than this the loop won't
752 // be entered.
753 ConstantInt *Replacement = ExitDirection
754 ? ConstantInt::getFalse(BI.getContext())
755 : ConstantInt::getTrue(BI.getContext());
756
757 // Since this is an i1 condition we can also trivially replace uses of it
758 // within the loop with a constant.
759 for (Value *Invariant : Invariants)
760 replaceLoopInvariantUses(L, Invariant, *Replacement);
761
762 // If this was full unswitching, we may have changed the nesting relationship
763 // for this loop so hoist it to its correct parent if needed.
764 if (FullUnswitch)
765 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
766
767 if (MSSAU && VerifyMemorySSA)
768 MSSAU->getMemorySSA()->verifyMemorySSA();
769
770 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
771 ++NumTrivial;
772 ++NumBranches;
773 return true;
774}
775
776/// Unswitch a trivial switch if the condition is loop invariant.
777///
778/// This routine should only be called when loop code leading to the switch has
779/// been validated as trivial (no side effects). This routine checks if the
780/// condition is invariant and that at least one of the successors is a loop
781/// exit. This allows us to unswitch without duplicating the loop, making it
782/// trivial.
783///
784/// If this routine fails to unswitch the switch it returns false.
785///
786/// If the switch can be unswitched, this routine splits the preheader and
787/// copies the switch above that split. If the default case is one of the
788/// exiting cases, it copies the non-exiting cases and points them at the new
789/// preheader. If the default case is not exiting, it copies the exiting cases
790/// and points the default at the preheader. It preserves loop simplified form
791/// (splitting the exit blocks as necessary). It simplifies the switch within
792/// the loop by removing now-dead cases. If the default case is one of those
793/// unswitched, it replaces its destination with a new basic block containing
794/// only unreachable. Such basic blocks, while technically loop exits, are not
795/// considered for unswitching so this is a stable transform and the same
796/// switch will not be revisited. If after unswitching there is only a single
797/// in-loop successor, the switch is further simplified to an unconditional
798/// branch. Still more cleanup can be done with some simplifycfg like pass.
799///
800/// If `SE` is not null, it will be updated based on the potential loop SCEVs
801/// invalidated by this.
803 LoopInfo &LI, ScalarEvolution *SE,
804 MemorySSAUpdater *MSSAU) {
805 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
806 Value *LoopCond = SI.getCondition();
807
808 // If this isn't switching on an invariant condition, we can't unswitch it.
809 if (!L.isLoopInvariant(LoopCond))
810 return false;
811
812 auto *ParentBB = SI.getParent();
813
814 // The same check must be used both for the default and the exit cases. We
815 // should never leave edges from the switch instruction to a basic block that
816 // we are unswitching, hence the condition used to determine the default case
817 // needs to also be used to populate ExitCaseIndices, which is then used to
818 // remove cases from the switch.
819 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
820 // BBToCheck is not an exit block if it is inside loop L.
821 if (L.contains(&BBToCheck))
822 return false;
823 // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
824 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
825 return false;
826 // We do not unswitch a block that only has an unreachable statement, as
827 // it's possible this is a previously unswitched block. Only unswitch if
828 // either the terminator is not unreachable, or, if it is, it's not the only
829 // instruction in the block.
830 auto *TI = BBToCheck.getTerminator();
831 bool isUnreachable = isa<UnreachableInst>(TI);
832 return !isUnreachable || &*BBToCheck.getFirstNonPHIOrDbg() != TI;
833 };
834
835 SmallVector<int, 4> ExitCaseIndices;
836 for (auto Case : SI.cases())
837 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
838 ExitCaseIndices.push_back(Case.getCaseIndex());
839 BasicBlock *DefaultExitBB = nullptr;
842 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
843 DefaultExitBB = SI.getDefaultDest();
844 } else if (ExitCaseIndices.empty())
845 return false;
846
847 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
848
849 if (MSSAU && VerifyMemorySSA)
850 MSSAU->getMemorySSA()->verifyMemorySSA();
851
852 // We may need to invalidate SCEVs for the outermost loop reached by any of
853 // the exits.
854 Loop *OuterL = &L;
855
856 if (DefaultExitBB) {
857 // Check the loop containing this exit.
858 Loop *ExitL = getTopMostExitingLoop(DefaultExitBB, LI);
859 if (!ExitL || ExitL->contains(OuterL))
860 OuterL = ExitL;
861 }
862 for (unsigned Index : ExitCaseIndices) {
863 auto CaseI = SI.case_begin() + Index;
864 // Compute the outer loop from this exit.
865 Loop *ExitL = getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI);
866 if (!ExitL || ExitL->contains(OuterL))
867 OuterL = ExitL;
868 }
869
870 if (SE) {
871 if (OuterL)
872 SE->forgetLoop(OuterL);
873 else
874 SE->forgetTopmostLoop(&L);
875 }
876
877 if (DefaultExitBB) {
878 // Clear out the default destination temporarily to allow accurate
879 // predecessor lists to be examined below.
880 SI.setDefaultDest(nullptr);
881 }
882
883 // Store the exit cases into a separate data structure and remove them from
884 // the switch.
885 SmallVector<std::tuple<ConstantInt *, BasicBlock *,
887 4> ExitCases;
888 ExitCases.reserve(ExitCaseIndices.size());
890 // We walk the case indices backwards so that we remove the last case first
891 // and don't disrupt the earlier indices.
892 for (unsigned Index : reverse(ExitCaseIndices)) {
893 auto CaseI = SI.case_begin() + Index;
894 // Save the value of this case.
895 auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
896 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
897 // Delete the unswitched cases.
898 SIW.removeCase(CaseI);
899 }
900
901 // Check if after this all of the remaining cases point at the same
902 // successor.
903 BasicBlock *CommonSuccBB = nullptr;
904 if (SI.getNumCases() > 0 &&
905 all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
906 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
907 }))
908 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
909 if (!DefaultExitBB) {
910 // If we're not unswitching the default, we need it to match any cases to
911 // have a common successor or if we have no cases it is the common
912 // successor.
913 if (SI.getNumCases() == 0)
914 CommonSuccBB = SI.getDefaultDest();
915 else if (SI.getDefaultDest() != CommonSuccBB)
916 CommonSuccBB = nullptr;
917 }
918
919 // Split the preheader, so that we know that there is a safe place to insert
920 // the switch.
921 BasicBlock *OldPH = L.getLoopPreheader();
922 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
923 OldPH->getTerminator()->eraseFromParent();
924
925 // Now add the unswitched switch. This new switch instruction inherits the
926 // debug location of the old switch, because it semantically replace the old
927 // one.
928 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
929 NewSI->setDebugLoc(SIW->getDebugLoc());
930 SwitchInstProfUpdateWrapper NewSIW(*NewSI);
931
932 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
933 // First, we split any exit blocks with remaining in-loop predecessors. Then
934 // we update the PHIs in one of two ways depending on if there was a split.
935 // We walk in reverse so that we split in the same order as the cases
936 // appeared. This is purely for convenience of reading the resulting IR, but
937 // it doesn't cost anything really.
938 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
940 // Handle the default exit if necessary.
941 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
942 // ranges aren't quite powerful enough yet.
943 if (DefaultExitBB) {
944 if (pred_empty(DefaultExitBB)) {
945 UnswitchedExitBBs.insert(DefaultExitBB);
946 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
947 } else {
948 auto *SplitBB =
949 SplitBlock(DefaultExitBB, DefaultExitBB->begin(), &DT, &LI, MSSAU);
950 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
951 *ParentBB, *OldPH,
952 /*FullUnswitch*/ true);
953 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
954 }
955 }
956 // Note that we must use a reference in the for loop so that we update the
957 // container.
958 for (auto &ExitCase : reverse(ExitCases)) {
959 // Grab a reference to the exit block in the pair so that we can update it.
960 BasicBlock *ExitBB = std::get<1>(ExitCase);
961
962 // If this case is the last edge into the exit block, we can simply reuse it
963 // as it will no longer be a loop exit. No mapping necessary.
964 if (pred_empty(ExitBB)) {
965 // Only rewrite once.
966 if (UnswitchedExitBBs.insert(ExitBB).second)
967 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
968 continue;
969 }
970
971 // Otherwise we need to split the exit block so that we retain an exit
972 // block from the loop and a target for the unswitched condition.
973 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
974 if (!SplitExitBB) {
975 // If this is the first time we see this, do the split and remember it.
976 SplitExitBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
977 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
978 *ParentBB, *OldPH,
979 /*FullUnswitch*/ true);
980 }
981 // Update the case pair to point to the split block.
982 std::get<1>(ExitCase) = SplitExitBB;
983 }
984
985 // Now add the unswitched cases. We do this in reverse order as we built them
986 // in reverse order.
987 for (auto &ExitCase : reverse(ExitCases)) {
988 ConstantInt *CaseVal = std::get<0>(ExitCase);
989 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
990
991 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
992 }
993
994 // If the default was unswitched, re-point it and add explicit cases for
995 // entering the loop.
996 if (DefaultExitBB) {
997 NewSIW->setDefaultDest(DefaultExitBB);
998 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
999
1000 // We removed all the exit cases, so we just copy the cases to the
1001 // unswitched switch.
1002 for (const auto &Case : SI.cases())
1003 NewSIW.addCase(Case.getCaseValue(), NewPH,
1005 } else if (DefaultCaseWeight) {
1006 // We have to set branch weight of the default case.
1007 uint64_t SW = *DefaultCaseWeight;
1008 for (const auto &Case : SI.cases()) {
1009 auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
1010 assert(W &&
1011 "case weight must be defined as default case weight is defined");
1012 SW += *W;
1013 }
1014 NewSIW.setSuccessorWeight(0, SW);
1015 }
1016
1017 // If we ended up with a common successor for every path through the switch
1018 // after unswitching, rewrite it to an unconditional branch to make it easy
1019 // to recognize. Otherwise we potentially have to recognize the default case
1020 // pointing at unreachable and other complexity.
1021 if (CommonSuccBB) {
1022 BasicBlock *BB = SI.getParent();
1023 // We may have had multiple edges to this common successor block, so remove
1024 // them as predecessors. We skip the first one, either the default or the
1025 // actual first case.
1026 bool SkippedFirst = DefaultExitBB == nullptr;
1027 for (auto Case : SI.cases()) {
1028 assert(Case.getCaseSuccessor() == CommonSuccBB &&
1029 "Non-common successor!");
1030 (void)Case;
1031 if (!SkippedFirst) {
1032 SkippedFirst = true;
1033 continue;
1034 }
1035 CommonSuccBB->removePredecessor(BB,
1036 /*KeepOneInputPHIs*/ true);
1037 }
1038 // Now nuke the switch and replace it with a direct branch.
1039 Instruction *NewBI = UncondBrInst::Create(CommonSuccBB, BB);
1040 NewBI->setDebugLoc(SIW->getDebugLoc());
1041 SIW.eraseFromParent();
1042 } else if (DefaultExitBB) {
1043 assert(SI.getNumCases() > 0 &&
1044 "If we had no cases we'd have a common successor!");
1045 // Move the last case to the default successor. This is valid as if the
1046 // default got unswitched it cannot be reached. This has the advantage of
1047 // being simple and keeping the number of edges from this switch to
1048 // successors the same, and avoiding any PHI update complexity.
1049 auto LastCaseI = std::prev(SI.case_end());
1050
1051 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
1053 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
1054 SIW.removeCase(LastCaseI);
1055 }
1056
1057 // Walk the unswitched exit blocks and the unswitched split blocks and update
1058 // the dominator tree based on the CFG edits. While we are walking unordered
1059 // containers here, the API for applyUpdates takes an unordered list of
1060 // updates and requires them to not contain duplicates.
1062 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
1063 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
1064 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
1065 }
1066 for (auto SplitUnswitchedPair : SplitExitBBMap) {
1067 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
1068 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
1069 }
1070
1071 if (MSSAU) {
1072 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
1073 if (VerifyMemorySSA)
1074 MSSAU->getMemorySSA()->verifyMemorySSA();
1075 } else {
1076 DT.applyUpdates(DTUpdates);
1077 }
1078
1079 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1080
1081 // We may have changed the nesting relationship for this loop so hoist it to
1082 // its correct parent if needed.
1083 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
1084
1085 if (MSSAU && VerifyMemorySSA)
1086 MSSAU->getMemorySSA()->verifyMemorySSA();
1087
1088 ++NumTrivial;
1089 ++NumSwitches;
1090 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
1091 return true;
1092}
1093
1094/// This routine scans the loop to find a branch or switch which occurs before
1095/// any side effects occur. These can potentially be unswitched without
1096/// duplicating the loop. If a branch or switch is successfully unswitched the
1097/// scanning continues to see if subsequent branches or switches have become
1098/// trivial. Once all trivial candidates have been unswitched, this routine
1099/// returns.
1100///
1101/// The return value indicates whether anything was unswitched (and therefore
1102/// changed).
1103///
1104/// If `SE` is not null, it will be updated based on the potential loop SCEVs
1105/// invalidated by this.
1107 LoopInfo &LI, ScalarEvolution *SE,
1108 MemorySSAUpdater *MSSAU) {
1109 bool Changed = false;
1110
1111 // If loop header has only one reachable successor we should keep looking for
1112 // trivial condition candidates in the successor as well. An alternative is
1113 // to constant fold conditions and merge successors into loop header (then we
1114 // only need to check header's terminator). The reason for not doing this in
1115 // LoopUnswitch pass is that it could potentially break LoopPassManager's
1116 // invariants. Folding dead branches could either eliminate the current loop
1117 // or make other loops unreachable. LCSSA form might also not be preserved
1118 // after deleting branches. The following code keeps traversing loop header's
1119 // successors until it finds the trivial condition candidate (condition that
1120 // is not a constant). Since unswitching generates branches with constant
1121 // conditions, this scenario could be very common in practice.
1122 BasicBlock *CurrentBB = L.getHeader();
1124 Visited.insert(CurrentBB);
1125 do {
1126 // Check if there are any side-effecting instructions (e.g. stores, calls,
1127 // volatile loads) in the part of the loop that the code *would* execute
1128 // without unswitching.
1129 if (MSSAU) // Possible early exit with MSSA
1130 if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1131 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1132 return Changed;
1133 if (llvm::any_of(*CurrentBB,
1134 [](Instruction &I) { return I.mayHaveSideEffects(); }))
1135 return Changed;
1136
1137 Instruction *CurrentTerm = CurrentBB->getTerminator();
1138
1139 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1140 // Don't bother trying to unswitch past a switch with a constant
1141 // condition. This should be removed prior to running this pass by
1142 // simplifycfg.
1143 if (isa<Constant>(SI->getCondition()))
1144 return Changed;
1145
1146 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1147 // Couldn't unswitch this one so we're done.
1148 return Changed;
1149
1150 // Mark that we managed to unswitch something.
1151 Changed = true;
1152
1153 // If unswitching turned the terminator into an unconditional branch then
1154 // we can continue. The unswitching logic specifically works to fold any
1155 // cases it can into an unconditional branch to make it easier to
1156 // recognize here.
1157 auto *BI = dyn_cast<UncondBrInst>(CurrentBB->getTerminator());
1158 if (!BI)
1159 return Changed;
1160
1161 CurrentBB = BI->getSuccessor();
1162 continue;
1163 }
1164
1165 auto *BI = dyn_cast<CondBrInst>(CurrentTerm);
1166 if (!BI)
1167 // We do not understand other terminator instructions.
1168 return Changed;
1169
1170 // Don't bother trying to unswitch past an unconditional branch or a branch
1171 // with a constant value. These should be removed by simplifycfg prior to
1172 // running this pass.
1173 if (isa<Constant>(skipTrivialSelect(BI->getCondition())))
1174 return Changed;
1175
1176 // Found a trivial condition candidate: non-foldable conditional branch. If
1177 // we fail to unswitch this, we can't do anything else that is trivial.
1178 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1179 return Changed;
1180
1181 // Mark that we managed to unswitch something.
1182 Changed = true;
1183
1184 // If we only unswitched some of the conditions feeding the branch, we won't
1185 // have collapsed it to a single successor.
1186 if (isa<CondBrInst>(CurrentBB->getTerminator()))
1187 return Changed;
1188
1189 // Follow the newly unconditional branch into its successor.
1190 CurrentBB = cast<UncondBrInst>(CurrentBB->getTerminator())->getSuccessor();
1191
1192 // When continuing, if we exit the loop or reach a previous visited block,
1193 // then we can not reach any trivial condition candidates (unfoldable
1194 // branch instructions or switch instructions) and no unswitch can happen.
1195 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1196
1197 return Changed;
1198}
1199
1200/// Build the cloned blocks for an unswitched copy of the given loop.
1201///
1202/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1203/// after the split block (`SplitBB`) that will be used to select between the
1204/// cloned and original loop.
1205///
1206/// This routine handles cloning all of the necessary loop blocks and exit
1207/// blocks including rewriting their instructions and the relevant PHI nodes.
1208/// Any loop blocks or exit blocks which are dominated by a different successor
1209/// than the one for this clone of the loop blocks can be trivially skipped. We
1210/// use the `DominatingSucc` map to determine whether a block satisfies that
1211/// property with a simple map lookup.
1212///
1213/// It also correctly creates the unconditional branch in the cloned
1214/// unswitched parent block to only point at the unswitched successor.
1215///
1216/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1217/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1218/// the cloned blocks (and their loops) are left without full `LoopInfo`
1219/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1220/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1221/// instead the caller must recompute an accurate DT. It *does* correctly
1222/// update the `AssumptionCache` provided in `AC`.
1224 Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1225 ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1226 BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1228 ValueToValueMapTy &VMap,
1230 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU,
1231 ScalarEvolution *SE) {
1233 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1234
1235 // We will need to clone a bunch of blocks, wrap up the clone operation in
1236 // a helper.
1237 auto CloneBlock = [&](BasicBlock *OldBB) {
1238 // Clone the basic block and insert it before the new preheader.
1239 BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1240 NewBB->moveBefore(LoopPH);
1241
1242 // Record this block and the mapping.
1243 NewBlocks.push_back(NewBB);
1244 VMap[OldBB] = NewBB;
1245
1246 return NewBB;
1247 };
1248
1249 // We skip cloning blocks when they have a dominating succ that is not the
1250 // succ we are cloning for.
1251 auto SkipBlock = [&](BasicBlock *BB) {
1252 auto It = DominatingSucc.find(BB);
1253 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1254 };
1255
1256 // First, clone the preheader.
1257 auto *ClonedPH = CloneBlock(LoopPH);
1258
1259 // Then clone all the loop blocks, skipping the ones that aren't necessary.
1260 for (auto *LoopBB : L.blocks())
1261 if (!SkipBlock(LoopBB))
1262 CloneBlock(LoopBB);
1263
1264 // Split all the loop exit edges so that when we clone the exit blocks, if
1265 // any of the exit blocks are *also* a preheader for some other loop, we
1266 // don't create multiple predecessors entering the loop header.
1267 for (auto *ExitBB : ExitBlocks) {
1268 if (SkipBlock(ExitBB))
1269 continue;
1270
1271 // When we are going to clone an exit, we don't need to clone all the
1272 // instructions in the exit block and we want to ensure we have an easy
1273 // place to merge the CFG, so split the exit first. This is always safe to
1274 // do because there cannot be any non-loop predecessors of a loop exit in
1275 // loop simplified form.
1276 auto *MergeBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1277
1278 // Rearrange the names to make it easier to write test cases by having the
1279 // exit block carry the suffix rather than the merge block carrying the
1280 // suffix.
1281 MergeBB->takeName(ExitBB);
1282 ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1283
1284 // Now clone the original exit block.
1285 auto *ClonedExitBB = CloneBlock(ExitBB);
1286 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1287 "Exit block should have been split to have one successor!");
1288 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1289 "Cloned exit block has the wrong successor!");
1290
1291 // Remap any cloned instructions and create a merge phi node for them.
1292 for (auto ZippedInsts : llvm::zip_first(
1293 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1294 llvm::make_range(ClonedExitBB->begin(),
1295 std::prev(ClonedExitBB->end())))) {
1296 Instruction &I = std::get<0>(ZippedInsts);
1297 Instruction &ClonedI = std::get<1>(ZippedInsts);
1298
1299 // The only instructions in the exit block should be PHI nodes and
1300 // potentially a landing pad.
1301 assert(
1303 "Bad instruction in exit block!");
1304 // We should have a value map between the instruction and its clone.
1305 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1306
1307 // Forget SCEVs based on exit phis in case SCEV looked through the phi.
1308 if (SE)
1309 if (auto *PN = dyn_cast<PHINode>(&I))
1311
1312 BasicBlock::iterator InsertPt = MergeBB->getFirstInsertionPt();
1313
1314 auto *MergePN =
1315 PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi");
1316 MergePN->insertBefore(InsertPt);
1317 MergePN->setDebugLoc(InsertPt->getDebugLoc());
1318 I.replaceAllUsesWith(MergePN);
1319 MergePN->addIncoming(&I, ExitBB);
1320 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1321 }
1322 }
1323
1324 // Rewrite the instructions in the cloned blocks to refer to the instructions
1325 // in the cloned blocks. We have to do this as a second pass so that we have
1326 // everything available. Also, we have inserted new instructions which may
1327 // include assume intrinsics, so we update the assumption cache while
1328 // processing this.
1329 Module *M = ClonedPH->getParent()->getParent();
1330 for (auto *ClonedBB : NewBlocks)
1331 for (Instruction &I : *ClonedBB) {
1332 RemapDbgRecordRange(M, I.getDbgRecordRange(), VMap,
1334 RemapInstruction(&I, VMap,
1336 if (auto *II = dyn_cast<AssumeInst>(&I))
1338 }
1339
1340 // Update any PHI nodes in the cloned successors of the skipped blocks to not
1341 // have spurious incoming values.
1342 for (auto *LoopBB : L.blocks())
1343 if (SkipBlock(LoopBB))
1344 for (auto *SuccBB : successors(LoopBB))
1345 if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1346 for (PHINode &PN : ClonedSuccBB->phis())
1347 PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1348
1349 // Remove the cloned parent as a predecessor of any successor we ended up
1350 // cloning other than the unswitched one.
1351 auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1352 for (auto *SuccBB : successors(ParentBB)) {
1353 if (SuccBB == UnswitchedSuccBB)
1354 continue;
1355
1356 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1357 if (!ClonedSuccBB)
1358 continue;
1359
1360 ClonedSuccBB->removePredecessor(ClonedParentBB,
1361 /*KeepOneInputPHIs*/ true);
1362 }
1363
1364 // Replace the cloned branch with an unconditional branch to the cloned
1365 // unswitched successor.
1366 auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1367 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1368 // Trivial Simplification. If Terminator is a conditional branch and
1369 // condition becomes dead - erase it.
1370 Value *ClonedConditionToErase = nullptr;
1371 if (auto *BI = dyn_cast<CondBrInst>(ClonedTerminator))
1372 ClonedConditionToErase = BI->getCondition();
1373 else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1374 ClonedConditionToErase = SI->getCondition();
1375
1376 Instruction *BI = UncondBrInst::Create(ClonedSuccBB, ClonedParentBB);
1377 BI->setDebugLoc(ClonedTerminator->getDebugLoc());
1378 ClonedTerminator->eraseFromParent();
1379
1380 if (ClonedConditionToErase)
1381 RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1382 MSSAU);
1383
1384 // If there are duplicate entries in the PHI nodes because of multiple edges
1385 // to the unswitched successor, we need to nuke all but one as we replaced it
1386 // with a direct branch.
1387 for (PHINode &PN : ClonedSuccBB->phis()) {
1388 bool Found = false;
1389 // Loop over the incoming operands backwards so we can easily delete as we
1390 // go without invalidating the index.
1391 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1392 if (PN.getIncomingBlock(i) != ClonedParentBB)
1393 continue;
1394 if (!Found) {
1395 Found = true;
1396 continue;
1397 }
1398 PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1399 }
1400 }
1401
1402 // Record the domtree updates for the new blocks.
1404 for (auto *ClonedBB : NewBlocks) {
1405 for (auto *SuccBB : successors(ClonedBB))
1406 if (SuccSet.insert(SuccBB).second)
1407 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1408 SuccSet.clear();
1409 }
1410
1411 return ClonedPH;
1412}
1413
1414/// Recursively clone the specified loop and all of its children.
1415///
1416/// The target parent loop for the clone should be provided, or can be null if
1417/// the clone is a top-level loop. While cloning, all the blocks are mapped
1418/// with the provided value map. The entire original loop must be present in
1419/// the value map. The cloned loop is returned.
1420static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1421 const ValueToValueMapTy &VMap, LoopInfo &LI) {
1422 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1423 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1424 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1425 for (auto *BB : OrigL.blocks()) {
1426 auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1427 ClonedL.addBlockEntry(ClonedBB);
1428 if (LI.getLoopFor(BB) == &OrigL)
1429 LI.changeLoopFor(ClonedBB, &ClonedL);
1430 }
1431 };
1432
1433 // We specially handle the first loop because it may get cloned into
1434 // a different parent and because we most commonly are cloning leaf loops.
1435 Loop *ClonedRootL = LI.AllocateLoop();
1436 if (RootParentL)
1437 RootParentL->addChildLoop(ClonedRootL);
1438 else
1439 LI.addTopLevelLoop(ClonedRootL);
1440 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1441
1442 if (OrigRootL.isInnermost())
1443 return ClonedRootL;
1444
1445 // If we have a nest, we can quickly clone the entire loop nest using an
1446 // iterative approach because it is a tree. We keep the cloned parent in the
1447 // data structure to avoid repeatedly querying through a map to find it.
1448 SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1449 // Build up the loops to clone in reverse order as we'll clone them from the
1450 // back.
1451 for (Loop *ChildL : llvm::reverse(OrigRootL))
1452 LoopsToClone.push_back({ClonedRootL, ChildL});
1453 do {
1454 Loop *ClonedParentL, *L;
1455 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1456 Loop *ClonedL = LI.AllocateLoop();
1457 ClonedParentL->addChildLoop(ClonedL);
1458 AddClonedBlocksToLoop(*L, *ClonedL);
1459 for (Loop *ChildL : llvm::reverse(*L))
1460 LoopsToClone.push_back({ClonedL, ChildL});
1461 } while (!LoopsToClone.empty());
1462
1463 return ClonedRootL;
1464}
1465
1466/// Build the cloned loops of an original loop from unswitching.
1467///
1468/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1469/// operation. We need to re-verify that there even is a loop (as the backedge
1470/// may not have been cloned), and even if there are remaining backedges the
1471/// backedge set may be different. However, we know that each child loop is
1472/// undisturbed, we only need to find where to place each child loop within
1473/// either any parent loop or within a cloned version of the original loop.
1474///
1475/// Because child loops may end up cloned outside of any cloned version of the
1476/// original loop, multiple cloned sibling loops may be created. All of them
1477/// are returned so that the newly introduced loop nest roots can be
1478/// identified.
1479static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1480 const ValueToValueMapTy &VMap, LoopInfo &LI,
1481 SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1482 Loop *ClonedL = nullptr;
1483
1484 auto *OrigPH = OrigL.getLoopPreheader();
1485 auto *OrigHeader = OrigL.getHeader();
1486
1487 auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1488 auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1489
1490 // We need to know the loops of the cloned exit blocks to even compute the
1491 // accurate parent loop. If we only clone exits to some parent of the
1492 // original parent, we want to clone into that outer loop. We also keep track
1493 // of the loops that our cloned exit blocks participate in.
1494 Loop *ParentL = nullptr;
1495 SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1497 ClonedExitsInLoops.reserve(ExitBlocks.size());
1498 for (auto *ExitBB : ExitBlocks)
1499 if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1500 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1501 ExitLoopMap[ClonedExitBB] = ExitL;
1502 ClonedExitsInLoops.push_back(ClonedExitBB);
1503 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1504 ParentL = ExitL;
1505 }
1506 assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1507 ParentL->contains(OrigL.getParentLoop())) &&
1508 "The computed parent loop should always contain (or be) the parent of "
1509 "the original loop.");
1510
1511 // We build the set of blocks dominated by the cloned header from the set of
1512 // cloned blocks out of the original loop. While not all of these will
1513 // necessarily be in the cloned loop, it is enough to establish that they
1514 // aren't in unreachable cycles, etc.
1515 SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1516 for (auto *BB : OrigL.blocks())
1517 if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1518 ClonedLoopBlocks.insert(ClonedBB);
1519
1520 // Rebuild the set of blocks that will end up in the cloned loop. We may have
1521 // skipped cloning some region of this loop which can in turn skip some of
1522 // the backedges so we have to rebuild the blocks in the loop based on the
1523 // backedges that remain after cloning.
1525 SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1526 for (auto *Pred : predecessors(ClonedHeader)) {
1527 // The only possible non-loop header predecessor is the preheader because
1528 // we know we cloned the loop in simplified form.
1529 if (Pred == ClonedPH)
1530 continue;
1531
1532 // Because the loop was in simplified form, the only non-loop predecessor
1533 // should be the preheader.
1534 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1535 "header other than the preheader "
1536 "that is not part of the loop!");
1537
1538 // Insert this block into the loop set and on the first visit (and if it
1539 // isn't the header we're currently walking) put it into the worklist to
1540 // recurse through.
1541 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1542 Worklist.push_back(Pred);
1543 }
1544
1545 // If we had any backedges then there *is* a cloned loop. Put the header into
1546 // the loop set and then walk the worklist backwards to find all the blocks
1547 // that remain within the loop after cloning.
1548 if (!BlocksInClonedLoop.empty()) {
1549 BlocksInClonedLoop.insert(ClonedHeader);
1550
1551 while (!Worklist.empty()) {
1552 BasicBlock *BB = Worklist.pop_back_val();
1553 assert(BlocksInClonedLoop.count(BB) &&
1554 "Didn't put block into the loop set!");
1555
1556 // Insert any predecessors that are in the possible set into the cloned
1557 // set, and if the insert is successful, add them to the worklist. Note
1558 // that we filter on the blocks that are definitely reachable via the
1559 // backedge to the loop header so we may prune out dead code within the
1560 // cloned loop.
1561 for (auto *Pred : predecessors(BB))
1562 if (ClonedLoopBlocks.count(Pred) &&
1563 BlocksInClonedLoop.insert(Pred).second)
1564 Worklist.push_back(Pred);
1565 }
1566
1567 ClonedL = LI.AllocateLoop();
1568 if (ParentL) {
1569 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1570 ParentL->addChildLoop(ClonedL);
1571 } else {
1572 LI.addTopLevelLoop(ClonedL);
1573 }
1574 NonChildClonedLoops.push_back(ClonedL);
1575
1576 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1577 // We don't want to just add the cloned loop blocks based on how we
1578 // discovered them. The original order of blocks was carefully built in
1579 // a way that doesn't rely on predecessor ordering. Rather than re-invent
1580 // that logic, we just re-walk the original blocks (and those of the child
1581 // loops) and filter them as we add them into the cloned loop.
1582 for (auto *BB : OrigL.blocks()) {
1583 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1584 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1585 continue;
1586
1587 // Directly add the blocks that are only in this loop.
1588 if (LI.getLoopFor(BB) == &OrigL) {
1589 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1590 continue;
1591 }
1592
1593 // We want to manually add it to this loop and parents.
1594 // Registering it with LoopInfo will happen when we clone the top
1595 // loop for this block.
1596 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1597 PL->addBlockEntry(ClonedBB);
1598 }
1599
1600 // Now add each child loop whose header remains within the cloned loop. All
1601 // of the blocks within the loop must satisfy the same constraints as the
1602 // header so once we pass the header checks we can just clone the entire
1603 // child loop nest.
1604 for (Loop *ChildL : OrigL) {
1605 auto *ClonedChildHeader =
1606 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1607 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1608 continue;
1609
1610#ifndef NDEBUG
1611 // We should never have a cloned child loop header but fail to have
1612 // all of the blocks for that child loop.
1613 for (auto *ChildLoopBB : ChildL->blocks())
1614 assert(BlocksInClonedLoop.count(
1615 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1616 "Child cloned loop has a header within the cloned outer "
1617 "loop but not all of its blocks!");
1618#endif
1619
1620 cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1621 }
1622 }
1623
1624 // Now that we've handled all the components of the original loop that were
1625 // cloned into a new loop, we still need to handle anything from the original
1626 // loop that wasn't in a cloned loop.
1627
1628 // Figure out what blocks are left to place within any loop nest containing
1629 // the unswitched loop. If we never formed a loop, the cloned PH is one of
1630 // them.
1631 SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1632 if (BlocksInClonedLoop.empty())
1633 UnloopedBlockSet.insert(ClonedPH);
1634 for (auto *ClonedBB : ClonedLoopBlocks)
1635 if (!BlocksInClonedLoop.count(ClonedBB))
1636 UnloopedBlockSet.insert(ClonedBB);
1637
1638 // Copy the cloned exits and sort them in ascending loop depth, we'll work
1639 // backwards across these to process them inside out. The order shouldn't
1640 // matter as we're just trying to build up the map from inside-out; we use
1641 // the map in a more stably ordered way below.
1642 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1643 llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1644 return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1645 ExitLoopMap.lookup(RHS)->getLoopDepth();
1646 });
1647
1648 // Populate the existing ExitLoopMap with everything reachable from each
1649 // exit, starting from the inner most exit.
1650 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1651 assert(Worklist.empty() && "Didn't clear worklist!");
1652
1653 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1654 Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1655
1656 // Walk the CFG back until we hit the cloned PH adding everything reachable
1657 // and in the unlooped set to this exit block's loop.
1658 Worklist.push_back(ExitBB);
1659 do {
1660 BasicBlock *BB = Worklist.pop_back_val();
1661 // We can stop recursing at the cloned preheader (if we get there).
1662 if (BB == ClonedPH)
1663 continue;
1664
1665 for (BasicBlock *PredBB : predecessors(BB)) {
1666 // If this pred has already been moved to our set or is part of some
1667 // (inner) loop, no update needed.
1668 if (!UnloopedBlockSet.erase(PredBB)) {
1669 assert(
1670 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1671 "Predecessor not mapped to a loop!");
1672 continue;
1673 }
1674
1675 // We just insert into the loop set here. We'll add these blocks to the
1676 // exit loop after we build up the set in an order that doesn't rely on
1677 // predecessor order (which in turn relies on use list order).
1678 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1679 (void)Inserted;
1680 assert(Inserted && "Should only visit an unlooped block once!");
1681
1682 // And recurse through to its predecessors.
1683 Worklist.push_back(PredBB);
1684 }
1685 } while (!Worklist.empty());
1686 }
1687
1688 // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1689 // blocks to their outer loops, walk the cloned blocks and the cloned exits
1690 // in their original order adding them to the correct loop.
1691
1692 // We need a stable insertion order. We use the order of the original loop
1693 // order and map into the correct parent loop.
1694 for (auto *BB : llvm::concat<BasicBlock *const>(
1695 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1696 if (Loop *OuterL = ExitLoopMap.lookup(BB))
1697 OuterL->addBasicBlockToLoop(BB, LI);
1698
1699#ifndef NDEBUG
1700 for (auto &BBAndL : ExitLoopMap) {
1701 auto *BB = BBAndL.first;
1702 auto *OuterL = BBAndL.second;
1703 assert(LI.getLoopFor(BB) == OuterL &&
1704 "Failed to put all blocks into outer loops!");
1705 }
1706#endif
1707
1708 // Now that all the blocks are placed into the correct containing loop in the
1709 // absence of child loops, find all the potentially cloned child loops and
1710 // clone them into whatever outer loop we placed their header into.
1711 for (Loop *ChildL : OrigL) {
1712 auto *ClonedChildHeader =
1713 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1714 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1715 continue;
1716
1717#ifndef NDEBUG
1718 for (auto *ChildLoopBB : ChildL->blocks())
1719 assert(VMap.count(ChildLoopBB) &&
1720 "Cloned a child loop header but not all of that loops blocks!");
1721#endif
1722
1723 NonChildClonedLoops.push_back(cloneLoopNest(
1724 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1725 }
1726}
1727
1728static void
1730 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1731 DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1732 // Find all the dead clones, and remove them from their successors.
1734 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1735 for (const auto &VMap : VMaps)
1736 if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1737 if (!DT.isReachableFromEntry(ClonedBB)) {
1738 for (BasicBlock *SuccBB : successors(ClonedBB))
1739 SuccBB->removePredecessor(ClonedBB);
1740 DeadBlocks.push_back(ClonedBB);
1741 }
1742
1743 // Remove all MemorySSA in the dead blocks
1744 if (MSSAU) {
1745 SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1746 DeadBlocks.end());
1747 MSSAU->removeBlocks(DeadBlockSet);
1748 }
1749
1750 // Drop any remaining references to break cycles.
1751 for (BasicBlock *BB : DeadBlocks)
1752 BB->dropAllReferences();
1753 // Erase them from the IR.
1754 for (BasicBlock *BB : DeadBlocks)
1755 BB->eraseFromParent();
1756}
1757
1760 DominatorTree &DT, LoopInfo &LI,
1761 MemorySSAUpdater *MSSAU,
1762 ScalarEvolution *SE,
1763 LPMUpdater &LoopUpdater) {
1764 // Find all the dead blocks tied to this loop, and remove them from their
1765 // successors.
1767
1768 // Start with loop/exit blocks and get a transitive closure of reachable dead
1769 // blocks.
1770 SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1771 ExitBlocks.end());
1772 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1773 while (!DeathCandidates.empty()) {
1774 auto *BB = DeathCandidates.pop_back_val();
1775 if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1776 for (BasicBlock *SuccBB : successors(BB)) {
1777 SuccBB->removePredecessor(BB);
1778 DeathCandidates.push_back(SuccBB);
1779 }
1780 DeadBlockSet.insert(BB);
1781 }
1782 }
1783
1784 // Remove all MemorySSA in the dead blocks
1785 if (MSSAU)
1786 MSSAU->removeBlocks(DeadBlockSet);
1787
1788 // Filter out the dead blocks from the exit blocks list so that it can be
1789 // used in the caller.
1790 llvm::erase_if(ExitBlocks,
1791 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1792
1793 // Walk from this loop up through its parents removing all of the dead blocks.
1794 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1795 for (auto *BB : DeadBlockSet)
1796 ParentL->getBlocksSet().erase(BB);
1797 llvm::erase_if(ParentL->getBlocksVector(),
1798 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1799 }
1800
1801 // Now delete the dead child loops. This raw delete will clear them
1802 // recursively.
1803 llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1804 if (!DeadBlockSet.count(ChildL->getHeader()))
1805 return false;
1806
1807 assert(llvm::all_of(ChildL->blocks(),
1808 [&](BasicBlock *ChildBB) {
1809 return DeadBlockSet.count(ChildBB);
1810 }) &&
1811 "If the child loop header is dead all blocks in the child loop must "
1812 "be dead as well!");
1813 LoopUpdater.markLoopAsDeleted(*ChildL, ChildL->getName());
1814 if (SE)
1816 LI.destroy(ChildL);
1817 return true;
1818 });
1819
1820 // Remove the loop mappings for the dead blocks and drop all the references
1821 // from these blocks to others to handle cyclic references as we start
1822 // deleting the blocks themselves.
1823 for (auto *BB : DeadBlockSet) {
1824 // Check that the dominator tree has already been updated.
1825 assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1826 LI.changeLoopFor(BB, nullptr);
1827 // Drop all uses of the instructions to make sure we won't have dangling
1828 // uses in other blocks.
1829 for (auto &I : *BB)
1830 if (!I.use_empty())
1831 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
1832 BB->dropAllReferences();
1833 }
1834
1835 // Actually delete the blocks now that they've been fully unhooked from the
1836 // IR.
1837 for (auto *BB : DeadBlockSet)
1838 BB->eraseFromParent();
1839}
1840
1841/// Recompute the set of blocks in a loop after unswitching.
1842///
1843/// This walks from the original headers predecessors to rebuild the loop. We
1844/// take advantage of the fact that new blocks can't have been added, and so we
1845/// filter by the original loop's blocks. This also handles potentially
1846/// unreachable code that we don't want to explore but might be found examining
1847/// the predecessors of the header.
1848///
1849/// If the original loop is no longer a loop, this will return an empty set. If
1850/// it remains a loop, all the blocks within it will be added to the set
1851/// (including those blocks in inner loops).
1853 LoopInfo &LI) {
1855
1856 auto *PH = L.getLoopPreheader();
1857 auto *Header = L.getHeader();
1858
1859 // A worklist to use while walking backwards from the header.
1861
1862 // First walk the predecessors of the header to find the backedges. This will
1863 // form the basis of our walk.
1864 for (auto *Pred : predecessors(Header)) {
1865 // Skip the preheader.
1866 if (Pred == PH)
1867 continue;
1868
1869 // Because the loop was in simplified form, the only non-loop predecessor
1870 // is the preheader.
1871 assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1872 "than the preheader that is not part of the "
1873 "loop!");
1874
1875 // Insert this block into the loop set and on the first visit and, if it
1876 // isn't the header we're currently walking, put it into the worklist to
1877 // recurse through.
1878 if (LoopBlockSet.insert(Pred).second && Pred != Header)
1879 Worklist.push_back(Pred);
1880 }
1881
1882 // If no backedges were found, we're done.
1883 if (LoopBlockSet.empty())
1884 return LoopBlockSet;
1885
1886 // We found backedges, recurse through them to identify the loop blocks.
1887 while (!Worklist.empty()) {
1888 BasicBlock *BB = Worklist.pop_back_val();
1889 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1890
1891 // No need to walk past the header.
1892 if (BB == Header)
1893 continue;
1894
1895 // Because we know the inner loop structure remains valid we can use the
1896 // loop structure to jump immediately across the entire nested loop.
1897 // Further, because it is in loop simplified form, we can directly jump
1898 // to its preheader afterward.
1899 if (Loop *InnerL = LI.getLoopFor(BB))
1900 if (InnerL != &L) {
1901 assert(L.contains(InnerL) &&
1902 "Should not reach a loop *outside* this loop!");
1903 // The preheader is the only possible predecessor of the loop so
1904 // insert it into the set and check whether it was already handled.
1905 auto *InnerPH = InnerL->getLoopPreheader();
1906 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1907 "but not contain the inner loop "
1908 "preheader!");
1909 if (!LoopBlockSet.insert(InnerPH).second)
1910 // The only way to reach the preheader is through the loop body
1911 // itself so if it has been visited the loop is already handled.
1912 continue;
1913
1914 // Insert all of the blocks (other than those already present) into
1915 // the loop set. We expect at least the block that led us to find the
1916 // inner loop to be in the block set, but we may also have other loop
1917 // blocks if they were already enqueued as predecessors of some other
1918 // outer loop block.
1919 for (auto *InnerBB : InnerL->blocks()) {
1920 if (InnerBB == BB) {
1921 assert(LoopBlockSet.count(InnerBB) &&
1922 "Block should already be in the set!");
1923 continue;
1924 }
1925
1926 LoopBlockSet.insert(InnerBB);
1927 }
1928
1929 // Add the preheader to the worklist so we will continue past the
1930 // loop body.
1931 Worklist.push_back(InnerPH);
1932 continue;
1933 }
1934
1935 // Insert any predecessors that were in the original loop into the new
1936 // set, and if the insert is successful, add them to the worklist.
1937 for (auto *Pred : predecessors(BB))
1938 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1939 Worklist.push_back(Pred);
1940 }
1941
1942 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1943
1944 // We've found all the blocks participating in the loop, return our completed
1945 // set.
1946 return LoopBlockSet;
1947}
1948
1949/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1950///
1951/// The removal may have removed some child loops entirely but cannot have
1952/// disturbed any remaining child loops. However, they may need to be hoisted
1953/// to the parent loop (or to be top-level loops). The original loop may be
1954/// completely removed.
1955///
1956/// The sibling loops resulting from this update are returned. If the original
1957/// loop remains a valid loop, it will be the first entry in this list with all
1958/// of the newly sibling loops following it.
1959///
1960/// Returns true if the loop remains a loop after unswitching, and false if it
1961/// is no longer a loop after unswitching (and should not continue to be
1962/// referenced).
1964 LoopInfo &LI,
1965 SmallVectorImpl<Loop *> &HoistedLoops,
1966 ScalarEvolution *SE) {
1967 auto *PH = L.getLoopPreheader();
1968
1969 // Compute the actual parent loop from the exit blocks. Because we may have
1970 // pruned some exits the loop may be different from the original parent.
1971 Loop *ParentL = nullptr;
1972 SmallVector<Loop *, 4> ExitLoops;
1973 SmallVector<BasicBlock *, 4> ExitsInLoops;
1974 ExitsInLoops.reserve(ExitBlocks.size());
1975 for (auto *ExitBB : ExitBlocks)
1976 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1977 ExitLoops.push_back(ExitL);
1978 ExitsInLoops.push_back(ExitBB);
1979 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1980 ParentL = ExitL;
1981 }
1982
1983 // Recompute the blocks participating in this loop. This may be empty if it
1984 // is no longer a loop.
1985 auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1986
1987 // If we still have a loop, we need to re-set the loop's parent as the exit
1988 // block set changing may have moved it within the loop nest. Note that this
1989 // can only happen when this loop has a parent as it can only hoist the loop
1990 // *up* the nest.
1991 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1992 // Remove this loop's (original) blocks from all of the intervening loops.
1993 for (Loop *IL = L.getParentLoop(); IL != ParentL;
1994 IL = IL->getParentLoop()) {
1995 IL->getBlocksSet().erase(PH);
1996 for (auto *BB : L.blocks())
1997 IL->getBlocksSet().erase(BB);
1998 llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1999 return BB == PH || L.contains(BB);
2000 });
2001 }
2002
2003 LI.changeLoopFor(PH, ParentL);
2004 L.getParentLoop()->removeChildLoop(&L);
2005 if (ParentL)
2006 ParentL->addChildLoop(&L);
2007 else
2008 LI.addTopLevelLoop(&L);
2009 }
2010
2011 // Now we update all the blocks which are no longer within the loop.
2012 auto &Blocks = L.getBlocksVector();
2013 auto BlocksSplitI =
2014 LoopBlockSet.empty()
2015 ? Blocks.begin()
2016 : std::stable_partition(
2017 Blocks.begin(), Blocks.end(),
2018 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
2019
2020 // Before we erase the list of unlooped blocks, build a set of them.
2021 SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
2022 if (LoopBlockSet.empty())
2023 UnloopedBlocks.insert(PH);
2024
2025 // Now erase these blocks from the loop.
2026 for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
2027 L.getBlocksSet().erase(BB);
2028 Blocks.erase(BlocksSplitI, Blocks.end());
2029
2030 // Sort the exits in ascending loop depth, we'll work backwards across these
2031 // to process them inside out.
2032 llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
2033 return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
2034 });
2035
2036 // We'll build up a set for each exit loop.
2037 SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
2038 Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
2039
2040 auto RemoveUnloopedBlocksFromLoop =
2041 [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
2042 for (auto *BB : UnloopedBlocks)
2043 L.getBlocksSet().erase(BB);
2044 llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
2045 return UnloopedBlocks.count(BB);
2046 });
2047 };
2048
2050 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
2051 assert(Worklist.empty() && "Didn't clear worklist!");
2052 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
2053
2054 // Grab the next exit block, in decreasing loop depth order.
2055 BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
2056 Loop &ExitL = *LI.getLoopFor(ExitBB);
2057 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
2058
2059 // Erase all of the unlooped blocks from the loops between the previous
2060 // exit loop and this exit loop. This works because the ExitInLoops list is
2061 // sorted in increasing order of loop depth and thus we visit loops in
2062 // decreasing order of loop depth.
2063 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
2064 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2065
2066 // Walk the CFG back until we hit the cloned PH adding everything reachable
2067 // and in the unlooped set to this exit block's loop.
2068 Worklist.push_back(ExitBB);
2069 do {
2070 BasicBlock *BB = Worklist.pop_back_val();
2071 // We can stop recursing at the cloned preheader (if we get there).
2072 if (BB == PH)
2073 continue;
2074
2075 for (BasicBlock *PredBB : predecessors(BB)) {
2076 // If this pred has already been moved to our set or is part of some
2077 // (inner) loop, no update needed.
2078 if (!UnloopedBlocks.erase(PredBB)) {
2079 assert((NewExitLoopBlocks.count(PredBB) ||
2080 ExitL.contains(LI.getLoopFor(PredBB))) &&
2081 "Predecessor not in a nested loop (or already visited)!");
2082 continue;
2083 }
2084
2085 // We just insert into the loop set here. We'll add these blocks to the
2086 // exit loop after we build up the set in a deterministic order rather
2087 // than the predecessor-influenced visit order.
2088 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2089 (void)Inserted;
2090 assert(Inserted && "Should only visit an unlooped block once!");
2091
2092 // And recurse through to its predecessors.
2093 Worklist.push_back(PredBB);
2094 }
2095 } while (!Worklist.empty());
2096
2097 // If blocks in this exit loop were directly part of the original loop (as
2098 // opposed to a child loop) update the map to point to this exit loop. This
2099 // just updates a map and so the fact that the order is unstable is fine.
2100 for (auto *BB : NewExitLoopBlocks)
2101 if (Loop *BBL = LI.getLoopFor(BB))
2102 if (BBL == &L || !L.contains(BBL))
2103 LI.changeLoopFor(BB, &ExitL);
2104
2105 // We will remove the remaining unlooped blocks from this loop in the next
2106 // iteration or below.
2107 NewExitLoopBlocks.clear();
2108 }
2109
2110 // Any remaining unlooped blocks are no longer part of any loop unless they
2111 // are part of some child loop.
2112 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
2113 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2114 for (auto *BB : UnloopedBlocks)
2115 if (Loop *BBL = LI.getLoopFor(BB))
2116 if (BBL == &L || !L.contains(BBL))
2117 LI.changeLoopFor(BB, nullptr);
2118
2119 // Sink all the child loops whose headers are no longer in the loop set to
2120 // the parent (or to be top level loops). We reach into the loop and directly
2121 // update its subloop vector to make this batch update efficient.
2122 auto &SubLoops = L.getSubLoopsVector();
2123 auto SubLoopsSplitI =
2124 LoopBlockSet.empty()
2125 ? SubLoops.begin()
2126 : std::stable_partition(
2127 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
2128 return LoopBlockSet.count(SubL->getHeader());
2129 });
2130 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
2131 HoistedLoops.push_back(HoistedL);
2132 HoistedL->setParentLoop(nullptr);
2133
2134 // To compute the new parent of this hoisted loop we look at where we
2135 // placed the preheader above. We can't lookup the header itself because we
2136 // retained the mapping from the header to the hoisted loop. But the
2137 // preheader and header should have the exact same new parent computed
2138 // based on the set of exit blocks from the original loop as the preheader
2139 // is a predecessor of the header and so reached in the reverse walk. And
2140 // because the loops were all in simplified form the preheader of the
2141 // hoisted loop can't be part of some *other* loop.
2142 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
2143 NewParentL->addChildLoop(HoistedL);
2144 else
2145 LI.addTopLevelLoop(HoistedL);
2146 }
2147 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2148
2149 // Actually delete the loop if nothing remained within it.
2150 if (Blocks.empty()) {
2151 assert(SubLoops.empty() &&
2152 "Failed to remove all subloops from the original loop!");
2153 if (Loop *ParentL = L.getParentLoop())
2154 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2155 else
2156 LI.removeLoop(llvm::find(LI, &L));
2157 // markLoopAsDeleted for L should be triggered by the caller (it is
2158 // typically done within postUnswitch).
2159 if (SE)
2161 LI.destroy(&L);
2162 return false;
2163 }
2164
2165 return true;
2166}
2167
2168/// Helper to visit a dominator subtree, invoking a callable on each node.
2169///
2170/// Returning false at any point will stop walking past that node of the tree.
2171template <typename CallableT>
2172void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2174 DomWorklist.push_back(DT[BB]);
2175#ifndef NDEBUG
2177 Visited.insert(DT[BB]);
2178#endif
2179 do {
2180 DomTreeNode *N = DomWorklist.pop_back_val();
2181
2182 // Visit this node.
2183 if (!Callable(N->getBlock()))
2184 continue;
2185
2186 // Accumulate the child nodes.
2187 for (DomTreeNode *ChildN : *N) {
2188 assert(Visited.insert(ChildN).second &&
2189 "Cannot visit a node twice when walking a tree!");
2190 DomWorklist.push_back(ChildN);
2191 }
2192 } while (!DomWorklist.empty());
2193}
2194
2196 bool CurrentLoopValid, bool PartiallyInvariant,
2197 bool InjectedCondition, ArrayRef<Loop *> NewLoops) {
2198 // If we did a non-trivial unswitch, we have added new (cloned) loops.
2199 if (!NewLoops.empty())
2200 U.addSiblingLoops(NewLoops);
2201
2202 // If the current loop remains valid, we should revisit it to catch any
2203 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
2204 if (CurrentLoopValid) {
2205 if (PartiallyInvariant) {
2206 // Mark the new loop as partially unswitched, to avoid unswitching on
2207 // the same condition again.
2208 auto &Context = L.getHeader()->getContext();
2209 MDNode *DisableUnswitchMD = MDNode::get(
2210 Context,
2211 MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
2213 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
2214 {DisableUnswitchMD});
2215 L.setLoopID(NewLoopID);
2216 } else if (InjectedCondition) {
2217 // Do the same for injection of invariant conditions.
2218 auto &Context = L.getHeader()->getContext();
2219 MDNode *DisableUnswitchMD = MDNode::get(
2220 Context,
2221 MDString::get(Context, "llvm.loop.unswitch.injection.disable"));
2223 Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},
2224 {DisableUnswitchMD});
2225 L.setLoopID(NewLoopID);
2226 } else
2227 U.revisitCurrentLoop();
2228 } else
2229 U.markLoopAsDeleted(L, LoopName);
2230}
2231
2233 Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2234 IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
2236 LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition) {
2237 auto *ParentBB = TI.getParent();
2239 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2240
2241 // Save the current loop name in a variable so that we can report it even
2242 // after it has been deleted.
2243 std::string LoopName(L.getName());
2244
2245 // We can only unswitch switches, conditional branches with an invariant
2246 // condition, or combining invariant conditions with an instruction or
2247 // partially invariant instructions.
2248 assert((SI || BI) && "Can only unswitch switches and conditional branch!");
2249 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2250 bool FullUnswitch =
2251 SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2252 !PartiallyInvariant);
2253 if (FullUnswitch)
2254 assert(Invariants.size() == 1 &&
2255 "Cannot have other invariants with full unswitching!");
2256 else
2258 "Partial unswitching requires an instruction as the condition!");
2259
2260 if (MSSAU && VerifyMemorySSA)
2261 MSSAU->getMemorySSA()->verifyMemorySSA();
2262
2263 // Constant and BBs tracking the cloned and continuing successor. When we are
2264 // unswitching the entire condition, this can just be trivially chosen to
2265 // unswitch towards `true`. However, when we are unswitching a set of
2266 // invariants combined with `and` or `or` or partially invariant instructions,
2267 // the combining operation determines the best direction to unswitch: we want
2268 // to unswitch the direction that will collapse the branch.
2269 bool Direction = true;
2270 int ClonedSucc = 0;
2271 if (!FullUnswitch) {
2273 (void)Cond;
2275 PartiallyInvariant) &&
2276 "Only `or`, `and`, an `select`, partially invariant instructions "
2277 "can combine invariants being unswitched.");
2278 if (!match(Cond, m_LogicalOr())) {
2279 if (match(Cond, m_LogicalAnd()) ||
2280 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2281 Direction = false;
2282 ClonedSucc = 1;
2283 }
2284 }
2285 }
2286
2287 BasicBlock *RetainedSuccBB =
2288 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2289 SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2290 if (BI)
2291 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2292 else
2293 for (auto Case : SI->cases())
2294 if (Case.getCaseSuccessor() != RetainedSuccBB)
2295 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2296
2297 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2298 "Should not unswitch the same successor we are retaining!");
2299
2300 // The branch should be in this exact loop. Any inner loop's invariant branch
2301 // should be handled by unswitching that inner loop. The caller of this
2302 // routine should filter out any candidates that remain (but were skipped for
2303 // whatever reason).
2304 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2305
2306 // Compute the parent loop now before we start hacking on things.
2307 Loop *ParentL = L.getParentLoop();
2308 // Get blocks in RPO order for MSSA update, before changing the CFG.
2309 LoopBlocksRPO LBRPO(&L);
2310 if (MSSAU)
2311 LBRPO.perform(&LI);
2312
2313 // Compute the outer-most loop containing one of our exit blocks. This is the
2314 // furthest up our loopnest which can be mutated, which we will use below to
2315 // update things.
2316 Loop *OuterExitL = &L;
2318 L.getUniqueExitBlocks(ExitBlocks);
2319 for (auto *ExitBB : ExitBlocks) {
2320 // ExitBB can be an exit block for several levels in the loop nest. Make
2321 // sure we find the top most.
2322 Loop *NewOuterExitL = getTopMostExitingLoop(ExitBB, LI);
2323 if (!NewOuterExitL) {
2324 // We exited the entire nest with this block, so we're done.
2325 OuterExitL = nullptr;
2326 break;
2327 }
2328 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2329 OuterExitL = NewOuterExitL;
2330 }
2331
2332 // At this point, we're definitely going to unswitch something so invalidate
2333 // any cached information in ScalarEvolution for the outer most loop
2334 // containing an exit block and all nested loops.
2335 if (SE) {
2336 if (OuterExitL)
2337 SE->forgetLoop(OuterExitL);
2338 else
2339 SE->forgetTopmostLoop(&L);
2341 }
2342
2343 // If the edge from this terminator to a successor dominates that successor,
2344 // store a map from each block in its dominator subtree to it. This lets us
2345 // tell when cloning for a particular successor if a block is dominated by
2346 // some *other* successor with a single data structure. We use this to
2347 // significantly reduce cloning.
2349 for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),
2350 UnswitchedSuccBBs))
2351 if (SuccBB->getUniquePredecessor() ||
2352 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2353 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2354 }))
2355 visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2356 DominatingSucc[BB] = SuccBB;
2357 return true;
2358 });
2359
2360 // Split the preheader, so that we know that there is a safe place to insert
2361 // the conditional branch. We will change the preheader to have a conditional
2362 // branch on LoopCond. The original preheader will become the split point
2363 // between the unswitched versions, and we will have a new preheader for the
2364 // original loop.
2365 BasicBlock *SplitBB = L.getLoopPreheader();
2366 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2367
2368 // Keep track of the dominator tree updates needed.
2370
2371 // Clone the loop for each unswitched successor.
2373 VMaps.reserve(UnswitchedSuccBBs.size());
2375 for (auto *SuccBB : UnswitchedSuccBBs) {
2376 VMaps.emplace_back(new ValueToValueMapTy());
2377 ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2378 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2379 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2380 }
2381
2382 // Drop metadata if we may break its semantics by moving this instr into the
2383 // split block.
2384 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2386 // Do not spend time trying to understand if we can keep it, just drop it
2387 // to save compile time.
2388 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2389 else {
2390 // It is only legal to preserve make.implicit metadata if we are
2391 // guaranteed no reach implicit null check after following this branch.
2392 ICFLoopSafetyInfo SafetyInfo;
2393 SafetyInfo.computeLoopSafetyInfo(&L);
2394 if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2395 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2396 }
2397 }
2398
2399 // The stitching of the branched code back together depends on whether we're
2400 // doing full unswitching or not with the exception that we always want to
2401 // nuke the initial terminator placed in the split block.
2402 SplitBB->getTerminator()->eraseFromParent();
2403 if (FullUnswitch) {
2404 // Keep a clone of the terminator for MSSA updates.
2405 Instruction *NewTI = TI.clone();
2406 NewTI->insertInto(ParentBB, ParentBB->end());
2407
2408 // Splice the terminator from the original loop and rewrite its
2409 // successors.
2410 TI.moveBefore(*SplitBB, SplitBB->end());
2411 TI.dropLocation();
2412
2413 // First wire up the moved terminator to the preheaders.
2414 if (BI) {
2415 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2416 BI->setSuccessor(ClonedSucc, ClonedPH);
2417 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2419 if (InsertFreeze) {
2420 // We don't give any debug location to the new freeze, because the
2421 // BI (`dyn_cast<CondBrInst>(TI)`) is an in-loop instruction hoisted
2422 // out of the loop.
2423 Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI->getIterator());
2425 }
2426 BI->setCondition(Cond);
2427 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2428 } else {
2429 assert(SI && "Must either be a branch or switch!");
2430
2431 // Walk the cases and directly update their successors.
2432 assert(SI->getDefaultDest() == RetainedSuccBB &&
2433 "Not retaining default successor!");
2434 SI->setDefaultDest(LoopPH);
2435 for (const auto &Case : SI->cases())
2436 if (Case.getCaseSuccessor() == RetainedSuccBB)
2437 Case.setSuccessor(LoopPH);
2438 else
2439 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2440
2441 if (InsertFreeze)
2442 SI->setCondition(new FreezeInst(SI->getCondition(),
2443 SI->getCondition()->getName() + ".fr",
2444 SI->getIterator()));
2445
2446 // We need to use the set to populate domtree updates as even when there
2447 // are multiple cases pointing at the same successor we only want to
2448 // remove and insert one edge in the domtree.
2449 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2450 DTUpdates.push_back(
2451 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2452 }
2453
2454 if (MSSAU) {
2455 DT.applyUpdates(DTUpdates);
2456 DTUpdates.clear();
2457
2458 // Remove all but one edge to the retained block and all unswitched
2459 // blocks. This is to avoid having duplicate entries in the cloned Phis,
2460 // when we know we only keep a single edge for each case.
2461 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2462 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2463 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2464
2465 for (auto &VMap : VMaps)
2466 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2467 /*IgnoreIncomingWithNoClones=*/true);
2468 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2469
2470 // Remove all edges to unswitched blocks.
2471 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2472 MSSAU->removeEdge(ParentBB, SuccBB);
2473 }
2474
2475 // Now unhook the successor relationship as we'll be replacing
2476 // the terminator with a direct branch. This is much simpler for branches
2477 // than switches so we handle those first.
2478 if (BI) {
2479 // Remove the parent as a predecessor of the unswitched successor.
2480 assert(UnswitchedSuccBBs.size() == 1 &&
2481 "Only one possible unswitched block for a branch!");
2482 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2483 UnswitchedSuccBB->removePredecessor(ParentBB,
2484 /*KeepOneInputPHIs*/ true);
2485 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2486 } else {
2487 // Note that we actually want to remove the parent block as a predecessor
2488 // of *every* case successor. The case successor is either unswitched,
2489 // completely eliminating an edge from the parent to that successor, or it
2490 // is a duplicate edge to the retained successor as the retained successor
2491 // is always the default successor and as we'll replace this with a direct
2492 // branch we no longer need the duplicate entries in the PHI nodes.
2493 SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2494 assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2495 "Not retaining default successor!");
2496 for (const auto &Case : NewSI->cases())
2497 Case.getCaseSuccessor()->removePredecessor(
2498 ParentBB,
2499 /*KeepOneInputPHIs*/ true);
2500
2501 // We need to use the set to populate domtree updates as even when there
2502 // are multiple cases pointing at the same successor we only want to
2503 // remove and insert one edge in the domtree.
2504 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2505 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2506 }
2507
2508 // Create a new unconditional branch to the continuing block (as opposed to
2509 // the one cloned).
2510 Instruction *NewBI = UncondBrInst::Create(RetainedSuccBB, ParentBB);
2511 NewBI->setDebugLoc(NewTI->getDebugLoc());
2512
2513 // After MSSAU update, remove the cloned terminator instruction NewTI.
2514 NewTI->eraseFromParent();
2515 } else {
2516 assert(BI && "Only branches have partial unswitching.");
2517 assert(UnswitchedSuccBBs.size() == 1 &&
2518 "Only one possible unswitched block for a branch!");
2519 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2520 // When doing a partial unswitch, we have to do a bit more work to build up
2521 // the branch in the split block.
2522 if (PartiallyInvariant)
2524 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU, *BI);
2525 else {
2527 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2528 FreezeLoopUnswitchCond, BI, &AC, DT, *BI);
2529 }
2530 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2531
2532 if (MSSAU) {
2533 DT.applyUpdates(DTUpdates);
2534 DTUpdates.clear();
2535
2536 // Perform MSSA cloning updates.
2537 for (auto &VMap : VMaps)
2538 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2539 /*IgnoreIncomingWithNoClones=*/true);
2540 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2541 }
2542 }
2543
2544 // Apply the updates accumulated above to get an up-to-date dominator tree.
2545 DT.applyUpdates(DTUpdates);
2546
2547 // Now that we have an accurate dominator tree, first delete the dead cloned
2548 // blocks so that we can accurately build any cloned loops. It is important to
2549 // not delete the blocks from the original loop yet because we still want to
2550 // reference the original loop to understand the cloned loop's structure.
2551 deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2552
2553 // Build the cloned loop structure itself. This may be substantially
2554 // different from the original structure due to the simplified CFG. This also
2555 // handles inserting all the cloned blocks into the correct loops.
2556 SmallVector<Loop *, 4> NonChildClonedLoops;
2557 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2558 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2559
2560 // Now that our cloned loops have been built, we can update the original loop.
2561 // First we delete the dead blocks from it and then we rebuild the loop
2562 // structure taking these deletions into account.
2563 deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE, LoopUpdater);
2564
2565 if (MSSAU && VerifyMemorySSA)
2566 MSSAU->getMemorySSA()->verifyMemorySSA();
2567
2568 SmallVector<Loop *, 4> HoistedLoops;
2569 bool IsStillLoop =
2570 rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
2571
2572 if (MSSAU && VerifyMemorySSA)
2573 MSSAU->getMemorySSA()->verifyMemorySSA();
2574
2575#ifdef EXPENSIVE_CHECKS
2576 // This transformation has a high risk of corrupting the dominator tree, and
2577 // the below steps to rebuild loop structures will result in hard to debug
2578 // errors in that case so verify that the dominator tree is sane first.
2579 // FIXME: Remove this when the bugs stop showing up and rely on existing
2580 // verification steps.
2581 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
2582#endif
2583
2584 if (BI && !PartiallyInvariant) {
2585 // If we unswitched a branch which collapses the condition to a known
2586 // constant we want to replace all the uses of the invariants within both
2587 // the original and cloned blocks. We do this here so that we can use the
2588 // now updated dominator tree to identify which side the users are on.
2589 assert(UnswitchedSuccBBs.size() == 1 &&
2590 "Only one possible unswitched block for a branch!");
2591 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2592
2593 // When considering multiple partially-unswitched invariants
2594 // we cant just go replace them with constants in both branches.
2595 //
2596 // For 'AND' we infer that true branch ("continue") means true
2597 // for each invariant operand.
2598 // For 'OR' we can infer that false branch ("continue") means false
2599 // for each invariant operand.
2600 // So it happens that for multiple-partial case we dont replace
2601 // in the unswitched branch.
2602 bool ReplaceUnswitched =
2603 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2604
2605 ConstantInt *UnswitchedReplacement =
2606 Direction ? ConstantInt::getTrue(BI->getContext())
2607 : ConstantInt::getFalse(BI->getContext());
2608 ConstantInt *ContinueReplacement =
2609 Direction ? ConstantInt::getFalse(BI->getContext())
2610 : ConstantInt::getTrue(BI->getContext());
2611 for (Value *Invariant : Invariants) {
2612 assert(!isa<Constant>(Invariant) &&
2613 "Should not be replacing constant values!");
2614 // Use make_early_inc_range here as set invalidates the iterator.
2615 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2616 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2617 if (!UserI)
2618 continue;
2619
2620 // Replace it with the 'continue' side if in the main loop body, and the
2621 // unswitched if in the cloned blocks.
2622 if (DT.dominates(LoopPH, UserI->getParent()))
2623 U.set(ContinueReplacement);
2624 else if (ReplaceUnswitched &&
2625 DT.dominates(ClonedPH, UserI->getParent()))
2626 U.set(UnswitchedReplacement);
2627 }
2628 }
2629 }
2630
2631 // We can change which blocks are exit blocks of all the cloned sibling
2632 // loops, the current loop, and any parent loops which shared exit blocks
2633 // with the current loop. As a consequence, we need to re-form LCSSA for
2634 // them. But we shouldn't need to re-form LCSSA for any child loops.
2635 // FIXME: This could be made more efficient by tracking which exit blocks are
2636 // new, and focusing on them, but that isn't likely to be necessary.
2637 //
2638 // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2639 // loop nest and update every loop that could have had its exits changed. We
2640 // also need to cover any intervening loops. We add all of these loops to
2641 // a list and sort them by loop depth to achieve this without updating
2642 // unnecessary loops.
2643 auto UpdateLoop = [&](Loop &UpdateL) {
2644#ifndef NDEBUG
2645 UpdateL.verifyLoop();
2646 for (Loop *ChildL : UpdateL) {
2647 ChildL->verifyLoop();
2648 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2649 "Perturbed a child loop's LCSSA form!");
2650 }
2651#endif
2652 // First build LCSSA for this loop so that we can preserve it when
2653 // forming dedicated exits. We don't want to perturb some other loop's
2654 // LCSSA while doing that CFG edit.
2655 formLCSSA(UpdateL, DT, &LI, SE);
2656
2657 // For loops reached by this loop's original exit blocks we may
2658 // introduced new, non-dedicated exits. At least try to re-form dedicated
2659 // exits for these loops. This may fail if they couldn't have dedicated
2660 // exits to start with.
2661 formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2662 };
2663
2664 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2665 // and we can do it in any order as they don't nest relative to each other.
2666 //
2667 // Also check if any of the loops we have updated have become top-level loops
2668 // as that will necessitate widening the outer loop scope.
2669 for (Loop *UpdatedL :
2670 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2671 UpdateLoop(*UpdatedL);
2672 if (UpdatedL->isOutermost())
2673 OuterExitL = nullptr;
2674 }
2675 if (IsStillLoop) {
2676 UpdateLoop(L);
2677 if (L.isOutermost())
2678 OuterExitL = nullptr;
2679 }
2680
2681 // If the original loop had exit blocks, walk up through the outer most loop
2682 // of those exit blocks to update LCSSA and form updated dedicated exits.
2683 if (OuterExitL != &L)
2684 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2685 OuterL = OuterL->getParentLoop())
2686 UpdateLoop(*OuterL);
2687
2688#ifdef EXPENSIVE_CHECKS
2689 // Verify the entire loop structure to catch any incorrect updates before we
2690 // progress in the pass pipeline.
2691 LI.verify(DT);
2692#endif
2693
2694 // Now that we've unswitched something, make callbacks to report the changes.
2695 // For that we need to merge together the updated loops and the cloned loops
2696 // and check whether the original loop survived.
2697 SmallVector<Loop *, 4> SibLoops;
2698 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2699 if (UpdatedL->getParentLoop() == ParentL)
2700 SibLoops.push_back(UpdatedL);
2701 postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,
2702 InjectedCondition, SibLoops);
2703
2704 if (MSSAU && VerifyMemorySSA)
2705 MSSAU->getMemorySSA()->verifyMemorySSA();
2706
2707 if (BI)
2708 ++NumBranches;
2709 else
2710 ++NumSwitches;
2711}
2712
2713/// Recursively compute the cost of a dominator subtree based on the per-block
2714/// cost map provided.
2715///
2716/// The recursive computation is memozied into the provided DT-indexed cost map
2717/// to allow querying it for most nodes in the domtree without it becoming
2718/// quadratic.
2720 DomTreeNode &N,
2723 // Don't accumulate cost (or recurse through) blocks not in our block cost
2724 // map and thus not part of the duplication cost being considered.
2725 auto BBCostIt = BBCostMap.find(N.getBlock());
2726 if (BBCostIt == BBCostMap.end())
2727 return 0;
2728
2729 // Lookup this node to see if we already computed its cost.
2730 auto DTCostIt = DTCostMap.find(&N);
2731 if (DTCostIt != DTCostMap.end())
2732 return DTCostIt->second;
2733
2734 // If not, we have to compute it. We can't use insert above and update
2735 // because computing the cost may insert more things into the map.
2736 InstructionCost Cost = std::accumulate(
2737 N.begin(), N.end(), BBCostIt->second,
2738 [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2739 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2740 });
2741 bool Inserted = DTCostMap.insert({&N, Cost}).second;
2742 (void)Inserted;
2743 assert(Inserted && "Should not insert a node while visiting children!");
2744 return Cost;
2745}
2746
2747/// Turns a select instruction into implicit control flow branch,
2748/// making the following replacement:
2749///
2750/// head:
2751/// --code before select--
2752/// select %cond, %trueval, %falseval
2753/// --code after select--
2754///
2755/// into
2756///
2757/// head:
2758/// --code before select--
2759/// br i1 %cond, label %then, label %tail
2760///
2761/// then:
2762/// br %tail
2763///
2764/// tail:
2765/// phi [ %trueval, %then ], [ %falseval, %head]
2766/// unreachable
2767///
2768/// It also makes all relevant DT and LI updates, so that all structures are in
2769/// valid state after this transform.
2771 LoopInfo &LI, MemorySSAUpdater *MSSAU,
2772 AssumptionCache *AC) {
2773 LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n");
2774 BasicBlock *HeadBB = SI->getParent();
2775
2776 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2777 SplitBlockAndInsertIfThen(SI->getCondition(), SI, false,
2778 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2779 auto *CondBr = cast<CondBrInst>(HeadBB->getTerminator());
2780 BasicBlock *ThenBB = CondBr->getSuccessor(0),
2781 *TailBB = CondBr->getSuccessor(1);
2782 if (MSSAU)
2783 MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
2784
2785 PHINode *Phi =
2786 PHINode::Create(SI->getType(), 2, "unswitched.select", SI->getIterator());
2787 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2788 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2789 Phi->setDebugLoc(SI->getDebugLoc());
2790 SI->replaceAllUsesWith(Phi);
2791 SI->eraseFromParent();
2792
2793 if (MSSAU && VerifyMemorySSA)
2794 MSSAU->getMemorySSA()->verifyMemorySSA();
2795
2796 ++NumSelects;
2797 return CondBr;
2798}
2799
2800/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2801/// making the following replacement:
2802///
2803/// --code before guard--
2804/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2805/// --code after guard--
2806///
2807/// into
2808///
2809/// --code before guard--
2810/// br i1 %cond, label %guarded, label %deopt
2811///
2812/// guarded:
2813/// --code after guard--
2814///
2815/// deopt:
2816/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2817/// unreachable
2818///
2819/// It also makes all relevant DT and LI updates, so that all structures are in
2820/// valid state after this transform.
2822 DominatorTree &DT, LoopInfo &LI,
2823 MemorySSAUpdater *MSSAU) {
2824 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2825 BasicBlock *CheckBB = GI->getParent();
2826
2827 if (MSSAU && VerifyMemorySSA)
2828 MSSAU->getMemorySSA()->verifyMemorySSA();
2829
2830 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2831 // llvm.experimental.guard doesn't have branch weights. We can assume,
2832 // however, that the deopt path is unlikely.
2833 Instruction *DeoptBlockTerm = SplitBlockAndInsertIfThen(
2834 GI->getArgOperand(0), GI, true,
2837 : nullptr,
2838 &DTU, &LI);
2839 CondBrInst *CheckBI = cast<CondBrInst>(CheckBB->getTerminator());
2840 // SplitBlockAndInsertIfThen inserts control flow that branches to
2841 // DeoptBlockTerm if the condition is true. We want the opposite.
2842 CheckBI->swapSuccessors();
2843
2844 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2845 GuardedBlock->setName("guarded");
2846 CheckBI->getSuccessor(1)->setName("deopt");
2847 BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2848
2849 if (MSSAU)
2850 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2851
2852 GI->moveBefore(DeoptBlockTerm->getIterator());
2854
2855 if (MSSAU) {
2857 MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2858 if (VerifyMemorySSA)
2859 MSSAU->getMemorySSA()->verifyMemorySSA();
2860 }
2861
2862 if (VerifyLoopInfo)
2863 LI.verify(DT);
2864 ++NumGuards;
2865 return CheckBI;
2866}
2867
2868/// Cost multiplier is a way to limit potentially exponential behavior
2869/// of loop-unswitch. Cost is multiplied in proportion of 2^number of unswitch
2870/// candidates available. Also consider the number of "sibling" loops with
2871/// the idea of accounting for previous unswitches that already happened on this
2872/// cluster of loops. There was an attempt to keep this formula simple,
2873/// just enough to limit the worst case behavior. Even if it is not that simple
2874/// now it is still not an attempt to provide a detailed heuristic size
2875/// prediction.
2876///
2877/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2878/// unswitch candidates, making adequate predictions instead of wild guesses.
2879/// That requires knowing not just the number of "remaining" candidates but
2880/// also costs of unswitching for each of these candidates.
2882 const Instruction &TI, const Loop &L, const LoopInfo &LI,
2883 const DominatorTree &DT,
2884 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
2885
2886 // Guards and other exiting conditions do not contribute to exponential
2887 // explosion as soon as they dominate the latch (otherwise there might be
2888 // another path to the latch remaining that does not allow to eliminate the
2889 // loop copy on unswitch).
2890 const BasicBlock *Latch = L.getLoopLatch();
2891 const BasicBlock *CondBlock = TI.getParent();
2892 if (DT.dominates(CondBlock, Latch) &&
2893 (isGuard(&TI) ||
2894 (TI.isTerminator() &&
2895 llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2896 return L.contains(SuccBB);
2897 }) <= 1))) {
2898 NumCostMultiplierSkipped++;
2899 return 1;
2900 }
2901
2902 // Each invariant non-trivial condition, after being unswitched, is supposed
2903 // to have its own specialized sibling loop (the invariant condition has been
2904 // hoisted out of the child loop into a newly-cloned loop). When unswitching
2905 // conditions in nested loops, the basic block size of the outer loop should
2906 // not be altered. If such a size significantly increases across unswitching
2907 // invocations, something may be wrong; so adjust the final cost taking this
2908 // into account.
2909 auto *ParentL = L.getParentLoop();
2910 int ParentLoopSizeMultiplier = 1;
2911 if (ParentL)
2912 ParentLoopSizeMultiplier =
2913 std::max<int>(ParentL->getNumBlocks() / UnswitchParentBlocksDiv, 1);
2914
2915 int SiblingsCount =
2916 (ParentL ? ParentL->getSubLoopsVector().size() : llvm::size(LI));
2917 // Count amount of clones that all the candidates might cause during
2918 // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
2919 // cases.
2920 int UnswitchedClones = 0;
2921 for (const auto &Candidate : UnswitchCandidates) {
2922 const Instruction *CI = Candidate.TI;
2923 const BasicBlock *CondBlock = CI->getParent();
2924 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2925 if (isa<SelectInst>(CI)) {
2926 UnswitchedClones++;
2927 continue;
2928 }
2929 if (isGuard(CI)) {
2930 if (!SkipExitingSuccessors)
2931 UnswitchedClones++;
2932 continue;
2933 }
2934 int NonExitingSuccessors =
2935 llvm::count_if(successors(CondBlock),
2936 [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
2937 return !SkipExitingSuccessors || L.contains(SuccBB);
2938 });
2939 UnswitchedClones += Log2_32(NonExitingSuccessors);
2940 }
2941
2942 // Ignore up to the "unscaled candidates" number of unswitch candidates
2943 // when calculating the power-of-two scaling of the cost. The main idea
2944 // with this control is to allow a small number of unswitches to happen
2945 // and rely more on siblings multiplier (see below) when the number
2946 // of candidates is small.
2947 unsigned ClonesPower =
2948 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2949
2950 // Allowing top-level loops to spread a bit more than nested ones.
2951 int SiblingsMultiplier =
2952 std::max((ParentL ? SiblingsCount
2953 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2954 1);
2955 // Compute the cost multiplier in a way that won't overflow by saturating
2956 // at an upper bound.
2957 int CostMultiplier;
2958 if (ClonesPower > Log2_32(UnswitchThreshold) ||
2959 SiblingsMultiplier > UnswitchThreshold ||
2960 ParentLoopSizeMultiplier > UnswitchThreshold)
2961 CostMultiplier = UnswitchThreshold;
2962 else
2963 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2964 (int)UnswitchThreshold);
2965
2966 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2967 << " (siblings " << SiblingsMultiplier << " * parent size "
2968 << ParentLoopSizeMultiplier << " * clones "
2969 << (1 << ClonesPower) << ")"
2970 << " for unswitch candidate: " << TI << "\n");
2971 return CostMultiplier;
2972}
2973
2976 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
2977 const Loop &L, const LoopInfo &LI, AAResults &AA,
2978 const MemorySSAUpdater *MSSAU) {
2979 assert(UnswitchCandidates.empty() && "Should be!");
2980
2981 auto AddUnswitchCandidatesForInst = [&](Instruction *I, Value *Cond) {
2983 if (isa<Constant>(Cond))
2984 return;
2985 if (L.isLoopInvariant(Cond)) {
2986 UnswitchCandidates.push_back({I, {Cond}});
2987 return;
2988 }
2990 TinyPtrVector<Value *> Invariants =
2992 L, *static_cast<Instruction *>(Cond), LI);
2993 if (!Invariants.empty())
2994 UnswitchCandidates.push_back({I, std::move(Invariants)});
2995 }
2996 };
2997
2998 // Whether or not we should also collect guards in the loop.
2999 bool CollectGuards = false;
3000 if (UnswitchGuards) {
3001 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
3002 L.getHeader()->getParent()->getParent(), Intrinsic::experimental_guard);
3003 if (GuardDecl && !GuardDecl->use_empty())
3004 CollectGuards = true;
3005 }
3006
3007 for (auto *BB : L.blocks()) {
3008 if (LI.getLoopFor(BB) != &L)
3009 continue;
3010
3011 for (auto &I : *BB) {
3012 if (auto *SI = dyn_cast<SelectInst>(&I)) {
3013 auto *Cond = SI->getCondition();
3014 // Do not unswitch vector selects and logical and/or selects
3015 if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
3016 AddUnswitchCandidatesForInst(SI, Cond);
3017 } else if (CollectGuards && isGuard(&I)) {
3018 auto *Cond =
3019 skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
3020 // TODO: Support AND, OR conditions and partial unswitching.
3021 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
3022 UnswitchCandidates.push_back({&I, {Cond}});
3023 }
3024 }
3025
3026 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
3027 // We can only consider fully loop-invariant switch conditions as we need
3028 // to completely eliminate the switch after unswitching.
3029 if (!isa<Constant>(SI->getCondition()) &&
3030 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
3031 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
3032 continue;
3033 }
3034
3035 auto *BI = dyn_cast<CondBrInst>(BB->getTerminator());
3036 if (!BI || BI->getSuccessor(0) == BI->getSuccessor(1))
3037 continue;
3038
3039 AddUnswitchCandidatesForInst(BI, BI->getCondition());
3040 }
3041
3042 if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
3043 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
3044 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
3045 })) {
3046 MemorySSA *MSSA = MSSAU->getMemorySSA();
3047 if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
3048 LLVM_DEBUG(
3049 dbgs() << "simple-loop-unswitch: Found partially invariant condition "
3050 << *Info->InstToDuplicate[0] << "\n");
3051 PartialIVInfo = *Info;
3052 PartialIVCondBranch = L.getHeader()->getTerminator();
3053 TinyPtrVector<Value *> ValsToDuplicate;
3054 llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
3055 UnswitchCandidates.push_back(
3056 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
3057 }
3058 }
3059 return !UnswitchCandidates.empty();
3060}
3061
3062/// Tries to canonicalize condition described by:
3063///
3064/// br (LHS pred RHS), label IfTrue, label IfFalse
3065///
3066/// into its equivalent where `Pred` is something that we support for injected
3067/// invariants (so far it is limited to ult), LHS in canonicalized form is
3068/// non-invariant and RHS is an invariant.
3070 Value *&LHS, Value *&RHS,
3071 BasicBlock *&IfTrue,
3072 BasicBlock *&IfFalse,
3073 const Loop &L) {
3074 if (!L.contains(IfTrue)) {
3075 Pred = ICmpInst::getInversePredicate(Pred);
3076 std::swap(IfTrue, IfFalse);
3077 }
3078
3079 // Move loop-invariant argument to RHS position.
3080 if (L.isLoopInvariant(LHS)) {
3081 Pred = ICmpInst::getSwappedPredicate(Pred);
3082 std::swap(LHS, RHS);
3083 }
3084
3085 if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) {
3086 // Turn "x >=s 0" into "x <u UMIN_INT"
3087 Pred = ICmpInst::ICMP_ULT;
3088 RHS = ConstantInt::get(
3089 RHS->getContext(),
3090 APInt::getSignedMinValue(RHS->getType()->getIntegerBitWidth()));
3091 }
3092}
3093
3094/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
3095/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
3096/// injecting a loop-invariant condition.
3098 const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
3099 const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
3100 if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
3101 return false;
3102 // TODO: Support other predicates.
3103 if (Pred != ICmpInst::ICMP_ULT)
3104 return false;
3105 // TODO: Support non-loop-exiting branches?
3106 if (!L.contains(IfTrue) || L.contains(IfFalse))
3107 return false;
3108 // FIXME: For some reason this causes problems with MSSA updates, need to
3109 // investigate why. So far, just don't unswitch latch.
3110 if (L.getHeader() == IfTrue)
3111 return false;
3112 return true;
3113}
3114
3115/// Returns true, if metadata on \p BI allows us to optimize branching into \p
3116/// TakenSucc via injection of invariant conditions. The branch should be not
3117/// enough and not previously unswitched, the information about this comes from
3118/// the metadata.
3120 const BasicBlock *TakenSucc) {
3121 SmallVector<uint32_t> Weights;
3122 if (!extractBranchWeights(*BI, Weights))
3123 return false;
3125 BranchProbability LikelyTaken(T - 1, T);
3126
3127 assert(Weights.size() == 2 && "Unexpected profile data!");
3128 size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
3129 auto Num = Weights[Idx];
3130 auto Denom = Weights[0] + Weights[1];
3131 // Degenerate or overflowed metadata.
3132 if (Denom == 0 || Num > Denom)
3133 return false;
3134 BranchProbability ActualTaken(Num, Denom);
3135 if (LikelyTaken > ActualTaken)
3136 return false;
3137 return true;
3138}
3139
3140/// Materialize pending invariant condition of the given candidate into IR. The
3141/// injected loop-invariant condition implies the original loop-variant branch
3142/// condition, so the materialization turns
3143///
3144/// loop_block:
3145/// ...
3146/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3147///
3148/// into
3149///
3150/// preheader:
3151/// %invariant_cond = LHS pred RHS
3152/// ...
3153/// loop_block:
3154/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
3155/// OriginalCheck:
3156/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3157/// ...
3158static NonTrivialUnswitchCandidate
3159injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
3160 DominatorTree &DT, LoopInfo &LI,
3161 AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
3162 assert(Candidate.hasPendingInjection() && "Nothing to inject!");
3163 BasicBlock *Preheader = L.getLoopPreheader();
3164 assert(Preheader && "Loop is not in simplified form?");
3165 assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&
3166 "Unswitching branch of inner loop!");
3167
3168 auto Pred = Candidate.PendingInjection->Pred;
3169 auto *LHS = Candidate.PendingInjection->LHS;
3170 auto *RHS = Candidate.PendingInjection->RHS;
3171 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3172 auto *TI = cast<CondBrInst>(Candidate.TI);
3173 auto *BB = Candidate.TI->getParent();
3174 auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3175 : TI->getSuccessor(0);
3176 // FIXME: Remove this once limitation on successors is lifted.
3177 assert(L.contains(InLoopSucc) && "Not supported yet!");
3178 assert(!L.contains(OutOfLoopSucc) && "Not supported yet!");
3179 auto &Ctx = BB->getContext();
3180
3181 IRBuilder<> Builder(Preheader->getTerminator());
3182 assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!");
3183 if (LHS->getType() != RHS->getType()) {
3184 if (LHS->getType()->getIntegerBitWidth() <
3185 RHS->getType()->getIntegerBitWidth())
3186 LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide");
3187 else
3188 RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide");
3189 }
3190 // Do not use builder here: CreateICmp may simplify this into a constant and
3191 // unswitching will break. Better optimize it away later.
3192 auto *InjectedCond =
3193 ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
3194 Preheader->getTerminator()->getIterator());
3195
3196 BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
3197 BB->getParent(), InLoopSucc);
3198 Builder.SetInsertPoint(TI);
3199 auto *InvariantBr =
3200 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3201 // We don't know anything about the relation between the limits.
3203
3204 Builder.SetInsertPoint(CheckBlock);
3205 Builder.CreateCondBr(
3206 TI->getCondition(), TI->getSuccessor(0), TI->getSuccessor(1),
3207 !ProfcheckDisableMetadataFixes ? TI->getMetadata(LLVMContext::MD_prof)
3208 : nullptr);
3209 TI->eraseFromParent();
3210
3211 // Fixup phis.
3212 for (auto &I : *InLoopSucc) {
3213 auto *PN = dyn_cast<PHINode>(&I);
3214 if (!PN)
3215 break;
3216 auto *Inc = PN->getIncomingValueForBlock(BB);
3217 PN->addIncoming(Inc, CheckBlock);
3218 }
3219 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3220
3222 { DominatorTree::Insert, BB, CheckBlock },
3223 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3224 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3225 { DominatorTree::Delete, BB, OutOfLoopSucc }
3226 };
3227
3228 DT.applyUpdates(DTUpdates);
3229 if (MSSAU)
3230 MSSAU->applyUpdates(DTUpdates, DT);
3231 L.addBasicBlockToLoop(CheckBlock, LI);
3232
3233#ifndef NDEBUG
3234 DT.verify();
3235 LI.verify(DT);
3236 if (MSSAU && VerifyMemorySSA)
3237 MSSAU->getMemorySSA()->verifyMemorySSA();
3238#endif
3239
3240 // TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
3241 // higher because we have just inserted a new block. Need to think how to
3242 // adjust the cost of injected candidates when it was first computed.
3243 LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr
3244 << " and considering it for unswitching.");
3245 ++NumInvariantConditionsInjected;
3246 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3247 Candidate.Cost);
3248}
3249
3250/// Given chain of loop branch conditions looking like:
3251/// br (Variant < Invariant1)
3252/// br (Variant < Invariant2)
3253/// br (Variant < Invariant3)
3254/// ...
3255/// collect set of invariant conditions on which we want to unswitch, which
3256/// look like:
3257/// Invariant1 <= Invariant2
3258/// Invariant2 <= Invariant3
3259/// ...
3260/// Though they might not immediately exist in the IR, we can still inject them.
3262 SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
3264 const DominatorTree &DT) {
3265
3268 if (Compares.size() < 2)
3269 return false;
3271 for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
3272 Next != Compares.end(); ++Prev, ++Next) {
3273 Value *LHS = Next->Invariant;
3274 Value *RHS = Prev->Invariant;
3275 BasicBlock *InLoopSucc = Prev->InLoopSucc;
3276 InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);
3277 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3278 std::nullopt, std::move(ToInject));
3279 UnswitchCandidates.push_back(std::move(Candidate));
3280 }
3281 return true;
3282}
3283
3284/// Collect unswitch candidates by invariant conditions that are not immediately
3285/// present in the loop. However, they can be injected into the code if we
3286/// decide it's profitable.
3287/// An example of such conditions is following:
3288///
3289/// for (...) {
3290/// x = load ...
3291/// if (! x <u C1) break;
3292/// if (! x <u C2) break;
3293/// <do something>
3294/// }
3295///
3296/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
3297/// C2" automatically implies "x <u C2", so we can get rid of one of
3298/// loop-variant checks in unswitched loop version.
3301 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
3302 const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
3303 const MemorySSAUpdater *MSSAU) {
3305 return false;
3306
3307 if (!DT.isReachableFromEntry(L.getHeader()))
3308 return false;
3309 auto *Latch = L.getLoopLatch();
3310 // Need to have a single latch and a preheader.
3311 if (!Latch)
3312 return false;
3313 assert(L.getLoopPreheader() && "Must have a preheader!");
3314
3316 // Traverse the conditions that dominate latch (and therefore dominate each
3317 // other).
3318 for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
3319 DTN = DTN->getIDom()) {
3320 CmpPredicate Pred;
3321 Value *LHS = nullptr, *RHS = nullptr;
3322 BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
3323 auto *BB = DTN->getBlock();
3324 // Ignore inner loops.
3325 if (LI.getLoopFor(BB) != &L)
3326 continue;
3327 auto *Term = BB->getTerminator();
3328 if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
3329 m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))))
3330 continue;
3331 if (!LHS->getType()->isIntegerTy())
3332 continue;
3333 canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse,
3334 L);
3335 if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L))
3336 continue;
3338 continue;
3339 // Strip ZEXT for unsigned predicate.
3340 // TODO: once signed predicates are supported, also strip SEXT.
3341 CompareDesc Desc(cast<CondBrInst>(Term), RHS, IfTrue);
3342 while (auto *Zext = dyn_cast<ZExtInst>(LHS))
3343 LHS = Zext->getOperand(0);
3344 CandidatesULT[LHS].push_back(Desc);
3345 }
3346
3347 bool Found = false;
3348 for (auto &It : CandidatesULT)
3350 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3351 return Found;
3352}
3353
3355 if (!L.isSafeToClone())
3356 return false;
3357 for (auto *BB : L.blocks())
3358 for (auto &I : *BB) {
3359 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
3360 return false;
3361 if (auto *CB = dyn_cast<CallBase>(&I)) {
3362 assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().");
3363 if (CB->isConvergent())
3364 return false;
3365 }
3366 }
3367
3368 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
3369 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
3370 // irreducible control flow into reducible control flow and introduce new
3371 // loops "out of thin air". If we ever discover important use cases for doing
3372 // this, we can add support to loop unswitch, but it is a lot of complexity
3373 // for what seems little or no real world benefit.
3374 LoopBlocksRPO RPOT(&L);
3375 RPOT.perform(&LI);
3377 return false;
3378
3380 L.getUniqueExitBlocks(ExitBlocks);
3381 // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
3382 // instruction as we don't know how to split those exit blocks.
3383 // FIXME: We should teach SplitBlock to handle this and remove this
3384 // restriction.
3385 for (auto *ExitBB : ExitBlocks) {
3386 auto It = ExitBB->getFirstNonPHIIt();
3388 LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
3389 "in exit block\n");
3390 return false;
3391 }
3392 }
3393
3394 return true;
3395}
3396
3397static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
3398 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
3399 const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
3400 const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
3401 // Given that unswitching these terminators will require duplicating parts of
3402 // the loop, so we need to be able to model that cost. Compute the ephemeral
3403 // values and set up a data structure to hold per-BB costs. We cache each
3404 // block's cost so that we don't recompute this when considering different
3405 // subsets of the loop for duplication during unswitching.
3407 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
3409
3410 // Compute the cost of each block, as well as the total loop cost. Also, bail
3411 // out if we see instructions which are incompatible with loop unswitching
3412 // (convergent, noduplicate, or cross-basic-block tokens).
3413 // FIXME: We might be able to safely handle some of these in non-duplicated
3414 // regions.
3416 L.getHeader()->getParent()->hasMinSize()
3419 InstructionCost LoopCost = 0;
3420 for (auto *BB : L.blocks()) {
3421 InstructionCost Cost = 0;
3422 for (auto &I : *BB) {
3423 if (EphValues.count(&I))
3424 continue;
3425 Cost += TTI.getInstructionCost(&I, CostKind);
3426 }
3427 assert(Cost >= 0 && "Must not have negative costs!");
3428 LoopCost += Cost;
3429 assert(LoopCost >= 0 && "Must not have negative loop costs!");
3430 BBCostMap[BB] = Cost;
3431 }
3432 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
3433
3434 // Now we find the best candidate by searching for the one with the following
3435 // properties in order:
3436 //
3437 // 1) An unswitching cost below the threshold
3438 // 2) The smallest number of duplicated unswitch candidates (to avoid
3439 // creating redundant subsequent unswitching)
3440 // 3) The smallest cost after unswitching.
3441 //
3442 // We prioritize reducing fanout of unswitch candidates provided the cost
3443 // remains below the threshold because this has a multiplicative effect.
3444 //
3445 // This requires memoizing each dominator subtree to avoid redundant work.
3446 //
3447 // FIXME: Need to actually do the number of candidates part above.
3449 // Given a terminator which might be unswitched, computes the non-duplicated
3450 // cost for that terminator.
3451 auto ComputeUnswitchedCost = [&](Instruction &TI,
3452 bool FullUnswitch) -> InstructionCost {
3453 // Unswitching selects unswitches the entire loop.
3454 if (isa<SelectInst>(TI))
3455 return LoopCost;
3456
3457 BasicBlock &BB = *TI.getParent();
3459
3460 InstructionCost Cost = 0;
3461 for (BasicBlock *SuccBB : successors(&BB)) {
3462 // Don't count successors more than once.
3463 if (!Visited.insert(SuccBB).second)
3464 continue;
3465
3466 // If this is a partial unswitch candidate, then it must be a conditional
3467 // branch with a condition of either `or`, `and`, their corresponding
3468 // select forms or partially invariant instructions. In that case, one of
3469 // the successors is necessarily duplicated, so don't even try to remove
3470 // its cost.
3471 if (!FullUnswitch) {
3472 auto &BI = cast<CondBrInst>(TI);
3473 Value *Cond = skipTrivialSelect(BI.getCondition());
3474 if (match(Cond, m_LogicalAnd())) {
3475 if (SuccBB == BI.getSuccessor(1))
3476 continue;
3477 } else if (match(Cond, m_LogicalOr())) {
3478 if (SuccBB == BI.getSuccessor(0))
3479 continue;
3480 } else if ((PartialIVInfo.KnownValue->isOneValue() &&
3481 SuccBB == BI.getSuccessor(0)) ||
3482 (!PartialIVInfo.KnownValue->isOneValue() &&
3483 SuccBB == BI.getSuccessor(1)))
3484 continue;
3485 }
3486
3487 // This successor's domtree will not need to be duplicated after
3488 // unswitching if the edge to the successor dominates it (and thus the
3489 // entire tree). This essentially means there is no other path into this
3490 // subtree and so it will end up live in only one clone of the loop.
3491 if (SuccBB->getUniquePredecessor() ||
3492 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
3493 return PredBB == &BB || DT.dominates(SuccBB, PredBB);
3494 })) {
3495 Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
3496 assert(Cost <= LoopCost &&
3497 "Non-duplicated cost should never exceed total loop cost!");
3498 }
3499 }
3500
3501 // Now scale the cost by the number of unique successors minus one. We
3502 // subtract one because there is already at least one copy of the entire
3503 // loop. This is computing the new cost of unswitching a condition.
3504 // Note that guards always have 2 unique successors that are implicit and
3505 // will be materialized if we decide to unswitch it.
3506 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
3507 assert(SuccessorsCount > 1 &&
3508 "Cannot unswitch a condition without multiple distinct successors!");
3509 return (LoopCost - Cost) * (SuccessorsCount - 1);
3510 };
3511
3512 std::optional<NonTrivialUnswitchCandidate> Best;
3513 for (auto &Candidate : UnswitchCandidates) {
3514 Instruction &TI = *Candidate.TI;
3515 ArrayRef<Value *> Invariants = Candidate.Invariants;
3517 bool FullUnswitch =
3518 !BI || Candidate.hasPendingInjection() ||
3519 (Invariants.size() == 1 &&
3520 Invariants[0] == skipTrivialSelect(BI->getCondition()));
3521 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3522 // Calculate cost multiplier which is a tool to limit potentially
3523 // exponential behavior of loop-unswitch.
3525 int CostMultiplier =
3526 CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
3527 assert(
3528 (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
3529 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3530 CandidateCost *= CostMultiplier;
3531 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3532 << " (multiplier: " << CostMultiplier << ")"
3533 << " for unswitch candidate: " << TI << "\n");
3534 } else {
3535 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3536 << " for unswitch candidate: " << TI << "\n");
3537 }
3538
3539 if (!Best || CandidateCost < Best->Cost) {
3540 Best = Candidate;
3541 Best->Cost = CandidateCost;
3542 }
3543 }
3544 assert(Best && "Must be!");
3545 return *Best;
3546}
3547
3548// Insert a freeze on an unswitched branch if all is true:
3549// 1. freeze-loop-unswitch-cond option is true
3550// 2. The branch may not execute in the loop pre-transformation. If a branch may
3551// not execute and could cause UB, it would always cause UB if it is hoisted outside
3552// of the loop. Insert a freeze to prevent this case.
3553// 3. The branch condition may be poison or undef
3555 AssumptionCache &AC) {
3558 return false;
3559
3560 ICFLoopSafetyInfo SafetyInfo;
3561 SafetyInfo.computeLoopSafetyInfo(&L);
3562 if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
3563 return false;
3564
3565 Value *Cond;
3566 if (CondBrInst *BI = dyn_cast<CondBrInst>(&TI))
3567 Cond = skipTrivialSelect(BI->getCondition());
3568 else
3571 Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3572}
3573
3577 MemorySSAUpdater *MSSAU,
3578 LPMUpdater &LoopUpdater) {
3579 // Collect all invariant conditions within this loop (as opposed to an inner
3580 // loop which would be handled when visiting that inner loop).
3582 IVConditionInfo PartialIVInfo;
3583 Instruction *PartialIVCondBranch = nullptr;
3584 collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
3585 PartialIVCondBranch, L, LI, AA, MSSAU);
3586 if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.injection.disable"))
3587 collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
3588 PartialIVCondBranch, L, DT, LI, AA,
3589 MSSAU);
3590 // If we didn't find any candidates, we're done.
3591 if (UnswitchCandidates.empty())
3592 return false;
3593
3594 LLVM_DEBUG(
3595 dbgs() << "Considering " << UnswitchCandidates.size()
3596 << " non-trivial loop invariant conditions for unswitching.\n");
3597
3598 NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
3599 UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
3600
3601 assert(Best.TI && "Failed to find loop unswitch candidate");
3602 assert(Best.Cost && "Failed to compute cost");
3603
3604 if (*Best.Cost >= UnswitchThreshold) {
3605 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost
3606 << "\n");
3607 return false;
3608 }
3609
3610 bool InjectedCondition = false;
3611 if (Best.hasPendingInjection()) {
3612 Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
3613 InjectedCondition = true;
3614 }
3615 assert(!Best.hasPendingInjection() &&
3616 "All injections should have been done by now!");
3617
3618 if (Best.TI != PartialIVCondBranch)
3619 PartialIVInfo.InstToDuplicate.clear();
3620
3621 bool InsertFreeze;
3622 if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3623 // If the best candidate is a select, turn it into a branch. Select
3624 // instructions with a poison conditional do not propagate poison, but
3625 // branching on poison causes UB. Insert a freeze on the select
3626 // conditional to prevent UB after turning the select into a branch.
3627 InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
3628 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3629 Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
3630 } else {
3631 // If the best candidate is a guard, turn it into a branch.
3632 if (isGuard(Best.TI))
3633 Best.TI =
3634 turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3635 InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC);
3636 }
3637
3638 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
3639 << ") terminator: " << *Best.TI << "\n");
3640 unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3641 LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,
3642 InjectedCondition);
3643 return true;
3644}
3645
3646/// Unswitch control flow predicated on loop invariant conditions.
3647///
3648/// This first hoists all branches or switches which are trivial (IE, do not
3649/// require duplicating any part of the loop) out of the loop body. It then
3650/// looks at other loop invariant control flows and tries to unswitch those as
3651/// well by cloning the loop if the result is small enough.
3652///
3653/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3654/// also updated based on the unswitch. The `MSSA` analysis is also updated if
3655/// valid (i.e. its use is enabled).
3656///
3657/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3658/// true, we will attempt to do non-trivial unswitching as well as trivial
3659/// unswitching.
3660///
3661/// The `postUnswitch` function will be run after unswitching is complete
3662/// with information on whether or not the provided loop remains a loop and
3663/// a list of new sibling loops created.
3664///
3665/// If `SE` is non-null, we will update that analysis based on the unswitching
3666/// done.
3667static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
3669 TargetTransformInfo &TTI, bool Trivial,
3670 bool NonTrivial, ScalarEvolution *SE,
3671 MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater) {
3672 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3673 "Loops must be in LCSSA form before unswitching.");
3674
3675 // Must be in loop simplified form: we need a preheader and dedicated exits.
3676 if (!L.isLoopSimplifyForm())
3677 return false;
3678
3679 // Try trivial unswitch first before loop over other basic blocks in the loop.
3680 if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3681 // If we unswitched successfully we will want to clean up the loop before
3682 // processing it further so just mark it as unswitched and return.
3683 postUnswitch(L, LoopUpdater, L.getName(),
3684 /*CurrentLoopValid*/ true, /*PartiallyInvariant*/ false,
3685 /*InjectedCondition*/ false, {});
3686 return true;
3687 }
3688
3689 const Function *F = L.getHeader()->getParent();
3690
3691 // Check whether we should continue with non-trivial conditions.
3692 // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3693 // unswitching for testing and debugging.
3694 // NonTrivial: Parameter that enables non-trivial unswitching for this
3695 // invocation of the transform. But this should be allowed only
3696 // for targets without branch divergence.
3697 //
3698 // FIXME: If divergence analysis becomes available to a loop
3699 // transform, we should allow unswitching for non-trivial uniform
3700 // branches even on targets that have divergence.
3701 // https://bugs.llvm.org/show_bug.cgi?id=48819
3702 bool ContinueWithNonTrivial =
3703 EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence(F));
3704 if (!ContinueWithNonTrivial)
3705 return false;
3706
3707 // Skip non-trivial unswitching for optsize functions.
3708 if (F->hasOptSize())
3709 return false;
3710
3711 // Perform legality checks.
3713 return false;
3714
3715 // For non-trivial unswitching, because it often creates new loops, we rely on
3716 // the pass manager to iterate on the loops rather than trying to immediately
3717 // reach a fixed point. There is no substantial advantage to iterating
3718 // internally, and if any of the new loops are simplified enough to contain
3719 // trivial unswitching we want to prefer those.
3720
3721 // Try to unswitch the best invariant condition. We prefer this full unswitch to
3722 // a partial unswitch when possible below the threshold.
3723 if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, SE, MSSAU, LoopUpdater))
3724 return true;
3725
3726 // No other opportunities to unswitch.
3727 return false;
3728}
3729
3732 LPMUpdater &U) {
3733 Function &F = *L.getHeader()->getParent();
3734 (void)F;
3735 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3736 << "\n");
3737
3738 std::optional<MemorySSAUpdater> MSSAU;
3739 if (AR.MSSA) {
3740 MSSAU = MemorySSAUpdater(AR.MSSA);
3741 if (VerifyMemorySSA)
3742 AR.MSSA->verifyMemorySSA();
3743 }
3744 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3745 &AR.SE, MSSAU ? &*MSSAU : nullptr, U))
3746 return PreservedAnalyses::all();
3747
3748 if (AR.MSSA && VerifyMemorySSA)
3749 AR.MSSA->verifyMemorySSA();
3750
3751#ifdef EXPENSIVE_CHECKS
3752 // Historically this pass has had issues with the dominator tree so verify it
3753 // in asserts builds.
3754 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
3755#endif
3756
3757 auto PA = getLoopPassPreservedAnalyses();
3758 if (AR.MSSA)
3759 PA.preserve<MemorySSAAnalysis>();
3760 return PA;
3761}
3762
3764 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3766 OS, MapClassName2PassName);
3767
3768 OS << '<';
3769 OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3770 OS << (Trivial ? "" : "no-") << "trivial";
3771 OS << '>';
3772}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
#define DEBUG_TYPE
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
static Value * getCondition(Instruction *I)
Module.h This file contains the declarations for the Module class.
This defines the Use class.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:253
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
uint64_t IntrinsicInst * II
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, bool CurrentLoopValid, bool PartiallyInvariant, bool InjectedCondition, ArrayRef< Loop * > NewLoops)
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU, const CondBrInst &OriginalBranch)
Copy a set of loop invariant values, and conditionally branch on them.
static Value * skipTrivialSelect(Value *Cond)
static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static CondBrInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)
bool shouldTryInjectBasingOnMetadata(const CondBrInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static void canonicalizeForInvariantConditionInjection(CmpPredicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition)
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
static CondBrInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)
Turns a select instruction into implicit control flow branch, making the following replacement:
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
Unswitch control flow predicated on loop invariant conditions.
static bool unswitchTrivialBranch(Loop &L, CondBrInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, LPMUpdater &LoopUpdater)
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT, const CondBrInst &ComputeProfFrom)
Copy a set of loop invariant values Invariants and insert them at the end of BB and conditionally bra...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:462
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:449
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:518
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
size_t size() const
Definition BasicBlock.h:470
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition BasicBlock.h:376
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
static LLVM_ABI CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:871
static LLVM_ABI bool isStrictPredicate(Predicate predicate)
This is a static version that you can use without an instruction available.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isUnsigned() const
Definition InstrTypes.h:936
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void setCondition(Value *V)
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getCompilerGenerated()
Definition DebugLoc.h:162
static DebugLoc getDropped()
Definition DebugLoc.h:163
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator begin()
Definition DenseMap.h:78
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1223
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2650
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1577
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1599
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2811
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void dropLocation()
Drop the instruction's debug location.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
void changeLoopFor(const BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
StringRef getName() const
Definition LoopInfo.h:389
LLVM_ABI MDNode * createUnlikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards false destination.
Definition MDBuilder.cpp:48
Metadata node.
Definition Metadata.h:1080
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition MemorySSA.h:371
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:922
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
LLVM_ABI void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
LLVM_ABI void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
LLVM_ABI void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
LLVM_ABI void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
LLVM_ABI void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
LLVM_ABI MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point, bool CreationMustSucceed=true)
Create a MemoryAccess in MemorySSA at a specified point in a block.
LLVM_ABI void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
LLVM_ABI void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
LLVM_ABI void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
LLVM_ABI void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition MemorySSA.h:765
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
The main scalar evolution driver.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LLVM_ABI void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition SetVector.h:106
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI Instruction::InstListType::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Multiway switch.
BasicBlock * getDefaultDest() const
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
void push_back(EltTy NewVal)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition ValueMap.h:167
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition ValueMap.h:156
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ?
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2116
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition Metadata.cpp:64
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:535
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
auto successors(const MachineBasicBlock *BB)
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
auto cast_or_null(const Y &Val)
Definition Casting.h:714
LLVM_ABI MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Op::Description Desc
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1152
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:94
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI)
Return true if the control flow in RPOTraversal is irreducible.
Definition CFG.h:154
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition STLExtras.h:854
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition LoopInfo.cpp:53
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
TargetTransformInfo TTI
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
LLVM_ABI bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition LoopUtils.cpp:61
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto sum_of(R &&Range, E Init=E{0})
Returns the sum of all values in Range with Init initial value.
Definition STLExtras.h:1717
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
static cl::opt< bool > EstimateProfile("simple-loop-unswitch-estimate-profile", cl::Hidden, cl::init(true))
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
auto predecessors(const MachineBasicBlock *BB)
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
LLVM_ABI std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
LLVM_ABI bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition LCSSA.cpp:427
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
static cl::opt< int > UnswitchParentBlocksDiv("unswitch-parent-blocks-div", cl::init(8), cl::Hidden, cl::desc("Outer loop size divisor for cost multiplier."))
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Struct to hold information about a partially invariant condition.
Definition LoopUtils.h:637
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition LoopUtils.h:640
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition LoopUtils.h:643
Incoming for lane mask phi as machine instruction, incoming register Reg and incoming block Block are...
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70