Bug Summary

File:llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
Warning:line 2872, column 15
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SimpleLoopUnswitch.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/build-llvm/lib/Transforms/Scalar -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/build-llvm/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/build-llvm/lib/Transforms/Scalar -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-06-13-111025-38230-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
14#include "llvm/ADT/SmallPtrSet.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
18#include "llvm/Analysis/AssumptionCache.h"
19#include "llvm/Analysis/CFG.h"
20#include "llvm/Analysis/CodeMetrics.h"
21#include "llvm/Analysis/GuardUtils.h"
22#include "llvm/Analysis/InstructionSimplify.h"
23#include "llvm/Analysis/LoopAnalysisManager.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/LoopIterator.h"
26#include "llvm/Analysis/LoopPass.h"
27#include "llvm/Analysis/MemorySSA.h"
28#include "llvm/Analysis/MemorySSAUpdater.h"
29#include "llvm/Analysis/MustExecute.h"
30#include "llvm/Analysis/ScalarEvolution.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/Constant.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Instructions.h"
40#include "llvm/IR/IntrinsicInst.h"
41#include "llvm/IR/PatternMatch.h"
42#include "llvm/IR/Use.h"
43#include "llvm/IR/Value.h"
44#include "llvm/InitializePasses.h"
45#include "llvm/Pass.h"
46#include "llvm/Support/Casting.h"
47#include "llvm/Support/CommandLine.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/ErrorHandling.h"
50#include "llvm/Support/GenericDomTree.h"
51#include "llvm/Support/raw_ostream.h"
52#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
53#include "llvm/Transforms/Utils/BasicBlockUtils.h"
54#include "llvm/Transforms/Utils/Cloning.h"
55#include "llvm/Transforms/Utils/Local.h"
56#include "llvm/Transforms/Utils/LoopUtils.h"
57#include "llvm/Transforms/Utils/ValueMapper.h"
58#include <algorithm>
59#include <cassert>
60#include <iterator>
61#include <numeric>
62#include <utility>
63
64#define DEBUG_TYPE"simple-loop-unswitch" "simple-loop-unswitch"
65
66using namespace llvm;
67using namespace llvm::PatternMatch;
68
69STATISTIC(NumBranches, "Number of branches unswitched")static llvm::Statistic NumBranches = {"simple-loop-unswitch",
"NumBranches", "Number of branches unswitched"}
;
70STATISTIC(NumSwitches, "Number of switches unswitched")static llvm::Statistic NumSwitches = {"simple-loop-unswitch",
"NumSwitches", "Number of switches unswitched"}
;
71STATISTIC(NumGuards, "Number of guards turned into branches for unswitching")static llvm::Statistic NumGuards = {"simple-loop-unswitch", "NumGuards"
, "Number of guards turned into branches for unswitching"}
;
72STATISTIC(NumTrivial, "Number of unswitches that are trivial")static llvm::Statistic NumTrivial = {"simple-loop-unswitch", "NumTrivial"
, "Number of unswitches that are trivial"}
;
73STATISTIC(static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
}
74 NumCostMultiplierSkipped,static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
}
75 "Number of unswitch candidates that had their cost multiplier skipped")static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
}
;
76
77static cl::opt<bool> EnableNonTrivialUnswitch(
78 "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
79 cl::desc("Forcibly enables non-trivial loop unswitching rather than "
80 "following the configuration passed into the pass."));
81
82static cl::opt<int>
83 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
84 cl::desc("The cost threshold for unswitching a loop."));
85
86static cl::opt<bool> EnableUnswitchCostMultiplier(
87 "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
88 cl::desc("Enable unswitch cost multiplier that prohibits exponential "
89 "explosion in nontrivial unswitch."));
90static cl::opt<int> UnswitchSiblingsToplevelDiv(
91 "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
92 cl::desc("Toplevel siblings divisor for cost multiplier."));
93static cl::opt<int> UnswitchNumInitialUnscaledCandidates(
94 "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
95 cl::desc("Number of unswitch candidates that are ignored when calculating "
96 "cost multiplier."));
97static cl::opt<bool> UnswitchGuards(
98 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
99 cl::desc("If enabled, simple loop unswitching will also consider "
100 "llvm.experimental.guard intrinsics as unswitch candidates."));
101static cl::opt<bool> DropNonTrivialImplicitNullChecks(
102 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
103 cl::init(false), cl::Hidden,
104 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
105 "null checks to save time analyzing if we can keep it."));
106static cl::opt<unsigned>
107 MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
108 cl::desc("Max number of memory uses to explore during "
109 "partial unswitching analysis"),
110 cl::init(100), cl::Hidden);
111
112/// Collect all of the loop invariant input values transitively used by the
113/// homogeneous instruction graph from a given root.
114///
115/// This essentially walks from a root recursively through loop variant operands
116/// which have the exact same opcode and finds all inputs which are loop
117/// invariant. For some operations these can be re-associated and unswitched out
118/// of the loop entirely.
119static TinyPtrVector<Value *>
120collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root,
121 LoopInfo &LI) {
122 assert(!L.isLoopInvariant(&Root) &&(static_cast <bool> (!L.isLoopInvariant(&Root) &&
"Only need to walk the graph if root itself is not invariant."
) ? void (0) : __assert_fail ("!L.isLoopInvariant(&Root) && \"Only need to walk the graph if root itself is not invariant.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 123, __extension__ __PRETTY_FUNCTION__))
123 "Only need to walk the graph if root itself is not invariant.")(static_cast <bool> (!L.isLoopInvariant(&Root) &&
"Only need to walk the graph if root itself is not invariant."
) ? void (0) : __assert_fail ("!L.isLoopInvariant(&Root) && \"Only need to walk the graph if root itself is not invariant.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 123, __extension__ __PRETTY_FUNCTION__))
;
124 TinyPtrVector<Value *> Invariants;
125
126 bool IsRootAnd = match(&Root, m_LogicalAnd());
127 bool IsRootOr = match(&Root, m_LogicalOr());
128
129 // Build a worklist and recurse through operators collecting invariants.
130 SmallVector<Instruction *, 4> Worklist;
131 SmallPtrSet<Instruction *, 8> Visited;
132 Worklist.push_back(&Root);
133 Visited.insert(&Root);
134 do {
135 Instruction &I = *Worklist.pop_back_val();
136 for (Value *OpV : I.operand_values()) {
137 // Skip constants as unswitching isn't interesting for them.
138 if (isa<Constant>(OpV))
139 continue;
140
141 // Add it to our result if loop invariant.
142 if (L.isLoopInvariant(OpV)) {
143 Invariants.push_back(OpV);
144 continue;
145 }
146
147 // If not an instruction with the same opcode, nothing we can do.
148 Instruction *OpI = dyn_cast<Instruction>(OpV);
149
150 if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
151 (IsRootOr && match(OpI, m_LogicalOr())))) {
152 // Visit this operand.
153 if (Visited.insert(OpI).second)
154 Worklist.push_back(OpI);
155 }
156 }
157 } while (!Worklist.empty());
158
159 return Invariants;
160}
161
162static void replaceLoopInvariantUses(Loop &L, Value *Invariant,
163 Constant &Replacement) {
164 assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?")(static_cast <bool> (!isa<Constant>(Invariant) &&
"Why are we unswitching on a constant?") ? void (0) : __assert_fail
("!isa<Constant>(Invariant) && \"Why are we unswitching on a constant?\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 164, __extension__ __PRETTY_FUNCTION__))
;
165
166 // Replace uses of LIC in the loop with the given constant.
167 // We use make_early_inc_range as set invalidates the iterator.
168 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
169 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
170
171 // Replace this use within the loop body.
172 if (UserI && L.contains(UserI))
173 U.set(&Replacement);
174 }
175}
176
177/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
178/// incoming values along this edge.
179static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
180 BasicBlock &ExitBB) {
181 for (Instruction &I : ExitBB) {
182 auto *PN = dyn_cast<PHINode>(&I);
183 if (!PN)
184 // No more PHIs to check.
185 return true;
186
187 // If the incoming value for this edge isn't loop invariant the unswitch
188 // won't be trivial.
189 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
190 return false;
191 }
192 llvm_unreachable("Basic blocks should never be empty!")::llvm::llvm_unreachable_internal("Basic blocks should never be empty!"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 192)
;
193}
194
195/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
196/// end of \p BB and conditionally branch on the copied condition. We only
197/// branch on a single value.
198static void buildPartialUnswitchConditionalBranch(BasicBlock &BB,
199 ArrayRef<Value *> Invariants,
200 bool Direction,
201 BasicBlock &UnswitchedSucc,
202 BasicBlock &NormalSucc) {
203 IRBuilder<> IRB(&BB);
204
205 Value *Cond = Direction ? IRB.CreateOr(Invariants) :
206 IRB.CreateAnd(Invariants);
207 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
208 Direction ? &NormalSucc : &UnswitchedSucc);
209}
210
211/// Copy a set of loop invariant values, and conditionally branch on them.
212static void buildPartialInvariantUnswitchConditionalBranch(
213 BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
214 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
215 MemorySSAUpdater *MSSAU) {
216 ValueToValueMapTy VMap;
217 for (auto *Val : reverse(ToDuplicate)) {
218 Instruction *Inst = cast<Instruction>(Val);
219 Instruction *NewInst = Inst->clone();
220 BB.getInstList().insert(BB.end(), NewInst);
221 RemapInstruction(NewInst, VMap,
222 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
223 VMap[Val] = NewInst;
224
225 if (!MSSAU)
226 continue;
227
228 MemorySSA *MSSA = MSSAU->getMemorySSA();
229 if (auto *MemUse =
230 dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
231 auto *DefiningAccess = MemUse->getDefiningAccess();
232 // Get the first defining access before the loop.
233 while (L.contains(DefiningAccess->getBlock())) {
234 // If the defining access is a MemoryPhi, get the incoming
235 // value for the pre-header as defining access.
236 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
237 DefiningAccess =
238 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
239 else
240 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
241 }
242 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
243 NewInst->getParent(),
244 MemorySSA::BeforeTerminator);
245 }
246 }
247
248 IRBuilder<> IRB(&BB);
249 Value *Cond = VMap[ToDuplicate[0]];
250 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
251 Direction ? &NormalSucc : &UnswitchedSucc);
252}
253
254/// Rewrite the PHI nodes in an unswitched loop exit basic block.
255///
256/// Requires that the loop exit and unswitched basic block are the same, and
257/// that the exiting block was a unique predecessor of that block. Rewrites the
258/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
259/// PHI nodes from the old preheader that now contains the unswitched
260/// terminator.
261static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
262 BasicBlock &OldExitingBB,
263 BasicBlock &OldPH) {
264 for (PHINode &PN : UnswitchedBB.phis()) {
265 // When the loop exit is directly unswitched we just need to update the
266 // incoming basic block. We loop to handle weird cases with repeated
267 // incoming blocks, but expect to typically only have one operand here.
268 for (auto i : seq<int>(0, PN.getNumOperands())) {
269 assert(PN.getIncomingBlock(i) == &OldExitingBB &&(static_cast <bool> (PN.getIncomingBlock(i) == &OldExitingBB
&& "Found incoming block different from unique predecessor!"
) ? void (0) : __assert_fail ("PN.getIncomingBlock(i) == &OldExitingBB && \"Found incoming block different from unique predecessor!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 270, __extension__ __PRETTY_FUNCTION__))
270 "Found incoming block different from unique predecessor!")(static_cast <bool> (PN.getIncomingBlock(i) == &OldExitingBB
&& "Found incoming block different from unique predecessor!"
) ? void (0) : __assert_fail ("PN.getIncomingBlock(i) == &OldExitingBB && \"Found incoming block different from unique predecessor!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 270, __extension__ __PRETTY_FUNCTION__))
;
271 PN.setIncomingBlock(i, &OldPH);
272 }
273 }
274}
275
276/// Rewrite the PHI nodes in the loop exit basic block and the split off
277/// unswitched block.
278///
279/// Because the exit block remains an exit from the loop, this rewrites the
280/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
281/// nodes into the unswitched basic block to select between the value in the
282/// old preheader and the loop exit.
283static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
284 BasicBlock &UnswitchedBB,
285 BasicBlock &OldExitingBB,
286 BasicBlock &OldPH,
287 bool FullUnswitch) {
288 assert(&ExitBB != &UnswitchedBB &&(static_cast <bool> (&ExitBB != &UnswitchedBB &&
"Must have different loop exit and unswitched blocks!") ? void
(0) : __assert_fail ("&ExitBB != &UnswitchedBB && \"Must have different loop exit and unswitched blocks!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 289, __extension__ __PRETTY_FUNCTION__))
289 "Must have different loop exit and unswitched blocks!")(static_cast <bool> (&ExitBB != &UnswitchedBB &&
"Must have different loop exit and unswitched blocks!") ? void
(0) : __assert_fail ("&ExitBB != &UnswitchedBB && \"Must have different loop exit and unswitched blocks!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 289, __extension__ __PRETTY_FUNCTION__))
;
290 Instruction *InsertPt = &*UnswitchedBB.begin();
291 for (PHINode &PN : ExitBB.phis()) {
292 auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
293 PN.getName() + ".split", InsertPt);
294
295 // Walk backwards over the old PHI node's inputs to minimize the cost of
296 // removing each one. We have to do this weird loop manually so that we
297 // create the same number of new incoming edges in the new PHI as we expect
298 // each case-based edge to be included in the unswitched switch in some
299 // cases.
300 // FIXME: This is really, really gross. It would be much cleaner if LLVM
301 // allowed us to create a single entry for a predecessor block without
302 // having separate entries for each "edge" even though these edges are
303 // required to produce identical results.
304 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
305 if (PN.getIncomingBlock(i) != &OldExitingBB)
306 continue;
307
308 Value *Incoming = PN.getIncomingValue(i);
309 if (FullUnswitch)
310 // No more edge from the old exiting block to the exit block.
311 PN.removeIncomingValue(i);
312
313 NewPN->addIncoming(Incoming, &OldPH);
314 }
315
316 // Now replace the old PHI with the new one and wire the old one in as an
317 // input to the new one.
318 PN.replaceAllUsesWith(NewPN);
319 NewPN->addIncoming(&PN, &ExitBB);
320 }
321}
322
323/// Hoist the current loop up to the innermost loop containing a remaining exit.
324///
325/// Because we've removed an exit from the loop, we may have changed the set of
326/// loops reachable and need to move the current loop up the loop nest or even
327/// to an entirely separate nest.
328static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
329 DominatorTree &DT, LoopInfo &LI,
330 MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
331 // If the loop is already at the top level, we can't hoist it anywhere.
332 Loop *OldParentL = L.getParentLoop();
333 if (!OldParentL)
334 return;
335
336 SmallVector<BasicBlock *, 4> Exits;
337 L.getExitBlocks(Exits);
338 Loop *NewParentL = nullptr;
339 for (auto *ExitBB : Exits)
340 if (Loop *ExitL = LI.getLoopFor(ExitBB))
341 if (!NewParentL || NewParentL->contains(ExitL))
342 NewParentL = ExitL;
343
344 if (NewParentL == OldParentL)
345 return;
346
347 // The new parent loop (if different) should always contain the old one.
348 if (NewParentL)
349 assert(NewParentL->contains(OldParentL) &&(static_cast <bool> (NewParentL->contains(OldParentL
) && "Can only hoist this loop up the nest!") ? void (
0) : __assert_fail ("NewParentL->contains(OldParentL) && \"Can only hoist this loop up the nest!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 350, __extension__ __PRETTY_FUNCTION__))
350 "Can only hoist this loop up the nest!")(static_cast <bool> (NewParentL->contains(OldParentL
) && "Can only hoist this loop up the nest!") ? void (
0) : __assert_fail ("NewParentL->contains(OldParentL) && \"Can only hoist this loop up the nest!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 350, __extension__ __PRETTY_FUNCTION__))
;
351
352 // The preheader will need to move with the body of this loop. However,
353 // because it isn't in this loop we also need to update the primary loop map.
354 assert(OldParentL == LI.getLoopFor(&Preheader) &&(static_cast <bool> (OldParentL == LI.getLoopFor(&Preheader
) && "Parent loop of this loop should contain this loop's preheader!"
) ? void (0) : __assert_fail ("OldParentL == LI.getLoopFor(&Preheader) && \"Parent loop of this loop should contain this loop's preheader!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 355, __extension__ __PRETTY_FUNCTION__))
355 "Parent loop of this loop should contain this loop's preheader!")(static_cast <bool> (OldParentL == LI.getLoopFor(&Preheader
) && "Parent loop of this loop should contain this loop's preheader!"
) ? void (0) : __assert_fail ("OldParentL == LI.getLoopFor(&Preheader) && \"Parent loop of this loop should contain this loop's preheader!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 355, __extension__ __PRETTY_FUNCTION__))
;
356 LI.changeLoopFor(&Preheader, NewParentL);
357
358 // Remove this loop from its old parent.
359 OldParentL->removeChildLoop(&L);
360
361 // Add the loop either to the new parent or as a top-level loop.
362 if (NewParentL)
363 NewParentL->addChildLoop(&L);
364 else
365 LI.addTopLevelLoop(&L);
366
367 // Remove this loops blocks from the old parent and every other loop up the
368 // nest until reaching the new parent. Also update all of these
369 // no-longer-containing loops to reflect the nesting change.
370 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
371 OldContainingL = OldContainingL->getParentLoop()) {
372 llvm::erase_if(OldContainingL->getBlocksVector(),
373 [&](const BasicBlock *BB) {
374 return BB == &Preheader || L.contains(BB);
375 });
376
377 OldContainingL->getBlocksSet().erase(&Preheader);
378 for (BasicBlock *BB : L.blocks())
379 OldContainingL->getBlocksSet().erase(BB);
380
381 // Because we just hoisted a loop out of this one, we have essentially
382 // created new exit paths from it. That means we need to form LCSSA PHI
383 // nodes for values used in the no-longer-nested loop.
384 formLCSSA(*OldContainingL, DT, &LI, SE);
385
386 // We shouldn't need to form dedicated exits because the exit introduced
387 // here is the (just split by unswitching) preheader. However, after trivial
388 // unswitching it is possible to get new non-dedicated exits out of parent
389 // loop so let's conservatively form dedicated exit blocks and figure out
390 // if we can optimize later.
391 formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
392 /*PreserveLCSSA*/ true);
393 }
394}
395
396// Return the top-most loop containing ExitBB and having ExitBB as exiting block
397// or the loop containing ExitBB, if there is no parent loop containing ExitBB
398// as exiting block.
399static Loop *getTopMostExitingLoop(BasicBlock *ExitBB, LoopInfo &LI) {
400 Loop *TopMost = LI.getLoopFor(ExitBB);
401 Loop *Current = TopMost;
402 while (Current) {
403 if (Current->isLoopExiting(ExitBB))
404 TopMost = Current;
405 Current = Current->getParentLoop();
406 }
407 return TopMost;
408}
409
410/// Unswitch a trivial branch if the condition is loop invariant.
411///
412/// This routine should only be called when loop code leading to the branch has
413/// been validated as trivial (no side effects). This routine checks if the
414/// condition is invariant and one of the successors is a loop exit. This
415/// allows us to unswitch without duplicating the loop, making it trivial.
416///
417/// If this routine fails to unswitch the branch it returns false.
418///
419/// If the branch can be unswitched, this routine splits the preheader and
420/// hoists the branch above that split. Preserves loop simplified form
421/// (splitting the exit block as necessary). It simplifies the branch within
422/// the loop to an unconditional branch but doesn't remove it entirely. Further
423/// cleanup can be done with some simplify-cfg like pass.
424///
425/// If `SE` is not null, it will be updated based on the potential loop SCEVs
426/// invalidated by this.
427static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
428 LoopInfo &LI, ScalarEvolution *SE,
429 MemorySSAUpdater *MSSAU) {
430 assert(BI.isConditional() && "Can only unswitch a conditional branch!")(static_cast <bool> (BI.isConditional() && "Can only unswitch a conditional branch!"
) ? void (0) : __assert_fail ("BI.isConditional() && \"Can only unswitch a conditional branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 430, __extension__ __PRETTY_FUNCTION__))
;
431 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Trying to unswitch branch: "
<< BI << "\n"; } } while (false)
;
432
433 // The loop invariant values that we want to unswitch.
434 TinyPtrVector<Value *> Invariants;
435
436 // When true, we're fully unswitching the branch rather than just unswitching
437 // some input conditions to the branch.
438 bool FullUnswitch = false;
439
440 if (L.isLoopInvariant(BI.getCondition())) {
441 Invariants.push_back(BI.getCondition());
442 FullUnswitch = true;
443 } else {
444 if (auto *CondInst = dyn_cast<Instruction>(BI.getCondition()))
445 Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
446 if (Invariants.empty())
447 // Couldn't find invariant inputs!
448 return false;
449 }
450
451 // Check that one of the branch's successors exits, and which one.
452 bool ExitDirection = true;
453 int LoopExitSuccIdx = 0;
454 auto *LoopExitBB = BI.getSuccessor(0);
455 if (L.contains(LoopExitBB)) {
456 ExitDirection = false;
457 LoopExitSuccIdx = 1;
458 LoopExitBB = BI.getSuccessor(1);
459 if (L.contains(LoopExitBB))
460 return false;
461 }
462 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
463 auto *ParentBB = BI.getParent();
464 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB))
465 return false;
466
467 // When unswitching only part of the branch's condition, we need the exit
468 // block to be reached directly from the partially unswitched input. This can
469 // be done when the exit block is along the true edge and the branch condition
470 // is a graph of `or` operations, or the exit block is along the false edge
471 // and the condition is a graph of `and` operations.
472 if (!FullUnswitch) {
473 if (ExitDirection) {
474 if (!match(BI.getCondition(), m_LogicalOr()))
475 return false;
476 } else {
477 if (!match(BI.getCondition(), m_LogicalAnd()))
478 return false;
479 }
480 }
481
482 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
483 dbgs() << " unswitching trivial invariant conditions for: " << BIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
484 << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
485 for (Value *Invariant : Invariants) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
486 dbgs() << " " << *Invariant << " == true";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
487 if (Invariant != Invariants.back())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
488 dbgs() << " ||";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
489 dbgs() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
490 }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
491 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
;
492
493 // If we have scalar evolutions, we need to invalidate them including this
494 // loop, the loop containing the exit block and the topmost parent loop
495 // exiting via LoopExitBB.
496 if (SE) {
497 if (Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
498 SE->forgetLoop(ExitL);
499 else
500 // Forget the entire nest as this exits the entire nest.
501 SE->forgetTopmostLoop(&L);
502 }
503
504 if (MSSAU && VerifyMemorySSA)
505 MSSAU->getMemorySSA()->verifyMemorySSA();
506
507 // Split the preheader, so that we know that there is a safe place to insert
508 // the conditional branch. We will change the preheader to have a conditional
509 // branch on LoopCond.
510 BasicBlock *OldPH = L.getLoopPreheader();
511 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
512
513 // Now that we have a place to insert the conditional branch, create a place
514 // to branch to: this is the exit block out of the loop that we are
515 // unswitching. We need to split this if there are other loop predecessors.
516 // Because the loop is in simplified form, *any* other predecessor is enough.
517 BasicBlock *UnswitchedBB;
518 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
519 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&(static_cast <bool> (LoopExitBB->getUniquePredecessor
() == BI.getParent() && "A branch's parent isn't a predecessor!"
) ? void (0) : __assert_fail ("LoopExitBB->getUniquePredecessor() == BI.getParent() && \"A branch's parent isn't a predecessor!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 520, __extension__ __PRETTY_FUNCTION__))
520 "A branch's parent isn't a predecessor!")(static_cast <bool> (LoopExitBB->getUniquePredecessor
() == BI.getParent() && "A branch's parent isn't a predecessor!"
) ? void (0) : __assert_fail ("LoopExitBB->getUniquePredecessor() == BI.getParent() && \"A branch's parent isn't a predecessor!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 520, __extension__ __PRETTY_FUNCTION__))
;
521 UnswitchedBB = LoopExitBB;
522 } else {
523 UnswitchedBB =
524 SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
525 }
526
527 if (MSSAU && VerifyMemorySSA)
528 MSSAU->getMemorySSA()->verifyMemorySSA();
529
530 // Actually move the invariant uses into the unswitched position. If possible,
531 // we do this by moving the instructions, but when doing partial unswitching
532 // we do it by building a new merge of the values in the unswitched position.
533 OldPH->getTerminator()->eraseFromParent();
534 if (FullUnswitch) {
535 // If fully unswitching, we can use the existing branch instruction.
536 // Splice it into the old PH to gate reaching the new preheader and re-point
537 // its successors.
538 OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
539 BI);
540 if (MSSAU) {
541 // Temporarily clone the terminator, to make MSSA update cheaper by
542 // separating "insert edge" updates from "remove edge" ones.
543 ParentBB->getInstList().push_back(BI.clone());
544 } else {
545 // Create a new unconditional branch that will continue the loop as a new
546 // terminator.
547 BranchInst::Create(ContinueBB, ParentBB);
548 }
549 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
550 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
551 } else {
552 // Only unswitching a subset of inputs to the condition, so we will need to
553 // build a new branch that merges the invariant inputs.
554 if (ExitDirection)
555 assert(match(BI.getCondition(), m_LogicalOr()) &&(static_cast <bool> (match(BI.getCondition(), m_LogicalOr
()) && "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
"condition!") ? void (0) : __assert_fail ("match(BI.getCondition(), m_LogicalOr()) && \"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the \" \"condition!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 557, __extension__ __PRETTY_FUNCTION__))
556 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "(static_cast <bool> (match(BI.getCondition(), m_LogicalOr
()) && "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
"condition!") ? void (0) : __assert_fail ("match(BI.getCondition(), m_LogicalOr()) && \"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the \" \"condition!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 557, __extension__ __PRETTY_FUNCTION__))
557 "condition!")(static_cast <bool> (match(BI.getCondition(), m_LogicalOr
()) && "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
"condition!") ? void (0) : __assert_fail ("match(BI.getCondition(), m_LogicalOr()) && \"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the \" \"condition!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 557, __extension__ __PRETTY_FUNCTION__))
;
558 else
559 assert(match(BI.getCondition(), m_LogicalAnd()) &&(static_cast <bool> (match(BI.getCondition(), m_LogicalAnd
()) && "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
" condition!") ? void (0) : __assert_fail ("match(BI.getCondition(), m_LogicalAnd()) && \"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the\" \" condition!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 561, __extension__ __PRETTY_FUNCTION__))
560 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"(static_cast <bool> (match(BI.getCondition(), m_LogicalAnd
()) && "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
" condition!") ? void (0) : __assert_fail ("match(BI.getCondition(), m_LogicalAnd()) && \"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the\" \" condition!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 561, __extension__ __PRETTY_FUNCTION__))
561 " condition!")(static_cast <bool> (match(BI.getCondition(), m_LogicalAnd
()) && "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
" condition!") ? void (0) : __assert_fail ("match(BI.getCondition(), m_LogicalAnd()) && \"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the\" \" condition!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 561, __extension__ __PRETTY_FUNCTION__))
;
562 buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
563 *UnswitchedBB, *NewPH);
564 }
565
566 // Update the dominator tree with the added edge.
567 DT.insertEdge(OldPH, UnswitchedBB);
568
569 // After the dominator tree was updated with the added edge, update MemorySSA
570 // if available.
571 if (MSSAU) {
572 SmallVector<CFGUpdate, 1> Updates;
573 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
574 MSSAU->applyInsertUpdates(Updates, DT);
575 }
576
577 // Finish updating dominator tree and memory ssa for full unswitch.
578 if (FullUnswitch) {
579 if (MSSAU) {
580 // Remove the cloned branch instruction.
581 ParentBB->getTerminator()->eraseFromParent();
582 // Create unconditional branch now.
583 BranchInst::Create(ContinueBB, ParentBB);
584 MSSAU->removeEdge(ParentBB, LoopExitBB);
585 }
586 DT.deleteEdge(ParentBB, LoopExitBB);
587 }
588
589 if (MSSAU && VerifyMemorySSA)
590 MSSAU->getMemorySSA()->verifyMemorySSA();
591
592 // Rewrite the relevant PHI nodes.
593 if (UnswitchedBB == LoopExitBB)
594 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
595 else
596 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
597 *ParentBB, *OldPH, FullUnswitch);
598
599 // The constant we can replace all of our invariants with inside the loop
600 // body. If any of the invariants have a value other than this the loop won't
601 // be entered.
602 ConstantInt *Replacement = ExitDirection
603 ? ConstantInt::getFalse(BI.getContext())
604 : ConstantInt::getTrue(BI.getContext());
605
606 // Since this is an i1 condition we can also trivially replace uses of it
607 // within the loop with a constant.
608 for (Value *Invariant : Invariants)
609 replaceLoopInvariantUses(L, Invariant, *Replacement);
610
611 // If this was full unswitching, we may have changed the nesting relationship
612 // for this loop so hoist it to its correct parent if needed.
613 if (FullUnswitch)
614 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
615
616 if (MSSAU && VerifyMemorySSA)
617 MSSAU->getMemorySSA()->verifyMemorySSA();
618
619 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " done: unswitching trivial branch...\n"
; } } while (false)
;
620 ++NumTrivial;
621 ++NumBranches;
622 return true;
623}
624
625/// Unswitch a trivial switch if the condition is loop invariant.
626///
627/// This routine should only be called when loop code leading to the switch has
628/// been validated as trivial (no side effects). This routine checks if the
629/// condition is invariant and that at least one of the successors is a loop
630/// exit. This allows us to unswitch without duplicating the loop, making it
631/// trivial.
632///
633/// If this routine fails to unswitch the switch it returns false.
634///
635/// If the switch can be unswitched, this routine splits the preheader and
636/// copies the switch above that split. If the default case is one of the
637/// exiting cases, it copies the non-exiting cases and points them at the new
638/// preheader. If the default case is not exiting, it copies the exiting cases
639/// and points the default at the preheader. It preserves loop simplified form
640/// (splitting the exit blocks as necessary). It simplifies the switch within
641/// the loop by removing now-dead cases. If the default case is one of those
642/// unswitched, it replaces its destination with a new basic block containing
643/// only unreachable. Such basic blocks, while technically loop exits, are not
644/// considered for unswitching so this is a stable transform and the same
645/// switch will not be revisited. If after unswitching there is only a single
646/// in-loop successor, the switch is further simplified to an unconditional
647/// branch. Still more cleanup can be done with some simplify-cfg like pass.
648///
649/// If `SE` is not null, it will be updated based on the potential loop SCEVs
650/// invalidated by this.
651static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
652 LoopInfo &LI, ScalarEvolution *SE,
653 MemorySSAUpdater *MSSAU) {
654 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Trying to unswitch switch: "
<< SI << "\n"; } } while (false)
;
655 Value *LoopCond = SI.getCondition();
656
657 // If this isn't switching on an invariant condition, we can't unswitch it.
658 if (!L.isLoopInvariant(LoopCond))
659 return false;
660
661 auto *ParentBB = SI.getParent();
662
663 // The same check must be used both for the default and the exit cases. We
664 // should never leave edges from the switch instruction to a basic block that
665 // we are unswitching, hence the condition used to determine the default case
666 // needs to also be used to populate ExitCaseIndices, which is then used to
667 // remove cases from the switch.
668 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
669 // BBToCheck is not an exit block if it is inside loop L.
670 if (L.contains(&BBToCheck))
671 return false;
672 // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
673 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
674 return false;
675 // We do not unswitch a block that only has an unreachable statement, as
676 // it's possible this is a previously unswitched block. Only unswitch if
677 // either the terminator is not unreachable, or, if it is, it's not the only
678 // instruction in the block.
679 auto *TI = BBToCheck.getTerminator();
680 bool isUnreachable = isa<UnreachableInst>(TI);
681 return !isUnreachable ||
682 (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
683 };
684
685 SmallVector<int, 4> ExitCaseIndices;
686 for (auto Case : SI.cases())
687 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
688 ExitCaseIndices.push_back(Case.getCaseIndex());
689 BasicBlock *DefaultExitBB = nullptr;
690 SwitchInstProfUpdateWrapper::CaseWeightOpt DefaultCaseWeight =
691 SwitchInstProfUpdateWrapper::getSuccessorWeight(SI, 0);
692 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
693 DefaultExitBB = SI.getDefaultDest();
694 } else if (ExitCaseIndices.empty())
695 return false;
696
697 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " unswitching trivial switch...\n"
; } } while (false)
;
698
699 if (MSSAU && VerifyMemorySSA)
700 MSSAU->getMemorySSA()->verifyMemorySSA();
701
702 // We may need to invalidate SCEVs for the outermost loop reached by any of
703 // the exits.
704 Loop *OuterL = &L;
705
706 if (DefaultExitBB) {
707 // Clear out the default destination temporarily to allow accurate
708 // predecessor lists to be examined below.
709 SI.setDefaultDest(nullptr);
710 // Check the loop containing this exit.
711 Loop *ExitL = LI.getLoopFor(DefaultExitBB);
712 if (!ExitL || ExitL->contains(OuterL))
713 OuterL = ExitL;
714 }
715
716 // Store the exit cases into a separate data structure and remove them from
717 // the switch.
718 SmallVector<std::tuple<ConstantInt *, BasicBlock *,
719 SwitchInstProfUpdateWrapper::CaseWeightOpt>,
720 4> ExitCases;
721 ExitCases.reserve(ExitCaseIndices.size());
722 SwitchInstProfUpdateWrapper SIW(SI);
723 // We walk the case indices backwards so that we remove the last case first
724 // and don't disrupt the earlier indices.
725 for (unsigned Index : reverse(ExitCaseIndices)) {
726 auto CaseI = SI.case_begin() + Index;
727 // Compute the outer loop from this exit.
728 Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
729 if (!ExitL || ExitL->contains(OuterL))
730 OuterL = ExitL;
731 // Save the value of this case.
732 auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
733 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
734 // Delete the unswitched cases.
735 SIW.removeCase(CaseI);
736 }
737
738 if (SE) {
739 if (OuterL)
740 SE->forgetLoop(OuterL);
741 else
742 SE->forgetTopmostLoop(&L);
743 }
744
745 // Check if after this all of the remaining cases point at the same
746 // successor.
747 BasicBlock *CommonSuccBB = nullptr;
748 if (SI.getNumCases() > 0 &&
749 all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
750 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
751 }))
752 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
753 if (!DefaultExitBB) {
754 // If we're not unswitching the default, we need it to match any cases to
755 // have a common successor or if we have no cases it is the common
756 // successor.
757 if (SI.getNumCases() == 0)
758 CommonSuccBB = SI.getDefaultDest();
759 else if (SI.getDefaultDest() != CommonSuccBB)
760 CommonSuccBB = nullptr;
761 }
762
763 // Split the preheader, so that we know that there is a safe place to insert
764 // the switch.
765 BasicBlock *OldPH = L.getLoopPreheader();
766 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
767 OldPH->getTerminator()->eraseFromParent();
768
769 // Now add the unswitched switch.
770 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
771 SwitchInstProfUpdateWrapper NewSIW(*NewSI);
772
773 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
774 // First, we split any exit blocks with remaining in-loop predecessors. Then
775 // we update the PHIs in one of two ways depending on if there was a split.
776 // We walk in reverse so that we split in the same order as the cases
777 // appeared. This is purely for convenience of reading the resulting IR, but
778 // it doesn't cost anything really.
779 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
780 SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
781 // Handle the default exit if necessary.
782 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
783 // ranges aren't quite powerful enough yet.
784 if (DefaultExitBB) {
785 if (pred_empty(DefaultExitBB)) {
786 UnswitchedExitBBs.insert(DefaultExitBB);
787 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
788 } else {
789 auto *SplitBB =
790 SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
791 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
792 *ParentBB, *OldPH,
793 /*FullUnswitch*/ true);
794 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
795 }
796 }
797 // Note that we must use a reference in the for loop so that we update the
798 // container.
799 for (auto &ExitCase : reverse(ExitCases)) {
800 // Grab a reference to the exit block in the pair so that we can update it.
801 BasicBlock *ExitBB = std::get<1>(ExitCase);
802
803 // If this case is the last edge into the exit block, we can simply reuse it
804 // as it will no longer be a loop exit. No mapping necessary.
805 if (pred_empty(ExitBB)) {
806 // Only rewrite once.
807 if (UnswitchedExitBBs.insert(ExitBB).second)
808 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
809 continue;
810 }
811
812 // Otherwise we need to split the exit block so that we retain an exit
813 // block from the loop and a target for the unswitched condition.
814 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
815 if (!SplitExitBB) {
816 // If this is the first time we see this, do the split and remember it.
817 SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
818 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
819 *ParentBB, *OldPH,
820 /*FullUnswitch*/ true);
821 }
822 // Update the case pair to point to the split block.
823 std::get<1>(ExitCase) = SplitExitBB;
824 }
825
826 // Now add the unswitched cases. We do this in reverse order as we built them
827 // in reverse order.
828 for (auto &ExitCase : reverse(ExitCases)) {
829 ConstantInt *CaseVal = std::get<0>(ExitCase);
830 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
831
832 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
833 }
834
835 // If the default was unswitched, re-point it and add explicit cases for
836 // entering the loop.
837 if (DefaultExitBB) {
838 NewSIW->setDefaultDest(DefaultExitBB);
839 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
840
841 // We removed all the exit cases, so we just copy the cases to the
842 // unswitched switch.
843 for (const auto &Case : SI.cases())
844 NewSIW.addCase(Case.getCaseValue(), NewPH,
845 SIW.getSuccessorWeight(Case.getSuccessorIndex()));
846 } else if (DefaultCaseWeight) {
847 // We have to set branch weight of the default case.
848 uint64_t SW = *DefaultCaseWeight;
849 for (const auto &Case : SI.cases()) {
850 auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
851 assert(W &&(static_cast <bool> (W && "case weight must be defined as default case weight is defined"
) ? void (0) : __assert_fail ("W && \"case weight must be defined as default case weight is defined\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 852, __extension__ __PRETTY_FUNCTION__))
852 "case weight must be defined as default case weight is defined")(static_cast <bool> (W && "case weight must be defined as default case weight is defined"
) ? void (0) : __assert_fail ("W && \"case weight must be defined as default case weight is defined\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 852, __extension__ __PRETTY_FUNCTION__))
;
853 SW += *W;
854 }
855 NewSIW.setSuccessorWeight(0, SW);
856 }
857
858 // If we ended up with a common successor for every path through the switch
859 // after unswitching, rewrite it to an unconditional branch to make it easy
860 // to recognize. Otherwise we potentially have to recognize the default case
861 // pointing at unreachable and other complexity.
862 if (CommonSuccBB) {
863 BasicBlock *BB = SI.getParent();
864 // We may have had multiple edges to this common successor block, so remove
865 // them as predecessors. We skip the first one, either the default or the
866 // actual first case.
867 bool SkippedFirst = DefaultExitBB == nullptr;
868 for (auto Case : SI.cases()) {
869 assert(Case.getCaseSuccessor() == CommonSuccBB &&(static_cast <bool> (Case.getCaseSuccessor() == CommonSuccBB
&& "Non-common successor!") ? void (0) : __assert_fail
("Case.getCaseSuccessor() == CommonSuccBB && \"Non-common successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 870, __extension__ __PRETTY_FUNCTION__))
870 "Non-common successor!")(static_cast <bool> (Case.getCaseSuccessor() == CommonSuccBB
&& "Non-common successor!") ? void (0) : __assert_fail
("Case.getCaseSuccessor() == CommonSuccBB && \"Non-common successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 870, __extension__ __PRETTY_FUNCTION__))
;
871 (void)Case;
872 if (!SkippedFirst) {
873 SkippedFirst = true;
874 continue;
875 }
876 CommonSuccBB->removePredecessor(BB,
877 /*KeepOneInputPHIs*/ true);
878 }
879 // Now nuke the switch and replace it with a direct branch.
880 SIW.eraseFromParent();
881 BranchInst::Create(CommonSuccBB, BB);
882 } else if (DefaultExitBB) {
883 assert(SI.getNumCases() > 0 &&(static_cast <bool> (SI.getNumCases() > 0 &&
"If we had no cases we'd have a common successor!") ? void (
0) : __assert_fail ("SI.getNumCases() > 0 && \"If we had no cases we'd have a common successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 884, __extension__ __PRETTY_FUNCTION__))
884 "If we had no cases we'd have a common successor!")(static_cast <bool> (SI.getNumCases() > 0 &&
"If we had no cases we'd have a common successor!") ? void (
0) : __assert_fail ("SI.getNumCases() > 0 && \"If we had no cases we'd have a common successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 884, __extension__ __PRETTY_FUNCTION__))
;
885 // Move the last case to the default successor. This is valid as if the
886 // default got unswitched it cannot be reached. This has the advantage of
887 // being simple and keeping the number of edges from this switch to
888 // successors the same, and avoiding any PHI update complexity.
889 auto LastCaseI = std::prev(SI.case_end());
890
891 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
892 SIW.setSuccessorWeight(
893 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
894 SIW.removeCase(LastCaseI);
895 }
896
897 // Walk the unswitched exit blocks and the unswitched split blocks and update
898 // the dominator tree based on the CFG edits. While we are walking unordered
899 // containers here, the API for applyUpdates takes an unordered list of
900 // updates and requires them to not contain duplicates.
901 SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
902 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
903 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
904 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
905 }
906 for (auto SplitUnswitchedPair : SplitExitBBMap) {
907 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
908 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
909 }
910
911 if (MSSAU) {
912 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
913 if (VerifyMemorySSA)
914 MSSAU->getMemorySSA()->verifyMemorySSA();
915 } else {
916 DT.applyUpdates(DTUpdates);
917 }
918
919 assert(DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (DT.verify(DominatorTree::VerificationLevel
::Fast)) ? void (0) : __assert_fail ("DT.verify(DominatorTree::VerificationLevel::Fast)"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 919, __extension__ __PRETTY_FUNCTION__))
;
920
921 // We may have changed the nesting relationship for this loop so hoist it to
922 // its correct parent if needed.
923 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
924
925 if (MSSAU && VerifyMemorySSA)
926 MSSAU->getMemorySSA()->verifyMemorySSA();
927
928 ++NumTrivial;
929 ++NumSwitches;
930 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " done: unswitching trivial switch...\n"
; } } while (false)
;
931 return true;
932}
933
934/// This routine scans the loop to find a branch or switch which occurs before
935/// any side effects occur. These can potentially be unswitched without
936/// duplicating the loop. If a branch or switch is successfully unswitched the
937/// scanning continues to see if subsequent branches or switches have become
938/// trivial. Once all trivial candidates have been unswitched, this routine
939/// returns.
940///
941/// The return value indicates whether anything was unswitched (and therefore
942/// changed).
943///
944/// If `SE` is not null, it will be updated based on the potential loop SCEVs
945/// invalidated by this.
946static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
947 LoopInfo &LI, ScalarEvolution *SE,
948 MemorySSAUpdater *MSSAU) {
949 bool Changed = false;
950
951 // If loop header has only one reachable successor we should keep looking for
952 // trivial condition candidates in the successor as well. An alternative is
953 // to constant fold conditions and merge successors into loop header (then we
954 // only need to check header's terminator). The reason for not doing this in
955 // LoopUnswitch pass is that it could potentially break LoopPassManager's
956 // invariants. Folding dead branches could either eliminate the current loop
957 // or make other loops unreachable. LCSSA form might also not be preserved
958 // after deleting branches. The following code keeps traversing loop header's
959 // successors until it finds the trivial condition candidate (condition that
960 // is not a constant). Since unswitching generates branches with constant
961 // conditions, this scenario could be very common in practice.
962 BasicBlock *CurrentBB = L.getHeader();
963 SmallPtrSet<BasicBlock *, 8> Visited;
964 Visited.insert(CurrentBB);
965 do {
966 // Check if there are any side-effecting instructions (e.g. stores, calls,
967 // volatile loads) in the part of the loop that the code *would* execute
968 // without unswitching.
969 if (MSSAU) // Possible early exit with MSSA
970 if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
971 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
972 return Changed;
973 if (llvm::any_of(*CurrentBB,
974 [](Instruction &I) { return I.mayHaveSideEffects(); }))
975 return Changed;
976
977 Instruction *CurrentTerm = CurrentBB->getTerminator();
978
979 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
980 // Don't bother trying to unswitch past a switch with a constant
981 // condition. This should be removed prior to running this pass by
982 // simplify-cfg.
983 if (isa<Constant>(SI->getCondition()))
984 return Changed;
985
986 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
987 // Couldn't unswitch this one so we're done.
988 return Changed;
989
990 // Mark that we managed to unswitch something.
991 Changed = true;
992
993 // If unswitching turned the terminator into an unconditional branch then
994 // we can continue. The unswitching logic specifically works to fold any
995 // cases it can into an unconditional branch to make it easier to
996 // recognize here.
997 auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
998 if (!BI || BI->isConditional())
999 return Changed;
1000
1001 CurrentBB = BI->getSuccessor(0);
1002 continue;
1003 }
1004
1005 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1006 if (!BI)
1007 // We do not understand other terminator instructions.
1008 return Changed;
1009
1010 // Don't bother trying to unswitch past an unconditional branch or a branch
1011 // with a constant value. These should be removed by simplify-cfg prior to
1012 // running this pass.
1013 if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
1014 return Changed;
1015
1016 // Found a trivial condition candidate: non-foldable conditional branch. If
1017 // we fail to unswitch this, we can't do anything else that is trivial.
1018 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1019 return Changed;
1020
1021 // Mark that we managed to unswitch something.
1022 Changed = true;
1023
1024 // If we only unswitched some of the conditions feeding the branch, we won't
1025 // have collapsed it to a single successor.
1026 BI = cast<BranchInst>(CurrentBB->getTerminator());
1027 if (BI->isConditional())
1028 return Changed;
1029
1030 // Follow the newly unconditional branch into its successor.
1031 CurrentBB = BI->getSuccessor(0);
1032
1033 // When continuing, if we exit the loop or reach a previous visited block,
1034 // then we can not reach any trivial condition candidates (unfoldable
1035 // branch instructions or switch instructions) and no unswitch can happen.
1036 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1037
1038 return Changed;
1039}
1040
1041/// Build the cloned blocks for an unswitched copy of the given loop.
1042///
1043/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1044/// after the split block (`SplitBB`) that will be used to select between the
1045/// cloned and original loop.
1046///
1047/// This routine handles cloning all of the necessary loop blocks and exit
1048/// blocks including rewriting their instructions and the relevant PHI nodes.
1049/// Any loop blocks or exit blocks which are dominated by a different successor
1050/// than the one for this clone of the loop blocks can be trivially skipped. We
1051/// use the `DominatingSucc` map to determine whether a block satisfies that
1052/// property with a simple map lookup.
1053///
1054/// It also correctly creates the unconditional branch in the cloned
1055/// unswitched parent block to only point at the unswitched successor.
1056///
1057/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1058/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1059/// the cloned blocks (and their loops) are left without full `LoopInfo`
1060/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1061/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1062/// instead the caller must recompute an accurate DT. It *does* correctly
1063/// update the `AssumptionCache` provided in `AC`.
1064static BasicBlock *buildClonedLoopBlocks(
1065 Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1066 ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1067 BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1068 const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
1069 ValueToValueMapTy &VMap,
1070 SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC,
1071 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
1072 SmallVector<BasicBlock *, 4> NewBlocks;
1073 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1074
1075 // We will need to clone a bunch of blocks, wrap up the clone operation in
1076 // a helper.
1077 auto CloneBlock = [&](BasicBlock *OldBB) {
1078 // Clone the basic block and insert it before the new preheader.
1079 BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1080 NewBB->moveBefore(LoopPH);
1081
1082 // Record this block and the mapping.
1083 NewBlocks.push_back(NewBB);
1084 VMap[OldBB] = NewBB;
1085
1086 return NewBB;
1087 };
1088
1089 // We skip cloning blocks when they have a dominating succ that is not the
1090 // succ we are cloning for.
1091 auto SkipBlock = [&](BasicBlock *BB) {
1092 auto It = DominatingSucc.find(BB);
1093 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1094 };
1095
1096 // First, clone the preheader.
1097 auto *ClonedPH = CloneBlock(LoopPH);
1098
1099 // Then clone all the loop blocks, skipping the ones that aren't necessary.
1100 for (auto *LoopBB : L.blocks())
1101 if (!SkipBlock(LoopBB))
1102 CloneBlock(LoopBB);
1103
1104 // Split all the loop exit edges so that when we clone the exit blocks, if
1105 // any of the exit blocks are *also* a preheader for some other loop, we
1106 // don't create multiple predecessors entering the loop header.
1107 for (auto *ExitBB : ExitBlocks) {
1108 if (SkipBlock(ExitBB))
1109 continue;
1110
1111 // When we are going to clone an exit, we don't need to clone all the
1112 // instructions in the exit block and we want to ensure we have an easy
1113 // place to merge the CFG, so split the exit first. This is always safe to
1114 // do because there cannot be any non-loop predecessors of a loop exit in
1115 // loop simplified form.
1116 auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
1117
1118 // Rearrange the names to make it easier to write test cases by having the
1119 // exit block carry the suffix rather than the merge block carrying the
1120 // suffix.
1121 MergeBB->takeName(ExitBB);
1122 ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1123
1124 // Now clone the original exit block.
1125 auto *ClonedExitBB = CloneBlock(ExitBB);
1126 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&(static_cast <bool> (ClonedExitBB->getTerminator()->
getNumSuccessors() == 1 && "Exit block should have been split to have one successor!"
) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getNumSuccessors() == 1 && \"Exit block should have been split to have one successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1127, __extension__ __PRETTY_FUNCTION__))
1127 "Exit block should have been split to have one successor!")(static_cast <bool> (ClonedExitBB->getTerminator()->
getNumSuccessors() == 1 && "Exit block should have been split to have one successor!"
) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getNumSuccessors() == 1 && \"Exit block should have been split to have one successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1127, __extension__ __PRETTY_FUNCTION__))
;
1128 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&(static_cast <bool> (ClonedExitBB->getTerminator()->
getSuccessor(0) == MergeBB && "Cloned exit block has the wrong successor!"
) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB && \"Cloned exit block has the wrong successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1129, __extension__ __PRETTY_FUNCTION__))
1129 "Cloned exit block has the wrong successor!")(static_cast <bool> (ClonedExitBB->getTerminator()->
getSuccessor(0) == MergeBB && "Cloned exit block has the wrong successor!"
) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB && \"Cloned exit block has the wrong successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1129, __extension__ __PRETTY_FUNCTION__))
;
1130
1131 // Remap any cloned instructions and create a merge phi node for them.
1132 for (auto ZippedInsts : llvm::zip_first(
1133 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1134 llvm::make_range(ClonedExitBB->begin(),
1135 std::prev(ClonedExitBB->end())))) {
1136 Instruction &I = std::get<0>(ZippedInsts);
1137 Instruction &ClonedI = std::get<1>(ZippedInsts);
1138
1139 // The only instructions in the exit block should be PHI nodes and
1140 // potentially a landing pad.
1141 assert((static_cast <bool> ((isa<PHINode>(I) || isa<LandingPadInst
>(I) || isa<CatchPadInst>(I)) && "Bad instruction in exit block!"
) ? void (0) : __assert_fail ("(isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) && \"Bad instruction in exit block!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1143, __extension__ __PRETTY_FUNCTION__))
1142 (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&(static_cast <bool> ((isa<PHINode>(I) || isa<LandingPadInst
>(I) || isa<CatchPadInst>(I)) && "Bad instruction in exit block!"
) ? void (0) : __assert_fail ("(isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) && \"Bad instruction in exit block!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1143, __extension__ __PRETTY_FUNCTION__))
1143 "Bad instruction in exit block!")(static_cast <bool> ((isa<PHINode>(I) || isa<LandingPadInst
>(I) || isa<CatchPadInst>(I)) && "Bad instruction in exit block!"
) ? void (0) : __assert_fail ("(isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) && \"Bad instruction in exit block!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1143, __extension__ __PRETTY_FUNCTION__))
;
1144 // We should have a value map between the instruction and its clone.
1145 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!")(static_cast <bool> (VMap.lookup(&I) == &ClonedI
&& "Mismatch in the value map!") ? void (0) : __assert_fail
("VMap.lookup(&I) == &ClonedI && \"Mismatch in the value map!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1145, __extension__ __PRETTY_FUNCTION__))
;
1146
1147 auto *MergePN =
1148 PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
1149 &*MergeBB->getFirstInsertionPt());
1150 I.replaceAllUsesWith(MergePN);
1151 MergePN->addIncoming(&I, ExitBB);
1152 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1153 }
1154 }
1155
1156 // Rewrite the instructions in the cloned blocks to refer to the instructions
1157 // in the cloned blocks. We have to do this as a second pass so that we have
1158 // everything available. Also, we have inserted new instructions which may
1159 // include assume intrinsics, so we update the assumption cache while
1160 // processing this.
1161 for (auto *ClonedBB : NewBlocks)
1162 for (Instruction &I : *ClonedBB) {
1163 RemapInstruction(&I, VMap,
1164 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1165 if (auto *II = dyn_cast<AssumeInst>(&I))
1166 AC.registerAssumption(II);
1167 }
1168
1169 // Update any PHI nodes in the cloned successors of the skipped blocks to not
1170 // have spurious incoming values.
1171 for (auto *LoopBB : L.blocks())
1172 if (SkipBlock(LoopBB))
1173 for (auto *SuccBB : successors(LoopBB))
1174 if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1175 for (PHINode &PN : ClonedSuccBB->phis())
1176 PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1177
1178 // Remove the cloned parent as a predecessor of any successor we ended up
1179 // cloning other than the unswitched one.
1180 auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1181 for (auto *SuccBB : successors(ParentBB)) {
1182 if (SuccBB == UnswitchedSuccBB)
1183 continue;
1184
1185 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1186 if (!ClonedSuccBB)
1187 continue;
1188
1189 ClonedSuccBB->removePredecessor(ClonedParentBB,
1190 /*KeepOneInputPHIs*/ true);
1191 }
1192
1193 // Replace the cloned branch with an unconditional branch to the cloned
1194 // unswitched successor.
1195 auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1196 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1197 // Trivial Simplification. If Terminator is a conditional branch and
1198 // condition becomes dead - erase it.
1199 Value *ClonedConditionToErase = nullptr;
1200 if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1201 ClonedConditionToErase = BI->getCondition();
1202 else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1203 ClonedConditionToErase = SI->getCondition();
1204
1205 ClonedTerminator->eraseFromParent();
1206 BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1207
1208 if (ClonedConditionToErase)
1209 RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1210 MSSAU);
1211
1212 // If there are duplicate entries in the PHI nodes because of multiple edges
1213 // to the unswitched successor, we need to nuke all but one as we replaced it
1214 // with a direct branch.
1215 for (PHINode &PN : ClonedSuccBB->phis()) {
1216 bool Found = false;
1217 // Loop over the incoming operands backwards so we can easily delete as we
1218 // go without invalidating the index.
1219 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1220 if (PN.getIncomingBlock(i) != ClonedParentBB)
1221 continue;
1222 if (!Found) {
1223 Found = true;
1224 continue;
1225 }
1226 PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1227 }
1228 }
1229
1230 // Record the domtree updates for the new blocks.
1231 SmallPtrSet<BasicBlock *, 4> SuccSet;
1232 for (auto *ClonedBB : NewBlocks) {
1233 for (auto *SuccBB : successors(ClonedBB))
1234 if (SuccSet.insert(SuccBB).second)
1235 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1236 SuccSet.clear();
1237 }
1238
1239 return ClonedPH;
1240}
1241
1242/// Recursively clone the specified loop and all of its children.
1243///
1244/// The target parent loop for the clone should be provided, or can be null if
1245/// the clone is a top-level loop. While cloning, all the blocks are mapped
1246/// with the provided value map. The entire original loop must be present in
1247/// the value map. The cloned loop is returned.
1248static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1249 const ValueToValueMapTy &VMap, LoopInfo &LI) {
1250 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1251 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!")(static_cast <bool> (ClonedL.getBlocks().empty() &&
"Must start with an empty loop!") ? void (0) : __assert_fail
("ClonedL.getBlocks().empty() && \"Must start with an empty loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1251, __extension__ __PRETTY_FUNCTION__))
;
1252 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1253 for (auto *BB : OrigL.blocks()) {
1254 auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1255 ClonedL.addBlockEntry(ClonedBB);
1256 if (LI.getLoopFor(BB) == &OrigL)
1257 LI.changeLoopFor(ClonedBB, &ClonedL);
1258 }
1259 };
1260
1261 // We specially handle the first loop because it may get cloned into
1262 // a different parent and because we most commonly are cloning leaf loops.
1263 Loop *ClonedRootL = LI.AllocateLoop();
1264 if (RootParentL)
1265 RootParentL->addChildLoop(ClonedRootL);
1266 else
1267 LI.addTopLevelLoop(ClonedRootL);
1268 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1269
1270 if (OrigRootL.isInnermost())
1271 return ClonedRootL;
1272
1273 // If we have a nest, we can quickly clone the entire loop nest using an
1274 // iterative approach because it is a tree. We keep the cloned parent in the
1275 // data structure to avoid repeatedly querying through a map to find it.
1276 SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1277 // Build up the loops to clone in reverse order as we'll clone them from the
1278 // back.
1279 for (Loop *ChildL : llvm::reverse(OrigRootL))
1280 LoopsToClone.push_back({ClonedRootL, ChildL});
1281 do {
1282 Loop *ClonedParentL, *L;
1283 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1284 Loop *ClonedL = LI.AllocateLoop();
1285 ClonedParentL->addChildLoop(ClonedL);
1286 AddClonedBlocksToLoop(*L, *ClonedL);
1287 for (Loop *ChildL : llvm::reverse(*L))
1288 LoopsToClone.push_back({ClonedL, ChildL});
1289 } while (!LoopsToClone.empty());
1290
1291 return ClonedRootL;
1292}
1293
1294/// Build the cloned loops of an original loop from unswitching.
1295///
1296/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1297/// operation. We need to re-verify that there even is a loop (as the backedge
1298/// may not have been cloned), and even if there are remaining backedges the
1299/// backedge set may be different. However, we know that each child loop is
1300/// undisturbed, we only need to find where to place each child loop within
1301/// either any parent loop or within a cloned version of the original loop.
1302///
1303/// Because child loops may end up cloned outside of any cloned version of the
1304/// original loop, multiple cloned sibling loops may be created. All of them
1305/// are returned so that the newly introduced loop nest roots can be
1306/// identified.
1307static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1308 const ValueToValueMapTy &VMap, LoopInfo &LI,
1309 SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1310 Loop *ClonedL = nullptr;
1311
1312 auto *OrigPH = OrigL.getLoopPreheader();
1313 auto *OrigHeader = OrigL.getHeader();
1314
1315 auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1316 auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1317
1318 // We need to know the loops of the cloned exit blocks to even compute the
1319 // accurate parent loop. If we only clone exits to some parent of the
1320 // original parent, we want to clone into that outer loop. We also keep track
1321 // of the loops that our cloned exit blocks participate in.
1322 Loop *ParentL = nullptr;
1323 SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1324 SmallDenseMap<BasicBlock *, Loop *, 16> ExitLoopMap;
1325 ClonedExitsInLoops.reserve(ExitBlocks.size());
1326 for (auto *ExitBB : ExitBlocks)
1327 if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1328 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1329 ExitLoopMap[ClonedExitBB] = ExitL;
1330 ClonedExitsInLoops.push_back(ClonedExitBB);
1331 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1332 ParentL = ExitL;
1333 }
1334 assert((!ParentL || ParentL == OrigL.getParentLoop() ||(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop
() || ParentL->contains(OrigL.getParentLoop())) &&
"The computed parent loop should always contain (or be) the parent of "
"the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1337, __extension__ __PRETTY_FUNCTION__))
1335 ParentL->contains(OrigL.getParentLoop())) &&(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop
() || ParentL->contains(OrigL.getParentLoop())) &&
"The computed parent loop should always contain (or be) the parent of "
"the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1337, __extension__ __PRETTY_FUNCTION__))
1336 "The computed parent loop should always contain (or be) the parent of "(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop
() || ParentL->contains(OrigL.getParentLoop())) &&
"The computed parent loop should always contain (or be) the parent of "
"the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1337, __extension__ __PRETTY_FUNCTION__))
1337 "the original loop.")(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop
() || ParentL->contains(OrigL.getParentLoop())) &&
"The computed parent loop should always contain (or be) the parent of "
"the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1337, __extension__ __PRETTY_FUNCTION__))
;
1338
1339 // We build the set of blocks dominated by the cloned header from the set of
1340 // cloned blocks out of the original loop. While not all of these will
1341 // necessarily be in the cloned loop, it is enough to establish that they
1342 // aren't in unreachable cycles, etc.
1343 SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1344 for (auto *BB : OrigL.blocks())
1345 if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1346 ClonedLoopBlocks.insert(ClonedBB);
1347
1348 // Rebuild the set of blocks that will end up in the cloned loop. We may have
1349 // skipped cloning some region of this loop which can in turn skip some of
1350 // the backedges so we have to rebuild the blocks in the loop based on the
1351 // backedges that remain after cloning.
1352 SmallVector<BasicBlock *, 16> Worklist;
1353 SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1354 for (auto *Pred : predecessors(ClonedHeader)) {
1355 // The only possible non-loop header predecessor is the preheader because
1356 // we know we cloned the loop in simplified form.
1357 if (Pred == ClonedPH)
1358 continue;
1359
1360 // Because the loop was in simplified form, the only non-loop predecessor
1361 // should be the preheader.
1362 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "(static_cast <bool> (ClonedLoopBlocks.count(Pred) &&
"Found a predecessor of the loop " "header other than the preheader "
"that is not part of the loop!") ? void (0) : __assert_fail (
"ClonedLoopBlocks.count(Pred) && \"Found a predecessor of the loop \" \"header other than the preheader \" \"that is not part of the loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1364, __extension__ __PRETTY_FUNCTION__))
1363 "header other than the preheader "(static_cast <bool> (ClonedLoopBlocks.count(Pred) &&
"Found a predecessor of the loop " "header other than the preheader "
"that is not part of the loop!") ? void (0) : __assert_fail (
"ClonedLoopBlocks.count(Pred) && \"Found a predecessor of the loop \" \"header other than the preheader \" \"that is not part of the loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1364, __extension__ __PRETTY_FUNCTION__))
1364 "that is not part of the loop!")(static_cast <bool> (ClonedLoopBlocks.count(Pred) &&
"Found a predecessor of the loop " "header other than the preheader "
"that is not part of the loop!") ? void (0) : __assert_fail (
"ClonedLoopBlocks.count(Pred) && \"Found a predecessor of the loop \" \"header other than the preheader \" \"that is not part of the loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1364, __extension__ __PRETTY_FUNCTION__))
;
1365
1366 // Insert this block into the loop set and on the first visit (and if it
1367 // isn't the header we're currently walking) put it into the worklist to
1368 // recurse through.
1369 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1370 Worklist.push_back(Pred);
1371 }
1372
1373 // If we had any backedges then there *is* a cloned loop. Put the header into
1374 // the loop set and then walk the worklist backwards to find all the blocks
1375 // that remain within the loop after cloning.
1376 if (!BlocksInClonedLoop.empty()) {
1377 BlocksInClonedLoop.insert(ClonedHeader);
1378
1379 while (!Worklist.empty()) {
1380 BasicBlock *BB = Worklist.pop_back_val();
1381 assert(BlocksInClonedLoop.count(BB) &&(static_cast <bool> (BlocksInClonedLoop.count(BB) &&
"Didn't put block into the loop set!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count(BB) && \"Didn't put block into the loop set!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1382, __extension__ __PRETTY_FUNCTION__))
1382 "Didn't put block into the loop set!")(static_cast <bool> (BlocksInClonedLoop.count(BB) &&
"Didn't put block into the loop set!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count(BB) && \"Didn't put block into the loop set!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1382, __extension__ __PRETTY_FUNCTION__))
;
1383
1384 // Insert any predecessors that are in the possible set into the cloned
1385 // set, and if the insert is successful, add them to the worklist. Note
1386 // that we filter on the blocks that are definitely reachable via the
1387 // backedge to the loop header so we may prune out dead code within the
1388 // cloned loop.
1389 for (auto *Pred : predecessors(BB))
1390 if (ClonedLoopBlocks.count(Pred) &&
1391 BlocksInClonedLoop.insert(Pred).second)
1392 Worklist.push_back(Pred);
1393 }
1394
1395 ClonedL = LI.AllocateLoop();
1396 if (ParentL) {
1397 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1398 ParentL->addChildLoop(ClonedL);
1399 } else {
1400 LI.addTopLevelLoop(ClonedL);
1401 }
1402 NonChildClonedLoops.push_back(ClonedL);
1403
1404 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1405 // We don't want to just add the cloned loop blocks based on how we
1406 // discovered them. The original order of blocks was carefully built in
1407 // a way that doesn't rely on predecessor ordering. Rather than re-invent
1408 // that logic, we just re-walk the original blocks (and those of the child
1409 // loops) and filter them as we add them into the cloned loop.
1410 for (auto *BB : OrigL.blocks()) {
1411 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1412 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1413 continue;
1414
1415 // Directly add the blocks that are only in this loop.
1416 if (LI.getLoopFor(BB) == &OrigL) {
1417 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1418 continue;
1419 }
1420
1421 // We want to manually add it to this loop and parents.
1422 // Registering it with LoopInfo will happen when we clone the top
1423 // loop for this block.
1424 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1425 PL->addBlockEntry(ClonedBB);
1426 }
1427
1428 // Now add each child loop whose header remains within the cloned loop. All
1429 // of the blocks within the loop must satisfy the same constraints as the
1430 // header so once we pass the header checks we can just clone the entire
1431 // child loop nest.
1432 for (Loop *ChildL : OrigL) {
1433 auto *ClonedChildHeader =
1434 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1435 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1436 continue;
1437
1438#ifndef NDEBUG
1439 // We should never have a cloned child loop header but fail to have
1440 // all of the blocks for that child loop.
1441 for (auto *ChildLoopBB : ChildL->blocks())
1442 assert(BlocksInClonedLoop.count((static_cast <bool> (BlocksInClonedLoop.count( cast<
BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer "
"loop but not all of its blocks!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1445, __extension__ __PRETTY_FUNCTION__))
1443 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&(static_cast <bool> (BlocksInClonedLoop.count( cast<
BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer "
"loop but not all of its blocks!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1445, __extension__ __PRETTY_FUNCTION__))
1444 "Child cloned loop has a header within the cloned outer "(static_cast <bool> (BlocksInClonedLoop.count( cast<
BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer "
"loop but not all of its blocks!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1445, __extension__ __PRETTY_FUNCTION__))
1445 "loop but not all of its blocks!")(static_cast <bool> (BlocksInClonedLoop.count( cast<
BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer "
"loop but not all of its blocks!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1445, __extension__ __PRETTY_FUNCTION__))
;
1446#endif
1447
1448 cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1449 }
1450 }
1451
1452 // Now that we've handled all the components of the original loop that were
1453 // cloned into a new loop, we still need to handle anything from the original
1454 // loop that wasn't in a cloned loop.
1455
1456 // Figure out what blocks are left to place within any loop nest containing
1457 // the unswitched loop. If we never formed a loop, the cloned PH is one of
1458 // them.
1459 SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1460 if (BlocksInClonedLoop.empty())
1461 UnloopedBlockSet.insert(ClonedPH);
1462 for (auto *ClonedBB : ClonedLoopBlocks)
1463 if (!BlocksInClonedLoop.count(ClonedBB))
1464 UnloopedBlockSet.insert(ClonedBB);
1465
1466 // Copy the cloned exits and sort them in ascending loop depth, we'll work
1467 // backwards across these to process them inside out. The order shouldn't
1468 // matter as we're just trying to build up the map from inside-out; we use
1469 // the map in a more stably ordered way below.
1470 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1471 llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1472 return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1473 ExitLoopMap.lookup(RHS)->getLoopDepth();
1474 });
1475
1476 // Populate the existing ExitLoopMap with everything reachable from each
1477 // exit, starting from the inner most exit.
1478 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1479 assert(Worklist.empty() && "Didn't clear worklist!")(static_cast <bool> (Worklist.empty() && "Didn't clear worklist!"
) ? void (0) : __assert_fail ("Worklist.empty() && \"Didn't clear worklist!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1479, __extension__ __PRETTY_FUNCTION__))
;
1480
1481 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1482 Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1483
1484 // Walk the CFG back until we hit the cloned PH adding everything reachable
1485 // and in the unlooped set to this exit block's loop.
1486 Worklist.push_back(ExitBB);
1487 do {
1488 BasicBlock *BB = Worklist.pop_back_val();
1489 // We can stop recursing at the cloned preheader (if we get there).
1490 if (BB == ClonedPH)
1491 continue;
1492
1493 for (BasicBlock *PredBB : predecessors(BB)) {
1494 // If this pred has already been moved to our set or is part of some
1495 // (inner) loop, no update needed.
1496 if (!UnloopedBlockSet.erase(PredBB)) {
1497 assert((static_cast <bool> ((BlocksInClonedLoop.count(PredBB) ||
ExitLoopMap.count(PredBB)) && "Predecessor not mapped to a loop!"
) ? void (0) : __assert_fail ("(BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && \"Predecessor not mapped to a loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1499, __extension__ __PRETTY_FUNCTION__))
1498 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&(static_cast <bool> ((BlocksInClonedLoop.count(PredBB) ||
ExitLoopMap.count(PredBB)) && "Predecessor not mapped to a loop!"
) ? void (0) : __assert_fail ("(BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && \"Predecessor not mapped to a loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1499, __extension__ __PRETTY_FUNCTION__))
1499 "Predecessor not mapped to a loop!")(static_cast <bool> ((BlocksInClonedLoop.count(PredBB) ||
ExitLoopMap.count(PredBB)) && "Predecessor not mapped to a loop!"
) ? void (0) : __assert_fail ("(BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && \"Predecessor not mapped to a loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1499, __extension__ __PRETTY_FUNCTION__))
;
1500 continue;
1501 }
1502
1503 // We just insert into the loop set here. We'll add these blocks to the
1504 // exit loop after we build up the set in an order that doesn't rely on
1505 // predecessor order (which in turn relies on use list order).
1506 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1507 (void)Inserted;
1508 assert(Inserted && "Should only visit an unlooped block once!")(static_cast <bool> (Inserted && "Should only visit an unlooped block once!"
) ? void (0) : __assert_fail ("Inserted && \"Should only visit an unlooped block once!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1508, __extension__ __PRETTY_FUNCTION__))
;
1509
1510 // And recurse through to its predecessors.
1511 Worklist.push_back(PredBB);
1512 }
1513 } while (!Worklist.empty());
1514 }
1515
1516 // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1517 // blocks to their outer loops, walk the cloned blocks and the cloned exits
1518 // in their original order adding them to the correct loop.
1519
1520 // We need a stable insertion order. We use the order of the original loop
1521 // order and map into the correct parent loop.
1522 for (auto *BB : llvm::concat<BasicBlock *const>(
1523 makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1524 if (Loop *OuterL = ExitLoopMap.lookup(BB))
1525 OuterL->addBasicBlockToLoop(BB, LI);
1526
1527#ifndef NDEBUG
1528 for (auto &BBAndL : ExitLoopMap) {
1529 auto *BB = BBAndL.first;
1530 auto *OuterL = BBAndL.second;
1531 assert(LI.getLoopFor(BB) == OuterL &&(static_cast <bool> (LI.getLoopFor(BB) == OuterL &&
"Failed to put all blocks into outer loops!") ? void (0) : __assert_fail
("LI.getLoopFor(BB) == OuterL && \"Failed to put all blocks into outer loops!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1532, __extension__ __PRETTY_FUNCTION__))
1532 "Failed to put all blocks into outer loops!")(static_cast <bool> (LI.getLoopFor(BB) == OuterL &&
"Failed to put all blocks into outer loops!") ? void (0) : __assert_fail
("LI.getLoopFor(BB) == OuterL && \"Failed to put all blocks into outer loops!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1532, __extension__ __PRETTY_FUNCTION__))
;
1533 }
1534#endif
1535
1536 // Now that all the blocks are placed into the correct containing loop in the
1537 // absence of child loops, find all the potentially cloned child loops and
1538 // clone them into whatever outer loop we placed their header into.
1539 for (Loop *ChildL : OrigL) {
1540 auto *ClonedChildHeader =
1541 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1542 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1543 continue;
1544
1545#ifndef NDEBUG
1546 for (auto *ChildLoopBB : ChildL->blocks())
1547 assert(VMap.count(ChildLoopBB) &&(static_cast <bool> (VMap.count(ChildLoopBB) &&
"Cloned a child loop header but not all of that loops blocks!"
) ? void (0) : __assert_fail ("VMap.count(ChildLoopBB) && \"Cloned a child loop header but not all of that loops blocks!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1548, __extension__ __PRETTY_FUNCTION__))
1548 "Cloned a child loop header but not all of that loops blocks!")(static_cast <bool> (VMap.count(ChildLoopBB) &&
"Cloned a child loop header but not all of that loops blocks!"
) ? void (0) : __assert_fail ("VMap.count(ChildLoopBB) && \"Cloned a child loop header but not all of that loops blocks!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1548, __extension__ __PRETTY_FUNCTION__))
;
1549#endif
1550
1551 NonChildClonedLoops.push_back(cloneLoopNest(
1552 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1553 }
1554}
1555
1556static void
1557deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
1558 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1559 DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1560 // Find all the dead clones, and remove them from their successors.
1561 SmallVector<BasicBlock *, 16> DeadBlocks;
1562 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1563 for (auto &VMap : VMaps)
1564 if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1565 if (!DT.isReachableFromEntry(ClonedBB)) {
1566 for (BasicBlock *SuccBB : successors(ClonedBB))
1567 SuccBB->removePredecessor(ClonedBB);
1568 DeadBlocks.push_back(ClonedBB);
1569 }
1570
1571 // Remove all MemorySSA in the dead blocks
1572 if (MSSAU) {
1573 SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1574 DeadBlocks.end());
1575 MSSAU->removeBlocks(DeadBlockSet);
1576 }
1577
1578 // Drop any remaining references to break cycles.
1579 for (BasicBlock *BB : DeadBlocks)
1580 BB->dropAllReferences();
1581 // Erase them from the IR.
1582 for (BasicBlock *BB : DeadBlocks)
1583 BB->eraseFromParent();
1584}
1585
1586static void deleteDeadBlocksFromLoop(Loop &L,
1587 SmallVectorImpl<BasicBlock *> &ExitBlocks,
1588 DominatorTree &DT, LoopInfo &LI,
1589 MemorySSAUpdater *MSSAU) {
1590 // Find all the dead blocks tied to this loop, and remove them from their
1591 // successors.
1592 SmallSetVector<BasicBlock *, 8> DeadBlockSet;
1593
1594 // Start with loop/exit blocks and get a transitive closure of reachable dead
1595 // blocks.
1596 SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1597 ExitBlocks.end());
1598 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1599 while (!DeathCandidates.empty()) {
1600 auto *BB = DeathCandidates.pop_back_val();
1601 if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1602 for (BasicBlock *SuccBB : successors(BB)) {
1603 SuccBB->removePredecessor(BB);
1604 DeathCandidates.push_back(SuccBB);
1605 }
1606 DeadBlockSet.insert(BB);
1607 }
1608 }
1609
1610 // Remove all MemorySSA in the dead blocks
1611 if (MSSAU)
1612 MSSAU->removeBlocks(DeadBlockSet);
1613
1614 // Filter out the dead blocks from the exit blocks list so that it can be
1615 // used in the caller.
1616 llvm::erase_if(ExitBlocks,
1617 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1618
1619 // Walk from this loop up through its parents removing all of the dead blocks.
1620 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1621 for (auto *BB : DeadBlockSet)
1622 ParentL->getBlocksSet().erase(BB);
1623 llvm::erase_if(ParentL->getBlocksVector(),
1624 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1625 }
1626
1627 // Now delete the dead child loops. This raw delete will clear them
1628 // recursively.
1629 llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1630 if (!DeadBlockSet.count(ChildL->getHeader()))
1631 return false;
1632
1633 assert(llvm::all_of(ChildL->blocks(),(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1638, __extension__ __PRETTY_FUNCTION__))
1634 [&](BasicBlock *ChildBB) {(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1638, __extension__ __PRETTY_FUNCTION__))
1635 return DeadBlockSet.count(ChildBB);(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1638, __extension__ __PRETTY_FUNCTION__))
1636 }) &&(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1638, __extension__ __PRETTY_FUNCTION__))
1637 "If the child loop header is dead all blocks in the child loop must "(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1638, __extension__ __PRETTY_FUNCTION__))
1638 "be dead as well!")(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1638, __extension__ __PRETTY_FUNCTION__))
;
1639 LI.destroy(ChildL);
1640 return true;
1641 });
1642
1643 // Remove the loop mappings for the dead blocks and drop all the references
1644 // from these blocks to others to handle cyclic references as we start
1645 // deleting the blocks themselves.
1646 for (auto *BB : DeadBlockSet) {
1647 // Check that the dominator tree has already been updated.
1648 assert(!DT.getNode(BB) && "Should already have cleared domtree!")(static_cast <bool> (!DT.getNode(BB) && "Should already have cleared domtree!"
) ? void (0) : __assert_fail ("!DT.getNode(BB) && \"Should already have cleared domtree!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1648, __extension__ __PRETTY_FUNCTION__))
;
1649 LI.changeLoopFor(BB, nullptr);
1650 // Drop all uses of the instructions to make sure we won't have dangling
1651 // uses in other blocks.
1652 for (auto &I : *BB)
1653 if (!I.use_empty())
1654 I.replaceAllUsesWith(UndefValue::get(I.getType()));
1655 BB->dropAllReferences();
1656 }
1657
1658 // Actually delete the blocks now that they've been fully unhooked from the
1659 // IR.
1660 for (auto *BB : DeadBlockSet)
1661 BB->eraseFromParent();
1662}
1663
1664/// Recompute the set of blocks in a loop after unswitching.
1665///
1666/// This walks from the original headers predecessors to rebuild the loop. We
1667/// take advantage of the fact that new blocks can't have been added, and so we
1668/// filter by the original loop's blocks. This also handles potentially
1669/// unreachable code that we don't want to explore but might be found examining
1670/// the predecessors of the header.
1671///
1672/// If the original loop is no longer a loop, this will return an empty set. If
1673/// it remains a loop, all the blocks within it will be added to the set
1674/// (including those blocks in inner loops).
1675static SmallPtrSet<const BasicBlock *, 16> recomputeLoopBlockSet(Loop &L,
1676 LoopInfo &LI) {
1677 SmallPtrSet<const BasicBlock *, 16> LoopBlockSet;
1678
1679 auto *PH = L.getLoopPreheader();
1680 auto *Header = L.getHeader();
1681
1682 // A worklist to use while walking backwards from the header.
1683 SmallVector<BasicBlock *, 16> Worklist;
1684
1685 // First walk the predecessors of the header to find the backedges. This will
1686 // form the basis of our walk.
1687 for (auto *Pred : predecessors(Header)) {
1688 // Skip the preheader.
1689 if (Pred == PH)
1690 continue;
1691
1692 // Because the loop was in simplified form, the only non-loop predecessor
1693 // is the preheader.
1694 assert(L.contains(Pred) && "Found a predecessor of the loop header other "(static_cast <bool> (L.contains(Pred) && "Found a predecessor of the loop header other "
"than the preheader that is not part of the " "loop!") ? void
(0) : __assert_fail ("L.contains(Pred) && \"Found a predecessor of the loop header other \" \"than the preheader that is not part of the \" \"loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1696, __extension__ __PRETTY_FUNCTION__))
1695 "than the preheader that is not part of the "(static_cast <bool> (L.contains(Pred) && "Found a predecessor of the loop header other "
"than the preheader that is not part of the " "loop!") ? void
(0) : __assert_fail ("L.contains(Pred) && \"Found a predecessor of the loop header other \" \"than the preheader that is not part of the \" \"loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1696, __extension__ __PRETTY_FUNCTION__))
1696 "loop!")(static_cast <bool> (L.contains(Pred) && "Found a predecessor of the loop header other "
"than the preheader that is not part of the " "loop!") ? void
(0) : __assert_fail ("L.contains(Pred) && \"Found a predecessor of the loop header other \" \"than the preheader that is not part of the \" \"loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1696, __extension__ __PRETTY_FUNCTION__))
;
1697
1698 // Insert this block into the loop set and on the first visit and, if it
1699 // isn't the header we're currently walking, put it into the worklist to
1700 // recurse through.
1701 if (LoopBlockSet.insert(Pred).second && Pred != Header)
1702 Worklist.push_back(Pred);
1703 }
1704
1705 // If no backedges were found, we're done.
1706 if (LoopBlockSet.empty())
1707 return LoopBlockSet;
1708
1709 // We found backedges, recurse through them to identify the loop blocks.
1710 while (!Worklist.empty()) {
1711 BasicBlock *BB = Worklist.pop_back_val();
1712 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!")(static_cast <bool> (LoopBlockSet.count(BB) && "Didn't put block into the loop set!"
) ? void (0) : __assert_fail ("LoopBlockSet.count(BB) && \"Didn't put block into the loop set!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1712, __extension__ __PRETTY_FUNCTION__))
;
1713
1714 // No need to walk past the header.
1715 if (BB == Header)
1716 continue;
1717
1718 // Because we know the inner loop structure remains valid we can use the
1719 // loop structure to jump immediately across the entire nested loop.
1720 // Further, because it is in loop simplified form, we can directly jump
1721 // to its preheader afterward.
1722 if (Loop *InnerL = LI.getLoopFor(BB))
1723 if (InnerL != &L) {
1724 assert(L.contains(InnerL) &&(static_cast <bool> (L.contains(InnerL) && "Should not reach a loop *outside* this loop!"
) ? void (0) : __assert_fail ("L.contains(InnerL) && \"Should not reach a loop *outside* this loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1725, __extension__ __PRETTY_FUNCTION__))
1725 "Should not reach a loop *outside* this loop!")(static_cast <bool> (L.contains(InnerL) && "Should not reach a loop *outside* this loop!"
) ? void (0) : __assert_fail ("L.contains(InnerL) && \"Should not reach a loop *outside* this loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1725, __extension__ __PRETTY_FUNCTION__))
;
1726 // The preheader is the only possible predecessor of the loop so
1727 // insert it into the set and check whether it was already handled.
1728 auto *InnerPH = InnerL->getLoopPreheader();
1729 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "(static_cast <bool> (L.contains(InnerPH) && "Cannot contain an inner loop block "
"but not contain the inner loop " "preheader!") ? void (0) :
__assert_fail ("L.contains(InnerPH) && \"Cannot contain an inner loop block \" \"but not contain the inner loop \" \"preheader!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1731, __extension__ __PRETTY_FUNCTION__))
1730 "but not contain the inner loop "(static_cast <bool> (L.contains(InnerPH) && "Cannot contain an inner loop block "
"but not contain the inner loop " "preheader!") ? void (0) :
__assert_fail ("L.contains(InnerPH) && \"Cannot contain an inner loop block \" \"but not contain the inner loop \" \"preheader!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1731, __extension__ __PRETTY_FUNCTION__))
1731 "preheader!")(static_cast <bool> (L.contains(InnerPH) && "Cannot contain an inner loop block "
"but not contain the inner loop " "preheader!") ? void (0) :
__assert_fail ("L.contains(InnerPH) && \"Cannot contain an inner loop block \" \"but not contain the inner loop \" \"preheader!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1731, __extension__ __PRETTY_FUNCTION__))
;
1732 if (!LoopBlockSet.insert(InnerPH).second)
1733 // The only way to reach the preheader is through the loop body
1734 // itself so if it has been visited the loop is already handled.
1735 continue;
1736
1737 // Insert all of the blocks (other than those already present) into
1738 // the loop set. We expect at least the block that led us to find the
1739 // inner loop to be in the block set, but we may also have other loop
1740 // blocks if they were already enqueued as predecessors of some other
1741 // outer loop block.
1742 for (auto *InnerBB : InnerL->blocks()) {
1743 if (InnerBB == BB) {
1744 assert(LoopBlockSet.count(InnerBB) &&(static_cast <bool> (LoopBlockSet.count(InnerBB) &&
"Block should already be in the set!") ? void (0) : __assert_fail
("LoopBlockSet.count(InnerBB) && \"Block should already be in the set!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1745, __extension__ __PRETTY_FUNCTION__))
1745 "Block should already be in the set!")(static_cast <bool> (LoopBlockSet.count(InnerBB) &&
"Block should already be in the set!") ? void (0) : __assert_fail
("LoopBlockSet.count(InnerBB) && \"Block should already be in the set!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1745, __extension__ __PRETTY_FUNCTION__))
;
1746 continue;
1747 }
1748
1749 LoopBlockSet.insert(InnerBB);
1750 }
1751
1752 // Add the preheader to the worklist so we will continue past the
1753 // loop body.
1754 Worklist.push_back(InnerPH);
1755 continue;
1756 }
1757
1758 // Insert any predecessors that were in the original loop into the new
1759 // set, and if the insert is successful, add them to the worklist.
1760 for (auto *Pred : predecessors(BB))
1761 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1762 Worklist.push_back(Pred);
1763 }
1764
1765 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!")(static_cast <bool> (LoopBlockSet.count(Header) &&
"Cannot fail to add the header!") ? void (0) : __assert_fail
("LoopBlockSet.count(Header) && \"Cannot fail to add the header!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1765, __extension__ __PRETTY_FUNCTION__))
;
1766
1767 // We've found all the blocks participating in the loop, return our completed
1768 // set.
1769 return LoopBlockSet;
1770}
1771
1772/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1773///
1774/// The removal may have removed some child loops entirely but cannot have
1775/// disturbed any remaining child loops. However, they may need to be hoisted
1776/// to the parent loop (or to be top-level loops). The original loop may be
1777/// completely removed.
1778///
1779/// The sibling loops resulting from this update are returned. If the original
1780/// loop remains a valid loop, it will be the first entry in this list with all
1781/// of the newly sibling loops following it.
1782///
1783/// Returns true if the loop remains a loop after unswitching, and false if it
1784/// is no longer a loop after unswitching (and should not continue to be
1785/// referenced).
1786static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
1787 LoopInfo &LI,
1788 SmallVectorImpl<Loop *> &HoistedLoops) {
1789 auto *PH = L.getLoopPreheader();
1790
1791 // Compute the actual parent loop from the exit blocks. Because we may have
1792 // pruned some exits the loop may be different from the original parent.
1793 Loop *ParentL = nullptr;
1794 SmallVector<Loop *, 4> ExitLoops;
1795 SmallVector<BasicBlock *, 4> ExitsInLoops;
1796 ExitsInLoops.reserve(ExitBlocks.size());
1797 for (auto *ExitBB : ExitBlocks)
1798 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1799 ExitLoops.push_back(ExitL);
1800 ExitsInLoops.push_back(ExitBB);
1801 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1802 ParentL = ExitL;
1803 }
1804
1805 // Recompute the blocks participating in this loop. This may be empty if it
1806 // is no longer a loop.
1807 auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1808
1809 // If we still have a loop, we need to re-set the loop's parent as the exit
1810 // block set changing may have moved it within the loop nest. Note that this
1811 // can only happen when this loop has a parent as it can only hoist the loop
1812 // *up* the nest.
1813 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1814 // Remove this loop's (original) blocks from all of the intervening loops.
1815 for (Loop *IL = L.getParentLoop(); IL != ParentL;
1816 IL = IL->getParentLoop()) {
1817 IL->getBlocksSet().erase(PH);
1818 for (auto *BB : L.blocks())
1819 IL->getBlocksSet().erase(BB);
1820 llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1821 return BB == PH || L.contains(BB);
1822 });
1823 }
1824
1825 LI.changeLoopFor(PH, ParentL);
1826 L.getParentLoop()->removeChildLoop(&L);
1827 if (ParentL)
1828 ParentL->addChildLoop(&L);
1829 else
1830 LI.addTopLevelLoop(&L);
1831 }
1832
1833 // Now we update all the blocks which are no longer within the loop.
1834 auto &Blocks = L.getBlocksVector();
1835 auto BlocksSplitI =
1836 LoopBlockSet.empty()
1837 ? Blocks.begin()
1838 : std::stable_partition(
1839 Blocks.begin(), Blocks.end(),
1840 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1841
1842 // Before we erase the list of unlooped blocks, build a set of them.
1843 SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1844 if (LoopBlockSet.empty())
1845 UnloopedBlocks.insert(PH);
1846
1847 // Now erase these blocks from the loop.
1848 for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1849 L.getBlocksSet().erase(BB);
1850 Blocks.erase(BlocksSplitI, Blocks.end());
1851
1852 // Sort the exits in ascending loop depth, we'll work backwards across these
1853 // to process them inside out.
1854 llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1855 return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1856 });
1857
1858 // We'll build up a set for each exit loop.
1859 SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1860 Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1861
1862 auto RemoveUnloopedBlocksFromLoop =
1863 [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1864 for (auto *BB : UnloopedBlocks)
1865 L.getBlocksSet().erase(BB);
1866 llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1867 return UnloopedBlocks.count(BB);
1868 });
1869 };
1870
1871 SmallVector<BasicBlock *, 16> Worklist;
1872 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1873 assert(Worklist.empty() && "Didn't clear worklist!")(static_cast <bool> (Worklist.empty() && "Didn't clear worklist!"
) ? void (0) : __assert_fail ("Worklist.empty() && \"Didn't clear worklist!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1873, __extension__ __PRETTY_FUNCTION__))
;
1874 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!")(static_cast <bool> (NewExitLoopBlocks.empty() &&
"Didn't clear loop set!") ? void (0) : __assert_fail ("NewExitLoopBlocks.empty() && \"Didn't clear loop set!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1874, __extension__ __PRETTY_FUNCTION__))
;
1875
1876 // Grab the next exit block, in decreasing loop depth order.
1877 BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1878 Loop &ExitL = *LI.getLoopFor(ExitBB);
1879 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!")(static_cast <bool> (ExitL.contains(&L) && "Exit loop must contain the inner loop!"
) ? void (0) : __assert_fail ("ExitL.contains(&L) && \"Exit loop must contain the inner loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1879, __extension__ __PRETTY_FUNCTION__))
;
1880
1881 // Erase all of the unlooped blocks from the loops between the previous
1882 // exit loop and this exit loop. This works because the ExitInLoops list is
1883 // sorted in increasing order of loop depth and thus we visit loops in
1884 // decreasing order of loop depth.
1885 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1886 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1887
1888 // Walk the CFG back until we hit the cloned PH adding everything reachable
1889 // and in the unlooped set to this exit block's loop.
1890 Worklist.push_back(ExitBB);
1891 do {
1892 BasicBlock *BB = Worklist.pop_back_val();
1893 // We can stop recursing at the cloned preheader (if we get there).
1894 if (BB == PH)
1895 continue;
1896
1897 for (BasicBlock *PredBB : predecessors(BB)) {
1898 // If this pred has already been moved to our set or is part of some
1899 // (inner) loop, no update needed.
1900 if (!UnloopedBlocks.erase(PredBB)) {
1901 assert((NewExitLoopBlocks.count(PredBB) ||(static_cast <bool> ((NewExitLoopBlocks.count(PredBB) ||
ExitL.contains(LI.getLoopFor(PredBB))) && "Predecessor not in a nested loop (or already visited)!"
) ? void (0) : __assert_fail ("(NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && \"Predecessor not in a nested loop (or already visited)!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1903, __extension__ __PRETTY_FUNCTION__))
1902 ExitL.contains(LI.getLoopFor(PredBB))) &&(static_cast <bool> ((NewExitLoopBlocks.count(PredBB) ||
ExitL.contains(LI.getLoopFor(PredBB))) && "Predecessor not in a nested loop (or already visited)!"
) ? void (0) : __assert_fail ("(NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && \"Predecessor not in a nested loop (or already visited)!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1903, __extension__ __PRETTY_FUNCTION__))
1903 "Predecessor not in a nested loop (or already visited)!")(static_cast <bool> ((NewExitLoopBlocks.count(PredBB) ||
ExitL.contains(LI.getLoopFor(PredBB))) && "Predecessor not in a nested loop (or already visited)!"
) ? void (0) : __assert_fail ("(NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && \"Predecessor not in a nested loop (or already visited)!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1903, __extension__ __PRETTY_FUNCTION__))
;
1904 continue;
1905 }
1906
1907 // We just insert into the loop set here. We'll add these blocks to the
1908 // exit loop after we build up the set in a deterministic order rather
1909 // than the predecessor-influenced visit order.
1910 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
1911 (void)Inserted;
1912 assert(Inserted && "Should only visit an unlooped block once!")(static_cast <bool> (Inserted && "Should only visit an unlooped block once!"
) ? void (0) : __assert_fail ("Inserted && \"Should only visit an unlooped block once!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1912, __extension__ __PRETTY_FUNCTION__))
;
1913
1914 // And recurse through to its predecessors.
1915 Worklist.push_back(PredBB);
1916 }
1917 } while (!Worklist.empty());
1918
1919 // If blocks in this exit loop were directly part of the original loop (as
1920 // opposed to a child loop) update the map to point to this exit loop. This
1921 // just updates a map and so the fact that the order is unstable is fine.
1922 for (auto *BB : NewExitLoopBlocks)
1923 if (Loop *BBL = LI.getLoopFor(BB))
1924 if (BBL == &L || !L.contains(BBL))
1925 LI.changeLoopFor(BB, &ExitL);
1926
1927 // We will remove the remaining unlooped blocks from this loop in the next
1928 // iteration or below.
1929 NewExitLoopBlocks.clear();
1930 }
1931
1932 // Any remaining unlooped blocks are no longer part of any loop unless they
1933 // are part of some child loop.
1934 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
1935 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1936 for (auto *BB : UnloopedBlocks)
1937 if (Loop *BBL = LI.getLoopFor(BB))
1938 if (BBL == &L || !L.contains(BBL))
1939 LI.changeLoopFor(BB, nullptr);
1940
1941 // Sink all the child loops whose headers are no longer in the loop set to
1942 // the parent (or to be top level loops). We reach into the loop and directly
1943 // update its subloop vector to make this batch update efficient.
1944 auto &SubLoops = L.getSubLoopsVector();
1945 auto SubLoopsSplitI =
1946 LoopBlockSet.empty()
1947 ? SubLoops.begin()
1948 : std::stable_partition(
1949 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
1950 return LoopBlockSet.count(SubL->getHeader());
1951 });
1952 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
1953 HoistedLoops.push_back(HoistedL);
1954 HoistedL->setParentLoop(nullptr);
1955
1956 // To compute the new parent of this hoisted loop we look at where we
1957 // placed the preheader above. We can't lookup the header itself because we
1958 // retained the mapping from the header to the hoisted loop. But the
1959 // preheader and header should have the exact same new parent computed
1960 // based on the set of exit blocks from the original loop as the preheader
1961 // is a predecessor of the header and so reached in the reverse walk. And
1962 // because the loops were all in simplified form the preheader of the
1963 // hoisted loop can't be part of some *other* loop.
1964 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
1965 NewParentL->addChildLoop(HoistedL);
1966 else
1967 LI.addTopLevelLoop(HoistedL);
1968 }
1969 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
1970
1971 // Actually delete the loop if nothing remained within it.
1972 if (Blocks.empty()) {
1973 assert(SubLoops.empty() &&(static_cast <bool> (SubLoops.empty() && "Failed to remove all subloops from the original loop!"
) ? void (0) : __assert_fail ("SubLoops.empty() && \"Failed to remove all subloops from the original loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1974, __extension__ __PRETTY_FUNCTION__))
1974 "Failed to remove all subloops from the original loop!")(static_cast <bool> (SubLoops.empty() && "Failed to remove all subloops from the original loop!"
) ? void (0) : __assert_fail ("SubLoops.empty() && \"Failed to remove all subloops from the original loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1974, __extension__ __PRETTY_FUNCTION__))
;
1975 if (Loop *ParentL = L.getParentLoop())
1976 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
1977 else
1978 LI.removeLoop(llvm::find(LI, &L));
1979 LI.destroy(&L);
1980 return false;
1981 }
1982
1983 return true;
1984}
1985
1986/// Helper to visit a dominator subtree, invoking a callable on each node.
1987///
1988/// Returning false at any point will stop walking past that node of the tree.
1989template <typename CallableT>
1990void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
1991 SmallVector<DomTreeNode *, 4> DomWorklist;
1992 DomWorklist.push_back(DT[BB]);
1993#ifndef NDEBUG
1994 SmallPtrSet<DomTreeNode *, 4> Visited;
1995 Visited.insert(DT[BB]);
1996#endif
1997 do {
1998 DomTreeNode *N = DomWorklist.pop_back_val();
1999
2000 // Visit this node.
2001 if (!Callable(N->getBlock()))
2002 continue;
2003
2004 // Accumulate the child nodes.
2005 for (DomTreeNode *ChildN : *N) {
2006 assert(Visited.insert(ChildN).second &&(static_cast <bool> (Visited.insert(ChildN).second &&
"Cannot visit a node twice when walking a tree!") ? void (0)
: __assert_fail ("Visited.insert(ChildN).second && \"Cannot visit a node twice when walking a tree!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2007, __extension__ __PRETTY_FUNCTION__))
2007 "Cannot visit a node twice when walking a tree!")(static_cast <bool> (Visited.insert(ChildN).second &&
"Cannot visit a node twice when walking a tree!") ? void (0)
: __assert_fail ("Visited.insert(ChildN).second && \"Cannot visit a node twice when walking a tree!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2007, __extension__ __PRETTY_FUNCTION__))
;
2008 DomWorklist.push_back(ChildN);
2009 }
2010 } while (!DomWorklist.empty());
2011}
2012
2013static void unswitchNontrivialInvariants(
2014 Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2015 SmallVectorImpl<BasicBlock *> &ExitBlocks, IVConditionInfo &PartialIVInfo,
2016 DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2017 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2018 ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
2019 auto *ParentBB = TI.getParent();
2020 BranchInst *BI = dyn_cast<BranchInst>(&TI);
2021 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2022
2023 // We can only unswitch switches, conditional branches with an invariant
2024 // condition, or combining invariant conditions with an instruction or
2025 // partially invariant instructions.
2026 assert((SI || (BI && BI->isConditional())) &&(static_cast <bool> ((SI || (BI && BI->isConditional
())) && "Can only unswitch switches and conditional branch!"
) ? void (0) : __assert_fail ("(SI || (BI && BI->isConditional())) && \"Can only unswitch switches and conditional branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2027, __extension__ __PRETTY_FUNCTION__))
2027 "Can only unswitch switches and conditional branch!")(static_cast <bool> ((SI || (BI && BI->isConditional
())) && "Can only unswitch switches and conditional branch!"
) ? void (0) : __assert_fail ("(SI || (BI && BI->isConditional())) && \"Can only unswitch switches and conditional branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2027, __extension__ __PRETTY_FUNCTION__))
;
2028 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2029 bool FullUnswitch =
2030 SI || (BI->getCondition() == Invariants[0] && !PartiallyInvariant);
2031 if (FullUnswitch)
2032 assert(Invariants.size() == 1 &&(static_cast <bool> (Invariants.size() == 1 && "Cannot have other invariants with full unswitching!"
) ? void (0) : __assert_fail ("Invariants.size() == 1 && \"Cannot have other invariants with full unswitching!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2033, __extension__ __PRETTY_FUNCTION__))
2033 "Cannot have other invariants with full unswitching!")(static_cast <bool> (Invariants.size() == 1 && "Cannot have other invariants with full unswitching!"
) ? void (0) : __assert_fail ("Invariants.size() == 1 && \"Cannot have other invariants with full unswitching!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2033, __extension__ __PRETTY_FUNCTION__))
;
2034 else
2035 assert(isa<Instruction>(BI->getCondition()) &&(static_cast <bool> (isa<Instruction>(BI->getCondition
()) && "Partial unswitching requires an instruction as the condition!"
) ? void (0) : __assert_fail ("isa<Instruction>(BI->getCondition()) && \"Partial unswitching requires an instruction as the condition!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2036, __extension__ __PRETTY_FUNCTION__))
2036 "Partial unswitching requires an instruction as the condition!")(static_cast <bool> (isa<Instruction>(BI->getCondition
()) && "Partial unswitching requires an instruction as the condition!"
) ? void (0) : __assert_fail ("isa<Instruction>(BI->getCondition()) && \"Partial unswitching requires an instruction as the condition!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2036, __extension__ __PRETTY_FUNCTION__))
;
2037
2038 if (MSSAU && VerifyMemorySSA)
2039 MSSAU->getMemorySSA()->verifyMemorySSA();
2040
2041 // Constant and BBs tracking the cloned and continuing successor. When we are
2042 // unswitching the entire condition, this can just be trivially chosen to
2043 // unswitch towards `true`. However, when we are unswitching a set of
2044 // invariants combined with `and` or `or` or partially invariant instructions,
2045 // the combining operation determines the best direction to unswitch: we want
2046 // to unswitch the direction that will collapse the branch.
2047 bool Direction = true;
2048 int ClonedSucc = 0;
2049 if (!FullUnswitch) {
2050 Value *Cond = BI->getCondition();
2051 (void)Cond;
2052 assert(((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) ||(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match
(Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.") ? void (0) : __assert_fail
("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2055, __extension__ __PRETTY_FUNCTION__))
2053 PartiallyInvariant) &&(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match
(Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.") ? void (0) : __assert_fail
("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2055, __extension__ __PRETTY_FUNCTION__))
2054 "Only `or`, `and`, an `select`, partially invariant instructions "(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match
(Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.") ? void (0) : __assert_fail
("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2055, __extension__ __PRETTY_FUNCTION__))
2055 "can combine invariants being unswitched.")(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match
(Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.") ? void (0) : __assert_fail
("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2055, __extension__ __PRETTY_FUNCTION__))
;
2056 if (!match(BI->getCondition(), m_LogicalOr())) {
2057 if (match(BI->getCondition(), m_LogicalAnd()) ||
2058 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2059 Direction = false;
2060 ClonedSucc = 1;
2061 }
2062 }
2063 }
2064
2065 BasicBlock *RetainedSuccBB =
2066 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2067 SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2068 if (BI)
2069 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2070 else
2071 for (auto Case : SI->cases())
2072 if (Case.getCaseSuccessor() != RetainedSuccBB)
2073 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2074
2075 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&(static_cast <bool> (!UnswitchedSuccBBs.count(RetainedSuccBB
) && "Should not unswitch the same successor we are retaining!"
) ? void (0) : __assert_fail ("!UnswitchedSuccBBs.count(RetainedSuccBB) && \"Should not unswitch the same successor we are retaining!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2076, __extension__ __PRETTY_FUNCTION__))
2076 "Should not unswitch the same successor we are retaining!")(static_cast <bool> (!UnswitchedSuccBBs.count(RetainedSuccBB
) && "Should not unswitch the same successor we are retaining!"
) ? void (0) : __assert_fail ("!UnswitchedSuccBBs.count(RetainedSuccBB) && \"Should not unswitch the same successor we are retaining!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2076, __extension__ __PRETTY_FUNCTION__))
;
2077
2078 // The branch should be in this exact loop. Any inner loop's invariant branch
2079 // should be handled by unswitching that inner loop. The caller of this
2080 // routine should filter out any candidates that remain (but were skipped for
2081 // whatever reason).
2082 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!")(static_cast <bool> (LI.getLoopFor(ParentBB) == &L &&
"Branch in an inner loop!") ? void (0) : __assert_fail ("LI.getLoopFor(ParentBB) == &L && \"Branch in an inner loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2082, __extension__ __PRETTY_FUNCTION__))
;
2083
2084 // Compute the parent loop now before we start hacking on things.
2085 Loop *ParentL = L.getParentLoop();
2086 // Get blocks in RPO order for MSSA update, before changing the CFG.
2087 LoopBlocksRPO LBRPO(&L);
2088 if (MSSAU)
2089 LBRPO.perform(&LI);
2090
2091 // Compute the outer-most loop containing one of our exit blocks. This is the
2092 // furthest up our loopnest which can be mutated, which we will use below to
2093 // update things.
2094 Loop *OuterExitL = &L;
2095 for (auto *ExitBB : ExitBlocks) {
2096 Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
2097 if (!NewOuterExitL) {
2098 // We exited the entire nest with this block, so we're done.
2099 OuterExitL = nullptr;
2100 break;
2101 }
2102 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2103 OuterExitL = NewOuterExitL;
2104 }
2105
2106 // At this point, we're definitely going to unswitch something so invalidate
2107 // any cached information in ScalarEvolution for the outer most loop
2108 // containing an exit block and all nested loops.
2109 if (SE) {
2110 if (OuterExitL)
2111 SE->forgetLoop(OuterExitL);
2112 else
2113 SE->forgetTopmostLoop(&L);
2114 }
2115
2116 // If the edge from this terminator to a successor dominates that successor,
2117 // store a map from each block in its dominator subtree to it. This lets us
2118 // tell when cloning for a particular successor if a block is dominated by
2119 // some *other* successor with a single data structure. We use this to
2120 // significantly reduce cloning.
2121 SmallDenseMap<BasicBlock *, BasicBlock *, 16> DominatingSucc;
2122 for (auto *SuccBB : llvm::concat<BasicBlock *const>(
2123 makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs))
2124 if (SuccBB->getUniquePredecessor() ||
2125 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2126 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2127 }))
2128 visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2129 DominatingSucc[BB] = SuccBB;
2130 return true;
2131 });
2132
2133 // Split the preheader, so that we know that there is a safe place to insert
2134 // the conditional branch. We will change the preheader to have a conditional
2135 // branch on LoopCond. The original preheader will become the split point
2136 // between the unswitched versions, and we will have a new preheader for the
2137 // original loop.
2138 BasicBlock *SplitBB = L.getLoopPreheader();
2139 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2140
2141 // Keep track of the dominator tree updates needed.
2142 SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
2143
2144 // Clone the loop for each unswitched successor.
2145 SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
2146 VMaps.reserve(UnswitchedSuccBBs.size());
2147 SmallDenseMap<BasicBlock *, BasicBlock *, 4> ClonedPHs;
2148 for (auto *SuccBB : UnswitchedSuccBBs) {
2149 VMaps.emplace_back(new ValueToValueMapTy());
2150 ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2151 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2152 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
2153 }
2154
2155 // Drop metadata if we may break its semantics by moving this instr into the
2156 // split block.
2157 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2158 if (DropNonTrivialImplicitNullChecks)
2159 // Do not spend time trying to understand if we can keep it, just drop it
2160 // to save compile time.
2161 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2162 else {
2163 // It is only legal to preserve make.implicit metadata if we are
2164 // guaranteed no reach implicit null check after following this branch.
2165 ICFLoopSafetyInfo SafetyInfo;
2166 SafetyInfo.computeLoopSafetyInfo(&L);
2167 if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2168 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2169 }
2170 }
2171
2172 // The stitching of the branched code back together depends on whether we're
2173 // doing full unswitching or not with the exception that we always want to
2174 // nuke the initial terminator placed in the split block.
2175 SplitBB->getTerminator()->eraseFromParent();
2176 if (FullUnswitch) {
2177 // Splice the terminator from the original loop and rewrite its
2178 // successors.
2179 SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
2180
2181 // Keep a clone of the terminator for MSSA updates.
2182 Instruction *NewTI = TI.clone();
2183 ParentBB->getInstList().push_back(NewTI);
2184
2185 // First wire up the moved terminator to the preheaders.
2186 if (BI) {
2187 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2188 BI->setSuccessor(ClonedSucc, ClonedPH);
2189 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2190 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2191 } else {
2192 assert(SI && "Must either be a branch or switch!")(static_cast <bool> (SI && "Must either be a branch or switch!"
) ? void (0) : __assert_fail ("SI && \"Must either be a branch or switch!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2192, __extension__ __PRETTY_FUNCTION__))
;
2193
2194 // Walk the cases and directly update their successors.
2195 assert(SI->getDefaultDest() == RetainedSuccBB &&(static_cast <bool> (SI->getDefaultDest() == RetainedSuccBB
&& "Not retaining default successor!") ? void (0) : __assert_fail
("SI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2196, __extension__ __PRETTY_FUNCTION__))
2196 "Not retaining default successor!")(static_cast <bool> (SI->getDefaultDest() == RetainedSuccBB
&& "Not retaining default successor!") ? void (0) : __assert_fail
("SI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2196, __extension__ __PRETTY_FUNCTION__))
;
2197 SI->setDefaultDest(LoopPH);
2198 for (auto &Case : SI->cases())
2199 if (Case.getCaseSuccessor() == RetainedSuccBB)
2200 Case.setSuccessor(LoopPH);
2201 else
2202 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2203
2204 // We need to use the set to populate domtree updates as even when there
2205 // are multiple cases pointing at the same successor we only want to
2206 // remove and insert one edge in the domtree.
2207 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2208 DTUpdates.push_back(
2209 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2210 }
2211
2212 if (MSSAU) {
2213 DT.applyUpdates(DTUpdates);
2214 DTUpdates.clear();
2215
2216 // Remove all but one edge to the retained block and all unswitched
2217 // blocks. This is to avoid having duplicate entries in the cloned Phis,
2218 // when we know we only keep a single edge for each case.
2219 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2220 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2221 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2222
2223 for (auto &VMap : VMaps)
2224 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2225 /*IgnoreIncomingWithNoClones=*/true);
2226 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2227
2228 // Remove all edges to unswitched blocks.
2229 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2230 MSSAU->removeEdge(ParentBB, SuccBB);
2231 }
2232
2233 // Now unhook the successor relationship as we'll be replacing
2234 // the terminator with a direct branch. This is much simpler for branches
2235 // than switches so we handle those first.
2236 if (BI) {
2237 // Remove the parent as a predecessor of the unswitched successor.
2238 assert(UnswitchedSuccBBs.size() == 1 &&(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2239, __extension__ __PRETTY_FUNCTION__))
2239 "Only one possible unswitched block for a branch!")(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2239, __extension__ __PRETTY_FUNCTION__))
;
2240 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2241 UnswitchedSuccBB->removePredecessor(ParentBB,
2242 /*KeepOneInputPHIs*/ true);
2243 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2244 } else {
2245 // Note that we actually want to remove the parent block as a predecessor
2246 // of *every* case successor. The case successor is either unswitched,
2247 // completely eliminating an edge from the parent to that successor, or it
2248 // is a duplicate edge to the retained successor as the retained successor
2249 // is always the default successor and as we'll replace this with a direct
2250 // branch we no longer need the duplicate entries in the PHI nodes.
2251 SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2252 assert(NewSI->getDefaultDest() == RetainedSuccBB &&(static_cast <bool> (NewSI->getDefaultDest() == RetainedSuccBB
&& "Not retaining default successor!") ? void (0) : __assert_fail
("NewSI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2253, __extension__ __PRETTY_FUNCTION__))
2253 "Not retaining default successor!")(static_cast <bool> (NewSI->getDefaultDest() == RetainedSuccBB
&& "Not retaining default successor!") ? void (0) : __assert_fail
("NewSI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2253, __extension__ __PRETTY_FUNCTION__))
;
2254 for (auto &Case : NewSI->cases())
2255 Case.getCaseSuccessor()->removePredecessor(
2256 ParentBB,
2257 /*KeepOneInputPHIs*/ true);
2258
2259 // We need to use the set to populate domtree updates as even when there
2260 // are multiple cases pointing at the same successor we only want to
2261 // remove and insert one edge in the domtree.
2262 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2263 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2264 }
2265
2266 // After MSSAU update, remove the cloned terminator instruction NewTI.
2267 ParentBB->getTerminator()->eraseFromParent();
2268
2269 // Create a new unconditional branch to the continuing block (as opposed to
2270 // the one cloned).
2271 BranchInst::Create(RetainedSuccBB, ParentBB);
2272 } else {
2273 assert(BI && "Only branches have partial unswitching.")(static_cast <bool> (BI && "Only branches have partial unswitching."
) ? void (0) : __assert_fail ("BI && \"Only branches have partial unswitching.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2273, __extension__ __PRETTY_FUNCTION__))
;
2274 assert(UnswitchedSuccBBs.size() == 1 &&(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2275, __extension__ __PRETTY_FUNCTION__))
2275 "Only one possible unswitched block for a branch!")(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2275, __extension__ __PRETTY_FUNCTION__))
;
2276 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2277 // When doing a partial unswitch, we have to do a bit more work to build up
2278 // the branch in the split block.
2279 if (PartiallyInvariant)
2280 buildPartialInvariantUnswitchConditionalBranch(
2281 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2282 else
2283 buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
2284 *ClonedPH, *LoopPH);
2285 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2286
2287 if (MSSAU) {
2288 DT.applyUpdates(DTUpdates);
2289 DTUpdates.clear();
2290
2291 // Perform MSSA cloning updates.
2292 for (auto &VMap : VMaps)
2293 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2294 /*IgnoreIncomingWithNoClones=*/true);
2295 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2296 }
2297 }
2298
2299 // Apply the updates accumulated above to get an up-to-date dominator tree.
2300 DT.applyUpdates(DTUpdates);
2301
2302 // Now that we have an accurate dominator tree, first delete the dead cloned
2303 // blocks so that we can accurately build any cloned loops. It is important to
2304 // not delete the blocks from the original loop yet because we still want to
2305 // reference the original loop to understand the cloned loop's structure.
2306 deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2307
2308 // Build the cloned loop structure itself. This may be substantially
2309 // different from the original structure due to the simplified CFG. This also
2310 // handles inserting all the cloned blocks into the correct loops.
2311 SmallVector<Loop *, 4> NonChildClonedLoops;
2312 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2313 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2314
2315 // Now that our cloned loops have been built, we can update the original loop.
2316 // First we delete the dead blocks from it and then we rebuild the loop
2317 // structure taking these deletions into account.
2318 deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU);
2319
2320 if (MSSAU && VerifyMemorySSA)
2321 MSSAU->getMemorySSA()->verifyMemorySSA();
2322
2323 SmallVector<Loop *, 4> HoistedLoops;
2324 bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
2325
2326 if (MSSAU && VerifyMemorySSA)
2327 MSSAU->getMemorySSA()->verifyMemorySSA();
2328
2329 // This transformation has a high risk of corrupting the dominator tree, and
2330 // the below steps to rebuild loop structures will result in hard to debug
2331 // errors in that case so verify that the dominator tree is sane first.
2332 // FIXME: Remove this when the bugs stop showing up and rely on existing
2333 // verification steps.
2334 assert(DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (DT.verify(DominatorTree::VerificationLevel
::Fast)) ? void (0) : __assert_fail ("DT.verify(DominatorTree::VerificationLevel::Fast)"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2334, __extension__ __PRETTY_FUNCTION__))
;
2335
2336 if (BI && !PartiallyInvariant) {
2337 // If we unswitched a branch which collapses the condition to a known
2338 // constant we want to replace all the uses of the invariants within both
2339 // the original and cloned blocks. We do this here so that we can use the
2340 // now updated dominator tree to identify which side the users are on.
2341 assert(UnswitchedSuccBBs.size() == 1 &&(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2342, __extension__ __PRETTY_FUNCTION__))
2342 "Only one possible unswitched block for a branch!")(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2342, __extension__ __PRETTY_FUNCTION__))
;
2343 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2344
2345 // When considering multiple partially-unswitched invariants
2346 // we cant just go replace them with constants in both branches.
2347 //
2348 // For 'AND' we infer that true branch ("continue") means true
2349 // for each invariant operand.
2350 // For 'OR' we can infer that false branch ("continue") means false
2351 // for each invariant operand.
2352 // So it happens that for multiple-partial case we dont replace
2353 // in the unswitched branch.
2354 bool ReplaceUnswitched =
2355 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2356
2357 ConstantInt *UnswitchedReplacement =
2358 Direction ? ConstantInt::getTrue(BI->getContext())
2359 : ConstantInt::getFalse(BI->getContext());
2360 ConstantInt *ContinueReplacement =
2361 Direction ? ConstantInt::getFalse(BI->getContext())
2362 : ConstantInt::getTrue(BI->getContext());
2363 for (Value *Invariant : Invariants)
2364 // Use make_early_inc_range here as set invalidates the iterator.
2365 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2366 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2367 if (!UserI)
2368 continue;
2369
2370 // Replace it with the 'continue' side if in the main loop body, and the
2371 // unswitched if in the cloned blocks.
2372 if (DT.dominates(LoopPH, UserI->getParent()))
2373 U.set(ContinueReplacement);
2374 else if (ReplaceUnswitched &&
2375 DT.dominates(ClonedPH, UserI->getParent()))
2376 U.set(UnswitchedReplacement);
2377 }
2378 }
2379
2380 // We can change which blocks are exit blocks of all the cloned sibling
2381 // loops, the current loop, and any parent loops which shared exit blocks
2382 // with the current loop. As a consequence, we need to re-form LCSSA for
2383 // them. But we shouldn't need to re-form LCSSA for any child loops.
2384 // FIXME: This could be made more efficient by tracking which exit blocks are
2385 // new, and focusing on them, but that isn't likely to be necessary.
2386 //
2387 // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2388 // loop nest and update every loop that could have had its exits changed. We
2389 // also need to cover any intervening loops. We add all of these loops to
2390 // a list and sort them by loop depth to achieve this without updating
2391 // unnecessary loops.
2392 auto UpdateLoop = [&](Loop &UpdateL) {
2393#ifndef NDEBUG
2394 UpdateL.verifyLoop();
2395 for (Loop *ChildL : UpdateL) {
2396 ChildL->verifyLoop();
2397 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&(static_cast <bool> (ChildL->isRecursivelyLCSSAForm(
DT, LI) && "Perturbed a child loop's LCSSA form!") ? void
(0) : __assert_fail ("ChildL->isRecursivelyLCSSAForm(DT, LI) && \"Perturbed a child loop's LCSSA form!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2398, __extension__ __PRETTY_FUNCTION__))
2398 "Perturbed a child loop's LCSSA form!")(static_cast <bool> (ChildL->isRecursivelyLCSSAForm(
DT, LI) && "Perturbed a child loop's LCSSA form!") ? void
(0) : __assert_fail ("ChildL->isRecursivelyLCSSAForm(DT, LI) && \"Perturbed a child loop's LCSSA form!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2398, __extension__ __PRETTY_FUNCTION__))
;
2399 }
2400#endif
2401 // First build LCSSA for this loop so that we can preserve it when
2402 // forming dedicated exits. We don't want to perturb some other loop's
2403 // LCSSA while doing that CFG edit.
2404 formLCSSA(UpdateL, DT, &LI, SE);
2405
2406 // For loops reached by this loop's original exit blocks we may
2407 // introduced new, non-dedicated exits. At least try to re-form dedicated
2408 // exits for these loops. This may fail if they couldn't have dedicated
2409 // exits to start with.
2410 formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2411 };
2412
2413 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2414 // and we can do it in any order as they don't nest relative to each other.
2415 //
2416 // Also check if any of the loops we have updated have become top-level loops
2417 // as that will necessitate widening the outer loop scope.
2418 for (Loop *UpdatedL :
2419 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2420 UpdateLoop(*UpdatedL);
2421 if (UpdatedL->isOutermost())
2422 OuterExitL = nullptr;
2423 }
2424 if (IsStillLoop) {
2425 UpdateLoop(L);
2426 if (L.isOutermost())
2427 OuterExitL = nullptr;
2428 }
2429
2430 // If the original loop had exit blocks, walk up through the outer most loop
2431 // of those exit blocks to update LCSSA and form updated dedicated exits.
2432 if (OuterExitL != &L)
2433 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2434 OuterL = OuterL->getParentLoop())
2435 UpdateLoop(*OuterL);
2436
2437#ifndef NDEBUG
2438 // Verify the entire loop structure to catch any incorrect updates before we
2439 // progress in the pass pipeline.
2440 LI.verify(DT);
2441#endif
2442
2443 // Now that we've unswitched something, make callbacks to report the changes.
2444 // For that we need to merge together the updated loops and the cloned loops
2445 // and check whether the original loop survived.
2446 SmallVector<Loop *, 4> SibLoops;
2447 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2448 if (UpdatedL->getParentLoop() == ParentL)
2449 SibLoops.push_back(UpdatedL);
2450 UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops);
2451
2452 if (MSSAU && VerifyMemorySSA)
2453 MSSAU->getMemorySSA()->verifyMemorySSA();
2454
2455 if (BI)
2456 ++NumBranches;
2457 else
2458 ++NumSwitches;
2459}
2460
2461/// Recursively compute the cost of a dominator subtree based on the per-block
2462/// cost map provided.
2463///
2464/// The recursive computation is memozied into the provided DT-indexed cost map
2465/// to allow querying it for most nodes in the domtree without it becoming
2466/// quadratic.
2467static InstructionCost computeDomSubtreeCost(
2468 DomTreeNode &N,
2469 const SmallDenseMap<BasicBlock *, InstructionCost, 4> &BBCostMap,
2470 SmallDenseMap<DomTreeNode *, InstructionCost, 4> &DTCostMap) {
2471 // Don't accumulate cost (or recurse through) blocks not in our block cost
2472 // map and thus not part of the duplication cost being considered.
2473 auto BBCostIt = BBCostMap.find(N.getBlock());
2474 if (BBCostIt == BBCostMap.end())
2475 return 0;
2476
2477 // Lookup this node to see if we already computed its cost.
2478 auto DTCostIt = DTCostMap.find(&N);
2479 if (DTCostIt != DTCostMap.end())
2480 return DTCostIt->second;
2481
2482 // If not, we have to compute it. We can't use insert above and update
2483 // because computing the cost may insert more things into the map.
2484 InstructionCost Cost = std::accumulate(
2485 N.begin(), N.end(), BBCostIt->second,
2486 [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2487 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2488 });
2489 bool Inserted = DTCostMap.insert({&N, Cost}).second;
2490 (void)Inserted;
2491 assert(Inserted && "Should not insert a node while visiting children!")(static_cast <bool> (Inserted && "Should not insert a node while visiting children!"
) ? void (0) : __assert_fail ("Inserted && \"Should not insert a node while visiting children!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2491, __extension__ __PRETTY_FUNCTION__))
;
2492 return Cost;
2493}
2494
2495/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2496/// making the following replacement:
2497///
2498/// --code before guard--
2499/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2500/// --code after guard--
2501///
2502/// into
2503///
2504/// --code before guard--
2505/// br i1 %cond, label %guarded, label %deopt
2506///
2507/// guarded:
2508/// --code after guard--
2509///
2510/// deopt:
2511/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2512/// unreachable
2513///
2514/// It also makes all relevant DT and LI updates, so that all structures are in
2515/// valid state after this transform.
2516static BranchInst *
2517turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
2518 SmallVectorImpl<BasicBlock *> &ExitBlocks,
2519 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
2520 SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
2521 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Turning " <<
*GI << " into a branch.\n"; } } while (false)
;
2522 BasicBlock *CheckBB = GI->getParent();
2523
2524 if (MSSAU && VerifyMemorySSA)
2525 MSSAU->getMemorySSA()->verifyMemorySSA();
2526
2527 // Remove all CheckBB's successors from DomTree. A block can be seen among
2528 // successors more than once, but for DomTree it should be added only once.
2529 SmallPtrSet<BasicBlock *, 4> Successors;
2530 for (auto *Succ : successors(CheckBB))
2531 if (Successors.insert(Succ).second)
2532 DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
2533
2534 Instruction *DeoptBlockTerm =
2535 SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
2536 BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2537 // SplitBlockAndInsertIfThen inserts control flow that branches to
2538 // DeoptBlockTerm if the condition is true. We want the opposite.
2539 CheckBI->swapSuccessors();
2540
2541 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2542 GuardedBlock->setName("guarded");
2543 CheckBI->getSuccessor(1)->setName("deopt");
2544 BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2545
2546 // We now have a new exit block.
2547 ExitBlocks.push_back(CheckBI->getSuccessor(1));
2548
2549 if (MSSAU)
2550 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2551
2552 GI->moveBefore(DeoptBlockTerm);
2553 GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext()));
2554
2555 // Add new successors of CheckBB into DomTree.
2556 for (auto *Succ : successors(CheckBB))
2557 DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
2558
2559 // Now the blocks that used to be CheckBB's successors are GuardedBlock's
2560 // successors.
2561 for (auto *Succ : Successors)
2562 DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
2563
2564 // Make proper changes to DT.
2565 DT.applyUpdates(DTUpdates);
2566 // Inform LI of a new loop block.
2567 L.addBasicBlockToLoop(GuardedBlock, LI);
2568
2569 if (MSSAU) {
2570 MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2571 MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2572 if (VerifyMemorySSA)
2573 MSSAU->getMemorySSA()->verifyMemorySSA();
2574 }
2575
2576 ++NumGuards;
2577 return CheckBI;
2578}
2579
2580/// Cost multiplier is a way to limit potentially exponential behavior
2581/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2582/// candidates available. Also accounting for the number of "sibling" loops with
2583/// the idea to account for previous unswitches that already happened on this
2584/// cluster of loops. There was an attempt to keep this formula simple,
2585/// just enough to limit the worst case behavior. Even if it is not that simple
2586/// now it is still not an attempt to provide a detailed heuristic size
2587/// prediction.
2588///
2589/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2590/// unswitch candidates, making adequate predictions instead of wild guesses.
2591/// That requires knowing not just the number of "remaining" candidates but
2592/// also costs of unswitching for each of these candidates.
2593static int CalculateUnswitchCostMultiplier(
2594 Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT,
2595 ArrayRef<std::pair<Instruction *, TinyPtrVector<Value *>>>
2596 UnswitchCandidates) {
2597
2598 // Guards and other exiting conditions do not contribute to exponential
2599 // explosion as soon as they dominate the latch (otherwise there might be
2600 // another path to the latch remaining that does not allow to eliminate the
2601 // loop copy on unswitch).
2602 BasicBlock *Latch = L.getLoopLatch();
2603 BasicBlock *CondBlock = TI.getParent();
2604 if (DT.dominates(CondBlock, Latch) &&
2605 (isGuard(&TI) ||
2606 llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) {
2607 return L.contains(SuccBB);
2608 }) <= 1)) {
2609 NumCostMultiplierSkipped++;
2610 return 1;
2611 }
2612
2613 auto *ParentL = L.getParentLoop();
2614 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2615 : std::distance(LI.begin(), LI.end()));
2616 // Count amount of clones that all the candidates might cause during
2617 // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
2618 int UnswitchedClones = 0;
2619 for (auto Candidate : UnswitchCandidates) {
2620 Instruction *CI = Candidate.first;
2621 BasicBlock *CondBlock = CI->getParent();
2622 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2623 if (isGuard(CI)) {
2624 if (!SkipExitingSuccessors)
2625 UnswitchedClones++;
2626 continue;
2627 }
2628 int NonExitingSuccessors = llvm::count_if(
2629 successors(CondBlock), [SkipExitingSuccessors, &L](BasicBlock *SuccBB) {
2630 return !SkipExitingSuccessors || L.contains(SuccBB);
2631 });
2632 UnswitchedClones += Log2_32(NonExitingSuccessors);
2633 }
2634
2635 // Ignore up to the "unscaled candidates" number of unswitch candidates
2636 // when calculating the power-of-two scaling of the cost. The main idea
2637 // with this control is to allow a small number of unswitches to happen
2638 // and rely more on siblings multiplier (see below) when the number
2639 // of candidates is small.
2640 unsigned ClonesPower =
2641 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2642
2643 // Allowing top-level loops to spread a bit more than nested ones.
2644 int SiblingsMultiplier =
2645 std::max((ParentL ? SiblingsCount
2646 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2647 1);
2648 // Compute the cost multiplier in a way that won't overflow by saturating
2649 // at an upper bound.
2650 int CostMultiplier;
2651 if (ClonesPower > Log2_32(UnswitchThreshold) ||
2652 SiblingsMultiplier > UnswitchThreshold)
2653 CostMultiplier = UnswitchThreshold;
2654 else
2655 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2656 (int)UnswitchThreshold);
2657
2658 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplierdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed multiplier "
<< CostMultiplier << " (siblings " << SiblingsMultiplier
<< " * clones " << (1 << ClonesPower) <<
")" << " for unswitch candidate: " << TI <<
"\n"; } } while (false)
2659 << " (siblings " << SiblingsMultiplier << " * clones "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed multiplier "
<< CostMultiplier << " (siblings " << SiblingsMultiplier
<< " * clones " << (1 << ClonesPower) <<
")" << " for unswitch candidate: " << TI <<
"\n"; } } while (false)
2660 << (1 << ClonesPower) << ")"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed multiplier "
<< CostMultiplier << " (siblings " << SiblingsMultiplier
<< " * clones " << (1 << ClonesPower) <<
")" << " for unswitch candidate: " << TI <<
"\n"; } } while (false)
2661 << " for unswitch candidate: " << TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed multiplier "
<< CostMultiplier << " (siblings " << SiblingsMultiplier
<< " * clones " << (1 << ClonesPower) <<
")" << " for unswitch candidate: " << TI <<
"\n"; } } while (false)
;
2662 return CostMultiplier;
2663}
2664
2665static bool unswitchBestCondition(
2666 Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2667 AAResults &AA, TargetTransformInfo &TTI,
2668 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2669 ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
2670 // Collect all invariant conditions within this loop (as opposed to an inner
2671 // loop which would be handled when visiting that inner loop).
2672 SmallVector<std::pair<Instruction *, TinyPtrVector<Value *>>, 4>
2673 UnswitchCandidates;
2674
2675 // Whether or not we should also collect guards in the loop.
2676 bool CollectGuards = false;
2677 if (UnswitchGuards) {
1
Assuming the condition is false
2
Taking false branch
2678 auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2679 Intrinsic::getName(Intrinsic::experimental_guard));
2680 if (GuardDecl && !GuardDecl->use_empty())
2681 CollectGuards = true;
2682 }
2683
2684 IVConditionInfo PartialIVInfo;
3
Calling implicit default constructor for 'IVConditionInfo'
5
Returning from default constructor for 'IVConditionInfo'
2685 for (auto *BB : L.blocks()) {
6
Assuming '__begin1' is equal to '__end1'
2686 if (LI.getLoopFor(BB) != &L)
2687 continue;
2688
2689 if (CollectGuards)
2690 for (auto &I : *BB)
2691 if (isGuard(&I)) {
2692 auto *Cond = cast<IntrinsicInst>(&I)->getArgOperand(0);
2693 // TODO: Support AND, OR conditions and partial unswitching.
2694 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2695 UnswitchCandidates.push_back({&I, {Cond}});
2696 }
2697
2698 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2699 // We can only consider fully loop-invariant switch conditions as we need
2700 // to completely eliminate the switch after unswitching.
2701 if (!isa<Constant>(SI->getCondition()) &&
2702 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2703 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2704 continue;
2705 }
2706
2707 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2708 if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
2709 BI->getSuccessor(0) == BI->getSuccessor(1))
2710 continue;
2711
2712 // If BI's condition is 'select _, true, false', simplify it to confuse
2713 // matchers
2714 Value *Cond = BI->getCondition(), *CondNext;
2715 while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
2716 Cond = CondNext;
2717 BI->setCondition(Cond);
2718
2719 if (L.isLoopInvariant(BI->getCondition())) {
2720 UnswitchCandidates.push_back({BI, {BI->getCondition()}});
2721 continue;
2722 }
2723
2724 Instruction &CondI = *cast<Instruction>(BI->getCondition());
2725 if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
2726 TinyPtrVector<Value *> Invariants =
2727 collectHomogenousInstGraphLoopInvariants(L, CondI, LI);
2728 if (Invariants.empty())
2729 continue;
2730
2731 UnswitchCandidates.push_back({BI, std::move(Invariants)});
2732 continue;
2733 }
2734 }
2735
2736 Instruction *PartialIVCondBranch = nullptr;
2737 if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
7
Assuming 'MSSAU' is null
8
Taking false branch
2738 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2739 return TerminatorAndInvariants.first == L.getHeader()->getTerminator();
2740 })) {
2741 MemorySSA *MSSA = MSSAU->getMemorySSA();
2742 if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2743 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "simple-loop-unswitch: Found partially invariant condition "
<< *Info->InstToDuplicate[0] << "\n"; } } while
(false)
2744 dbgs() << "simple-loop-unswitch: Found partially invariant condition "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "simple-loop-unswitch: Found partially invariant condition "
<< *Info->InstToDuplicate[0] << "\n"; } } while
(false)
2745 << *Info->InstToDuplicate[0] << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "simple-loop-unswitch: Found partially invariant condition "
<< *Info->InstToDuplicate[0] << "\n"; } } while
(false)
;
2746 PartialIVInfo = *Info;
2747 PartialIVCondBranch = L.getHeader()->getTerminator();
2748 TinyPtrVector<Value *> ValsToDuplicate;
2749 for (auto *Inst : Info->InstToDuplicate)
2750 ValsToDuplicate.push_back(Inst);
2751 UnswitchCandidates.push_back(
2752 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2753 }
2754 }
2755
2756 // If we didn't find any candidates, we're done.
2757 if (UnswitchCandidates.empty())
9
Calling 'SmallVectorBase::empty'
12
Returning from 'SmallVectorBase::empty'
13
Taking false branch
2758 return false;
2759
2760 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
2761 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
2762 // irreducible control flow into reducible control flow and introduce new
2763 // loops "out of thin air". If we ever discover important use cases for doing
2764 // this, we can add support to loop unswitch, but it is a lot of complexity
2765 // for what seems little or no real world benefit.
2766 LoopBlocksRPO RPOT(&L);
2767 RPOT.perform(&LI);
2768 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
14
Calling 'containsIrreducibleCFG<const llvm::BasicBlock *, llvm::LoopBlocksRPO, llvm::LoopInfo, llvm::GraphTraits<const llvm::BasicBlock *>>'
16
Returning from 'containsIrreducibleCFG<const llvm::BasicBlock *, llvm::LoopBlocksRPO, llvm::LoopInfo, llvm::GraphTraits<const llvm::BasicBlock *>>'
17
Taking false branch
2769 return false;
2770
2771 SmallVector<BasicBlock *, 4> ExitBlocks;
2772 L.getUniqueExitBlocks(ExitBlocks);
2773
2774 // We cannot unswitch if exit blocks contain a cleanuppad instruction as we
2775 // don't know how to split those exit blocks.
2776 // FIXME: We should teach SplitBlock to handle this and remove this
2777 // restriction.
2778 for (auto *ExitBB : ExitBlocks) {
18
Assuming '__begin1' is equal to '__end1'
2779 if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI())) {
2780 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Cannot unswitch because of cleanuppad in exit block\n"
; } } while (false)
2781 dbgs() << "Cannot unswitch because of cleanuppad in exit block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Cannot unswitch because of cleanuppad in exit block\n"
; } } while (false)
;
2782 return false;
2783 }
2784 }
2785
2786 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Considering " <<
UnswitchCandidates.size() << " non-trivial loop invariant conditions for unswitching.\n"
; } } while (false)
19
Assuming 'DebugFlag' is false
20
Loop condition is false. Exiting loop
2787 dbgs() << "Considering " << UnswitchCandidates.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Considering " <<
UnswitchCandidates.size() << " non-trivial loop invariant conditions for unswitching.\n"
; } } while (false)
2788 << " non-trivial loop invariant conditions for unswitching.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Considering " <<
UnswitchCandidates.size() << " non-trivial loop invariant conditions for unswitching.\n"
; } } while (false)
;
2789
2790 // Given that unswitching these terminators will require duplicating parts of
2791 // the loop, so we need to be able to model that cost. Compute the ephemeral
2792 // values and set up a data structure to hold per-BB costs. We cache each
2793 // block's cost so that we don't recompute this when considering different
2794 // subsets of the loop for duplication during unswitching.
2795 SmallPtrSet<const Value *, 4> EphValues;
2796 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
2797 SmallDenseMap<BasicBlock *, InstructionCost, 4> BBCostMap;
2798
2799 // Compute the cost of each block, as well as the total loop cost. Also, bail
2800 // out if we see instructions which are incompatible with loop unswitching
2801 // (convergent, noduplicate, or cross-basic-block tokens).
2802 // FIXME: We might be able to safely handle some of these in non-duplicated
2803 // regions.
2804 TargetTransformInfo::TargetCostKind CostKind =
2805 L.getHeader()->getParent()->hasMinSize()
21
Assuming the condition is false
22
'?' condition is false
2806 ? TargetTransformInfo::TCK_CodeSize
2807 : TargetTransformInfo::TCK_SizeAndLatency;
2808 InstructionCost LoopCost = 0;
2809 for (auto *BB : L.blocks()) {
23
Assuming '__begin1' is equal to '__end1'
2810 InstructionCost Cost = 0;
2811 for (auto &I : *BB) {
2812 if (EphValues.count(&I))
2813 continue;
2814
2815 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
2816 return false;
2817 if (auto *CB = dyn_cast<CallBase>(&I))
2818 if (CB->isConvergent() || CB->cannotDuplicate())
2819 return false;
2820
2821 Cost += TTI.getUserCost(&I, CostKind);
2822 }
2823 assert(Cost >= 0 && "Must not have negative costs!")(static_cast <bool> (Cost >= 0 && "Must not have negative costs!"
) ? void (0) : __assert_fail ("Cost >= 0 && \"Must not have negative costs!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2823, __extension__ __PRETTY_FUNCTION__))
;
2824 LoopCost += Cost;
2825 assert(LoopCost >= 0 && "Must not have negative loop costs!")(static_cast <bool> (LoopCost >= 0 && "Must not have negative loop costs!"
) ? void (0) : __assert_fail ("LoopCost >= 0 && \"Must not have negative loop costs!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2825, __extension__ __PRETTY_FUNCTION__))
;
2826 BBCostMap[BB] = Cost;
2827 }
2828 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Total loop cost: "
<< LoopCost << "\n"; } } while (false)
;
24
Assuming 'DebugFlag' is false
25
Loop condition is false. Exiting loop
2829
2830 // Now we find the best candidate by searching for the one with the following
2831 // properties in order:
2832 //
2833 // 1) An unswitching cost below the threshold
2834 // 2) The smallest number of duplicated unswitch candidates (to avoid
2835 // creating redundant subsequent unswitching)
2836 // 3) The smallest cost after unswitching.
2837 //
2838 // We prioritize reducing fanout of unswitch candidates provided the cost
2839 // remains below the threshold because this has a multiplicative effect.
2840 //
2841 // This requires memoizing each dominator subtree to avoid redundant work.
2842 //
2843 // FIXME: Need to actually do the number of candidates part above.
2844 SmallDenseMap<DomTreeNode *, InstructionCost, 4> DTCostMap;
2845 // Given a terminator which might be unswitched, computes the non-duplicated
2846 // cost for that terminator.
2847 auto ComputeUnswitchedCost = [&](Instruction &TI,
2848 bool FullUnswitch) -> InstructionCost {
2849 BasicBlock &BB = *TI.getParent();
2850 SmallPtrSet<BasicBlock *, 4> Visited;
2851
2852 InstructionCost Cost = 0;
2853 for (BasicBlock *SuccBB : successors(&BB)) {
2854 // Don't count successors more than once.
2855 if (!Visited.insert(SuccBB).second)
30
Assuming field 'second' is true
31
Taking false branch
2856 continue;
2857
2858 // If this is a partial unswitch candidate, then it must be a conditional
2859 // branch with a condition of either `or`, `and`, their corresponding
2860 // select forms or partially invariant instructions. In that case, one of
2861 // the successors is necessarily duplicated, so don't even try to remove
2862 // its cost.
2863 if (!FullUnswitch
31.1
'FullUnswitch' is false
31.1
'FullUnswitch' is false
31.1
'FullUnswitch' is false
31.1
'FullUnswitch' is false
31.1
'FullUnswitch' is false
) {
32
Taking true branch
2864 auto &BI = cast<BranchInst>(TI);
33
'TI' is a 'BranchInst'
2865 if (match(BI.getCondition(), m_LogicalAnd())) {
34
Calling 'match<llvm::Value, llvm::PatternMatch::LogicalOp_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::class_match<llvm::Value>, 28>>'
41
Returning from 'match<llvm::Value, llvm::PatternMatch::LogicalOp_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::class_match<llvm::Value>, 28>>'
42
Taking false branch
2866 if (SuccBB == BI.getSuccessor(1))
2867 continue;
2868 } else if (match(BI.getCondition(), m_LogicalOr())) {
43
Calling 'match<llvm::Value, llvm::PatternMatch::LogicalOp_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::class_match<llvm::Value>, 29>>'
50
Returning from 'match<llvm::Value, llvm::PatternMatch::LogicalOp_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::class_match<llvm::Value>, 29>>'
51
Taking false branch
2869 if (SuccBB == BI.getSuccessor(0))
2870 continue;
2871 } else if (!PartialIVInfo.InstToDuplicate.empty()) {
52
Calling 'SmallVectorBase::empty'
55
Returning from 'SmallVectorBase::empty'
56
Taking true branch
2872 if (PartialIVInfo.KnownValue->isOneValue() &&
57
Called C++ object pointer is null
2873 SuccBB == BI.getSuccessor(1))
2874 continue;
2875 else if (!PartialIVInfo.KnownValue->isOneValue() &&
2876 SuccBB == BI.getSuccessor(0))
2877 continue;
2878 }
2879 }
2880
2881 // This successor's domtree will not need to be duplicated after
2882 // unswitching if the edge to the successor dominates it (and thus the
2883 // entire tree). This essentially means there is no other path into this
2884 // subtree and so it will end up live in only one clone of the loop.
2885 if (SuccBB->getUniquePredecessor() ||
2886 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2887 return PredBB == &BB || DT.dominates(SuccBB, PredBB);
2888 })) {
2889 Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
2890 assert(Cost <= LoopCost &&(static_cast <bool> (Cost <= LoopCost && "Non-duplicated cost should never exceed total loop cost!"
) ? void (0) : __assert_fail ("Cost <= LoopCost && \"Non-duplicated cost should never exceed total loop cost!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2891, __extension__ __PRETTY_FUNCTION__))
2891 "Non-duplicated cost should never exceed total loop cost!")(static_cast <bool> (Cost <= LoopCost && "Non-duplicated cost should never exceed total loop cost!"
) ? void (0) : __assert_fail ("Cost <= LoopCost && \"Non-duplicated cost should never exceed total loop cost!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2891, __extension__ __PRETTY_FUNCTION__))
;
2892 }
2893 }
2894
2895 // Now scale the cost by the number of unique successors minus one. We
2896 // subtract one because there is already at least one copy of the entire
2897 // loop. This is computing the new cost of unswitching a condition.
2898 // Note that guards always have 2 unique successors that are implicit and
2899 // will be materialized if we decide to unswitch it.
2900 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
2901 assert(SuccessorsCount > 1 &&(static_cast <bool> (SuccessorsCount > 1 && "Cannot unswitch a condition without multiple distinct successors!"
) ? void (0) : __assert_fail ("SuccessorsCount > 1 && \"Cannot unswitch a condition without multiple distinct successors!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2902, __extension__ __PRETTY_FUNCTION__))
2902 "Cannot unswitch a condition without multiple distinct successors!")(static_cast <bool> (SuccessorsCount > 1 && "Cannot unswitch a condition without multiple distinct successors!"
) ? void (0) : __assert_fail ("SuccessorsCount > 1 && \"Cannot unswitch a condition without multiple distinct successors!\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2902, __extension__ __PRETTY_FUNCTION__))
;
2903 return (LoopCost - Cost) * (SuccessorsCount - 1);
2904 };
2905 Instruction *BestUnswitchTI = nullptr;
2906 InstructionCost BestUnswitchCost = 0;
2907 ArrayRef<Value *> BestUnswitchInvariants;
2908 for (auto &TerminatorAndInvariants : UnswitchCandidates) {
26
Assuming '__begin1' is not equal to '__end1'
2909 Instruction &TI = *TerminatorAndInvariants.first;
2910 ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
2911 BranchInst *BI = dyn_cast<BranchInst>(&TI);
27
Assuming the object is a 'BranchInst'
2912 InstructionCost CandidateCost = ComputeUnswitchedCost(
29
Calling 'operator()'
2913 TI, /*FullUnswitch*/ !BI
27.1
'BI' is non-null
27.1
'BI' is non-null
27.1
'BI' is non-null
27.1
'BI' is non-null
27.1
'BI' is non-null
|| (Invariants.size() == 1 &&
28
Assuming the condition is false
2914 Invariants[0] == BI->getCondition()));
2915 // Calculate cost multiplier which is a tool to limit potentially
2916 // exponential behavior of loop-unswitch.
2917 if (EnableUnswitchCostMultiplier) {
2918 int CostMultiplier =
2919 CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
2920 assert((static_cast <bool> ((CostMultiplier > 0 && CostMultiplier
<= UnswitchThreshold) && "cost multiplier needs to be in the range of 1..UnswitchThreshold"
) ? void (0) : __assert_fail ("(CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && \"cost multiplier needs to be in the range of 1..UnswitchThreshold\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2922, __extension__ __PRETTY_FUNCTION__))
2921 (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&(static_cast <bool> ((CostMultiplier > 0 && CostMultiplier
<= UnswitchThreshold) && "cost multiplier needs to be in the range of 1..UnswitchThreshold"
) ? void (0) : __assert_fail ("(CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && \"cost multiplier needs to be in the range of 1..UnswitchThreshold\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2922, __extension__ __PRETTY_FUNCTION__))
2922 "cost multiplier needs to be in the range of 1..UnswitchThreshold")(static_cast <bool> ((CostMultiplier > 0 && CostMultiplier
<= UnswitchThreshold) && "cost multiplier needs to be in the range of 1..UnswitchThreshold"
) ? void (0) : __assert_fail ("(CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && \"cost multiplier needs to be in the range of 1..UnswitchThreshold\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2922, __extension__ __PRETTY_FUNCTION__))
;
2923 CandidateCost *= CostMultiplier;
2924 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " (multiplier: " << CostMultiplier
<< ")" << " for unswitch candidate: " << TI
<< "\n"; } } while (false)
2925 << " (multiplier: " << CostMultiplier << ")"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " (multiplier: " << CostMultiplier
<< ")" << " for unswitch candidate: " << TI
<< "\n"; } } while (false)
2926 << " for unswitch candidate: " << TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " (multiplier: " << CostMultiplier
<< ")" << " for unswitch candidate: " << TI
<< "\n"; } } while (false)
;
2927 } else {
2928 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " for unswitch candidate: " <<
TI << "\n"; } } while (false)
2929 << " for unswitch candidate: " << TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " for unswitch candidate: " <<
TI << "\n"; } } while (false)
;
2930 }
2931
2932 if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
2933 BestUnswitchTI = &TI;
2934 BestUnswitchCost = CandidateCost;
2935 BestUnswitchInvariants = Invariants;
2936 }
2937 }
2938 assert(BestUnswitchTI && "Failed to find loop unswitch candidate")(static_cast <bool> (BestUnswitchTI && "Failed to find loop unswitch candidate"
) ? void (0) : __assert_fail ("BestUnswitchTI && \"Failed to find loop unswitch candidate\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2938, __extension__ __PRETTY_FUNCTION__))
;
2939
2940 if (BestUnswitchCost >= UnswitchThreshold) {
2941 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Cannot unswitch, lowest cost found: "
<< BestUnswitchCost << "\n"; } } while (false)
2942 << BestUnswitchCost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Cannot unswitch, lowest cost found: "
<< BestUnswitchCost << "\n"; } } while (false)
;
2943 return false;
2944 }
2945
2946 if (BestUnswitchTI != PartialIVCondBranch)
2947 PartialIVInfo.InstToDuplicate.clear();
2948
2949 // If the best candidate is a guard, turn it into a branch.
2950 if (isGuard(BestUnswitchTI))
2951 BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L,
2952 ExitBlocks, DT, LI, MSSAU);
2953
2954 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Unswitching non-trivial (cost = "
<< BestUnswitchCost << ") terminator: " <<
*BestUnswitchTI << "\n"; } } while (false)
2955 << BestUnswitchCost << ") terminator: " << *BestUnswitchTIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Unswitching non-trivial (cost = "
<< BestUnswitchCost << ") terminator: " <<
*BestUnswitchTI << "\n"; } } while (false)
2956 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Unswitching non-trivial (cost = "
<< BestUnswitchCost << ") terminator: " <<
*BestUnswitchTI << "\n"; } } while (false)
;
2957 unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
2958 ExitBlocks, PartialIVInfo, DT, LI, AC,
2959 UnswitchCB, SE, MSSAU);
2960 return true;
2961}
2962
2963/// Unswitch control flow predicated on loop invariant conditions.
2964///
2965/// This first hoists all branches or switches which are trivial (IE, do not
2966/// require duplicating any part of the loop) out of the loop body. It then
2967/// looks at other loop invariant control flows and tries to unswitch those as
2968/// well by cloning the loop if the result is small enough.
2969///
2970/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
2971/// also updated based on the unswitch. The `MSSA` analysis is also updated if
2972/// valid (i.e. its use is enabled).
2973///
2974/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
2975/// true, we will attempt to do non-trivial unswitching as well as trivial
2976/// unswitching.
2977///
2978/// The `UnswitchCB` callback provided will be run after unswitching is
2979/// complete, with the first parameter set to `true` if the provided loop
2980/// remains a loop, and a list of new sibling loops created.
2981///
2982/// If `SE` is non-null, we will update that analysis based on the unswitching
2983/// done.
2984static bool
2985unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2986 AAResults &AA, TargetTransformInfo &TTI, bool NonTrivial,
2987 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2988 ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
2989 assert(L.isRecursivelyLCSSAForm(DT, LI) &&(static_cast <bool> (L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.") ? void (0
) : __assert_fail ("L.isRecursivelyLCSSAForm(DT, LI) && \"Loops must be in LCSSA form before unswitching.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2990, __extension__ __PRETTY_FUNCTION__))
2990 "Loops must be in LCSSA form before unswitching.")(static_cast <bool> (L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.") ? void (0
) : __assert_fail ("L.isRecursivelyLCSSAForm(DT, LI) && \"Loops must be in LCSSA form before unswitching.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2990, __extension__ __PRETTY_FUNCTION__))
;
2991
2992 // Must be in loop simplified form: we need a preheader and dedicated exits.
2993 if (!L.isLoopSimplifyForm())
2994 return false;
2995
2996 // Try trivial unswitch first before loop over other basic blocks in the loop.
2997 if (unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
2998 // If we unswitched successfully we will want to clean up the loop before
2999 // processing it further so just mark it as unswitched and return.
3000 UnswitchCB(/*CurrentLoopValid*/ true, false, {});
3001 return true;
3002 }
3003
3004 // Check whether we should continue with non-trivial conditions.
3005 // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3006 // unswitching for testing and debugging.
3007 // NonTrivial: Parameter that enables non-trivial unswitching for this
3008 // invocation of the transform. But this should be allowed only
3009 // for targets without branch divergence.
3010 //
3011 // FIXME: If divergence analysis becomes available to a loop
3012 // transform, we should allow unswitching for non-trivial uniform
3013 // branches even on targets that have divergence.
3014 // https://bugs.llvm.org/show_bug.cgi?id=48819
3015 bool ContinueWithNonTrivial =
3016 EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence());
3017 if (!ContinueWithNonTrivial)
3018 return false;
3019
3020 // Skip non-trivial unswitching for optsize functions.
3021 if (L.getHeader()->getParent()->hasOptSize())
3022 return false;
3023
3024 // Skip non-trivial unswitching for loops that cannot be cloned.
3025 if (!L.isSafeToClone())
3026 return false;
3027
3028 // For non-trivial unswitching, because it often creates new loops, we rely on
3029 // the pass manager to iterate on the loops rather than trying to immediately
3030 // reach a fixed point. There is no substantial advantage to iterating
3031 // internally, and if any of the new loops are simplified enough to contain
3032 // trivial unswitching we want to prefer those.
3033
3034 // Try to unswitch the best invariant condition. We prefer this full unswitch to
3035 // a partial unswitch when possible below the threshold.
3036 if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU))
3037 return true;
3038
3039 // No other opportunities to unswitch.
3040 return false;
3041}
3042
3043PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
3044 LoopStandardAnalysisResults &AR,
3045 LPMUpdater &U) {
3046 Function &F = *L.getHeader()->getParent();
3047 (void)F;
3048
3049 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << Ldo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Unswitching loop in "
<< F.getName() << ": " << L << "\n";
} } while (false)
3050 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Unswitching loop in "
<< F.getName() << ": " << L << "\n";
} } while (false)
;
3051
3052 // Save the current loop name in a variable so that we can report it even
3053 // after it has been deleted.
3054 std::string LoopName = std::string(L.getName());
3055
3056 auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
3057 bool PartiallyInvariant,
3058 ArrayRef<Loop *> NewLoops) {
3059 // If we did a non-trivial unswitch, we have added new (cloned) loops.
3060 if (!NewLoops.empty())
3061 U.addSiblingLoops(NewLoops);
3062
3063 // If the current loop remains valid, we should revisit it to catch any
3064 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
3065 if (CurrentLoopValid) {
3066 if (PartiallyInvariant) {
3067 // Mark the new loop as partially unswitched, to avoid unswitching on
3068 // the same condition again.
3069 auto &Context = L.getHeader()->getContext();
3070 MDNode *DisableUnswitchMD = MDNode::get(
3071 Context,
3072 MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
3073 MDNode *NewLoopID = makePostTransformationMetadata(
3074 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
3075 {DisableUnswitchMD});
3076 L.setLoopID(NewLoopID);
3077 } else
3078 U.revisitCurrentLoop();
3079 } else
3080 U.markLoopAsDeleted(L, LoopName);
3081 };
3082
3083 Optional<MemorySSAUpdater> MSSAU;
3084 if (AR.MSSA) {
3085 MSSAU = MemorySSAUpdater(AR.MSSA);
3086 if (VerifyMemorySSA)
3087 AR.MSSA->verifyMemorySSA();
3088 }
3089 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, NonTrivial,
3090 UnswitchCB, &AR.SE,
3091 MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
3092 return PreservedAnalyses::all();
3093
3094 if (AR.MSSA && VerifyMemorySSA)
3095 AR.MSSA->verifyMemorySSA();
3096
3097 // Historically this pass has had issues with the dominator tree so verify it
3098 // in asserts builds.
3099 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (AR.DT.verify(DominatorTree::VerificationLevel
::Fast)) ? void (0) : __assert_fail ("AR.DT.verify(DominatorTree::VerificationLevel::Fast)"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 3099, __extension__ __PRETTY_FUNCTION__))
;
3100
3101 auto PA = getLoopPassPreservedAnalyses();
3102 if (AR.MSSA)
3103 PA.preserve<MemorySSAAnalysis>();
3104 return PA;
3105}
3106
3107namespace {
3108
3109class SimpleLoopUnswitchLegacyPass : public LoopPass {
3110 bool NonTrivial;
3111
3112public:
3113 static char ID; // Pass ID, replacement for typeid
3114
3115 explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
3116 : LoopPass(ID), NonTrivial(NonTrivial) {
3117 initializeSimpleLoopUnswitchLegacyPassPass(
3118 *PassRegistry::getPassRegistry());
3119 }
3120
3121 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
3122
3123 void getAnalysisUsage(AnalysisUsage &AU) const override {
3124 AU.addRequired<AssumptionCacheTracker>();
3125 AU.addRequired<TargetTransformInfoWrapperPass>();
3126 if (EnableMSSALoopDependency) {
3127 AU.addRequired<MemorySSAWrapperPass>();
3128 AU.addPreserved<MemorySSAWrapperPass>();
3129 }
3130 getLoopAnalysisUsage(AU);
3131 }
3132};
3133
3134} // end anonymous namespace
3135
3136bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
3137 if (skipLoop(L))
3138 return false;
3139
3140 Function &F = *L->getHeader()->getParent();
3141
3142 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *Ldo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Unswitching loop in "
<< F.getName() << ": " << *L << "\n"
; } } while (false)
3143 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Unswitching loop in "
<< F.getName() << ": " << *L << "\n"
; } } while (false)
;
3144
3145 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3146 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3147 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
3148 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3149 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3150 MemorySSA *MSSA = nullptr;
3151 Optional<MemorySSAUpdater> MSSAU;
3152 if (EnableMSSALoopDependency) {
3153 MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
3154 MSSAU = MemorySSAUpdater(MSSA);
3155 }
3156
3157 auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3158 auto *SE = SEWP ? &SEWP->getSE() : nullptr;
3159
3160 auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant,
3161 ArrayRef<Loop *> NewLoops) {
3162 // If we did a non-trivial unswitch, we have added new (cloned) loops.
3163 for (auto *NewL : NewLoops)
3164 LPM.addLoop(*NewL);
3165
3166 // If the current loop remains valid, re-add it to the queue. This is
3167 // a little wasteful as we'll finish processing the current loop as well,
3168 // but it is the best we can do in the old PM.
3169 if (CurrentLoopValid) {
3170 // If the current loop has been unswitched using a partially invariant
3171 // condition, we should not re-add the current loop to avoid unswitching
3172 // on the same condition again.
3173 if (!PartiallyInvariant)
3174 LPM.addLoop(*L);
3175 } else
3176 LPM.markLoopAsDeleted(*L);
3177 };
3178
3179 if (MSSA && VerifyMemorySSA)
3180 MSSA->verifyMemorySSA();
3181
3182 bool Changed =
3183 unswitchLoop(*L, DT, LI, AC, AA, TTI, NonTrivial, UnswitchCB, SE,
3184 MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
3185
3186 if (MSSA && VerifyMemorySSA)
3187 MSSA->verifyMemorySSA();
3188
3189 // Historically this pass has had issues with the dominator tree so verify it
3190 // in asserts builds.
3191 assert(DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (DT.verify(DominatorTree::VerificationLevel
::Fast)) ? void (0) : __assert_fail ("DT.verify(DominatorTree::VerificationLevel::Fast)"
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 3191, __extension__ __PRETTY_FUNCTION__))
;
3192
3193 return Changed;
3194}
3195
3196char SimpleLoopUnswitchLegacyPass::ID = 0;
3197INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",static void *initializeSimpleLoopUnswitchLegacyPassPassOnce(PassRegistry
&Registry) {
3198 "Simple unswitch loops", false, false)static void *initializeSimpleLoopUnswitchLegacyPassPassOnce(PassRegistry
&Registry) {
3199INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry);
3200INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
3201INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
3202INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
3203INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry);
3204INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
3205INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",PassInfo *PI = new PassInfo( "Simple unswitch loops", "simple-loop-unswitch"
, &SimpleLoopUnswitchLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SimpleLoopUnswitchLegacyPass>), false,
false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeSimpleLoopUnswitchLegacyPassPassFlag
; void llvm::initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeSimpleLoopUnswitchLegacyPassPassFlag
, initializeSimpleLoopUnswitchLegacyPassPassOnce, std::ref(Registry
)); }
3206 "Simple unswitch loops", false, false)PassInfo *PI = new PassInfo( "Simple unswitch loops", "simple-loop-unswitch"
, &SimpleLoopUnswitchLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SimpleLoopUnswitchLegacyPass>), false,
false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeSimpleLoopUnswitchLegacyPassPassFlag
; void llvm::initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeSimpleLoopUnswitchLegacyPassPassFlag
, initializeSimpleLoopUnswitchLegacyPassPassOnce, std::ref(Registry
)); }
3207
3208Pass *llvm::createSimpleLoopUnswitchLegacyPass(bool NonTrivial) {
3209 return new SimpleLoopUnswitchLegacyPass(NonTrivial);
3210}

/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/Transforms/Utils/LoopUtils.h

1//===- llvm/Transforms/Utils/LoopUtils.h - Loop utilities -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines some loop transformation utilities.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
14#define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
15
16#include "llvm/ADT/StringRef.h"
17#include "llvm/Analysis/IVDescriptors.h"
18#include "llvm/Analysis/TargetTransformInfo.h"
19#include "llvm/Transforms/Utils/ValueMapper.h"
20
21namespace llvm {
22
23template <typename T> class DomTreeNodeBase;
24using DomTreeNode = DomTreeNodeBase<BasicBlock>;
25class AAResults;
26class AliasSet;
27class AliasSetTracker;
28class BasicBlock;
29class BlockFrequencyInfo;
30class ICFLoopSafetyInfo;
31class IRBuilderBase;
32class Loop;
33class LoopInfo;
34class MemoryAccess;
35class MemorySSA;
36class MemorySSAUpdater;
37class OptimizationRemarkEmitter;
38class PredIteratorCache;
39class ScalarEvolution;
40class ScalarEvolutionExpander;
41class SCEV;
42class SCEVExpander;
43class TargetLibraryInfo;
44class LPPassManager;
45class Instruction;
46struct RuntimeCheckingPtrGroup;
47typedef std::pair<const RuntimeCheckingPtrGroup *,
48 const RuntimeCheckingPtrGroup *>
49 RuntimePointerCheck;
50
51template <typename T> class Optional;
52template <typename T, unsigned N> class SmallSetVector;
53template <typename T, unsigned N> class SmallVector;
54template <typename T> class SmallVectorImpl;
55template <typename T, unsigned N> class SmallPriorityWorklist;
56
57BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
58 MemorySSAUpdater *MSSAU, bool PreserveLCSSA);
59
60/// Ensure that all exit blocks of the loop are dedicated exits.
61///
62/// For any loop exit block with non-loop predecessors, we split the loop
63/// predecessors to use a dedicated loop exit block. We update the dominator
64/// tree and loop info if provided, and will preserve LCSSA if requested.
65bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
66 MemorySSAUpdater *MSSAU, bool PreserveLCSSA);
67
68/// Ensures LCSSA form for every instruction from the Worklist in the scope of
69/// innermost containing loop.
70///
71/// For the given instruction which have uses outside of the loop, an LCSSA PHI
72/// node is inserted and the uses outside the loop are rewritten to use this
73/// node.
74///
75/// LoopInfo and DominatorTree are required and, since the routine makes no
76/// changes to CFG, preserved.
77///
78/// Returns true if any modifications are made.
79///
80/// This function may introduce unused PHI nodes. If \p PHIsToRemove is not
81/// nullptr, those are added to it (before removing, the caller has to check if
82/// they still do not have any uses). Otherwise the PHIs are directly removed.
83bool formLCSSAForInstructions(
84 SmallVectorImpl<Instruction *> &Worklist, const DominatorTree &DT,
85 const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder,
86 SmallVectorImpl<PHINode *> *PHIsToRemove = nullptr);
87
88/// Put loop into LCSSA form.
89///
90/// Looks at all instructions in the loop which have uses outside of the
91/// current loop. For each, an LCSSA PHI node is inserted and the uses outside
92/// the loop are rewritten to use this node. Sub-loops must be in LCSSA form
93/// already.
94///
95/// LoopInfo and DominatorTree are required and preserved.
96///
97/// If ScalarEvolution is passed in, it will be preserved.
98///
99/// Returns true if any modifications are made to the loop.
100bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
101 ScalarEvolution *SE);
102
103/// Put a loop nest into LCSSA form.
104///
105/// This recursively forms LCSSA for a loop nest.
106///
107/// LoopInfo and DominatorTree are required and preserved.
108///
109/// If ScalarEvolution is passed in, it will be preserved.
110///
111/// Returns true if any modifications are made to the loop.
112bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
113 ScalarEvolution *SE);
114
115/// Flags controlling how much is checked when sinking or hoisting
116/// instructions. The number of memory access in the loop (and whether there
117/// are too many) is determined in the constructors when using MemorySSA.
118class SinkAndHoistLICMFlags {
119public:
120 // Explicitly set limits.
121 SinkAndHoistLICMFlags(unsigned LicmMssaOptCap,
122 unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
123 Loop *L = nullptr, MemorySSA *MSSA = nullptr);
124 // Use default limits.
125 SinkAndHoistLICMFlags(bool IsSink, Loop *L = nullptr,
126 MemorySSA *MSSA = nullptr);
127
128 void setIsSink(bool B) { IsSink = B; }
129 bool getIsSink() { return IsSink; }
130 bool tooManyMemoryAccesses() { return NoOfMemAccTooLarge; }
131 bool tooManyClobberingCalls() { return LicmMssaOptCounter >= LicmMssaOptCap; }
132 void incrementClobberingCalls() { ++LicmMssaOptCounter; }
133
134protected:
135 bool NoOfMemAccTooLarge = false;
136 unsigned LicmMssaOptCounter = 0;
137 unsigned LicmMssaOptCap;
138 unsigned LicmMssaNoAccForPromotionCap;
139 bool IsSink;
140};
141
142/// Walk the specified region of the CFG (defined by all blocks
143/// dominated by the specified block, and that are in the current loop) in
144/// reverse depth first order w.r.t the DominatorTree. This allows us to visit
145/// uses before definitions, allowing us to sink a loop body in one pass without
146/// iteration. Takes DomTreeNode, AAResults, LoopInfo, DominatorTree,
147/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
148/// instructions of the loop and loop safety information as
149/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
150bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
151 BlockFrequencyInfo *, TargetLibraryInfo *,
152 TargetTransformInfo *, Loop *, AliasSetTracker *,
153 MemorySSAUpdater *, ICFLoopSafetyInfo *,
154 SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
155
156/// Walk the specified region of the CFG (defined by all blocks
157/// dominated by the specified block, and that are in the current loop) in depth
158/// first order w.r.t the DominatorTree. This allows us to visit definitions
159/// before uses, allowing us to hoist a loop body in one pass without iteration.
160/// Takes DomTreeNode, AAResults, LoopInfo, DominatorTree,
161/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
162/// instructions of the loop and loop safety information as arguments.
163/// Diagnostics is emitted via \p ORE. It returns changed status.
164bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
165 BlockFrequencyInfo *, TargetLibraryInfo *, Loop *,
166 AliasSetTracker *, MemorySSAUpdater *, ScalarEvolution *,
167 ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
168 OptimizationRemarkEmitter *);
169
170/// This function deletes dead loops. The caller of this function needs to
171/// guarantee that the loop is infact dead.
172/// The function requires a bunch or prerequisites to be present:
173/// - The loop needs to be in LCSSA form
174/// - The loop needs to have a Preheader
175/// - A unique dedicated exit block must exist
176///
177/// This also updates the relevant analysis information in \p DT, \p SE, \p LI
178/// and \p MSSA if pointers to those are provided.
179/// It also updates the loop PM if an updater struct is provided.
180
181void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
182 LoopInfo *LI, MemorySSA *MSSA = nullptr);
183
184/// Remove the backedge of the specified loop. Handles loop nests and general
185/// loop structures subject to the precondition that the loop has no parent
186/// loop and has a single latch block. Preserves all listed analyses.
187void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
188 LoopInfo &LI, MemorySSA *MSSA);
189
190/// Try to promote memory values to scalars by sinking stores out of
191/// the loop and moving loads to before the loop. We do this by looping over
192/// the stores in the loop, looking for stores to Must pointers which are
193/// loop invariant. It takes a set of must-alias values, Loop exit blocks
194/// vector, loop exit blocks insertion point vector, PredIteratorCache,
195/// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions
196/// of the loop and loop safety information as arguments.
197/// Diagnostics is emitted via \p ORE. It returns changed status.
198bool promoteLoopAccessesToScalars(
199 const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
200 SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
201 PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *,
202 Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
203 OptimizationRemarkEmitter *);
204
205/// Does a BFS from a given node to all of its children inside a given loop.
206/// The returned vector of nodes includes the starting point.
207SmallVector<DomTreeNode *, 16> collectChildrenInLoop(DomTreeNode *N,
208 const Loop *CurLoop);
209
210/// Returns the instructions that use values defined in the loop.
211SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L);
212
213/// Find a combination of metadata ("llvm.loop.vectorize.width" and
214/// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a
215/// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found
216/// then None is returned.
217Optional<ElementCount>
218getOptionalElementCountLoopAttribute(const Loop *TheLoop);
219
220/// Create a new loop identifier for a loop created from a loop transformation.
221///
222/// @param OrigLoopID The loop ID of the loop before the transformation.
223/// @param FollowupAttrs List of attribute names that contain attributes to be
224/// added to the new loop ID.
225/// @param InheritOptionsAttrsPrefix Selects which attributes should be inherited
226/// from the original loop. The following values
227/// are considered:
228/// nullptr : Inherit all attributes from @p OrigLoopID.
229/// "" : Do not inherit any attribute from @p OrigLoopID; only use
230/// those specified by a followup attribute.
231/// "<prefix>": Inherit all attributes except those which start with
232/// <prefix>; commonly used to remove metadata for the
233/// applied transformation.
234/// @param AlwaysNew If true, do not try to reuse OrigLoopID and never return
235/// None.
236///
237/// @return The loop ID for the after-transformation loop. The following values
238/// can be returned:
239/// None : No followup attribute was found; it is up to the
240/// transformation to choose attributes that make sense.
241/// @p OrigLoopID: The original identifier can be reused.
242/// nullptr : The new loop has no attributes.
243/// MDNode* : A new unique loop identifier.
244Optional<MDNode *>
245makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef<StringRef> FollowupAttrs,
246 const char *InheritOptionsAttrsPrefix = "",
247 bool AlwaysNew = false);
248
249/// Look for the loop attribute that disables all transformation heuristic.
250bool hasDisableAllTransformsHint(const Loop *L);
251
252/// Look for the loop attribute that disables the LICM transformation heuristics.
253bool hasDisableLICMTransformsHint(const Loop *L);
254
255/// The mode sets how eager a transformation should be applied.
256enum TransformationMode {
257 /// The pass can use heuristics to determine whether a transformation should
258 /// be applied.
259 TM_Unspecified,
260
261 /// The transformation should be applied without considering a cost model.
262 TM_Enable,
263
264 /// The transformation should not be applied.
265 TM_Disable,
266
267 /// Force is a flag and should not be used alone.
268 TM_Force = 0x04,
269
270 /// The transformation was directed by the user, e.g. by a #pragma in
271 /// the source code. If the transformation could not be applied, a
272 /// warning should be emitted.
273 TM_ForcedByUser = TM_Enable | TM_Force,
274
275 /// The transformation must not be applied. For instance, `#pragma clang loop
276 /// unroll(disable)` explicitly forbids any unrolling to take place. Unlike
277 /// general loop metadata, it must not be dropped. Most passes should not
278 /// behave differently under TM_Disable and TM_SuppressedByUser.
279 TM_SuppressedByUser = TM_Disable | TM_Force
280};
281
282/// @{
283/// Get the mode for LLVM's supported loop transformations.
284TransformationMode hasUnrollTransformation(const Loop *L);
285TransformationMode hasUnrollAndJamTransformation(const Loop *L);
286TransformationMode hasVectorizeTransformation(const Loop *L);
287TransformationMode hasDistributeTransformation(const Loop *L);
288TransformationMode hasLICMVersioningTransformation(const Loop *L);
289/// @}
290
291/// Set input string into loop metadata by keeping other values intact.
292/// If the string is already in loop metadata update value if it is
293/// different.
294void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
295 unsigned V = 0);
296
297/// Returns a loop's estimated trip count based on branch weight metadata.
298/// In addition if \p EstimatedLoopInvocationWeight is not null it is
299/// initialized with weight of loop's latch leading to the exit.
300/// Returns 0 when the count is estimated to be 0, or None when a meaningful
301/// estimate can not be made.
302Optional<unsigned>
303getLoopEstimatedTripCount(Loop *L,
304 unsigned *EstimatedLoopInvocationWeight = nullptr);
305
306/// Set a loop's branch weight metadata to reflect that loop has \p
307/// EstimatedTripCount iterations and \p EstimatedLoopInvocationWeight exits
308/// through latch. Returns true if metadata is successfully updated, false
309/// otherwise. Note that loop must have a latch block which controls loop exit
310/// in order to succeed.
311bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
312 unsigned EstimatedLoopInvocationWeight);
313
314/// Check inner loop (L) backedge count is known to be invariant on all
315/// iterations of its outer loop. If the loop has no parent, this is trivially
316/// true.
317bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE);
318
319/// Helper to consistently add the set of standard passes to a loop pass's \c
320/// AnalysisUsage.
321///
322/// All loop passes should call this as part of implementing their \c
323/// getAnalysisUsage.
324void getLoopAnalysisUsage(AnalysisUsage &AU);
325
326/// Returns true if is legal to hoist or sink this instruction disregarding the
327/// possible introduction of faults. Reasoning about potential faulting
328/// instructions is the responsibility of the caller since it is challenging to
329/// do efficiently from within this routine.
330/// \p TargetExecutesOncePerLoop is true only when it is guaranteed that the
331/// target executes at most once per execution of the loop body. This is used
332/// to assess the legality of duplicating atomic loads. Generally, this is
333/// true when moving out of loop and not true when moving into loops.
334/// If \p ORE is set use it to emit optimization remarks.
335bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
336 Loop *CurLoop, AliasSetTracker *CurAST,
337 MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop,
338 SinkAndHoistLICMFlags *LICMFlags = nullptr,
339 OptimizationRemarkEmitter *ORE = nullptr);
340
341/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
342/// The Builder's fast-math-flags must be set to propagate the expected values.
343Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
344 Value *Right);
345
346/// Generates an ordered vector reduction using extracts to reduce the value.
347Value *getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
348 unsigned Op, RecurKind MinMaxKind = RecurKind::None,
349 ArrayRef<Value *> RedOps = None);
350
351/// Generates a vector reduction using shufflevectors to reduce the value.
352/// Fast-math-flags are propagated using the IRBuilder's setting.
353Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op,
354 RecurKind MinMaxKind = RecurKind::None,
355 ArrayRef<Value *> RedOps = None);
356
357/// Create a target reduction of the given vector. The reduction operation
358/// is described by the \p Opcode parameter. min/max reductions require
359/// additional information supplied in \p RdxKind.
360/// The target is queried to determine if intrinsics or shuffle sequences are
361/// required to implement the reduction.
362/// Fast-math-flags are propagated using the IRBuilder's setting.
363Value *createSimpleTargetReduction(IRBuilderBase &B,
364 const TargetTransformInfo *TTI, Value *Src,
365 RecurKind RdxKind,
366 ArrayRef<Value *> RedOps = None);
367
368/// Create a generic target reduction using a recurrence descriptor \p Desc
369/// The target is queried to determine if intrinsics or shuffle sequences are
370/// required to implement the reduction.
371/// Fast-math-flags are propagated using the RecurrenceDescriptor.
372Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI,
373 const RecurrenceDescriptor &Desc, Value *Src);
374
375/// Create an ordered reduction intrinsic using the given recurrence
376/// descriptor \p Desc.
377Value *createOrderedReduction(IRBuilderBase &B,
378 const RecurrenceDescriptor &Desc, Value *Src,
379 Value *Start);
380
381/// Get the intersection (logical and) of all of the potential IR flags
382/// of each scalar operation (VL) that will be converted into a vector (I).
383/// If OpValue is non-null, we only consider operations similar to OpValue
384/// when intersecting.
385/// Flag set: NSW, NUW, exact, and all of fast-math.
386void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr);
387
388/// Returns true if we can prove that \p S is defined and always negative in
389/// loop \p L.
390bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE);
391
392/// Returns true if we can prove that \p S is defined and always non-negative in
393/// loop \p L.
394bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
395 ScalarEvolution &SE);
396
397/// Returns true if \p S is defined and never is equal to signed/unsigned max.
398bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
399 bool Signed);
400
401/// Returns true if \p S is defined and never is equal to signed/unsigned min.
402bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
403 bool Signed);
404
405enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
406
407/// If the final value of any expressions that are recurrent in the loop can
408/// be computed, substitute the exit values from the loop into any instructions
409/// outside of the loop that use the final values of the current expressions.
410/// Return the number of loop exit values that have been replaced, and the
411/// corresponding phi node will be added to DeadInsts.
412int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
413 ScalarEvolution *SE, const TargetTransformInfo *TTI,
414 SCEVExpander &Rewriter, DominatorTree *DT,
415 ReplaceExitVal ReplaceExitValue,
416 SmallVector<WeakTrackingVH, 16> &DeadInsts);
417
418/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
419/// \p OrigLoop and the following distribution of \p OrigLoop iteration among \p
420/// UnrolledLoop and \p RemainderLoop. \p UnrolledLoop receives weights that
421/// reflect TC/UF iterations, and \p RemainderLoop receives weights that reflect
422/// the remaining TC%UF iterations.
423///
424/// Note that \p OrigLoop may be equal to either \p UnrolledLoop or \p
425/// RemainderLoop in which case weights for \p OrigLoop are updated accordingly.
426/// Note also behavior is undefined if \p UnrolledLoop and \p RemainderLoop are
427/// equal. \p UF must be greater than zero.
428/// If \p OrigLoop has no profile info associated nothing happens.
429///
430/// This utility may be useful for such optimizations as unroller and
431/// vectorizer as it's typical transformation for them.
432void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
433 Loop *RemainderLoop, uint64_t UF);
434
435/// Utility that implements appending of loops onto a worklist given a range.
436/// We want to process loops in postorder, but the worklist is a LIFO data
437/// structure, so we append to it in *reverse* postorder.
438/// For trees, a preorder traversal is a viable reverse postorder, so we
439/// actually append using a preorder walk algorithm.
440template <typename RangeT>
441void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist<Loop *, 4> &);
442/// Utility that implements appending of loops onto a worklist given a range.
443/// It has the same behavior as appendLoopsToWorklist, but assumes the range of
444/// loops has already been reversed, so it processes loops in the given order.
445template <typename RangeT>
446void appendReversedLoopsToWorklist(RangeT &&,
447 SmallPriorityWorklist<Loop *, 4> &);
448
449/// Utility that implements appending of loops onto a worklist given LoopInfo.
450/// Calls the templated utility taking a Range of loops, handing it the Loops
451/// in LoopInfo, iterated in reverse. This is because the loops are stored in
452/// RPO w.r.t. the control flow graph in LoopInfo. For the purpose of unrolling,
453/// loop deletion, and LICM, we largely want to work forward across the CFG so
454/// that we visit defs before uses and can propagate simplifications from one
455/// loop nest into the next. Calls appendReversedLoopsToWorklist with the
456/// already reversed loops in LI.
457/// FIXME: Consider changing the order in LoopInfo.
458void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist<Loop *, 4> &);
459
460/// Recursively clone the specified loop and all of its children,
461/// mapping the blocks with the specified map.
462Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
463 LoopInfo *LI, LPPassManager *LPM);
464
465/// Add code that checks at runtime if the accessed arrays in \p PointerChecks
466/// overlap.
467///
468/// Returns a pair of instructions where the first element is the first
469/// instruction generated in possibly a sequence of instructions and the
470/// second value is the final comparator value or NULL if no check is needed.
471std::pair<Instruction *, Instruction *>
472addRuntimeChecks(Instruction *Loc, Loop *TheLoop,
473 const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
474 SCEVExpander &Expander);
475
476/// Struct to hold information about a partially invariant condition.
477struct IVConditionInfo {
478 /// Instructions that need to be duplicated and checked for the unswitching
479 /// condition.
480 SmallVector<Instruction *> InstToDuplicate;
481
482 /// Constant to indicate for which value the condition is invariant.
483 Constant *KnownValue = nullptr;
4
Null pointer value stored to 'PartialIVInfo.KnownValue'
484
485 /// True if the partially invariant path is no-op (=does not have any
486 /// side-effects and no loop value is used outside the loop).
487 bool PathIsNoop = true;
488
489 /// If the partially invariant path reaches a single exit block, ExitForPath
490 /// is set to that block. Otherwise it is nullptr.
491 BasicBlock *ExitForPath = nullptr;
492};
493
494/// Check if the loop header has a conditional branch that is not
495/// loop-invariant, because it involves load instructions. If all paths from
496/// either the true or false successor to the header or loop exists do not
497/// modify the memory feeding the condition, perform 'partial unswitching'. That
498/// is, duplicate the instructions feeding the condition in the pre-header. Then
499/// unswitch on the duplicated condition. The condition is now known in the
500/// unswitched version for the 'invariant' path through the original loop.
501///
502/// If the branch condition of the header is partially invariant, return a pair
503/// containing the instructions to duplicate and a boolean Constant to update
504/// the condition in the loops created for the true or false successors.
505Optional<IVConditionInfo> hasPartialIVCondition(Loop &L, unsigned MSSAThreshold,
506 MemorySSA &MSSA, AAResults &AA);
507
508} // end namespace llvm
509
510#endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H

/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/ADT/SmallVector.h

1//===- llvm/ADT/SmallVector.h - 'Normally small' vectors --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the SmallVector class.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_ADT_SMALLVECTOR_H
14#define LLVM_ADT_SMALLVECTOR_H
15
16#include "llvm/ADT/iterator_range.h"
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/ErrorHandling.h"
19#include "llvm/Support/MemAlloc.h"
20#include "llvm/Support/type_traits.h"
21#include <algorithm>
22#include <cassert>
23#include <cstddef>
24#include <cstdlib>
25#include <cstring>
26#include <initializer_list>
27#include <iterator>
28#include <limits>
29#include <memory>
30#include <new>
31#include <type_traits>
32#include <utility>
33
34namespace llvm {
35
36/// This is all the stuff common to all SmallVectors.
37///
38/// The template parameter specifies the type which should be used to hold the
39/// Size and Capacity of the SmallVector, so it can be adjusted.
40/// Using 32 bit size is desirable to shrink the size of the SmallVector.
41/// Using 64 bit size is desirable for cases like SmallVector<char>, where a
42/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
43/// buffering bitcode output - which can exceed 4GB.
44template <class Size_T> class SmallVectorBase {
45protected:
46 void *BeginX;
47 Size_T Size = 0, Capacity;
48
49 /// The maximum value of the Size_T used.
50 static constexpr size_t SizeTypeMax() {
51 return std::numeric_limits<Size_T>::max();
52 }
53
54 SmallVectorBase() = delete;
55 SmallVectorBase(void *FirstEl, size_t TotalCapacity)
56 : BeginX(FirstEl), Capacity(TotalCapacity) {}
57
58 /// This is a helper for \a grow() that's out of line to reduce code
59 /// duplication. This function will report a fatal error if it can't grow at
60 /// least to \p MinSize.
61 void *mallocForGrow(size_t MinSize, size_t TSize, size_t &NewCapacity);
62
63 /// This is an implementation of the grow() method which only works
64 /// on POD-like data types and is out of line to reduce code duplication.
65 /// This function will report a fatal error if it cannot increase capacity.
66 void grow_pod(void *FirstEl, size_t MinSize, size_t TSize);
67
68public:
69 size_t size() const { return Size; }
70 size_t capacity() const { return Capacity; }
71
72 LLVM_NODISCARD[[clang::warn_unused_result]] bool empty() const { return !Size; }
10
Assuming field 'Size' is not equal to 0
11
Returning zero, which participates in a condition later
53
Assuming field 'Size' is not equal to 0, which participates in a condition later
54
Returning zero, which participates in a condition later
73
74 /// Set the array size to \p N, which the current array must have enough
75 /// capacity for.
76 ///
77 /// This does not construct or destroy any elements in the vector.
78 ///
79 /// Clients can use this in conjunction with capacity() to write past the end
80 /// of the buffer when they know that more elements are available, and only
81 /// update the size later. This avoids the cost of value initializing elements
82 /// which will only be overwritten.
83 void set_size(size_t N) {
84 assert(N <= capacity())(static_cast <bool> (N <= capacity()) ? void (0) : __assert_fail
("N <= capacity()", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/ADT/SmallVector.h"
, 84, __extension__ __PRETTY_FUNCTION__))
;
85 Size = N;
86 }
87};
88
89template <class T>
90using SmallVectorSizeType =
91 typename std::conditional<sizeof(T) < 4 && sizeof(void *) >= 8, uint64_t,
92 uint32_t>::type;
93
94/// Figure out the offset of the first element.
95template <class T, typename = void> struct SmallVectorAlignmentAndSize {
96 alignas(SmallVectorBase<SmallVectorSizeType<T>>) char Base[sizeof(
97 SmallVectorBase<SmallVectorSizeType<T>>)];
98 alignas(T) char FirstEl[sizeof(T)];
99};
100
101/// This is the part of SmallVectorTemplateBase which does not depend on whether
102/// the type T is a POD. The extra dummy template argument is used by ArrayRef
103/// to avoid unnecessarily requiring T to be complete.
104template <typename T, typename = void>
105class SmallVectorTemplateCommon
106 : public SmallVectorBase<SmallVectorSizeType<T>> {
107 using Base = SmallVectorBase<SmallVectorSizeType<T>>;
108
109 /// Find the address of the first element. For this pointer math to be valid
110 /// with small-size of 0 for T with lots of alignment, it's important that
111 /// SmallVectorStorage is properly-aligned even for small-size of 0.
112 void *getFirstEl() const {
113 return const_cast<void *>(reinterpret_cast<const void *>(
114 reinterpret_cast<const char *>(this) +
115 offsetof(SmallVectorAlignmentAndSize<T>, FirstEl)__builtin_offsetof(SmallVectorAlignmentAndSize<T>, FirstEl
)
));
116 }
117 // Space after 'FirstEl' is clobbered, do not add any instance vars after it.
118
119protected:
120 SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {}
121
122 void grow_pod(size_t MinSize, size_t TSize) {
123 Base::grow_pod(getFirstEl(), MinSize, TSize);
124 }
125
126 /// Return true if this is a smallvector which has not had dynamic
127 /// memory allocated for it.
128 bool isSmall() const { return this->BeginX == getFirstEl(); }
129
130 /// Put this vector in a state of being small.
131 void resetToSmall() {
132 this->BeginX = getFirstEl();
133 this->Size = this->Capacity = 0; // FIXME: Setting Capacity to 0 is suspect.
134 }
135
136 /// Return true if V is an internal reference to the given range.
137 bool isReferenceToRange(const void *V, const void *First, const void *Last) const {
138 // Use std::less to avoid UB.
139 std::less<> LessThan;
140 return !LessThan(V, First) && LessThan(V, Last);
141 }
142
143 /// Return true if V is an internal reference to this vector.
144 bool isReferenceToStorage(const void *V) const {
145 return isReferenceToRange(V, this->begin(), this->end());
146 }
147
148 /// Return true if First and Last form a valid (possibly empty) range in this
149 /// vector's storage.
150 bool isRangeInStorage(const void *First, const void *Last) const {
151 // Use std::less to avoid UB.
152 std::less<> LessThan;
153 return !LessThan(First, this->begin()) && !LessThan(Last, First) &&
154 !LessThan(this->end(), Last);
155 }
156
157 /// Return true unless Elt will be invalidated by resizing the vector to
158 /// NewSize.
159 bool isSafeToReferenceAfterResize(const void *Elt, size_t NewSize) {
160 // Past the end.
161 if (LLVM_LIKELY(!isReferenceToStorage(Elt))__builtin_expect((bool)(!isReferenceToStorage(Elt)), true))
162 return true;
163
164 // Return false if Elt will be destroyed by shrinking.
165 if (NewSize <= this->size())
166 return Elt < this->begin() + NewSize;
167
168 // Return false if we need to grow.
169 return NewSize <= this->capacity();
170 }
171
172 /// Check whether Elt will be invalidated by resizing the vector to NewSize.
173 void assertSafeToReferenceAfterResize(const void *Elt, size_t NewSize) {
174 assert(isSafeToReferenceAfterResize(Elt, NewSize) &&(static_cast <bool> (isSafeToReferenceAfterResize(Elt, NewSize
) && "Attempting to reference an element of the vector in an operation "
"that invalidates it") ? void (0) : __assert_fail ("isSafeToReferenceAfterResize(Elt, NewSize) && \"Attempting to reference an element of the vector in an operation \" \"that invalidates it\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/ADT/SmallVector.h"
, 176, __extension__ __PRETTY_FUNCTION__))
175 "Attempting to reference an element of the vector in an operation "(static_cast <bool> (isSafeToReferenceAfterResize(Elt, NewSize
) && "Attempting to reference an element of the vector in an operation "
"that invalidates it") ? void (0) : __assert_fail ("isSafeToReferenceAfterResize(Elt, NewSize) && \"Attempting to reference an element of the vector in an operation \" \"that invalidates it\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/ADT/SmallVector.h"
, 176, __extension__ __PRETTY_FUNCTION__))
176 "that invalidates it")(static_cast <bool> (isSafeToReferenceAfterResize(Elt, NewSize
) && "Attempting to reference an element of the vector in an operation "
"that invalidates it") ? void (0) : __assert_fail ("isSafeToReferenceAfterResize(Elt, NewSize) && \"Attempting to reference an element of the vector in an operation \" \"that invalidates it\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/ADT/SmallVector.h"
, 176, __extension__ __PRETTY_FUNCTION__))
;
177 }
178
179 /// Check whether Elt will be invalidated by increasing the size of the
180 /// vector by N.
181 void assertSafeToAdd(const void *Elt, size_t N = 1) {
182 this->assertSafeToReferenceAfterResize(Elt, this->size() + N);
183 }
184
185 /// Check whether any part of the range will be invalidated by clearing.
186 void assertSafeToReferenceAfterClear(const T *From, const T *To) {
187 if (From == To)
188 return;
189 this->assertSafeToReferenceAfterResize(From, 0);
190 this->assertSafeToReferenceAfterResize(To - 1, 0);
191 }
192 template <
193 class ItTy,
194 std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value,
195 bool> = false>
196 void assertSafeToReferenceAfterClear(ItTy, ItTy) {}
197
198 /// Check whether any part of the range will be invalidated by growing.
199 void assertSafeToAddRange(const T *From, const T *To) {
200 if (From == To)
201 return;
202 this->assertSafeToAdd(From, To - From);
203 this->assertSafeToAdd(To - 1, To - From);
204 }
205 template <
206 class ItTy,
207 std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value,
208 bool> = false>
209 void assertSafeToAddRange(ItTy, ItTy) {}
210
211 /// Reserve enough space to add one element, and return the updated element
212 /// pointer in case it was a reference to the storage.
213 template <class U>
214 static const T *reserveForParamAndGetAddressImpl(U *This, const T &Elt,
215 size_t N) {
216 size_t NewSize = This->size() + N;
217 if (LLVM_LIKELY(NewSize <= This->capacity())__builtin_expect((bool)(NewSize <= This->capacity()), true
)
)
218 return &Elt;
219
220 bool ReferencesStorage = false;
221 int64_t Index = -1;
222 if (!U::TakesParamByValue) {
223 if (LLVM_UNLIKELY(This->isReferenceToStorage(&Elt))__builtin_expect((bool)(This->isReferenceToStorage(&Elt
)), false)
) {
224 ReferencesStorage = true;
225 Index = &Elt - This->begin();
226 }
227 }
228 This->grow(NewSize);
229 return ReferencesStorage ? This->begin() + Index : &Elt;
230 }
231
232public:
233 using size_type = size_t;
234 using difference_type = ptrdiff_t;
235 using value_type = T;
236 using iterator = T *;
237 using const_iterator = const T *;
238
239 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
240 using reverse_iterator = std::reverse_iterator<iterator>;
241
242 using reference = T &;
243 using const_reference = const T &;
244 using pointer = T *;
245 using const_pointer = const T *;
246
247 using Base::capacity;
248 using Base::empty;
249 using Base::size;
250
251 // forward iterator creation methods.
252 iterator begin() { return (iterator)this->BeginX; }
253 const_iterator begin() const { return (const_iterator)this->BeginX; }
254 iterator end() { return begin() + size(); }
255 const_iterator end() const { return begin() + size(); }
256
257 // reverse iterator creation methods.
258 reverse_iterator rbegin() { return reverse_iterator(end()); }
259 const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); }
260 reverse_iterator rend() { return reverse_iterator(begin()); }
261 const_reverse_iterator rend() const { return const_reverse_iterator(begin());}
262
263 size_type size_in_bytes() const { return size() * sizeof(T); }
264 size_type max_size() const {
265 return std::min(this->SizeTypeMax(), size_type(-1) / sizeof(T));
266 }
267
268 size_t capacity_in_bytes() const { return capacity() * sizeof(T); }
269
270 /// Return a pointer to the vector's buffer, even if empty().
271 pointer data() { return pointer(begin()); }
272 /// Return a pointer to the vector's buffer, even if empty().
273 const_pointer data() const { return const_pointer(begin()); }
274
275 reference operator[](size_type idx) {
276 assert(idx < size())(static_cast <bool> (idx < size()) ? void (0) : __assert_fail
("idx < size()", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/ADT/SmallVector.h"
, 276, __extension__ __PRETTY_FUNCTION__))
;
277 return begin()[idx];
278 }
279 const_reference operator[](size_type idx) const {
280 assert(idx < size())(static_cast <bool> (idx < size()) ? void (0) : __assert_fail
("idx < size()", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/ADT/SmallVector.h"
, 280, __extension__ __PRETTY_FUNCTION__))
;
281 return begin()[idx];
282 }
283
284 reference front() {
285 assert(!empty())(static_cast <bool> (!empty()) ? void (0) : __assert_fail
("!empty()", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/ADT/SmallVector.h"
, 285, __extension__ __PRETTY_FUNCTION__))
;
286 return begin()[0];
287 }
288 const_reference front() const {
289 assert(!empty())(static_cast <bool> (!empty()) ? void (0) : __assert_fail
("!empty()", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/ADT/SmallVector.h"
, 289, __extension__ __PRETTY_FUNCTION__))
;
290 return begin()[0];
291 }
292
293 reference back() {
294 assert(!empty())(static_cast <bool> (!empty()) ? void (0) : __assert_fail
("!empty()", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/ADT/SmallVector.h"
, 294, __extension__ __PRETTY_FUNCTION__))
;
295 return end()[-1];
296 }
297 const_reference back() const {
298 assert(!empty())(static_cast <bool> (!empty()) ? void (0) : __assert_fail
("!empty()", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/ADT/SmallVector.h"
, 298, __extension__ __PRETTY_FUNCTION__))
;
299 return end()[-1];
300 }
301};
302
303/// SmallVectorTemplateBase<TriviallyCopyable = false> - This is where we put
304/// method implementations that are designed to work with non-trivial T's.
305///
306/// We approximate is_trivially_copyable with trivial move/copy construction and
307/// trivial destruction. While the standard doesn't specify that you're allowed
308/// copy these types with memcpy, there is no way for the type to observe this.
309/// This catches the important case of std::pair<POD, POD>, which is not
310/// trivially assignable.
311template <typename T, bool = (is_trivially_copy_constructible<T>::value) &&
312 (is_trivially_move_constructible<T>::value) &&
313 std::is_trivially_destructible<T>::value>
314class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
315 friend class SmallVectorTemplateCommon<T>;
316
317protected:
318 static constexpr bool TakesParamByValue = false;
319 using ValueParamT = const T &;
320
321 SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
322
323 static void destroy_range(T *S, T *E) {
324 while (S != E) {
325 --E;
326 E->~T();
327 }
328 }
329
330 /// Move the range [I, E) into the uninitialized memory starting with "Dest",
331 /// constructing elements as needed.
332 template<typename It1, typename It2>
333 static void uninitialized_move(It1 I, It1 E, It2 Dest) {
334 std::uninitialized_copy(std::make_move_iterator(I),
335 std::make_move_iterator(E), Dest);
336 }
337
338 /// Copy the range [I, E) onto the uninitialized memory starting with "Dest",
339 /// constructing elements as needed.
340 template<typename It1, typename It2>
341 static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
342 std::uninitialized_copy(I, E, Dest);
343 }
344
345 /// Grow the allocated memory (without initializing new elements), doubling
346 /// the size of the allocated memory. Guarantees space for at least one more
347 /// element, or MinSize more elements if specified.
348 void grow(size_t MinSize = 0);
349
350 /// Create a new allocation big enough for \p MinSize and pass back its size
351 /// in \p NewCapacity. This is the first section of \a grow().
352 T *mallocForGrow(size_t MinSize, size_t &NewCapacity) {
353 return static_cast<T *>(
354 SmallVectorBase<SmallVectorSizeType<T>>::mallocForGrow(
355 MinSize, sizeof(T), NewCapacity));
356 }
357
358 /// Move existing elements over to the new allocation \p NewElts, the middle
359 /// section of \a grow().
360 void moveElementsForGrow(T *NewElts);
361
362 /// Transfer ownership of the allocation, finishing up \a grow().
363 void takeAllocationForGrow(T *NewElts, size_t NewCapacity);
364
365 /// Reserve enough space to add one element, and return the updated element
366 /// pointer in case it was a reference to the storage.
367 const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) {
368 return this->reserveForParamAndGetAddressImpl(this, Elt, N);
369 }
370
371 /// Reserve enough space to add one element, and return the updated element
372 /// pointer in case it was a reference to the storage.
373 T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) {
374 return const_cast<T *>(
375 this->reserveForParamAndGetAddressImpl(this, Elt, N));
376 }
377
378 static T &&forward_value_param(T &&V) { return std::move(V); }
379 static const T &forward_value_param(const T &V) { return V; }
380
381 void growAndAssign(size_t NumElts, const T &Elt) {
382 // Grow manually in case Elt is an internal reference.
383 size_t NewCapacity;
384 T *NewElts = mallocForGrow(NumElts, NewCapacity);
385 std::uninitialized_fill_n(NewElts, NumElts, Elt);
386 this->destroy_range(this->begin(), this->end());
387 takeAllocationForGrow(NewElts, NewCapacity);
388 this->set_size(NumElts);
389 }
390
391 template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
392 // Grow manually in case one of Args is an internal reference.
393 size_t NewCapacity;
394 T *NewElts = mallocForGrow(0, NewCapacity);
395 ::new ((void *)(NewElts + this->size())) T(std::forward<ArgTypes>(Args)...);
396 moveElementsForGrow(NewElts);
397 takeAllocationForGrow(NewElts, NewCapacity);
398 this->set_size(this->size() + 1);
399 return this->back();
400 }
401
402public:
403 void push_back(const T &Elt) {
404 const T *EltPtr = reserveForParamAndGetAddress(Elt);
405 ::new ((void *)this->end()) T(*EltPtr);
406 this->set_size(this->size() + 1);
407 }
408
409 void push_back(T &&Elt) {
410 T *EltPtr = reserveForParamAndGetAddress(Elt);
411 ::new ((void *)this->end()) T(::std::move(*EltPtr));
412 this->set_size(this->size() + 1);
413 }
414
415 void pop_back() {
416 this->set_size(this->size() - 1);
417 this->end()->~T();
418 }
419};
420
421// Define this out-of-line to dissuade the C++ compiler from inlining it.
422template <typename T, bool TriviallyCopyable>
423void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) {
424 size_t NewCapacity;
425 T *NewElts = mallocForGrow(MinSize, NewCapacity);
426 moveElementsForGrow(NewElts);
427 takeAllocationForGrow(NewElts, NewCapacity);
428}
429
430// Define this out-of-line to dissuade the C++ compiler from inlining it.
431template <typename T, bool TriviallyCopyable>
432void SmallVectorTemplateBase<T, TriviallyCopyable>::moveElementsForGrow(
433 T *NewElts) {
434 // Move the elements over.
435 this->uninitialized_move(this->begin(), this->end(), NewElts);
436
437 // Destroy the original elements.
438 destroy_range(this->begin(), this->end());
439}
440
441// Define this out-of-line to dissuade the C++ compiler from inlining it.
442template <typename T, bool TriviallyCopyable>
443void SmallVectorTemplateBase<T, TriviallyCopyable>::takeAllocationForGrow(
444 T *NewElts, size_t NewCapacity) {
445 // If this wasn't grown from the inline copy, deallocate the old space.
446 if (!this->isSmall())
447 free(this->begin());
448
449 this->BeginX = NewElts;
450 this->Capacity = NewCapacity;
451}
452
453/// SmallVectorTemplateBase<TriviallyCopyable = true> - This is where we put
454/// method implementations that are designed to work with trivially copyable
455/// T's. This allows using memcpy in place of copy/move construction and
456/// skipping destruction.
457template <typename T>
458class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> {
459 friend class SmallVectorTemplateCommon<T>;
460
461protected:
462 /// True if it's cheap enough to take parameters by value. Doing so avoids
463 /// overhead related to mitigations for reference invalidation.
464 static constexpr bool TakesParamByValue = sizeof(T) <= 2 * sizeof(void *);
465
466 /// Either const T& or T, depending on whether it's cheap enough to take
467 /// parameters by value.
468 using ValueParamT =
469 typename std::conditional<TakesParamByValue, T, const T &>::type;
470
471 SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
472
473 // No need to do a destroy loop for POD's.
474 static void destroy_range(T *, T *) {}
475
476 /// Move the range [I, E) onto the uninitialized memory
477 /// starting with "Dest", constructing elements into it as needed.
478 template<typename It1, typename It2>
479 static void uninitialized_move(It1 I, It1 E, It2 Dest) {
480 // Just do a copy.
481 uninitialized_copy(I, E, Dest);
482 }
483
484 /// Copy the range [I, E) onto the uninitialized memory
485 /// starting with "Dest", constructing elements into it as needed.
486 template<typename It1, typename It2>
487 static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
488 // Arbitrary iterator types; just use the basic implementation.
489 std::uninitialized_copy(I, E, Dest);
490 }
491
492 /// Copy the range [I, E) onto the uninitialized memory
493 /// starting with "Dest", constructing elements into it as needed.
494 template <typename T1, typename T2>
495 static void uninitialized_copy(
496 T1 *I, T1 *E, T2 *Dest,
497 std::enable_if_t<std::is_same<typename std::remove_const<T1>::type,
498 T2>::value> * = nullptr) {
499 // Use memcpy for PODs iterated by pointers (which includes SmallVector
500 // iterators): std::uninitialized_copy optimizes to memmove, but we can
501 // use memcpy here. Note that I and E are iterators and thus might be
502 // invalid for memcpy if they are equal.
503 if (I != E)
504 memcpy(reinterpret_cast<void *>(Dest), I, (E - I) * sizeof(T));
505 }
506
507 /// Double the size of the allocated memory, guaranteeing space for at
508 /// least one more element or MinSize if specified.
509 void grow(size_t MinSize = 0) { this->grow_pod(MinSize, sizeof(T)); }
510
511 /// Reserve enough space to add one element, and return the updated element
512 /// pointer in case it was a reference to the storage.
513 const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) {
514 return this->reserveForParamAndGetAddressImpl(this, Elt, N);
515 }
516
517 /// Reserve enough space to add one element, and return the updated element
518 /// pointer in case it was a reference to the storage.
519 T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) {
520 return const_cast<T *>(
521 this->reserveForParamAndGetAddressImpl(this, Elt, N));
522 }
523
524 /// Copy \p V or return a reference, depending on \a ValueParamT.
525 static ValueParamT forward_value_param(ValueParamT V) { return V; }
526
527 void growAndAssign(size_t NumElts, T Elt) {
528 // Elt has been copied in case it's an internal reference, side-stepping
529 // reference invalidation problems without losing the realloc optimization.
530 this->set_size(0);
531 this->grow(NumElts);
532 std::uninitialized_fill_n(this->begin(), NumElts, Elt);
533 this->set_size(NumElts);
534 }
535
536 template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
537 // Use push_back with a copy in case Args has an internal reference,
538 // side-stepping reference invalidation problems without losing the realloc
539 // optimization.
540 push_back(T(std::forward<ArgTypes>(Args)...));
541 return this->back();
542 }
543
544public:
545 void push_back(ValueParamT Elt) {
546 const T *EltPtr = reserveForParamAndGetAddress(Elt);
547 memcpy(reinterpret_cast<void *>(this->end()), EltPtr, sizeof(T));
548 this->set_size(this->size() + 1);
549 }
550
551 void pop_back() { this->set_size(this->size() - 1); }
552};
553
554/// This class consists of common code factored out of the SmallVector class to
555/// reduce code duplication based on the SmallVector 'N' template parameter.
556template <typename T>
557class SmallVectorImpl : public SmallVectorTemplateBase<T> {
558 using SuperClass = SmallVectorTemplateBase<T>;
559
560public:
561 using iterator = typename SuperClass::iterator;
562 using const_iterator = typename SuperClass::const_iterator;
563 using reference = typename SuperClass::reference;
564 using size_type = typename SuperClass::size_type;
565
566protected:
567 using SmallVectorTemplateBase<T>::TakesParamByValue;
568 using ValueParamT = typename SuperClass::ValueParamT;
569
570 // Default ctor - Initialize to empty.
571 explicit SmallVectorImpl(unsigned N)
572 : SmallVectorTemplateBase<T>(N) {}
573
574public:
575 SmallVectorImpl(const SmallVectorImpl &) = delete;
576
577 ~SmallVectorImpl() {
578 // Subclass has already destructed this vector's elements.
579 // If this wasn't grown from the inline copy, deallocate the old space.
580 if (!this->isSmall())
581 free(this->begin());
582 }
583
584 void clear() {
585 this->destroy_range(this->begin(), this->end());
586 this->Size = 0;
587 }
588
589private:
590 template <bool ForOverwrite> void resizeImpl(size_type N) {
591 if (N < this->size()) {
592 this->pop_back_n(this->size() - N);
593 } else if (N > this->size()) {
594 this->reserve(N);
595 for (auto I = this->end(), E = this->begin() + N; I != E; ++I)
596 if (ForOverwrite)
597 new (&*I) T;
598 else
599 new (&*I) T();
600 this->set_size(N);
601 }
602 }
603
604public:
605 void resize(size_type N) { resizeImpl<false>(N); }
606
607 /// Like resize, but \ref T is POD, the new values won't be initialized.
608 void resize_for_overwrite(size_type N) { resizeImpl<true>(N); }
609
610 void resize(size_type N, ValueParamT NV) {
611 if (N == this->size())
612 return;
613
614 if (N < this->size()) {
615 this->pop_back_n(this->size() - N);
616 return;
617 }
618
619 // N > this->size(). Defer to append.
620 this->append(N - this->size(), NV);
621 }
622
623 void reserve(size_type N) {
624 if (this->capacity() < N)
625 this->grow(N);
626 }
627
628 void pop_back_n(size_type NumItems) {
629 assert(this->size() >= NumItems)(static_cast <bool> (this->size() >= NumItems) ? void
(0) : __assert_fail ("this->size() >= NumItems", "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/ADT/SmallVector.h"
, 629, __extension__ __PRETTY_FUNCTION__))
;
630 this->destroy_range(this->end() - NumItems, this->end());
631 this->set_size(this->size() - NumItems);
632 }
633
634 LLVM_NODISCARD[[clang::warn_unused_result]] T pop_back_val() {
635 T Result = ::std::move(this->back());
636 this->pop_back();
637 return Result;
638 }
639
640 void swap(SmallVectorImpl &RHS);
641
642 /// Add the specified range to the end of the SmallVector.
643 template <typename in_iter,
644 typename = std::enable_if_t<std::is_convertible<
645 typename std::iterator_traits<in_iter>::iterator_category,
646 std::input_iterator_tag>::value>>
647 void append(in_iter in_start, in_iter in_end) {
648 this->assertSafeToAddRange(in_start, in_end);
649 size_type NumInputs = std::distance(in_start, in_end);
650 this->reserve(this->size() + NumInputs);
651 this->uninitialized_copy(in_start, in_end, this->end());
652 this->set_size(this->size() + NumInputs);
653 }
654
655 /// Append \p NumInputs copies of \p Elt to the end.
656 void append(size_type NumInputs, ValueParamT Elt) {
657 const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumInputs);
658 std::uninitialized_fill_n(this->end(), NumInputs, *EltPtr);
659 this->set_size(this->size() + NumInputs);
660 }
661
662 void append(std::initializer_list<T> IL) {
663 append(IL.begin(), IL.end());
664 }
665
666 void append(const SmallVectorImpl &RHS) { append(RHS.begin(), RHS.end()); }
667
668 void assign(size_type NumElts, ValueParamT Elt) {
669 // Note that Elt could be an internal reference.
670 if (NumElts > this->capacity()) {
671 this->growAndAssign(NumElts, Elt);
672 return;
673 }
674
675 // Assign over existing elements.
676 std::fill_n(this->begin(), std::min(NumElts, this->size()), Elt);
677 if (NumElts > this->size())
678 std::uninitialized_fill_n(this->end(), NumElts - this->size(), Elt);
679 else if (NumElts < this->size())
680 this->destroy_range(this->begin() + NumElts, this->end());
681 this->set_size(NumElts);
682 }
683
684 // FIXME: Consider assigning over existing elements, rather than clearing &
685 // re-initializing them - for all assign(...) variants.
686
687 template <typename in_iter,
688 typename = std::enable_if_t<std::is_convertible<
689 typename std::iterator_traits<in_iter>::iterator_category,
690 std::input_iterator_tag>::value>>
691 void assign(in_iter in_start, in_iter in_end) {
692 this->assertSafeToReferenceAfterClear(in_start, in_end);
693 clear();
694 append(in_start, in_end);
695 }
696
697 void assign(std::initializer_list<T> IL) {
698 clear();
699 append(IL);
700 }
701
702 void assign(const SmallVectorImpl &RHS) { assign(RHS.begin(), RHS.end()); }
703
704 iterator erase(const_iterator CI) {
705 // Just cast away constness because this is a non-const member function.
706 iterator I = const_cast<iterator>(CI);
707
708 assert(this->isReferenceToStorage(CI) && "Iterator to erase is out of bounds.")(static_cast <bool> (this->isReferenceToStorage(CI) &&
"Iterator to erase is out of bounds.") ? void (0) : __assert_fail
("this->isReferenceToStorage(CI) && \"Iterator to erase is out of bounds.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/ADT/SmallVector.h"
, 708, __extension__ __PRETTY_FUNCTION__))
;
709
710 iterator N = I;
711 // Shift all elts down one.
712 std::move(I+1, this->end(), I);
713 // Drop the last elt.
714 this->pop_back();
715 return(N);
716 }
717
718 iterator erase(const_iterator CS, const_iterator CE) {
719 // Just cast away constness because this is a non-const member function.
720 iterator S = const_cast<iterator>(CS);
721 iterator E = const_cast<iterator>(CE);
722
723 assert(this->isRangeInStorage(S, E) && "Range to erase is out of bounds.")(static_cast <bool> (this->isRangeInStorage(S, E) &&
"Range to erase is out of bounds.") ? void (0) : __assert_fail
("this->isRangeInStorage(S, E) && \"Range to erase is out of bounds.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/ADT/SmallVector.h"
, 723, __extension__ __PRETTY_FUNCTION__))
;
724
725 iterator N = S;
726 // Shift all elts down.
727 iterator I = std::move(E, this->end(), S);
728 // Drop the last elts.
729 this->destroy_range(I, this->end());
730 this->set_size(I - this->begin());
731 return(N);
732 }
733
734private:
735 template <class ArgType> iterator insert_one_impl(iterator I, ArgType &&Elt) {
736 // Callers ensure that ArgType is derived from T.
737 static_assert(
738 std::is_same<std::remove_const_t<std::remove_reference_t<ArgType>>,
739 T>::value,
740 "ArgType must be derived from T!");
741
742 if (I == this->end()) { // Important special case for empty vector.
743 this->push_back(::std::forward<ArgType>(Elt));
744 return this->end()-1;
745 }
746
747 assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")(static_cast <bool> (this->isReferenceToStorage(I) &&
"Insertion iterator is out of bounds.") ? void (0) : __assert_fail
("this->isReferenceToStorage(I) && \"Insertion iterator is out of bounds.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/ADT/SmallVector.h"
, 747, __extension__ __PRETTY_FUNCTION__))
;
748
749 // Grow if necessary.
750 size_t Index = I - this->begin();
751 std::remove_reference_t<ArgType> *EltPtr =
752 this->reserveForParamAndGetAddress(Elt);
753 I = this->begin() + Index;
754
755 ::new ((void*) this->end()) T(::std::move(this->back()));
756 // Push everything else over.
757 std::move_backward(I, this->end()-1, this->end());
758 this->set_size(this->size() + 1);
759
760 // If we just moved the element we're inserting, be sure to update
761 // the reference (never happens if TakesParamByValue).
762 static_assert(!TakesParamByValue || std::is_same<ArgType, T>::value,
763 "ArgType must be 'T' when taking by value!");
764 if (!TakesParamByValue && this->isReferenceToRange(EltPtr, I, this->end()))
765 ++EltPtr;
766
767 *I = ::std::forward<ArgType>(*EltPtr);
768 return I;
769 }
770
771public:
772 iterator insert(iterator I, T &&Elt) {
773 return insert_one_impl(I, this->forward_value_param(std::move(Elt)));
774 }
775
776 iterator insert(iterator I, const T &Elt) {
777 return insert_one_impl(I, this->forward_value_param(Elt));
778 }
779
780 iterator insert(iterator I, size_type NumToInsert, ValueParamT Elt) {
781 // Convert iterator to elt# to avoid invalidating iterator when we reserve()
782 size_t InsertElt = I - this->begin();
783
784 if (I == this->end()) { // Important special case for empty vector.
785 append(NumToInsert, Elt);
786 return this->begin()+InsertElt;
787 }
788
789 assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")(static_cast <bool> (this->isReferenceToStorage(I) &&
"Insertion iterator is out of bounds.") ? void (0) : __assert_fail
("this->isReferenceToStorage(I) && \"Insertion iterator is out of bounds.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/ADT/SmallVector.h"
, 789, __extension__ __PRETTY_FUNCTION__))
;
790
791 // Ensure there is enough space, and get the (maybe updated) address of
792 // Elt.
793 const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumToInsert);
794
795 // Uninvalidate the iterator.
796 I = this->begin()+InsertElt;
797
798 // If there are more elements between the insertion point and the end of the
799 // range than there are being inserted, we can use a simple approach to
800 // insertion. Since we already reserved space, we know that this won't
801 // reallocate the vector.
802 if (size_t(this->end()-I) >= NumToInsert) {
803 T *OldEnd = this->end();
804 append(std::move_iterator<iterator>(this->end() - NumToInsert),
805 std::move_iterator<iterator>(this->end()));
806
807 // Copy the existing elements that get replaced.
808 std::move_backward(I, OldEnd-NumToInsert, OldEnd);
809
810 // If we just moved the element we're inserting, be sure to update
811 // the reference (never happens if TakesParamByValue).
812 if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end())
813 EltPtr += NumToInsert;
814
815 std::fill_n(I, NumToInsert, *EltPtr);
816 return I;
817 }
818
819 // Otherwise, we're inserting more elements than exist already, and we're
820 // not inserting at the end.
821
822 // Move over the elements that we're about to overwrite.
823 T *OldEnd = this->end();
824 this->set_size(this->size() + NumToInsert);
825 size_t NumOverwritten = OldEnd-I;
826 this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten);
827
828 // If we just moved the element we're inserting, be sure to update
829 // the reference (never happens if TakesParamByValue).
830 if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end())
831 EltPtr += NumToInsert;
832
833 // Replace the overwritten part.
834 std::fill_n(I, NumOverwritten, *EltPtr);
835
836 // Insert the non-overwritten middle part.
837 std::uninitialized_fill_n(OldEnd, NumToInsert - NumOverwritten, *EltPtr);
838 return I;
839 }
840
841 template <typename ItTy,
842 typename = std::enable_if_t<std::is_convertible<
843 typename std::iterator_traits<ItTy>::iterator_category,
844 std::input_iterator_tag>::value>>
845 iterator insert(iterator I, ItTy From, ItTy To) {
846 // Convert iterator to elt# to avoid invalidating iterator when we reserve()
847 size_t InsertElt = I - this->begin();
848
849 if (I == this->end()) { // Important special case for empty vector.
850 append(From, To);
851 return this->begin()+InsertElt;
852 }
853
854 assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")(static_cast <bool> (this->isReferenceToStorage(I) &&
"Insertion iterator is out of bounds.") ? void (0) : __assert_fail
("this->isReferenceToStorage(I) && \"Insertion iterator is out of bounds.\""
, "/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/ADT/SmallVector.h"
, 854, __extension__ __PRETTY_FUNCTION__))
;
855
856 // Check that the reserve that follows doesn't invalidate the iterators.
857 this->assertSafeToAddRange(From, To);
858
859 size_t NumToInsert = std::distance(From, To);
860
861 // Ensure there is enough space.
862 reserve(this->size() + NumToInsert);
863
864 // Uninvalidate the iterator.
865 I = this->begin()+InsertElt;
866
867 // If there are more elements between the insertion point and the end of the
868 // range than there are being inserted, we can use a simple approach to
869 // insertion. Since we already reserved space, we know that this won't
870 // reallocate the vector.
871 if (size_t(this->end()-I) >= NumToInsert) {
872 T *OldEnd = this->end();
873 append(std::move_iterator<iterator>(this->end() - NumToInsert),
874 std::move_iterator<iterator>(this->end()));
875
876 // Copy the existing elements that get replaced.
877 std::move_backward(I, OldEnd-NumToInsert, OldEnd);
878
879 std::copy(From, To, I);
880 return I;
881 }
882
883 // Otherwise, we're inserting more elements than exist already, and we're
884 // not inserting at the end.
885
886 // Move over the elements that we're about to overwrite.
887 T *OldEnd = this->end();
888 this->set_size(this->size() + NumToInsert);
889 size_t NumOverwritten = OldEnd-I;
890 this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten);
891
892 // Replace the overwritten part.
893 for (T *J = I; NumOverwritten > 0; --NumOverwritten) {
894 *J = *From;
895 ++J; ++From;
896 }
897
898 // Insert the non-overwritten middle part.
899 this->uninitialized_copy(From, To, OldEnd);
900 return I;
901 }
902
903 void insert(iterator I, std::initializer_list<T> IL) {
904 insert(I, IL.begin(), IL.end());
905 }
906
907 template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) {
908 if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity
()), false)
)
909 return this->growAndEmplaceBack(std::forward<ArgTypes>(Args)...);
910
911 ::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...);
912 this->set_size(this->size() + 1);
913 return this->back();
914 }
915
916 SmallVectorImpl &operator=(const SmallVectorImpl &RHS);
917
918 SmallVectorImpl &operator=(SmallVectorImpl &&RHS);
919
920 bool operator==(const SmallVectorImpl &RHS) const {
921 if (this->size() != RHS.size()) return false;
922 return std::equal(this->begin(), this->end(), RHS.begin());
923 }
924 bool operator!=(const SmallVectorImpl &RHS) const {
925 return !(*this == RHS);
926 }
927
928 bool operator<(const SmallVectorImpl &RHS) const {
929 return std::lexicographical_compare(this->begin(), this->end(),
930 RHS.begin(), RHS.end());
931 }
932};
933
934template <typename T>
935void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) {
936 if (this == &RHS) return;
937
938 // We can only avoid copying elements if neither vector is small.
939 if (!this->isSmall() && !RHS.isSmall()) {
940 std::swap(this->BeginX, RHS.BeginX);
941 std::swap(this->Size, RHS.Size);
942 std::swap(this->Capacity, RHS.Capacity);
943 return;
944 }
945 this->reserve(RHS.size());
946 RHS.reserve(this->size());
947
948 // Swap the shared elements.
949 size_t NumShared = this->size();
950 if (NumShared > RHS.size()) NumShared = RHS.size();
951 for (size_type i = 0; i != NumShared; ++i)
952 std::swap((*this)[i], RHS[i]);
953
954 // Copy over the extra elts.
955 if (this->size() > RHS.size()) {
956 size_t EltDiff = this->size() - RHS.size();
957 this->uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end());
958 RHS.set_size(RHS.size() + EltDiff);
959 this->destroy_range(this->begin()+NumShared, this->end());
960 this->set_size(NumShared);
961 } else if (RHS.size() > this->size()) {
962 size_t EltDiff = RHS.size() - this->size();
963 this->uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end());
964 this->set_size(this->size() + EltDiff);
965 this->destroy_range(RHS.begin()+NumShared, RHS.end());
966 RHS.set_size(NumShared);
967 }
968}
969
970template <typename T>
971SmallVectorImpl<T> &SmallVectorImpl<T>::
972 operator=(const SmallVectorImpl<T> &RHS) {
973 // Avoid self-assignment.
974 if (this == &RHS) return *this;
975
976 // If we already have sufficient space, assign the common elements, then
977 // destroy any excess.
978 size_t RHSSize = RHS.size();
979 size_t CurSize = this->size();
980 if (CurSize >= RHSSize) {
981 // Assign common elements.
982 iterator NewEnd;
983 if (RHSSize)
984 NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, this->begin());
985 else
986 NewEnd = this->begin();
987
988 // Destroy excess elements.
989 this->destroy_range(NewEnd, this->end());
990
991 // Trim.
992 this->set_size(RHSSize);
993 return *this;
994 }
995
996 // If we have to grow to have enough elements, destroy the current elements.
997 // This allows us to avoid copying them during the grow.
998 // FIXME: don't do this if they're efficiently moveable.
999 if (this->capacity() < RHSSize) {
1000 // Destroy current elements.
1001 this->clear();
1002 CurSize = 0;
1003 this->grow(RHSSize);
1004 } else if (CurSize) {
1005 // Otherwise, use assignment for the already-constructed elements.
1006 std::copy(RHS.begin(), RHS.begin()+CurSize, this->begin());
1007 }
1008
1009 // Copy construct the new elements in place.
1010 this->uninitialized_copy(RHS.begin()+CurSize, RHS.end(),
1011 this->begin()+CurSize);
1012
1013 // Set end.
1014 this->set_size(RHSSize);
1015 return *this;
1016}
1017
1018template <typename T>
1019SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) {
1020 // Avoid self-assignment.
1021 if (this == &RHS) return *this;
1022
1023 // If the RHS isn't small, clear this vector and then steal its buffer.
1024 if (!RHS.isSmall()) {
1025 this->destroy_range(this->begin(), this->end());
1026 if (!this->isSmall()) free(this->begin());
1027 this->BeginX = RHS.BeginX;
1028 this->Size = RHS.Size;
1029 this->Capacity = RHS.Capacity;
1030 RHS.resetToSmall();
1031 return *this;
1032 }
1033
1034 // If we already have sufficient space, assign the common elements, then
1035 // destroy any excess.
1036 size_t RHSSize = RHS.size();
1037 size_t CurSize = this->size();
1038 if (CurSize >= RHSSize) {
1039 // Assign common elements.
1040 iterator NewEnd = this->begin();
1041 if (RHSSize)
1042 NewEnd = std::move(RHS.begin(), RHS.end(), NewEnd);
1043
1044 // Destroy excess elements and trim the bounds.
1045 this->destroy_range(NewEnd, this->end());
1046 this->set_size(RHSSize);
1047
1048 // Clear the RHS.
1049 RHS.clear();
1050
1051 return *this;
1052 }
1053
1054 // If we have to grow to have enough elements, destroy the current elements.
1055 // This allows us to avoid copying them during the grow.
1056 // FIXME: this may not actually make any sense if we can efficiently move
1057 // elements.
1058 if (this->capacity() < RHSSize) {
1059 // Destroy current elements.
1060 this->clear();
1061 CurSize = 0;
1062 this->grow(RHSSize);
1063 } else if (CurSize) {
1064 // Otherwise, use assignment for the already-constructed elements.
1065 std::move(RHS.begin(), RHS.begin()+CurSize, this->begin());
1066 }
1067
1068 // Move-construct the new elements in place.
1069 this->uninitialized_move(RHS.begin()+CurSize, RHS.end(),
1070 this->begin()+CurSize);
1071
1072 // Set end.
1073 this->set_size(RHSSize);
1074
1075 RHS.clear();
1076 return *this;
1077}
1078
1079/// Storage for the SmallVector elements. This is specialized for the N=0 case
1080/// to avoid allocating unnecessary storage.
1081template <typename T, unsigned N>
1082struct SmallVectorStorage {
1083 alignas(T) char InlineElts[N * sizeof(T)];
1084};
1085
1086/// We need the storage to be properly aligned even for small-size of 0 so that
1087/// the pointer math in \a SmallVectorTemplateCommon::getFirstEl() is
1088/// well-defined.
1089template <typename T> struct alignas(T) SmallVectorStorage<T, 0> {};
1090
1091/// Forward declaration of SmallVector so that
1092/// calculateSmallVectorDefaultInlinedElements can reference
1093/// `sizeof(SmallVector<T, 0>)`.
1094template <typename T, unsigned N> class LLVM_GSL_OWNER[[gsl::Owner]] SmallVector;
1095
1096/// Helper class for calculating the default number of inline elements for
1097/// `SmallVector<T>`.
1098///
1099/// This should be migrated to a constexpr function when our minimum
1100/// compiler support is enough for multi-statement constexpr functions.
1101template <typename T> struct CalculateSmallVectorDefaultInlinedElements {
1102 // Parameter controlling the default number of inlined elements
1103 // for `SmallVector<T>`.
1104 //
1105 // The default number of inlined elements ensures that
1106 // 1. There is at least one inlined element.
1107 // 2. `sizeof(SmallVector<T>) <= kPreferredSmallVectorSizeof` unless
1108 // it contradicts 1.
1109 static constexpr size_t kPreferredSmallVectorSizeof = 64;
1110
1111 // static_assert that sizeof(T) is not "too big".
1112 //
1113 // Because our policy guarantees at least one inlined element, it is possible
1114 // for an arbitrarily large inlined element to allocate an arbitrarily large
1115 // amount of inline storage. We generally consider it an antipattern for a
1116 // SmallVector to allocate an excessive amount of inline storage, so we want
1117 // to call attention to these cases and make sure that users are making an
1118 // intentional decision if they request a lot of inline storage.
1119 //
1120 // We want this assertion to trigger in pathological cases, but otherwise
1121 // not be too easy to hit. To accomplish that, the cutoff is actually somewhat
1122 // larger than kPreferredSmallVectorSizeof (otherwise,
1123 // `SmallVector<SmallVector<T>>` would be one easy way to trip it, and that
1124 // pattern seems useful in practice).
1125 //
1126 // One wrinkle is that this assertion is in theory non-portable, since
1127 // sizeof(T) is in general platform-dependent. However, we don't expect this
1128 // to be much of an issue, because most LLVM development happens on 64-bit
1129 // hosts, and therefore sizeof(T) is expected to *decrease* when compiled for
1130 // 32-bit hosts, dodging the issue. The reverse situation, where development
1131 // happens on a 32-bit host and then fails due to sizeof(T) *increasing* on a
1132 // 64-bit host, is expected to be very rare.
1133 static_assert(
1134 sizeof(T) <= 256,
1135 "You are trying to use a default number of inlined elements for "
1136 "`SmallVector<T>` but `sizeof(T)` is really big! Please use an "
1137 "explicit number of inlined elements with `SmallVector<T, N>` to make "
1138 "sure you really want that much inline storage.");
1139
1140 // Discount the size of the header itself when calculating the maximum inline
1141 // bytes.
1142 static constexpr size_t PreferredInlineBytes =
1143 kPreferredSmallVectorSizeof - sizeof(SmallVector<T, 0>);
1144 static constexpr size_t NumElementsThatFit = PreferredInlineBytes / sizeof(T);
1145 static constexpr size_t value =
1146 NumElementsThatFit == 0 ? 1 : NumElementsThatFit;
1147};
1148
1149/// This is a 'vector' (really, a variable-sized array), optimized
1150/// for the case when the array is small. It contains some number of elements
1151/// in-place, which allows it to avoid heap allocation when the actual number of
1152/// elements is below that threshold. This allows normal "small" cases to be
1153/// fast without losing generality for large inputs.
1154///
1155/// \note
1156/// In the absence of a well-motivated choice for the number of inlined
1157/// elements \p N, it is recommended to use \c SmallVector<T> (that is,
1158/// omitting the \p N). This will choose a default number of inlined elements
1159/// reasonable for allocation on the stack (for example, trying to keep \c
1160/// sizeof(SmallVector<T>) around 64 bytes).
1161///
1162/// \warning This does not attempt to be exception safe.
1163///
1164/// \see https://llvm.org/docs/ProgrammersManual.html#llvm-adt-smallvector-h
1165template <typename T,
1166 unsigned N = CalculateSmallVectorDefaultInlinedElements<T>::value>
1167class LLVM_GSL_OWNER[[gsl::Owner]] SmallVector : public SmallVectorImpl<T>,
1168 SmallVectorStorage<T, N> {
1169public:
1170 SmallVector() : SmallVectorImpl<T>(N) {}
1171
1172 ~SmallVector() {
1173 // Destroy the constructed elements in the vector.
1174 this->destroy_range(this->begin(), this->end());
1175 }
1176
1177 explicit SmallVector(size_t Size, const T &Value = T())
1178 : SmallVectorImpl<T>(N) {
1179 this->assign(Size, Value);
1180 }
1181
1182 template <typename ItTy,
1183 typename = std::enable_if_t<std::is_convertible<
1184 typename std::iterator_traits<ItTy>::iterator_category,
1185 std::input_iterator_tag>::value>>
1186 SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(N) {
1187 this->append(S, E);
1188 }
1189
1190 template <typename RangeTy>
1191 explicit SmallVector(const iterator_range<RangeTy> &R)
1192 : SmallVectorImpl<T>(N) {
1193 this->append(R.begin(), R.end());
1194 }
1195
1196 SmallVector(std::initializer_list<T> IL) : SmallVectorImpl<T>(N) {
1197 this->assign(IL);
1198 }
1199
1200 SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(N) {
1201 if (!RHS.empty())
1202 SmallVectorImpl<T>::operator=(RHS);
1203 }
1204
1205 SmallVector &operator=(const SmallVector &RHS) {
1206 SmallVectorImpl<T>::operator=(RHS);
1207 return *this;
1208 }
1209
1210 SmallVector(SmallVector &&RHS) : SmallVectorImpl<T>(N) {
1211 if (!RHS.empty())
1212 SmallVectorImpl<T>::operator=(::std::move(RHS));
1213 }
1214
1215 SmallVector(SmallVectorImpl<T> &&RHS) : SmallVectorImpl<T>(N) {
1216 if (!RHS.empty())
1217 SmallVectorImpl<T>::operator=(::std::move(RHS));
1218 }
1219
1220 SmallVector &operator=(SmallVector &&RHS) {
1221 SmallVectorImpl<T>::operator=(::std::move(RHS));
1222 return *this;
1223 }
1224
1225 SmallVector &operator=(SmallVectorImpl<T> &&RHS) {
1226 SmallVectorImpl<T>::operator=(::std::move(RHS));
1227 return *this;
1228 }
1229
1230 SmallVector &operator=(std::initializer_list<T> IL) {
1231 this->assign(IL);
1232 return *this;
1233 }
1234};
1235
1236template <typename T, unsigned N>
1237inline size_t capacity_in_bytes(const SmallVector<T, N> &X) {
1238 return X.capacity_in_bytes();
1239}
1240
1241/// Given a range of type R, iterate the entire range and return a
1242/// SmallVector with elements of the vector. This is useful, for example,
1243/// when you want to iterate a range and then sort the results.
1244template <unsigned Size, typename R>
1245SmallVector<typename std::remove_const<typename std::remove_reference<
1246 decltype(*std::begin(std::declval<R &>()))>::type>::type,
1247 Size>
1248to_vector(R &&Range) {
1249 return {std::begin(Range), std::end(Range)};
1250}
1251
1252} // end namespace llvm
1253
1254namespace std {
1255
1256 /// Implement std::swap in terms of SmallVector swap.
1257 template<typename T>
1258 inline void
1259 swap(llvm::SmallVectorImpl<T> &LHS, llvm::SmallVectorImpl<T> &RHS) {
1260 LHS.swap(RHS);
1261 }
1262
1263 /// Implement std::swap in terms of SmallVector swap.
1264 template<typename T, unsigned N>
1265 inline void
1266 swap(llvm::SmallVector<T, N> &LHS, llvm::SmallVector<T, N> &RHS) {
1267 LHS.swap(RHS);
1268 }
1269
1270} // end namespace std
1271
1272#endif // LLVM_ADT_SMALLVECTOR_H

/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/Analysis/CFG.h

1//===-- Analysis/CFG.h - BasicBlock Analyses --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This family of functions performs analyses on basic blocks, and instructions
10// contained within basic blocks.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_CFG_H
15#define LLVM_ANALYSIS_CFG_H
16
17#include "llvm/ADT/GraphTraits.h"
18#include "llvm/ADT/SmallPtrSet.h"
19#include <utility>
20
21namespace llvm {
22
23class BasicBlock;
24class DominatorTree;
25class Function;
26class Instruction;
27class LoopInfo;
28template <typename T> class SmallVectorImpl;
29
30/// Analyze the specified function to find all of the loop backedges in the
31/// function and return them. This is a relatively cheap (compared to
32/// computing dominators and loop info) analysis.
33///
34/// The output is added to Result, as pairs of <from,to> edge info.
35void FindFunctionBackedges(
36 const Function &F,
37 SmallVectorImpl<std::pair<const BasicBlock *, const BasicBlock *> > &
38 Result);
39
40/// Search for the specified successor of basic block BB and return its position
41/// in the terminator instruction's list of successors. It is an error to call
42/// this with a block that is not a successor.
43unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ);
44
45/// Return true if the specified edge is a critical edge. Critical edges are
46/// edges from a block with multiple successors to a block with multiple
47/// predecessors.
48///
49bool isCriticalEdge(const Instruction *TI, unsigned SuccNum,
50 bool AllowIdenticalEdges = false);
51bool isCriticalEdge(const Instruction *TI, const BasicBlock *Succ,
52 bool AllowIdenticalEdges = false);
53
54/// Determine whether instruction 'To' is reachable from 'From', without passing
55/// through any blocks in ExclusionSet, returning true if uncertain.
56///
57/// Determine whether there is a path from From to To within a single function.
58/// Returns false only if we can prove that once 'From' has been executed then
59/// 'To' can not be executed. Conservatively returns true.
60///
61/// This function is linear with respect to the number of blocks in the CFG,
62/// walking down successors from From to reach To, with a fixed threshold.
63/// Using DT or LI allows us to answer more quickly. LI reduces the cost of
64/// an entire loop of any number of blocks to be the same as the cost of a
65/// single block. DT reduces the cost by allowing the search to terminate when
66/// we find a block that dominates the block containing 'To'. DT is most useful
67/// on branchy code but not loops, and LI is most useful on code with loops but
68/// does not help on branchy code outside loops.
69bool isPotentiallyReachable(
70 const Instruction *From, const Instruction *To,
71 const SmallPtrSetImpl<BasicBlock *> *ExclusionSet = nullptr,
72 const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
73
74/// Determine whether block 'To' is reachable from 'From', returning
75/// true if uncertain.
76///
77/// Determine whether there is a path from From to To within a single function.
78/// Returns false only if we can prove that once 'From' has been reached then
79/// 'To' can not be executed. Conservatively returns true.
80bool isPotentiallyReachable(
81 const BasicBlock *From, const BasicBlock *To,
82 const SmallPtrSetImpl<BasicBlock *> *ExclusionSet = nullptr,
83 const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
84
85/// Determine whether there is at least one path from a block in
86/// 'Worklist' to 'StopBB', returning true if uncertain.
87///
88/// Determine whether there is a path from at least one block in Worklist to
89/// StopBB within a single function. Returns false only if we can prove that
90/// once any block in 'Worklist' has been reached then 'StopBB' can not be
91/// executed. Conservatively returns true.
92bool isPotentiallyReachableFromMany(SmallVectorImpl<BasicBlock *> &Worklist,
93 BasicBlock *StopBB,
94 const DominatorTree *DT = nullptr,
95 const LoopInfo *LI = nullptr);
96
97/// Determine whether there is at least one path from a block in
98/// 'Worklist' to 'StopBB' without passing through any blocks in
99/// 'ExclusionSet', returning true if uncertain.
100///
101/// Determine whether there is a path from at least one block in Worklist to
102/// StopBB within a single function without passing through any of the blocks
103/// in 'ExclusionSet'. Returns false only if we can prove that once any block
104/// in 'Worklist' has been reached then 'StopBB' can not be executed.
105/// Conservatively returns true.
106bool isPotentiallyReachableFromMany(
107 SmallVectorImpl<BasicBlock *> &Worklist, BasicBlock *StopBB,
108 const SmallPtrSetImpl<BasicBlock *> *ExclusionSet,
109 const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
110
111/// Return true if the control flow in \p RPOTraversal is irreducible.
112///
113/// This is a generic implementation to detect CFG irreducibility based on loop
114/// info analysis. It can be used for any kind of CFG (Loop, MachineLoop,
115/// Function, MachineFunction, etc.) by providing an RPO traversal (\p
116/// RPOTraversal) and the loop info analysis (\p LI) of the CFG. This utility
117/// function is only recommended when loop info analysis is available. If loop
118/// info analysis isn't available, please, don't compute it explicitly for this
119/// purpose. There are more efficient ways to detect CFG irreducibility that
120/// don't require recomputing loop info analysis (e.g., T1/T2 or Tarjan's
121/// algorithm).
122///
123/// Requirements:
124/// 1) GraphTraits must be implemented for NodeT type. It is used to access
125/// NodeT successors.
126// 2) \p RPOTraversal must be a valid reverse post-order traversal of the
127/// target CFG with begin()/end() iterator interfaces.
128/// 3) \p LI must be a valid LoopInfoBase that contains up-to-date loop
129/// analysis information of the CFG.
130///
131/// This algorithm uses the information about reducible loop back-edges already
132/// computed in \p LI. When a back-edge is found during the RPO traversal, the
133/// algorithm checks whether the back-edge is one of the reducible back-edges in
134/// loop info. If it isn't, the CFG is irreducible. For example, for the CFG
135/// below (canonical irreducible graph) loop info won't contain any loop, so the
136/// algorithm will return that the CFG is irreducible when checking the B <-
137/// -> C back-edge.
138///
139/// (A->B, A->C, B->C, C->B, C->D)
140/// A
141/// / \
142/// B<- ->C
143/// |
144/// D
145///
146template <class NodeT, class RPOTraversalT, class LoopInfoT,
147 class GT = GraphTraits<NodeT>>
148bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI) {
149 /// Check whether the edge (\p Src, \p Dst) is a reducible loop backedge
150 /// according to LI. I.e., check if there exists a loop that contains Src and
151 /// where Dst is the loop header.
152 auto isProperBackedge = [&](NodeT Src, NodeT Dst) {
153 for (const auto *Lp = LI.getLoopFor(Src); Lp; Lp = Lp->getParentLoop()) {
154 if (Lp->getHeader() == Dst)
155 return true;
156 }
157 return false;
158 };
159
160 SmallPtrSet<NodeT, 32> Visited;
161 for (NodeT Node : RPOTraversal) {
162 Visited.insert(Node);
163 for (NodeT Succ : make_range(GT::child_begin(Node), GT::child_end(Node))) {
164 // Succ hasn't been visited yet
165 if (!Visited.count(Succ))
166 continue;
167 // We already visited Succ, thus Node->Succ must be a backedge. Check that
168 // the head matches what we have in the loop information. Otherwise, we
169 // have an irreducible graph.
170 if (!isProperBackedge(Node, Succ))
171 return true;
172 }
173 }
174
175 return false;
15
Returning zero, which participates in a condition later
176}
177} // End llvm namespace
178
179#endif

/build/llvm-toolchain-snapshot-13~++20210613111130+5be314f79ba7/llvm/include/llvm/IR/PatternMatch.h

1//===- PatternMatch.h - Match on the LLVM IR --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides a simple and efficient mechanism for performing general
10// tree-based pattern matches on the LLVM IR. The power of these routines is
11// that it allows you to write concise patterns that are expressive and easy to
12// understand. The other major advantage of this is that it allows you to
13// trivially capture/bind elements in the pattern to variables. For example,
14// you can do something like this:
15//
16// Value *Exp = ...
17// Value *X, *Y; ConstantInt *C1, *C2; // (X & C1) | (Y & C2)
18// if (match(Exp, m_Or(m_And(m_Value(X), m_ConstantInt(C1)),
19// m_And(m_Value(Y), m_ConstantInt(C2))))) {
20// ... Pattern is matched and variables are bound ...
21// }
22//
23// This is primarily useful to things like the instruction combiner, but can
24// also be useful for static analysis tools or code generators.
25//
26//===----------------------------------------------------------------------===//
27
28#ifndef LLVM_IR_PATTERNMATCH_H
29#define LLVM_IR_PATTERNMATCH_H
30
31#include "llvm/ADT/APFloat.h"
32#include "llvm/ADT/APInt.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/InstrTypes.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/IntrinsicInst.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Operator.h"
42#include "llvm/IR/Value.h"
43#include "llvm/Support/Casting.h"
44#include <cstdint>
45
46namespace llvm {
47namespace PatternMatch {
48
49template <typename Val, typename Pattern> bool match(Val *V, const Pattern &P) {
50 return const_cast<Pattern &>(P).match(V);
35
Calling 'LogicalOp_match::match'
39
Returning from 'LogicalOp_match::match'
40
Returning zero, which participates in a condition later
44
Calling 'LogicalOp_match::match'
48
Returning from 'LogicalOp_match::match'
49
Returning zero, which participates in a condition later
51}
52
53template <typename Pattern> bool match(ArrayRef<int> Mask, const Pattern &P) {
54 return const_cast<Pattern &>(P).match(Mask);
55}
56
57template <typename SubPattern_t> struct OneUse_match {
58 SubPattern_t SubPattern;
59
60 OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
61
62 template <typename OpTy> bool match(OpTy *V) {
63 return V->hasOneUse() && SubPattern.match(V);
64 }
65};
66
67template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
68 return SubPattern;
69}
70
71template <typename Class> struct class_match {
72 template <typename ITy> bool match(ITy *V) { return isa<Class>(V); }
73};
74
75/// Match an arbitrary value and ignore it.
76inline class_match<Value> m_Value() { return class_match<Value>(); }
77
78/// Match an arbitrary unary operation and ignore it.
79inline class_match<UnaryOperator> m_UnOp() {
80 return class_match<UnaryOperator>();
81}
82
83/// Match an arbitrary binary operation and ignore it.
84inline class_match<BinaryOperator> m_BinOp() {
85 return class_match<BinaryOperator>();
86}
87
88/// Matches any compare instruction and ignore it.
89inline class_match<CmpInst> m_Cmp() { return class_match<CmpInst>(); }
90
91struct undef_match {
92 static bool check(const Value *V) {
93 if (isa<UndefValue>(V))
94 return true;
95
96 const auto *CA = dyn_cast<ConstantAggregate>(V);
97 if (!CA)
98 return false;
99
100 SmallPtrSet<const ConstantAggregate *, 8> Seen;
101 SmallVector<const ConstantAggregate *, 8> Worklist;
102
103 // Either UndefValue, PoisonValue, or an aggregate that only contains
104 // these is accepted by matcher.
105 // CheckValue returns false if CA cannot satisfy this constraint.
106 auto CheckValue = [&](const ConstantAggregate *CA) {
107 for (const Value *Op : CA->operand_values()) {
108 if (isa<UndefValue>(Op))
109 continue;
110
111 const auto *CA = dyn_cast<ConstantAggregate>(Op);
112 if (!CA)
113 return false;
114 if (Seen.insert(CA).second)
115 Worklist.emplace_back(CA);
116 }
117
118 return true;
119 };
120
121 if (!CheckValue(CA))
122 return false;
123
124 while (!Worklist.empty()) {
125 if (!CheckValue(Worklist.pop_back_val()))
126 return false;
127 }
128 return true;
129 }
130 template <typename ITy> bool match(ITy *V) { return check(V); }
131};
132
133/// Match an arbitrary undef constant. This matches poison as well.
134/// If this is an aggregate and contains a non-aggregate element that is
135/// neither undef nor poison, the aggregate is not matched.
136inline auto m_Undef() { return undef_match(); }
137
138/// Match an arbitrary poison constant.
139inline class_match<PoisonValue> m_Poison() { return class_match<PoisonValue>(); }
140
141/// Match an arbitrary Constant and ignore it.
142inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
143
144/// Match an arbitrary ConstantInt and ignore it.