Bug Summary

File:build/source/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
Warning:line 2954, column 21
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name SimpleLoopUnswitch.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Transforms/Scalar -I /build/source/llvm/lib/Transforms/Scalar -I include -I /build/source/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm=build-llvm -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm=build-llvm -fcoverage-prefix-map=/build/source/= -source-date-epoch 1668078801 -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-11-10-135928-647445-1 -x c++ /build/source/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

/build/source/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
14#include "llvm/ADT/SmallPtrSet.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
18#include "llvm/Analysis/AssumptionCache.h"
19#include "llvm/Analysis/BlockFrequencyInfo.h"
20#include "llvm/Analysis/CFG.h"
21#include "llvm/Analysis/CodeMetrics.h"
22#include "llvm/Analysis/GuardUtils.h"
23#include "llvm/Analysis/LoopAnalysisManager.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/LoopIterator.h"
26#include "llvm/Analysis/LoopPass.h"
27#include "llvm/Analysis/MemorySSA.h"
28#include "llvm/Analysis/MemorySSAUpdater.h"
29#include "llvm/Analysis/MustExecute.h"
30#include "llvm/Analysis/ProfileSummaryInfo.h"
31#include "llvm/Analysis/ScalarEvolution.h"
32#include "llvm/Analysis/TargetTransformInfo.h"
33#include "llvm/Analysis/ValueTracking.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/Constant.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/Dominators.h"
38#include "llvm/IR/Function.h"
39#include "llvm/IR/IRBuilder.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Instruction.h"
42#include "llvm/IR/Instructions.h"
43#include "llvm/IR/IntrinsicInst.h"
44#include "llvm/IR/PatternMatch.h"
45#include "llvm/IR/Use.h"
46#include "llvm/IR/Value.h"
47#include "llvm/InitializePasses.h"
48#include "llvm/Pass.h"
49#include "llvm/Support/Casting.h"
50#include "llvm/Support/CommandLine.h"
51#include "llvm/Support/Debug.h"
52#include "llvm/Support/ErrorHandling.h"
53#include "llvm/Support/GenericDomTree.h"
54#include "llvm/Support/InstructionCost.h"
55#include "llvm/Support/raw_ostream.h"
56#include "llvm/Transforms/Scalar/LoopPassManager.h"
57#include "llvm/Transforms/Utils/BasicBlockUtils.h"
58#include "llvm/Transforms/Utils/Cloning.h"
59#include "llvm/Transforms/Utils/Local.h"
60#include "llvm/Transforms/Utils/LoopUtils.h"
61#include "llvm/Transforms/Utils/ValueMapper.h"
62#include <algorithm>
63#include <cassert>
64#include <iterator>
65#include <numeric>
66#include <utility>
67
68#define DEBUG_TYPE"simple-loop-unswitch" "simple-loop-unswitch"
69
70using namespace llvm;
71using namespace llvm::PatternMatch;
72
73STATISTIC(NumBranches, "Number of branches unswitched")static llvm::Statistic NumBranches = {"simple-loop-unswitch",
"NumBranches", "Number of branches unswitched"}
;
74STATISTIC(NumSwitches, "Number of switches unswitched")static llvm::Statistic NumSwitches = {"simple-loop-unswitch",
"NumSwitches", "Number of switches unswitched"}
;
75STATISTIC(NumGuards, "Number of guards turned into branches for unswitching")static llvm::Statistic NumGuards = {"simple-loop-unswitch", "NumGuards"
, "Number of guards turned into branches for unswitching"}
;
76STATISTIC(NumTrivial, "Number of unswitches that are trivial")static llvm::Statistic NumTrivial = {"simple-loop-unswitch", "NumTrivial"
, "Number of unswitches that are trivial"}
;
77STATISTIC(static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
}
78 NumCostMultiplierSkipped,static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
}
79 "Number of unswitch candidates that had their cost multiplier skipped")static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
}
;
80
81static cl::opt<bool> EnableNonTrivialUnswitch(
82 "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
83 cl::desc("Forcibly enables non-trivial loop unswitching rather than "
84 "following the configuration passed into the pass."));
85
86static cl::opt<int>
87 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
88 cl::desc("The cost threshold for unswitching a loop."));
89
90static cl::opt<bool> EnableUnswitchCostMultiplier(
91 "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
92 cl::desc("Enable unswitch cost multiplier that prohibits exponential "
93 "explosion in nontrivial unswitch."));
94static cl::opt<int> UnswitchSiblingsToplevelDiv(
95 "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
96 cl::desc("Toplevel siblings divisor for cost multiplier."));
97static cl::opt<int> UnswitchNumInitialUnscaledCandidates(
98 "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
99 cl::desc("Number of unswitch candidates that are ignored when calculating "
100 "cost multiplier."));
101static cl::opt<bool> UnswitchGuards(
102 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
103 cl::desc("If enabled, simple loop unswitching will also consider "
104 "llvm.experimental.guard intrinsics as unswitch candidates."));
105static cl::opt<bool> DropNonTrivialImplicitNullChecks(
106 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
107 cl::init(false), cl::Hidden,
108 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
109 "null checks to save time analyzing if we can keep it."));
110static cl::opt<unsigned>
111 MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
112 cl::desc("Max number of memory uses to explore during "
113 "partial unswitching analysis"),
114 cl::init(100), cl::Hidden);
115static cl::opt<bool> FreezeLoopUnswitchCond(
116 "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
117 cl::desc("If enabled, the freeze instruction will be added to condition "
118 "of loop unswitch to prevent miscompilation."));
119
120namespace {
121struct NonTrivialUnswitchCandidate {
122 Instruction *TI = nullptr;
123 TinyPtrVector<Value *> Invariants;
124 Optional<InstructionCost> Cost;
125 NonTrivialUnswitchCandidate(Instruction *TI, ArrayRef<Value *> Invariants,
126 Optional<InstructionCost> Cost = None)
127 : TI(TI), Invariants(Invariants), Cost(Cost) {};
128};
129} // end anonymous namespace.
130
131// Helper to skip (select x, true, false), which matches both a logical AND and
132// OR and can confuse code that tries to determine if \p Cond is either a
133// logical AND or OR but not both.
134static Value *skipTrivialSelect(Value *Cond) {
135 Value *CondNext;
136 while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
137 Cond = CondNext;
138 return Cond;
139}
140
141/// Collect all of the loop invariant input values transitively used by the
142/// homogeneous instruction graph from a given root.
143///
144/// This essentially walks from a root recursively through loop variant operands
145/// which have perform the same logical operation (AND or OR) and finds all
146/// inputs which are loop invariant. For some operations these can be
147/// re-associated and unswitched out of the loop entirely.
148static TinyPtrVector<Value *>
149collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root,
150 const LoopInfo &LI) {
151 assert(!L.isLoopInvariant(&Root) &&(static_cast <bool> (!L.isLoopInvariant(&Root) &&
"Only need to walk the graph if root itself is not invariant."
) ? void (0) : __assert_fail ("!L.isLoopInvariant(&Root) && \"Only need to walk the graph if root itself is not invariant.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 152, __extension__
__PRETTY_FUNCTION__))
152 "Only need to walk the graph if root itself is not invariant.")(static_cast <bool> (!L.isLoopInvariant(&Root) &&
"Only need to walk the graph if root itself is not invariant."
) ? void (0) : __assert_fail ("!L.isLoopInvariant(&Root) && \"Only need to walk the graph if root itself is not invariant.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 152, __extension__
__PRETTY_FUNCTION__))
;
153 TinyPtrVector<Value *> Invariants;
154
155 bool IsRootAnd = match(&Root, m_LogicalAnd());
156 bool IsRootOr = match(&Root, m_LogicalOr());
157
158 // Build a worklist and recurse through operators collecting invariants.
159 SmallVector<Instruction *, 4> Worklist;
160 SmallPtrSet<Instruction *, 8> Visited;
161 Worklist.push_back(&Root);
162 Visited.insert(&Root);
163 do {
164 Instruction &I = *Worklist.pop_back_val();
165 for (Value *OpV : I.operand_values()) {
166 // Skip constants as unswitching isn't interesting for them.
167 if (isa<Constant>(OpV))
168 continue;
169
170 // Add it to our result if loop invariant.
171 if (L.isLoopInvariant(OpV)) {
172 Invariants.push_back(OpV);
173 continue;
174 }
175
176 // If not an instruction with the same opcode, nothing we can do.
177 Instruction *OpI = dyn_cast<Instruction>(skipTrivialSelect(OpV));
178
179 if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
180 (IsRootOr && match(OpI, m_LogicalOr())))) {
181 // Visit this operand.
182 if (Visited.insert(OpI).second)
183 Worklist.push_back(OpI);
184 }
185 }
186 } while (!Worklist.empty());
187
188 return Invariants;
189}
190
191static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
192 Constant &Replacement) {
193 assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?")(static_cast <bool> (!isa<Constant>(Invariant) &&
"Why are we unswitching on a constant?") ? void (0) : __assert_fail
("!isa<Constant>(Invariant) && \"Why are we unswitching on a constant?\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 193, __extension__
__PRETTY_FUNCTION__))
;
194
195 // Replace uses of LIC in the loop with the given constant.
196 // We use make_early_inc_range as set invalidates the iterator.
197 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
198 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
199
200 // Replace this use within the loop body.
201 if (UserI && L.contains(UserI))
202 U.set(&Replacement);
203 }
204}
205
206/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
207/// incoming values along this edge.
208static bool areLoopExitPHIsLoopInvariant(const Loop &L,
209 const BasicBlock &ExitingBB,
210 const BasicBlock &ExitBB) {
211 for (const Instruction &I : ExitBB) {
212 auto *PN = dyn_cast<PHINode>(&I);
213 if (!PN)
214 // No more PHIs to check.
215 return true;
216
217 // If the incoming value for this edge isn't loop invariant the unswitch
218 // won't be trivial.
219 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
220 return false;
221 }
222 llvm_unreachable("Basic blocks should never be empty!")::llvm::llvm_unreachable_internal("Basic blocks should never be empty!"
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 222)
;
223}
224
225/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
226/// end of \p BB and conditionally branch on the copied condition. We only
227/// branch on a single value.
228static void buildPartialUnswitchConditionalBranch(
229 BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
230 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
231 const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
232 IRBuilder<> IRB(&BB);
233
234 SmallVector<Value *> FrozenInvariants;
235 for (Value *Inv : Invariants) {
236 if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
237 Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
238 FrozenInvariants.push_back(Inv);
239 }
240
241 Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
242 : IRB.CreateAnd(FrozenInvariants);
243 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
244 Direction ? &NormalSucc : &UnswitchedSucc);
245}
246
247/// Copy a set of loop invariant values, and conditionally branch on them.
248static void buildPartialInvariantUnswitchConditionalBranch(
249 BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
250 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
251 MemorySSAUpdater *MSSAU) {
252 ValueToValueMapTy VMap;
253 for (auto *Val : reverse(ToDuplicate)) {
254 Instruction *Inst = cast<Instruction>(Val);
255 Instruction *NewInst = Inst->clone();
256 BB.getInstList().insert(BB.end(), NewInst);
257 RemapInstruction(NewInst, VMap,
258 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
259 VMap[Val] = NewInst;
260
261 if (!MSSAU)
262 continue;
263
264 MemorySSA *MSSA = MSSAU->getMemorySSA();
265 if (auto *MemUse =
266 dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
267 auto *DefiningAccess = MemUse->getDefiningAccess();
268 // Get the first defining access before the loop.
269 while (L.contains(DefiningAccess->getBlock())) {
270 // If the defining access is a MemoryPhi, get the incoming
271 // value for the pre-header as defining access.
272 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
273 DefiningAccess =
274 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
275 else
276 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
277 }
278 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
279 NewInst->getParent(),
280 MemorySSA::BeforeTerminator);
281 }
282 }
283
284 IRBuilder<> IRB(&BB);
285 Value *Cond = VMap[ToDuplicate[0]];
286 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
287 Direction ? &NormalSucc : &UnswitchedSucc);
288}
289
290/// Rewrite the PHI nodes in an unswitched loop exit basic block.
291///
292/// Requires that the loop exit and unswitched basic block are the same, and
293/// that the exiting block was a unique predecessor of that block. Rewrites the
294/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
295/// PHI nodes from the old preheader that now contains the unswitched
296/// terminator.
297static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
298 BasicBlock &OldExitingBB,
299 BasicBlock &OldPH) {
300 for (PHINode &PN : UnswitchedBB.phis()) {
301 // When the loop exit is directly unswitched we just need to update the
302 // incoming basic block. We loop to handle weird cases with repeated
303 // incoming blocks, but expect to typically only have one operand here.
304 for (auto i : seq<int>(0, PN.getNumOperands())) {
305 assert(PN.getIncomingBlock(i) == &OldExitingBB &&(static_cast <bool> (PN.getIncomingBlock(i) == &OldExitingBB
&& "Found incoming block different from unique predecessor!"
) ? void (0) : __assert_fail ("PN.getIncomingBlock(i) == &OldExitingBB && \"Found incoming block different from unique predecessor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 306, __extension__
__PRETTY_FUNCTION__))
306 "Found incoming block different from unique predecessor!")(static_cast <bool> (PN.getIncomingBlock(i) == &OldExitingBB
&& "Found incoming block different from unique predecessor!"
) ? void (0) : __assert_fail ("PN.getIncomingBlock(i) == &OldExitingBB && \"Found incoming block different from unique predecessor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 306, __extension__
__PRETTY_FUNCTION__))
;
307 PN.setIncomingBlock(i, &OldPH);
308 }
309 }
310}
311
312/// Rewrite the PHI nodes in the loop exit basic block and the split off
313/// unswitched block.
314///
315/// Because the exit block remains an exit from the loop, this rewrites the
316/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
317/// nodes into the unswitched basic block to select between the value in the
318/// old preheader and the loop exit.
319static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
320 BasicBlock &UnswitchedBB,
321 BasicBlock &OldExitingBB,
322 BasicBlock &OldPH,
323 bool FullUnswitch) {
324 assert(&ExitBB != &UnswitchedBB &&(static_cast <bool> (&ExitBB != &UnswitchedBB &&
"Must have different loop exit and unswitched blocks!") ? void
(0) : __assert_fail ("&ExitBB != &UnswitchedBB && \"Must have different loop exit and unswitched blocks!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 325, __extension__
__PRETTY_FUNCTION__))
325 "Must have different loop exit and unswitched blocks!")(static_cast <bool> (&ExitBB != &UnswitchedBB &&
"Must have different loop exit and unswitched blocks!") ? void
(0) : __assert_fail ("&ExitBB != &UnswitchedBB && \"Must have different loop exit and unswitched blocks!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 325, __extension__
__PRETTY_FUNCTION__))
;
326 Instruction *InsertPt = &*UnswitchedBB.begin();
327 for (PHINode &PN : ExitBB.phis()) {
328 auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
329 PN.getName() + ".split", InsertPt);
330
331 // Walk backwards over the old PHI node's inputs to minimize the cost of
332 // removing each one. We have to do this weird loop manually so that we
333 // create the same number of new incoming edges in the new PHI as we expect
334 // each case-based edge to be included in the unswitched switch in some
335 // cases.
336 // FIXME: This is really, really gross. It would be much cleaner if LLVM
337 // allowed us to create a single entry for a predecessor block without
338 // having separate entries for each "edge" even though these edges are
339 // required to produce identical results.
340 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
341 if (PN.getIncomingBlock(i) != &OldExitingBB)
342 continue;
343
344 Value *Incoming = PN.getIncomingValue(i);
345 if (FullUnswitch)
346 // No more edge from the old exiting block to the exit block.
347 PN.removeIncomingValue(i);
348
349 NewPN->addIncoming(Incoming, &OldPH);
350 }
351
352 // Now replace the old PHI with the new one and wire the old one in as an
353 // input to the new one.
354 PN.replaceAllUsesWith(NewPN);
355 NewPN->addIncoming(&PN, &ExitBB);
356 }
357}
358
359/// Hoist the current loop up to the innermost loop containing a remaining exit.
360///
361/// Because we've removed an exit from the loop, we may have changed the set of
362/// loops reachable and need to move the current loop up the loop nest or even
363/// to an entirely separate nest.
364static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
365 DominatorTree &DT, LoopInfo &LI,
366 MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
367 // If the loop is already at the top level, we can't hoist it anywhere.
368 Loop *OldParentL = L.getParentLoop();
369 if (!OldParentL)
370 return;
371
372 SmallVector<BasicBlock *, 4> Exits;
373 L.getExitBlocks(Exits);
374 Loop *NewParentL = nullptr;
375 for (auto *ExitBB : Exits)
376 if (Loop *ExitL = LI.getLoopFor(ExitBB))
377 if (!NewParentL || NewParentL->contains(ExitL))
378 NewParentL = ExitL;
379
380 if (NewParentL == OldParentL)
381 return;
382
383 // The new parent loop (if different) should always contain the old one.
384 if (NewParentL)
385 assert(NewParentL->contains(OldParentL) &&(static_cast <bool> (NewParentL->contains(OldParentL
) && "Can only hoist this loop up the nest!") ? void (
0) : __assert_fail ("NewParentL->contains(OldParentL) && \"Can only hoist this loop up the nest!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 386, __extension__
__PRETTY_FUNCTION__))
386 "Can only hoist this loop up the nest!")(static_cast <bool> (NewParentL->contains(OldParentL
) && "Can only hoist this loop up the nest!") ? void (
0) : __assert_fail ("NewParentL->contains(OldParentL) && \"Can only hoist this loop up the nest!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 386, __extension__
__PRETTY_FUNCTION__))
;
387
388 // The preheader will need to move with the body of this loop. However,
389 // because it isn't in this loop we also need to update the primary loop map.
390 assert(OldParentL == LI.getLoopFor(&Preheader) &&(static_cast <bool> (OldParentL == LI.getLoopFor(&Preheader
) && "Parent loop of this loop should contain this loop's preheader!"
) ? void (0) : __assert_fail ("OldParentL == LI.getLoopFor(&Preheader) && \"Parent loop of this loop should contain this loop's preheader!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 391, __extension__
__PRETTY_FUNCTION__))
391 "Parent loop of this loop should contain this loop's preheader!")(static_cast <bool> (OldParentL == LI.getLoopFor(&Preheader
) && "Parent loop of this loop should contain this loop's preheader!"
) ? void (0) : __assert_fail ("OldParentL == LI.getLoopFor(&Preheader) && \"Parent loop of this loop should contain this loop's preheader!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 391, __extension__
__PRETTY_FUNCTION__))
;
392 LI.changeLoopFor(&Preheader, NewParentL);
393
394 // Remove this loop from its old parent.
395 OldParentL->removeChildLoop(&L);
396
397 // Add the loop either to the new parent or as a top-level loop.
398 if (NewParentL)
399 NewParentL->addChildLoop(&L);
400 else
401 LI.addTopLevelLoop(&L);
402
403 // Remove this loops blocks from the old parent and every other loop up the
404 // nest until reaching the new parent. Also update all of these
405 // no-longer-containing loops to reflect the nesting change.
406 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
407 OldContainingL = OldContainingL->getParentLoop()) {
408 llvm::erase_if(OldContainingL->getBlocksVector(),
409 [&](const BasicBlock *BB) {
410 return BB == &Preheader || L.contains(BB);
411 });
412
413 OldContainingL->getBlocksSet().erase(&Preheader);
414 for (BasicBlock *BB : L.blocks())
415 OldContainingL->getBlocksSet().erase(BB);
416
417 // Because we just hoisted a loop out of this one, we have essentially
418 // created new exit paths from it. That means we need to form LCSSA PHI
419 // nodes for values used in the no-longer-nested loop.
420 formLCSSA(*OldContainingL, DT, &LI, SE);
421
422 // We shouldn't need to form dedicated exits because the exit introduced
423 // here is the (just split by unswitching) preheader. However, after trivial
424 // unswitching it is possible to get new non-dedicated exits out of parent
425 // loop so let's conservatively form dedicated exit blocks and figure out
426 // if we can optimize later.
427 formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
428 /*PreserveLCSSA*/ true);
429 }
430}
431
432// Return the top-most loop containing ExitBB and having ExitBB as exiting block
433// or the loop containing ExitBB, if there is no parent loop containing ExitBB
434// as exiting block.
435static const Loop *getTopMostExitingLoop(const BasicBlock *ExitBB,
436 const LoopInfo &LI) {
437 const Loop *TopMost = LI.getLoopFor(ExitBB);
438 const Loop *Current = TopMost;
439 while (Current) {
440 if (Current->isLoopExiting(ExitBB))
441 TopMost = Current;
442 Current = Current->getParentLoop();
443 }
444 return TopMost;
445}
446
447/// Unswitch a trivial branch if the condition is loop invariant.
448///
449/// This routine should only be called when loop code leading to the branch has
450/// been validated as trivial (no side effects). This routine checks if the
451/// condition is invariant and one of the successors is a loop exit. This
452/// allows us to unswitch without duplicating the loop, making it trivial.
453///
454/// If this routine fails to unswitch the branch it returns false.
455///
456/// If the branch can be unswitched, this routine splits the preheader and
457/// hoists the branch above that split. Preserves loop simplified form
458/// (splitting the exit block as necessary). It simplifies the branch within
459/// the loop to an unconditional branch but doesn't remove it entirely. Further
460/// cleanup can be done with some simplifycfg like pass.
461///
462/// If `SE` is not null, it will be updated based on the potential loop SCEVs
463/// invalidated by this.
464static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
465 LoopInfo &LI, ScalarEvolution *SE,
466 MemorySSAUpdater *MSSAU) {
467 assert(BI.isConditional() && "Can only unswitch a conditional branch!")(static_cast <bool> (BI.isConditional() && "Can only unswitch a conditional branch!"
) ? void (0) : __assert_fail ("BI.isConditional() && \"Can only unswitch a conditional branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 467, __extension__
__PRETTY_FUNCTION__))
;
468 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Trying to unswitch branch: "
<< BI << "\n"; } } while (false)
;
469
470 // The loop invariant values that we want to unswitch.
471 TinyPtrVector<Value *> Invariants;
472
473 // When true, we're fully unswitching the branch rather than just unswitching
474 // some input conditions to the branch.
475 bool FullUnswitch = false;
476
477 Value *Cond = skipTrivialSelect(BI.getCondition());
478 if (L.isLoopInvariant(Cond)) {
479 Invariants.push_back(Cond);
480 FullUnswitch = true;
481 } else {
482 if (auto *CondInst = dyn_cast<Instruction>(Cond))
483 Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
484 if (Invariants.empty()) {
485 LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Couldn't find invariant inputs!\n"
; } } while (false)
;
486 return false;
487 }
488 }
489
490 // Check that one of the branch's successors exits, and which one.
491 bool ExitDirection = true;
492 int LoopExitSuccIdx = 0;
493 auto *LoopExitBB = BI.getSuccessor(0);
494 if (L.contains(LoopExitBB)) {
495 ExitDirection = false;
496 LoopExitSuccIdx = 1;
497 LoopExitBB = BI.getSuccessor(1);
498 if (L.contains(LoopExitBB)) {
499 LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Branch doesn't exit the loop!\n"
; } } while (false)
;
500 return false;
501 }
502 }
503 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
504 auto *ParentBB = BI.getParent();
505 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
506 LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Loop exit PHI's aren't loop-invariant!\n"
; } } while (false)
;
507 return false;
508 }
509
510 // When unswitching only part of the branch's condition, we need the exit
511 // block to be reached directly from the partially unswitched input. This can
512 // be done when the exit block is along the true edge and the branch condition
513 // is a graph of `or` operations, or the exit block is along the false edge
514 // and the condition is a graph of `and` operations.
515 if (!FullUnswitch) {
516 if (ExitDirection ? !match(Cond, m_LogicalOr())
517 : !match(Cond, m_LogicalAnd())) {
518 LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Branch condition is in improper form for "
"non-full unswitch!\n"; } } while (false)
519 "non-full unswitch!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Branch condition is in improper form for "
"non-full unswitch!\n"; } } while (false)
;
520 return false;
521 }
522 }
523
524 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
525 dbgs() << " unswitching trivial invariant conditions for: " << BIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
526 << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
527 for (Value *Invariant : Invariants) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
528 dbgs() << " " << *Invariant << " == true";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
529 if (Invariant != Invariants.back())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
530 dbgs() << " ||";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
531 dbgs() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
532 }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
533 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
;
534
535 // If we have scalar evolutions, we need to invalidate them including this
536 // loop, the loop containing the exit block and the topmost parent loop
537 // exiting via LoopExitBB.
538 if (SE) {
539 if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
540 SE->forgetLoop(ExitL);
541 else
542 // Forget the entire nest as this exits the entire nest.
543 SE->forgetTopmostLoop(&L);
544 SE->forgetBlockAndLoopDispositions();
545 }
546
547 if (MSSAU && VerifyMemorySSA)
548 MSSAU->getMemorySSA()->verifyMemorySSA();
549
550 // Split the preheader, so that we know that there is a safe place to insert
551 // the conditional branch. We will change the preheader to have a conditional
552 // branch on LoopCond.
553 BasicBlock *OldPH = L.getLoopPreheader();
554 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
555
556 // Now that we have a place to insert the conditional branch, create a place
557 // to branch to: this is the exit block out of the loop that we are
558 // unswitching. We need to split this if there are other loop predecessors.
559 // Because the loop is in simplified form, *any* other predecessor is enough.
560 BasicBlock *UnswitchedBB;
561 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
562 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&(static_cast <bool> (LoopExitBB->getUniquePredecessor
() == BI.getParent() && "A branch's parent isn't a predecessor!"
) ? void (0) : __assert_fail ("LoopExitBB->getUniquePredecessor() == BI.getParent() && \"A branch's parent isn't a predecessor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 563, __extension__
__PRETTY_FUNCTION__))
563 "A branch's parent isn't a predecessor!")(static_cast <bool> (LoopExitBB->getUniquePredecessor
() == BI.getParent() && "A branch's parent isn't a predecessor!"
) ? void (0) : __assert_fail ("LoopExitBB->getUniquePredecessor() == BI.getParent() && \"A branch's parent isn't a predecessor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 563, __extension__
__PRETTY_FUNCTION__))
;
564 UnswitchedBB = LoopExitBB;
565 } else {
566 UnswitchedBB =
567 SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
568 }
569
570 if (MSSAU && VerifyMemorySSA)
571 MSSAU->getMemorySSA()->verifyMemorySSA();
572
573 // Actually move the invariant uses into the unswitched position. If possible,
574 // we do this by moving the instructions, but when doing partial unswitching
575 // we do it by building a new merge of the values in the unswitched position.
576 OldPH->getTerminator()->eraseFromParent();
577 if (FullUnswitch) {
578 // If fully unswitching, we can use the existing branch instruction.
579 // Splice it into the old PH to gate reaching the new preheader and re-point
580 // its successors.
581 OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
582 BI);
583 BI.setCondition(Cond);
584 if (MSSAU) {
585 // Temporarily clone the terminator, to make MSSA update cheaper by
586 // separating "insert edge" updates from "remove edge" ones.
587 ParentBB->getInstList().push_back(BI.clone());
588 } else {
589 // Create a new unconditional branch that will continue the loop as a new
590 // terminator.
591 BranchInst::Create(ContinueBB, ParentBB);
592 }
593 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
594 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
595 } else {
596 // Only unswitching a subset of inputs to the condition, so we will need to
597 // build a new branch that merges the invariant inputs.
598 if (ExitDirection)
599 assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) &&(static_cast <bool> (match(skipTrivialSelect(BI.getCondition
()), m_LogicalOr()) && "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
"condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) && \"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the \" \"condition!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 601, __extension__
__PRETTY_FUNCTION__))
600 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "(static_cast <bool> (match(skipTrivialSelect(BI.getCondition
()), m_LogicalOr()) && "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
"condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) && \"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the \" \"condition!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 601, __extension__
__PRETTY_FUNCTION__))
601 "condition!")(static_cast <bool> (match(skipTrivialSelect(BI.getCondition
()), m_LogicalOr()) && "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
"condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) && \"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the \" \"condition!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 601, __extension__
__PRETTY_FUNCTION__))
;
602 else
603 assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) &&(static_cast <bool> (match(skipTrivialSelect(BI.getCondition
()), m_LogicalAnd()) && "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
" condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) && \"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the\" \" condition!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 605, __extension__
__PRETTY_FUNCTION__))
604 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"(static_cast <bool> (match(skipTrivialSelect(BI.getCondition
()), m_LogicalAnd()) && "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
" condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) && \"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the\" \" condition!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 605, __extension__
__PRETTY_FUNCTION__))
605 " condition!")(static_cast <bool> (match(skipTrivialSelect(BI.getCondition
()), m_LogicalAnd()) && "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
" condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) && \"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the\" \" condition!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 605, __extension__
__PRETTY_FUNCTION__))
;
606 buildPartialUnswitchConditionalBranch(
607 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
608 FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
609 }
610
611 // Update the dominator tree with the added edge.
612 DT.insertEdge(OldPH, UnswitchedBB);
613
614 // After the dominator tree was updated with the added edge, update MemorySSA
615 // if available.
616 if (MSSAU) {
617 SmallVector<CFGUpdate, 1> Updates;
618 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
619 MSSAU->applyInsertUpdates(Updates, DT);
620 }
621
622 // Finish updating dominator tree and memory ssa for full unswitch.
623 if (FullUnswitch) {
624 if (MSSAU) {
625 // Remove the cloned branch instruction.
626 ParentBB->getTerminator()->eraseFromParent();
627 // Create unconditional branch now.
628 BranchInst::Create(ContinueBB, ParentBB);
629 MSSAU->removeEdge(ParentBB, LoopExitBB);
630 }
631 DT.deleteEdge(ParentBB, LoopExitBB);
632 }
633
634 if (MSSAU && VerifyMemorySSA)
635 MSSAU->getMemorySSA()->verifyMemorySSA();
636
637 // Rewrite the relevant PHI nodes.
638 if (UnswitchedBB == LoopExitBB)
639 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
640 else
641 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
642 *ParentBB, *OldPH, FullUnswitch);
643
644 // The constant we can replace all of our invariants with inside the loop
645 // body. If any of the invariants have a value other than this the loop won't
646 // be entered.
647 ConstantInt *Replacement = ExitDirection
648 ? ConstantInt::getFalse(BI.getContext())
649 : ConstantInt::getTrue(BI.getContext());
650
651 // Since this is an i1 condition we can also trivially replace uses of it
652 // within the loop with a constant.
653 for (Value *Invariant : Invariants)
654 replaceLoopInvariantUses(L, Invariant, *Replacement);
655
656 // If this was full unswitching, we may have changed the nesting relationship
657 // for this loop so hoist it to its correct parent if needed.
658 if (FullUnswitch)
659 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
660
661 if (MSSAU && VerifyMemorySSA)
662 MSSAU->getMemorySSA()->verifyMemorySSA();
663
664 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " done: unswitching trivial branch...\n"
; } } while (false)
;
665 ++NumTrivial;
666 ++NumBranches;
667 return true;
668}
669
670/// Unswitch a trivial switch if the condition is loop invariant.
671///
672/// This routine should only be called when loop code leading to the switch has
673/// been validated as trivial (no side effects). This routine checks if the
674/// condition is invariant and that at least one of the successors is a loop
675/// exit. This allows us to unswitch without duplicating the loop, making it
676/// trivial.
677///
678/// If this routine fails to unswitch the switch it returns false.
679///
680/// If the switch can be unswitched, this routine splits the preheader and
681/// copies the switch above that split. If the default case is one of the
682/// exiting cases, it copies the non-exiting cases and points them at the new
683/// preheader. If the default case is not exiting, it copies the exiting cases
684/// and points the default at the preheader. It preserves loop simplified form
685/// (splitting the exit blocks as necessary). It simplifies the switch within
686/// the loop by removing now-dead cases. If the default case is one of those
687/// unswitched, it replaces its destination with a new basic block containing
688/// only unreachable. Such basic blocks, while technically loop exits, are not
689/// considered for unswitching so this is a stable transform and the same
690/// switch will not be revisited. If after unswitching there is only a single
691/// in-loop successor, the switch is further simplified to an unconditional
692/// branch. Still more cleanup can be done with some simplifycfg like pass.
693///
694/// If `SE` is not null, it will be updated based on the potential loop SCEVs
695/// invalidated by this.
696static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
697 LoopInfo &LI, ScalarEvolution *SE,
698 MemorySSAUpdater *MSSAU) {
699 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Trying to unswitch switch: "
<< SI << "\n"; } } while (false)
;
700 Value *LoopCond = SI.getCondition();
701
702 // If this isn't switching on an invariant condition, we can't unswitch it.
703 if (!L.isLoopInvariant(LoopCond))
704 return false;
705
706 auto *ParentBB = SI.getParent();
707
708 // The same check must be used both for the default and the exit cases. We
709 // should never leave edges from the switch instruction to a basic block that
710 // we are unswitching, hence the condition used to determine the default case
711 // needs to also be used to populate ExitCaseIndices, which is then used to
712 // remove cases from the switch.
713 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
714 // BBToCheck is not an exit block if it is inside loop L.
715 if (L.contains(&BBToCheck))
716 return false;
717 // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
718 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
719 return false;
720 // We do not unswitch a block that only has an unreachable statement, as
721 // it's possible this is a previously unswitched block. Only unswitch if
722 // either the terminator is not unreachable, or, if it is, it's not the only
723 // instruction in the block.
724 auto *TI = BBToCheck.getTerminator();
725 bool isUnreachable = isa<UnreachableInst>(TI);
726 return !isUnreachable ||
727 (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
728 };
729
730 SmallVector<int, 4> ExitCaseIndices;
731 for (auto Case : SI.cases())
732 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
733 ExitCaseIndices.push_back(Case.getCaseIndex());
734 BasicBlock *DefaultExitBB = nullptr;
735 SwitchInstProfUpdateWrapper::CaseWeightOpt DefaultCaseWeight =
736 SwitchInstProfUpdateWrapper::getSuccessorWeight(SI, 0);
737 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
738 DefaultExitBB = SI.getDefaultDest();
739 } else if (ExitCaseIndices.empty())
740 return false;
741
742 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " unswitching trivial switch...\n"
; } } while (false)
;
743
744 if (MSSAU && VerifyMemorySSA)
745 MSSAU->getMemorySSA()->verifyMemorySSA();
746
747 // We may need to invalidate SCEVs for the outermost loop reached by any of
748 // the exits.
749 Loop *OuterL = &L;
750
751 if (DefaultExitBB) {
752 // Clear out the default destination temporarily to allow accurate
753 // predecessor lists to be examined below.
754 SI.setDefaultDest(nullptr);
755 // Check the loop containing this exit.
756 Loop *ExitL = LI.getLoopFor(DefaultExitBB);
757 if (!ExitL || ExitL->contains(OuterL))
758 OuterL = ExitL;
759 }
760
761 // Store the exit cases into a separate data structure and remove them from
762 // the switch.
763 SmallVector<std::tuple<ConstantInt *, BasicBlock *,
764 SwitchInstProfUpdateWrapper::CaseWeightOpt>,
765 4> ExitCases;
766 ExitCases.reserve(ExitCaseIndices.size());
767 SwitchInstProfUpdateWrapper SIW(SI);
768 // We walk the case indices backwards so that we remove the last case first
769 // and don't disrupt the earlier indices.
770 for (unsigned Index : reverse(ExitCaseIndices)) {
771 auto CaseI = SI.case_begin() + Index;
772 // Compute the outer loop from this exit.
773 Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
774 if (!ExitL || ExitL->contains(OuterL))
775 OuterL = ExitL;
776 // Save the value of this case.
777 auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
778 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
779 // Delete the unswitched cases.
780 SIW.removeCase(CaseI);
781 }
782
783 if (SE) {
784 if (OuterL)
785 SE->forgetLoop(OuterL);
786 else
787 SE->forgetTopmostLoop(&L);
788 }
789
790 // Check if after this all of the remaining cases point at the same
791 // successor.
792 BasicBlock *CommonSuccBB = nullptr;
793 if (SI.getNumCases() > 0 &&
794 all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
795 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
796 }))
797 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
798 if (!DefaultExitBB) {
799 // If we're not unswitching the default, we need it to match any cases to
800 // have a common successor or if we have no cases it is the common
801 // successor.
802 if (SI.getNumCases() == 0)
803 CommonSuccBB = SI.getDefaultDest();
804 else if (SI.getDefaultDest() != CommonSuccBB)
805 CommonSuccBB = nullptr;
806 }
807
808 // Split the preheader, so that we know that there is a safe place to insert
809 // the switch.
810 BasicBlock *OldPH = L.getLoopPreheader();
811 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
812 OldPH->getTerminator()->eraseFromParent();
813
814 // Now add the unswitched switch.
815 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
816 SwitchInstProfUpdateWrapper NewSIW(*NewSI);
817
818 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
819 // First, we split any exit blocks with remaining in-loop predecessors. Then
820 // we update the PHIs in one of two ways depending on if there was a split.
821 // We walk in reverse so that we split in the same order as the cases
822 // appeared. This is purely for convenience of reading the resulting IR, but
823 // it doesn't cost anything really.
824 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
825 SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
826 // Handle the default exit if necessary.
827 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
828 // ranges aren't quite powerful enough yet.
829 if (DefaultExitBB) {
830 if (pred_empty(DefaultExitBB)) {
831 UnswitchedExitBBs.insert(DefaultExitBB);
832 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
833 } else {
834 auto *SplitBB =
835 SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
836 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
837 *ParentBB, *OldPH,
838 /*FullUnswitch*/ true);
839 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
840 }
841 }
842 // Note that we must use a reference in the for loop so that we update the
843 // container.
844 for (auto &ExitCase : reverse(ExitCases)) {
845 // Grab a reference to the exit block in the pair so that we can update it.
846 BasicBlock *ExitBB = std::get<1>(ExitCase);
847
848 // If this case is the last edge into the exit block, we can simply reuse it
849 // as it will no longer be a loop exit. No mapping necessary.
850 if (pred_empty(ExitBB)) {
851 // Only rewrite once.
852 if (UnswitchedExitBBs.insert(ExitBB).second)
853 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
854 continue;
855 }
856
857 // Otherwise we need to split the exit block so that we retain an exit
858 // block from the loop and a target for the unswitched condition.
859 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
860 if (!SplitExitBB) {
861 // If this is the first time we see this, do the split and remember it.
862 SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
863 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
864 *ParentBB, *OldPH,
865 /*FullUnswitch*/ true);
866 }
867 // Update the case pair to point to the split block.
868 std::get<1>(ExitCase) = SplitExitBB;
869 }
870
871 // Now add the unswitched cases. We do this in reverse order as we built them
872 // in reverse order.
873 for (auto &ExitCase : reverse(ExitCases)) {
874 ConstantInt *CaseVal = std::get<0>(ExitCase);
875 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
876
877 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
878 }
879
880 // If the default was unswitched, re-point it and add explicit cases for
881 // entering the loop.
882 if (DefaultExitBB) {
883 NewSIW->setDefaultDest(DefaultExitBB);
884 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
885
886 // We removed all the exit cases, so we just copy the cases to the
887 // unswitched switch.
888 for (const auto &Case : SI.cases())
889 NewSIW.addCase(Case.getCaseValue(), NewPH,
890 SIW.getSuccessorWeight(Case.getSuccessorIndex()));
891 } else if (DefaultCaseWeight) {
892 // We have to set branch weight of the default case.
893 uint64_t SW = *DefaultCaseWeight;
894 for (const auto &Case : SI.cases()) {
895 auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
896 assert(W &&(static_cast <bool> (W && "case weight must be defined as default case weight is defined"
) ? void (0) : __assert_fail ("W && \"case weight must be defined as default case weight is defined\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 897, __extension__
__PRETTY_FUNCTION__))
897 "case weight must be defined as default case weight is defined")(static_cast <bool> (W && "case weight must be defined as default case weight is defined"
) ? void (0) : __assert_fail ("W && \"case weight must be defined as default case weight is defined\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 897, __extension__
__PRETTY_FUNCTION__))
;
898 SW += *W;
899 }
900 NewSIW.setSuccessorWeight(0, SW);
901 }
902
903 // If we ended up with a common successor for every path through the switch
904 // after unswitching, rewrite it to an unconditional branch to make it easy
905 // to recognize. Otherwise we potentially have to recognize the default case
906 // pointing at unreachable and other complexity.
907 if (CommonSuccBB) {
908 BasicBlock *BB = SI.getParent();
909 // We may have had multiple edges to this common successor block, so remove
910 // them as predecessors. We skip the first one, either the default or the
911 // actual first case.
912 bool SkippedFirst = DefaultExitBB == nullptr;
913 for (auto Case : SI.cases()) {
914 assert(Case.getCaseSuccessor() == CommonSuccBB &&(static_cast <bool> (Case.getCaseSuccessor() == CommonSuccBB
&& "Non-common successor!") ? void (0) : __assert_fail
("Case.getCaseSuccessor() == CommonSuccBB && \"Non-common successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 915, __extension__
__PRETTY_FUNCTION__))
915 "Non-common successor!")(static_cast <bool> (Case.getCaseSuccessor() == CommonSuccBB
&& "Non-common successor!") ? void (0) : __assert_fail
("Case.getCaseSuccessor() == CommonSuccBB && \"Non-common successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 915, __extension__
__PRETTY_FUNCTION__))
;
916 (void)Case;
917 if (!SkippedFirst) {
918 SkippedFirst = true;
919 continue;
920 }
921 CommonSuccBB->removePredecessor(BB,
922 /*KeepOneInputPHIs*/ true);
923 }
924 // Now nuke the switch and replace it with a direct branch.
925 SIW.eraseFromParent();
926 BranchInst::Create(CommonSuccBB, BB);
927 } else if (DefaultExitBB) {
928 assert(SI.getNumCases() > 0 &&(static_cast <bool> (SI.getNumCases() > 0 &&
"If we had no cases we'd have a common successor!") ? void (
0) : __assert_fail ("SI.getNumCases() > 0 && \"If we had no cases we'd have a common successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 929, __extension__
__PRETTY_FUNCTION__))
929 "If we had no cases we'd have a common successor!")(static_cast <bool> (SI.getNumCases() > 0 &&
"If we had no cases we'd have a common successor!") ? void (
0) : __assert_fail ("SI.getNumCases() > 0 && \"If we had no cases we'd have a common successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 929, __extension__
__PRETTY_FUNCTION__))
;
930 // Move the last case to the default successor. This is valid as if the
931 // default got unswitched it cannot be reached. This has the advantage of
932 // being simple and keeping the number of edges from this switch to
933 // successors the same, and avoiding any PHI update complexity.
934 auto LastCaseI = std::prev(SI.case_end());
935
936 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
937 SIW.setSuccessorWeight(
938 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
939 SIW.removeCase(LastCaseI);
940 }
941
942 // Walk the unswitched exit blocks and the unswitched split blocks and update
943 // the dominator tree based on the CFG edits. While we are walking unordered
944 // containers here, the API for applyUpdates takes an unordered list of
945 // updates and requires them to not contain duplicates.
946 SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
947 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
948 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
949 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
950 }
951 for (auto SplitUnswitchedPair : SplitExitBBMap) {
952 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
953 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
954 }
955
956 if (MSSAU) {
957 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
958 if (VerifyMemorySSA)
959 MSSAU->getMemorySSA()->verifyMemorySSA();
960 } else {
961 DT.applyUpdates(DTUpdates);
962 }
963
964 assert(DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (DT.verify(DominatorTree::VerificationLevel
::Fast)) ? void (0) : __assert_fail ("DT.verify(DominatorTree::VerificationLevel::Fast)"
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 964, __extension__
__PRETTY_FUNCTION__))
;
965
966 // We may have changed the nesting relationship for this loop so hoist it to
967 // its correct parent if needed.
968 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
969
970 if (MSSAU && VerifyMemorySSA)
971 MSSAU->getMemorySSA()->verifyMemorySSA();
972
973 ++NumTrivial;
974 ++NumSwitches;
975 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " done: unswitching trivial switch...\n"
; } } while (false)
;
976 return true;
977}
978
979/// This routine scans the loop to find a branch or switch which occurs before
980/// any side effects occur. These can potentially be unswitched without
981/// duplicating the loop. If a branch or switch is successfully unswitched the
982/// scanning continues to see if subsequent branches or switches have become
983/// trivial. Once all trivial candidates have been unswitched, this routine
984/// returns.
985///
986/// The return value indicates whether anything was unswitched (and therefore
987/// changed).
988///
989/// If `SE` is not null, it will be updated based on the potential loop SCEVs
990/// invalidated by this.
991static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
992 LoopInfo &LI, ScalarEvolution *SE,
993 MemorySSAUpdater *MSSAU) {
994 bool Changed = false;
995
996 // If loop header has only one reachable successor we should keep looking for
997 // trivial condition candidates in the successor as well. An alternative is
998 // to constant fold conditions and merge successors into loop header (then we
999 // only need to check header's terminator). The reason for not doing this in
1000 // LoopUnswitch pass is that it could potentially break LoopPassManager's
1001 // invariants. Folding dead branches could either eliminate the current loop
1002 // or make other loops unreachable. LCSSA form might also not be preserved
1003 // after deleting branches. The following code keeps traversing loop header's
1004 // successors until it finds the trivial condition candidate (condition that
1005 // is not a constant). Since unswitching generates branches with constant
1006 // conditions, this scenario could be very common in practice.
1007 BasicBlock *CurrentBB = L.getHeader();
1008 SmallPtrSet<BasicBlock *, 8> Visited;
1009 Visited.insert(CurrentBB);
1010 do {
1011 // Check if there are any side-effecting instructions (e.g. stores, calls,
1012 // volatile loads) in the part of the loop that the code *would* execute
1013 // without unswitching.
1014 if (MSSAU) // Possible early exit with MSSA
1015 if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1016 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1017 return Changed;
1018 if (llvm::any_of(*CurrentBB,
1019 [](Instruction &I) { return I.mayHaveSideEffects(); }))
1020 return Changed;
1021
1022 Instruction *CurrentTerm = CurrentBB->getTerminator();
1023
1024 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1025 // Don't bother trying to unswitch past a switch with a constant
1026 // condition. This should be removed prior to running this pass by
1027 // simplifycfg.
1028 if (isa<Constant>(SI->getCondition()))
1029 return Changed;
1030
1031 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1032 // Couldn't unswitch this one so we're done.
1033 return Changed;
1034
1035 // Mark that we managed to unswitch something.
1036 Changed = true;
1037
1038 // If unswitching turned the terminator into an unconditional branch then
1039 // we can continue. The unswitching logic specifically works to fold any
1040 // cases it can into an unconditional branch to make it easier to
1041 // recognize here.
1042 auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1043 if (!BI || BI->isConditional())
1044 return Changed;
1045
1046 CurrentBB = BI->getSuccessor(0);
1047 continue;
1048 }
1049
1050 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1051 if (!BI)
1052 // We do not understand other terminator instructions.
1053 return Changed;
1054
1055 // Don't bother trying to unswitch past an unconditional branch or a branch
1056 // with a constant value. These should be removed by simplifycfg prior to
1057 // running this pass.
1058 if (!BI->isConditional() ||
1059 isa<Constant>(skipTrivialSelect(BI->getCondition())))
1060 return Changed;
1061
1062 // Found a trivial condition candidate: non-foldable conditional branch. If
1063 // we fail to unswitch this, we can't do anything else that is trivial.
1064 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1065 return Changed;
1066
1067 // Mark that we managed to unswitch something.
1068 Changed = true;
1069
1070 // If we only unswitched some of the conditions feeding the branch, we won't
1071 // have collapsed it to a single successor.
1072 BI = cast<BranchInst>(CurrentBB->getTerminator());
1073 if (BI->isConditional())
1074 return Changed;
1075
1076 // Follow the newly unconditional branch into its successor.
1077 CurrentBB = BI->getSuccessor(0);
1078
1079 // When continuing, if we exit the loop or reach a previous visited block,
1080 // then we can not reach any trivial condition candidates (unfoldable
1081 // branch instructions or switch instructions) and no unswitch can happen.
1082 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1083
1084 return Changed;
1085}
1086
1087/// Build the cloned blocks for an unswitched copy of the given loop.
1088///
1089/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1090/// after the split block (`SplitBB`) that will be used to select between the
1091/// cloned and original loop.
1092///
1093/// This routine handles cloning all of the necessary loop blocks and exit
1094/// blocks including rewriting their instructions and the relevant PHI nodes.
1095/// Any loop blocks or exit blocks which are dominated by a different successor
1096/// than the one for this clone of the loop blocks can be trivially skipped. We
1097/// use the `DominatingSucc` map to determine whether a block satisfies that
1098/// property with a simple map lookup.
1099///
1100/// It also correctly creates the unconditional branch in the cloned
1101/// unswitched parent block to only point at the unswitched successor.
1102///
1103/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1104/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1105/// the cloned blocks (and their loops) are left without full `LoopInfo`
1106/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1107/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1108/// instead the caller must recompute an accurate DT. It *does* correctly
1109/// update the `AssumptionCache` provided in `AC`.
1110static BasicBlock *buildClonedLoopBlocks(
1111 Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1112 ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1113 BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1114 const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
1115 ValueToValueMapTy &VMap,
1116 SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC,
1117 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
1118 SmallVector<BasicBlock *, 4> NewBlocks;
1119 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1120
1121 // We will need to clone a bunch of blocks, wrap up the clone operation in
1122 // a helper.
1123 auto CloneBlock = [&](BasicBlock *OldBB) {
1124 // Clone the basic block and insert it before the new preheader.
1125 BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1126 NewBB->moveBefore(LoopPH);
1127
1128 // Record this block and the mapping.
1129 NewBlocks.push_back(NewBB);
1130 VMap[OldBB] = NewBB;
1131
1132 return NewBB;
1133 };
1134
1135 // We skip cloning blocks when they have a dominating succ that is not the
1136 // succ we are cloning for.
1137 auto SkipBlock = [&](BasicBlock *BB) {
1138 auto It = DominatingSucc.find(BB);
1139 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1140 };
1141
1142 // First, clone the preheader.
1143 auto *ClonedPH = CloneBlock(LoopPH);
1144
1145 // Then clone all the loop blocks, skipping the ones that aren't necessary.
1146 for (auto *LoopBB : L.blocks())
1147 if (!SkipBlock(LoopBB))
1148 CloneBlock(LoopBB);
1149
1150 // Split all the loop exit edges so that when we clone the exit blocks, if
1151 // any of the exit blocks are *also* a preheader for some other loop, we
1152 // don't create multiple predecessors entering the loop header.
1153 for (auto *ExitBB : ExitBlocks) {
1154 if (SkipBlock(ExitBB))
1155 continue;
1156
1157 // When we are going to clone an exit, we don't need to clone all the
1158 // instructions in the exit block and we want to ensure we have an easy
1159 // place to merge the CFG, so split the exit first. This is always safe to
1160 // do because there cannot be any non-loop predecessors of a loop exit in
1161 // loop simplified form.
1162 auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
1163
1164 // Rearrange the names to make it easier to write test cases by having the
1165 // exit block carry the suffix rather than the merge block carrying the
1166 // suffix.
1167 MergeBB->takeName(ExitBB);
1168 ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1169
1170 // Now clone the original exit block.
1171 auto *ClonedExitBB = CloneBlock(ExitBB);
1172 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&(static_cast <bool> (ClonedExitBB->getTerminator()->
getNumSuccessors() == 1 && "Exit block should have been split to have one successor!"
) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getNumSuccessors() == 1 && \"Exit block should have been split to have one successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1173, __extension__
__PRETTY_FUNCTION__))
1173 "Exit block should have been split to have one successor!")(static_cast <bool> (ClonedExitBB->getTerminator()->
getNumSuccessors() == 1 && "Exit block should have been split to have one successor!"
) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getNumSuccessors() == 1 && \"Exit block should have been split to have one successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1173, __extension__
__PRETTY_FUNCTION__))
;
1174 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&(static_cast <bool> (ClonedExitBB->getTerminator()->
getSuccessor(0) == MergeBB && "Cloned exit block has the wrong successor!"
) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB && \"Cloned exit block has the wrong successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1175, __extension__
__PRETTY_FUNCTION__))
1175 "Cloned exit block has the wrong successor!")(static_cast <bool> (ClonedExitBB->getTerminator()->
getSuccessor(0) == MergeBB && "Cloned exit block has the wrong successor!"
) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB && \"Cloned exit block has the wrong successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1175, __extension__
__PRETTY_FUNCTION__))
;
1176
1177 // Remap any cloned instructions and create a merge phi node for them.
1178 for (auto ZippedInsts : llvm::zip_first(
1179 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1180 llvm::make_range(ClonedExitBB->begin(),
1181 std::prev(ClonedExitBB->end())))) {
1182 Instruction &I = std::get<0>(ZippedInsts);
1183 Instruction &ClonedI = std::get<1>(ZippedInsts);
1184
1185 // The only instructions in the exit block should be PHI nodes and
1186 // potentially a landing pad.
1187 assert((static_cast <bool> ((isa<PHINode>(I) || isa<LandingPadInst
>(I) || isa<CatchPadInst>(I)) && "Bad instruction in exit block!"
) ? void (0) : __assert_fail ("(isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) && \"Bad instruction in exit block!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1189, __extension__
__PRETTY_FUNCTION__))
1188 (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&(static_cast <bool> ((isa<PHINode>(I) || isa<LandingPadInst
>(I) || isa<CatchPadInst>(I)) && "Bad instruction in exit block!"
) ? void (0) : __assert_fail ("(isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) && \"Bad instruction in exit block!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1189, __extension__
__PRETTY_FUNCTION__))
1189 "Bad instruction in exit block!")(static_cast <bool> ((isa<PHINode>(I) || isa<LandingPadInst
>(I) || isa<CatchPadInst>(I)) && "Bad instruction in exit block!"
) ? void (0) : __assert_fail ("(isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) && \"Bad instruction in exit block!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1189, __extension__
__PRETTY_FUNCTION__))
;
1190 // We should have a value map between the instruction and its clone.
1191 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!")(static_cast <bool> (VMap.lookup(&I) == &ClonedI
&& "Mismatch in the value map!") ? void (0) : __assert_fail
("VMap.lookup(&I) == &ClonedI && \"Mismatch in the value map!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1191, __extension__
__PRETTY_FUNCTION__))
;
1192
1193 auto *MergePN =
1194 PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
1195 &*MergeBB->getFirstInsertionPt());
1196 I.replaceAllUsesWith(MergePN);
1197 MergePN->addIncoming(&I, ExitBB);
1198 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1199 }
1200 }
1201
1202 // Rewrite the instructions in the cloned blocks to refer to the instructions
1203 // in the cloned blocks. We have to do this as a second pass so that we have
1204 // everything available. Also, we have inserted new instructions which may
1205 // include assume intrinsics, so we update the assumption cache while
1206 // processing this.
1207 for (auto *ClonedBB : NewBlocks)
1208 for (Instruction &I : *ClonedBB) {
1209 RemapInstruction(&I, VMap,
1210 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1211 if (auto *II = dyn_cast<AssumeInst>(&I))
1212 AC.registerAssumption(II);
1213 }
1214
1215 // Update any PHI nodes in the cloned successors of the skipped blocks to not
1216 // have spurious incoming values.
1217 for (auto *LoopBB : L.blocks())
1218 if (SkipBlock(LoopBB))
1219 for (auto *SuccBB : successors(LoopBB))
1220 if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1221 for (PHINode &PN : ClonedSuccBB->phis())
1222 PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1223
1224 // Remove the cloned parent as a predecessor of any successor we ended up
1225 // cloning other than the unswitched one.
1226 auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1227 for (auto *SuccBB : successors(ParentBB)) {
1228 if (SuccBB == UnswitchedSuccBB)
1229 continue;
1230
1231 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1232 if (!ClonedSuccBB)
1233 continue;
1234
1235 ClonedSuccBB->removePredecessor(ClonedParentBB,
1236 /*KeepOneInputPHIs*/ true);
1237 }
1238
1239 // Replace the cloned branch with an unconditional branch to the cloned
1240 // unswitched successor.
1241 auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1242 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1243 // Trivial Simplification. If Terminator is a conditional branch and
1244 // condition becomes dead - erase it.
1245 Value *ClonedConditionToErase = nullptr;
1246 if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1247 ClonedConditionToErase = BI->getCondition();
1248 else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1249 ClonedConditionToErase = SI->getCondition();
1250
1251 ClonedTerminator->eraseFromParent();
1252 BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1253
1254 if (ClonedConditionToErase)
1255 RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1256 MSSAU);
1257
1258 // If there are duplicate entries in the PHI nodes because of multiple edges
1259 // to the unswitched successor, we need to nuke all but one as we replaced it
1260 // with a direct branch.
1261 for (PHINode &PN : ClonedSuccBB->phis()) {
1262 bool Found = false;
1263 // Loop over the incoming operands backwards so we can easily delete as we
1264 // go without invalidating the index.
1265 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1266 if (PN.getIncomingBlock(i) != ClonedParentBB)
1267 continue;
1268 if (!Found) {
1269 Found = true;
1270 continue;
1271 }
1272 PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1273 }
1274 }
1275
1276 // Record the domtree updates for the new blocks.
1277 SmallPtrSet<BasicBlock *, 4> SuccSet;
1278 for (auto *ClonedBB : NewBlocks) {
1279 for (auto *SuccBB : successors(ClonedBB))
1280 if (SuccSet.insert(SuccBB).second)
1281 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1282 SuccSet.clear();
1283 }
1284
1285 return ClonedPH;
1286}
1287
1288/// Recursively clone the specified loop and all of its children.
1289///
1290/// The target parent loop for the clone should be provided, or can be null if
1291/// the clone is a top-level loop. While cloning, all the blocks are mapped
1292/// with the provided value map. The entire original loop must be present in
1293/// the value map. The cloned loop is returned.
1294static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1295 const ValueToValueMapTy &VMap, LoopInfo &LI) {
1296 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1297 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!")(static_cast <bool> (ClonedL.getBlocks().empty() &&
"Must start with an empty loop!") ? void (0) : __assert_fail
("ClonedL.getBlocks().empty() && \"Must start with an empty loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1297, __extension__
__PRETTY_FUNCTION__))
;
1298 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1299 for (auto *BB : OrigL.blocks()) {
1300 auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1301 ClonedL.addBlockEntry(ClonedBB);
1302 if (LI.getLoopFor(BB) == &OrigL)
1303 LI.changeLoopFor(ClonedBB, &ClonedL);
1304 }
1305 };
1306
1307 // We specially handle the first loop because it may get cloned into
1308 // a different parent and because we most commonly are cloning leaf loops.
1309 Loop *ClonedRootL = LI.AllocateLoop();
1310 if (RootParentL)
1311 RootParentL->addChildLoop(ClonedRootL);
1312 else
1313 LI.addTopLevelLoop(ClonedRootL);
1314 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1315
1316 if (OrigRootL.isInnermost())
1317 return ClonedRootL;
1318
1319 // If we have a nest, we can quickly clone the entire loop nest using an
1320 // iterative approach because it is a tree. We keep the cloned parent in the
1321 // data structure to avoid repeatedly querying through a map to find it.
1322 SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1323 // Build up the loops to clone in reverse order as we'll clone them from the
1324 // back.
1325 for (Loop *ChildL : llvm::reverse(OrigRootL))
1326 LoopsToClone.push_back({ClonedRootL, ChildL});
1327 do {
1328 Loop *ClonedParentL, *L;
1329 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1330 Loop *ClonedL = LI.AllocateLoop();
1331 ClonedParentL->addChildLoop(ClonedL);
1332 AddClonedBlocksToLoop(*L, *ClonedL);
1333 for (Loop *ChildL : llvm::reverse(*L))
1334 LoopsToClone.push_back({ClonedL, ChildL});
1335 } while (!LoopsToClone.empty());
1336
1337 return ClonedRootL;
1338}
1339
1340/// Build the cloned loops of an original loop from unswitching.
1341///
1342/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1343/// operation. We need to re-verify that there even is a loop (as the backedge
1344/// may not have been cloned), and even if there are remaining backedges the
1345/// backedge set may be different. However, we know that each child loop is
1346/// undisturbed, we only need to find where to place each child loop within
1347/// either any parent loop or within a cloned version of the original loop.
1348///
1349/// Because child loops may end up cloned outside of any cloned version of the
1350/// original loop, multiple cloned sibling loops may be created. All of them
1351/// are returned so that the newly introduced loop nest roots can be
1352/// identified.
1353static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1354 const ValueToValueMapTy &VMap, LoopInfo &LI,
1355 SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1356 Loop *ClonedL = nullptr;
1357
1358 auto *OrigPH = OrigL.getLoopPreheader();
1359 auto *OrigHeader = OrigL.getHeader();
1360
1361 auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1362 auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1363
1364 // We need to know the loops of the cloned exit blocks to even compute the
1365 // accurate parent loop. If we only clone exits to some parent of the
1366 // original parent, we want to clone into that outer loop. We also keep track
1367 // of the loops that our cloned exit blocks participate in.
1368 Loop *ParentL = nullptr;
1369 SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1370 SmallDenseMap<BasicBlock *, Loop *, 16> ExitLoopMap;
1371 ClonedExitsInLoops.reserve(ExitBlocks.size());
1372 for (auto *ExitBB : ExitBlocks)
1373 if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1374 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1375 ExitLoopMap[ClonedExitBB] = ExitL;
1376 ClonedExitsInLoops.push_back(ClonedExitBB);
1377 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1378 ParentL = ExitL;
1379 }
1380 assert((!ParentL || ParentL == OrigL.getParentLoop() ||(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop
() || ParentL->contains(OrigL.getParentLoop())) &&
"The computed parent loop should always contain (or be) the parent of "
"the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1383, __extension__
__PRETTY_FUNCTION__))
1381 ParentL->contains(OrigL.getParentLoop())) &&(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop
() || ParentL->contains(OrigL.getParentLoop())) &&
"The computed parent loop should always contain (or be) the parent of "
"the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1383, __extension__
__PRETTY_FUNCTION__))
1382 "The computed parent loop should always contain (or be) the parent of "(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop
() || ParentL->contains(OrigL.getParentLoop())) &&
"The computed parent loop should always contain (or be) the parent of "
"the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1383, __extension__
__PRETTY_FUNCTION__))
1383 "the original loop.")(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop
() || ParentL->contains(OrigL.getParentLoop())) &&
"The computed parent loop should always contain (or be) the parent of "
"the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1383, __extension__
__PRETTY_FUNCTION__))
;
1384
1385 // We build the set of blocks dominated by the cloned header from the set of
1386 // cloned blocks out of the original loop. While not all of these will
1387 // necessarily be in the cloned loop, it is enough to establish that they
1388 // aren't in unreachable cycles, etc.
1389 SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1390 for (auto *BB : OrigL.blocks())
1391 if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1392 ClonedLoopBlocks.insert(ClonedBB);
1393
1394 // Rebuild the set of blocks that will end up in the cloned loop. We may have
1395 // skipped cloning some region of this loop which can in turn skip some of
1396 // the backedges so we have to rebuild the blocks in the loop based on the
1397 // backedges that remain after cloning.
1398 SmallVector<BasicBlock *, 16> Worklist;
1399 SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1400 for (auto *Pred : predecessors(ClonedHeader)) {
1401 // The only possible non-loop header predecessor is the preheader because
1402 // we know we cloned the loop in simplified form.
1403 if (Pred == ClonedPH)
1404 continue;
1405
1406 // Because the loop was in simplified form, the only non-loop predecessor
1407 // should be the preheader.
1408 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "(static_cast <bool> (ClonedLoopBlocks.count(Pred) &&
"Found a predecessor of the loop " "header other than the preheader "
"that is not part of the loop!") ? void (0) : __assert_fail (
"ClonedLoopBlocks.count(Pred) && \"Found a predecessor of the loop \" \"header other than the preheader \" \"that is not part of the loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1410, __extension__
__PRETTY_FUNCTION__))
1409 "header other than the preheader "(static_cast <bool> (ClonedLoopBlocks.count(Pred) &&
"Found a predecessor of the loop " "header other than the preheader "
"that is not part of the loop!") ? void (0) : __assert_fail (
"ClonedLoopBlocks.count(Pred) && \"Found a predecessor of the loop \" \"header other than the preheader \" \"that is not part of the loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1410, __extension__
__PRETTY_FUNCTION__))
1410 "that is not part of the loop!")(static_cast <bool> (ClonedLoopBlocks.count(Pred) &&
"Found a predecessor of the loop " "header other than the preheader "
"that is not part of the loop!") ? void (0) : __assert_fail (
"ClonedLoopBlocks.count(Pred) && \"Found a predecessor of the loop \" \"header other than the preheader \" \"that is not part of the loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1410, __extension__
__PRETTY_FUNCTION__))
;
1411
1412 // Insert this block into the loop set and on the first visit (and if it
1413 // isn't the header we're currently walking) put it into the worklist to
1414 // recurse through.
1415 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1416 Worklist.push_back(Pred);
1417 }
1418
1419 // If we had any backedges then there *is* a cloned loop. Put the header into
1420 // the loop set and then walk the worklist backwards to find all the blocks
1421 // that remain within the loop after cloning.
1422 if (!BlocksInClonedLoop.empty()) {
1423 BlocksInClonedLoop.insert(ClonedHeader);
1424
1425 while (!Worklist.empty()) {
1426 BasicBlock *BB = Worklist.pop_back_val();
1427 assert(BlocksInClonedLoop.count(BB) &&(static_cast <bool> (BlocksInClonedLoop.count(BB) &&
"Didn't put block into the loop set!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count(BB) && \"Didn't put block into the loop set!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1428, __extension__
__PRETTY_FUNCTION__))
1428 "Didn't put block into the loop set!")(static_cast <bool> (BlocksInClonedLoop.count(BB) &&
"Didn't put block into the loop set!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count(BB) && \"Didn't put block into the loop set!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1428, __extension__
__PRETTY_FUNCTION__))
;
1429
1430 // Insert any predecessors that are in the possible set into the cloned
1431 // set, and if the insert is successful, add them to the worklist. Note
1432 // that we filter on the blocks that are definitely reachable via the
1433 // backedge to the loop header so we may prune out dead code within the
1434 // cloned loop.
1435 for (auto *Pred : predecessors(BB))
1436 if (ClonedLoopBlocks.count(Pred) &&
1437 BlocksInClonedLoop.insert(Pred).second)
1438 Worklist.push_back(Pred);
1439 }
1440
1441 ClonedL = LI.AllocateLoop();
1442 if (ParentL) {
1443 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1444 ParentL->addChildLoop(ClonedL);
1445 } else {
1446 LI.addTopLevelLoop(ClonedL);
1447 }
1448 NonChildClonedLoops.push_back(ClonedL);
1449
1450 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1451 // We don't want to just add the cloned loop blocks based on how we
1452 // discovered them. The original order of blocks was carefully built in
1453 // a way that doesn't rely on predecessor ordering. Rather than re-invent
1454 // that logic, we just re-walk the original blocks (and those of the child
1455 // loops) and filter them as we add them into the cloned loop.
1456 for (auto *BB : OrigL.blocks()) {
1457 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1458 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1459 continue;
1460
1461 // Directly add the blocks that are only in this loop.
1462 if (LI.getLoopFor(BB) == &OrigL) {
1463 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1464 continue;
1465 }
1466
1467 // We want to manually add it to this loop and parents.
1468 // Registering it with LoopInfo will happen when we clone the top
1469 // loop for this block.
1470 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1471 PL->addBlockEntry(ClonedBB);
1472 }
1473
1474 // Now add each child loop whose header remains within the cloned loop. All
1475 // of the blocks within the loop must satisfy the same constraints as the
1476 // header so once we pass the header checks we can just clone the entire
1477 // child loop nest.
1478 for (Loop *ChildL : OrigL) {
1479 auto *ClonedChildHeader =
1480 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1481 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1482 continue;
1483
1484#ifndef NDEBUG
1485 // We should never have a cloned child loop header but fail to have
1486 // all of the blocks for that child loop.
1487 for (auto *ChildLoopBB : ChildL->blocks())
1488 assert(BlocksInClonedLoop.count((static_cast <bool> (BlocksInClonedLoop.count( cast<
BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer "
"loop but not all of its blocks!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1491, __extension__
__PRETTY_FUNCTION__))
1489 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&(static_cast <bool> (BlocksInClonedLoop.count( cast<
BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer "
"loop but not all of its blocks!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1491, __extension__
__PRETTY_FUNCTION__))
1490 "Child cloned loop has a header within the cloned outer "(static_cast <bool> (BlocksInClonedLoop.count( cast<
BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer "
"loop but not all of its blocks!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1491, __extension__
__PRETTY_FUNCTION__))
1491 "loop but not all of its blocks!")(static_cast <bool> (BlocksInClonedLoop.count( cast<
BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer "
"loop but not all of its blocks!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1491, __extension__
__PRETTY_FUNCTION__))
;
1492#endif
1493
1494 cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1495 }
1496 }
1497
1498 // Now that we've handled all the components of the original loop that were
1499 // cloned into a new loop, we still need to handle anything from the original
1500 // loop that wasn't in a cloned loop.
1501
1502 // Figure out what blocks are left to place within any loop nest containing
1503 // the unswitched loop. If we never formed a loop, the cloned PH is one of
1504 // them.
1505 SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1506 if (BlocksInClonedLoop.empty())
1507 UnloopedBlockSet.insert(ClonedPH);
1508 for (auto *ClonedBB : ClonedLoopBlocks)
1509 if (!BlocksInClonedLoop.count(ClonedBB))
1510 UnloopedBlockSet.insert(ClonedBB);
1511
1512 // Copy the cloned exits and sort them in ascending loop depth, we'll work
1513 // backwards across these to process them inside out. The order shouldn't
1514 // matter as we're just trying to build up the map from inside-out; we use
1515 // the map in a more stably ordered way below.
1516 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1517 llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1518 return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1519 ExitLoopMap.lookup(RHS)->getLoopDepth();
1520 });
1521
1522 // Populate the existing ExitLoopMap with everything reachable from each
1523 // exit, starting from the inner most exit.
1524 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1525 assert(Worklist.empty() && "Didn't clear worklist!")(static_cast <bool> (Worklist.empty() && "Didn't clear worklist!"
) ? void (0) : __assert_fail ("Worklist.empty() && \"Didn't clear worklist!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1525, __extension__
__PRETTY_FUNCTION__))
;
1526
1527 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1528 Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1529
1530 // Walk the CFG back until we hit the cloned PH adding everything reachable
1531 // and in the unlooped set to this exit block's loop.
1532 Worklist.push_back(ExitBB);
1533 do {
1534 BasicBlock *BB = Worklist.pop_back_val();
1535 // We can stop recursing at the cloned preheader (if we get there).
1536 if (BB == ClonedPH)
1537 continue;
1538
1539 for (BasicBlock *PredBB : predecessors(BB)) {
1540 // If this pred has already been moved to our set or is part of some
1541 // (inner) loop, no update needed.
1542 if (!UnloopedBlockSet.erase(PredBB)) {
1543 assert((static_cast <bool> ((BlocksInClonedLoop.count(PredBB) ||
ExitLoopMap.count(PredBB)) && "Predecessor not mapped to a loop!"
) ? void (0) : __assert_fail ("(BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && \"Predecessor not mapped to a loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1545, __extension__
__PRETTY_FUNCTION__))
1544 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&(static_cast <bool> ((BlocksInClonedLoop.count(PredBB) ||
ExitLoopMap.count(PredBB)) && "Predecessor not mapped to a loop!"
) ? void (0) : __assert_fail ("(BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && \"Predecessor not mapped to a loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1545, __extension__
__PRETTY_FUNCTION__))
1545 "Predecessor not mapped to a loop!")(static_cast <bool> ((BlocksInClonedLoop.count(PredBB) ||
ExitLoopMap.count(PredBB)) && "Predecessor not mapped to a loop!"
) ? void (0) : __assert_fail ("(BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && \"Predecessor not mapped to a loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1545, __extension__
__PRETTY_FUNCTION__))
;
1546 continue;
1547 }
1548
1549 // We just insert into the loop set here. We'll add these blocks to the
1550 // exit loop after we build up the set in an order that doesn't rely on
1551 // predecessor order (which in turn relies on use list order).
1552 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1553 (void)Inserted;
1554 assert(Inserted && "Should only visit an unlooped block once!")(static_cast <bool> (Inserted && "Should only visit an unlooped block once!"
) ? void (0) : __assert_fail ("Inserted && \"Should only visit an unlooped block once!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1554, __extension__
__PRETTY_FUNCTION__))
;
1555
1556 // And recurse through to its predecessors.
1557 Worklist.push_back(PredBB);
1558 }
1559 } while (!Worklist.empty());
1560 }
1561
1562 // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1563 // blocks to their outer loops, walk the cloned blocks and the cloned exits
1564 // in their original order adding them to the correct loop.
1565
1566 // We need a stable insertion order. We use the order of the original loop
1567 // order and map into the correct parent loop.
1568 for (auto *BB : llvm::concat<BasicBlock *const>(
1569 makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1570 if (Loop *OuterL = ExitLoopMap.lookup(BB))
1571 OuterL->addBasicBlockToLoop(BB, LI);
1572
1573#ifndef NDEBUG
1574 for (auto &BBAndL : ExitLoopMap) {
1575 auto *BB = BBAndL.first;
1576 auto *OuterL = BBAndL.second;
1577 assert(LI.getLoopFor(BB) == OuterL &&(static_cast <bool> (LI.getLoopFor(BB) == OuterL &&
"Failed to put all blocks into outer loops!") ? void (0) : __assert_fail
("LI.getLoopFor(BB) == OuterL && \"Failed to put all blocks into outer loops!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1578, __extension__
__PRETTY_FUNCTION__))
1578 "Failed to put all blocks into outer loops!")(static_cast <bool> (LI.getLoopFor(BB) == OuterL &&
"Failed to put all blocks into outer loops!") ? void (0) : __assert_fail
("LI.getLoopFor(BB) == OuterL && \"Failed to put all blocks into outer loops!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1578, __extension__
__PRETTY_FUNCTION__))
;
1579 }
1580#endif
1581
1582 // Now that all the blocks are placed into the correct containing loop in the
1583 // absence of child loops, find all the potentially cloned child loops and
1584 // clone them into whatever outer loop we placed their header into.
1585 for (Loop *ChildL : OrigL) {
1586 auto *ClonedChildHeader =
1587 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1588 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1589 continue;
1590
1591#ifndef NDEBUG
1592 for (auto *ChildLoopBB : ChildL->blocks())
1593 assert(VMap.count(ChildLoopBB) &&(static_cast <bool> (VMap.count(ChildLoopBB) &&
"Cloned a child loop header but not all of that loops blocks!"
) ? void (0) : __assert_fail ("VMap.count(ChildLoopBB) && \"Cloned a child loop header but not all of that loops blocks!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1594, __extension__
__PRETTY_FUNCTION__))
1594 "Cloned a child loop header but not all of that loops blocks!")(static_cast <bool> (VMap.count(ChildLoopBB) &&
"Cloned a child loop header but not all of that loops blocks!"
) ? void (0) : __assert_fail ("VMap.count(ChildLoopBB) && \"Cloned a child loop header but not all of that loops blocks!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1594, __extension__
__PRETTY_FUNCTION__))
;
1595#endif
1596
1597 NonChildClonedLoops.push_back(cloneLoopNest(
1598 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1599 }
1600}
1601
1602static void
1603deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
1604 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1605 DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1606 // Find all the dead clones, and remove them from their successors.
1607 SmallVector<BasicBlock *, 16> DeadBlocks;
1608 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1609 for (const auto &VMap : VMaps)
1610 if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1611 if (!DT.isReachableFromEntry(ClonedBB)) {
1612 for (BasicBlock *SuccBB : successors(ClonedBB))
1613 SuccBB->removePredecessor(ClonedBB);
1614 DeadBlocks.push_back(ClonedBB);
1615 }
1616
1617 // Remove all MemorySSA in the dead blocks
1618 if (MSSAU) {
1619 SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1620 DeadBlocks.end());
1621 MSSAU->removeBlocks(DeadBlockSet);
1622 }
1623
1624 // Drop any remaining references to break cycles.
1625 for (BasicBlock *BB : DeadBlocks)
1626 BB->dropAllReferences();
1627 // Erase them from the IR.
1628 for (BasicBlock *BB : DeadBlocks)
1629 BB->eraseFromParent();
1630}
1631
1632static void
1633deleteDeadBlocksFromLoop(Loop &L,
1634 SmallVectorImpl<BasicBlock *> &ExitBlocks,
1635 DominatorTree &DT, LoopInfo &LI,
1636 MemorySSAUpdater *MSSAU,
1637 ScalarEvolution *SE,
1638 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
1639 // Find all the dead blocks tied to this loop, and remove them from their
1640 // successors.
1641 SmallSetVector<BasicBlock *, 8> DeadBlockSet;
1642
1643 // Start with loop/exit blocks and get a transitive closure of reachable dead
1644 // blocks.
1645 SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1646 ExitBlocks.end());
1647 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1648 while (!DeathCandidates.empty()) {
1649 auto *BB = DeathCandidates.pop_back_val();
1650 if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1651 for (BasicBlock *SuccBB : successors(BB)) {
1652 SuccBB->removePredecessor(BB);
1653 DeathCandidates.push_back(SuccBB);
1654 }
1655 DeadBlockSet.insert(BB);
1656 }
1657 }
1658
1659 // Remove all MemorySSA in the dead blocks
1660 if (MSSAU)
1661 MSSAU->removeBlocks(DeadBlockSet);
1662
1663 // Filter out the dead blocks from the exit blocks list so that it can be
1664 // used in the caller.
1665 llvm::erase_if(ExitBlocks,
1666 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1667
1668 // Walk from this loop up through its parents removing all of the dead blocks.
1669 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1670 for (auto *BB : DeadBlockSet)
1671 ParentL->getBlocksSet().erase(BB);
1672 llvm::erase_if(ParentL->getBlocksVector(),
1673 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1674 }
1675
1676 // Now delete the dead child loops. This raw delete will clear them
1677 // recursively.
1678 llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1679 if (!DeadBlockSet.count(ChildL->getHeader()))
1680 return false;
1681
1682 assert(llvm::all_of(ChildL->blocks(),(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1687, __extension__
__PRETTY_FUNCTION__))
1683 [&](BasicBlock *ChildBB) {(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1687, __extension__
__PRETTY_FUNCTION__))
1684 return DeadBlockSet.count(ChildBB);(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1687, __extension__
__PRETTY_FUNCTION__))
1685 }) &&(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1687, __extension__
__PRETTY_FUNCTION__))
1686 "If the child loop header is dead all blocks in the child loop must "(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1687, __extension__
__PRETTY_FUNCTION__))
1687 "be dead as well!")(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1687, __extension__
__PRETTY_FUNCTION__))
;
1688 DestroyLoopCB(*ChildL, ChildL->getName());
1689 if (SE)
1690 SE->forgetBlockAndLoopDispositions();
1691 LI.destroy(ChildL);
1692 return true;
1693 });
1694
1695 // Remove the loop mappings for the dead blocks and drop all the references
1696 // from these blocks to others to handle cyclic references as we start
1697 // deleting the blocks themselves.
1698 for (auto *BB : DeadBlockSet) {
1699 // Check that the dominator tree has already been updated.
1700 assert(!DT.getNode(BB) && "Should already have cleared domtree!")(static_cast <bool> (!DT.getNode(BB) && "Should already have cleared domtree!"
) ? void (0) : __assert_fail ("!DT.getNode(BB) && \"Should already have cleared domtree!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1700, __extension__
__PRETTY_FUNCTION__))
;
1701 LI.changeLoopFor(BB, nullptr);
1702 // Drop all uses of the instructions to make sure we won't have dangling
1703 // uses in other blocks.
1704 for (auto &I : *BB)
1705 if (!I.use_empty())
1706 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
1707 BB->dropAllReferences();
1708 }
1709
1710 // Actually delete the blocks now that they've been fully unhooked from the
1711 // IR.
1712 for (auto *BB : DeadBlockSet)
1713 BB->eraseFromParent();
1714}
1715
1716/// Recompute the set of blocks in a loop after unswitching.
1717///
1718/// This walks from the original headers predecessors to rebuild the loop. We
1719/// take advantage of the fact that new blocks can't have been added, and so we
1720/// filter by the original loop's blocks. This also handles potentially
1721/// unreachable code that we don't want to explore but might be found examining
1722/// the predecessors of the header.
1723///
1724/// If the original loop is no longer a loop, this will return an empty set. If
1725/// it remains a loop, all the blocks within it will be added to the set
1726/// (including those blocks in inner loops).
1727static SmallPtrSet<const BasicBlock *, 16> recomputeLoopBlockSet(Loop &L,
1728 LoopInfo &LI) {
1729 SmallPtrSet<const BasicBlock *, 16> LoopBlockSet;
1730
1731 auto *PH = L.getLoopPreheader();
1732 auto *Header = L.getHeader();
1733
1734 // A worklist to use while walking backwards from the header.
1735 SmallVector<BasicBlock *, 16> Worklist;
1736
1737 // First walk the predecessors of the header to find the backedges. This will
1738 // form the basis of our walk.
1739 for (auto *Pred : predecessors(Header)) {
1740 // Skip the preheader.
1741 if (Pred == PH)
1742 continue;
1743
1744 // Because the loop was in simplified form, the only non-loop predecessor
1745 // is the preheader.
1746 assert(L.contains(Pred) && "Found a predecessor of the loop header other "(static_cast <bool> (L.contains(Pred) && "Found a predecessor of the loop header other "
"than the preheader that is not part of the " "loop!") ? void
(0) : __assert_fail ("L.contains(Pred) && \"Found a predecessor of the loop header other \" \"than the preheader that is not part of the \" \"loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1748, __extension__
__PRETTY_FUNCTION__))
1747 "than the preheader that is not part of the "(static_cast <bool> (L.contains(Pred) && "Found a predecessor of the loop header other "
"than the preheader that is not part of the " "loop!") ? void
(0) : __assert_fail ("L.contains(Pred) && \"Found a predecessor of the loop header other \" \"than the preheader that is not part of the \" \"loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1748, __extension__
__PRETTY_FUNCTION__))
1748 "loop!")(static_cast <bool> (L.contains(Pred) && "Found a predecessor of the loop header other "
"than the preheader that is not part of the " "loop!") ? void
(0) : __assert_fail ("L.contains(Pred) && \"Found a predecessor of the loop header other \" \"than the preheader that is not part of the \" \"loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1748, __extension__
__PRETTY_FUNCTION__))
;
1749
1750 // Insert this block into the loop set and on the first visit and, if it
1751 // isn't the header we're currently walking, put it into the worklist to
1752 // recurse through.
1753 if (LoopBlockSet.insert(Pred).second && Pred != Header)
1754 Worklist.push_back(Pred);
1755 }
1756
1757 // If no backedges were found, we're done.
1758 if (LoopBlockSet.empty())
1759 return LoopBlockSet;
1760
1761 // We found backedges, recurse through them to identify the loop blocks.
1762 while (!Worklist.empty()) {
1763 BasicBlock *BB = Worklist.pop_back_val();
1764 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!")(static_cast <bool> (LoopBlockSet.count(BB) && "Didn't put block into the loop set!"
) ? void (0) : __assert_fail ("LoopBlockSet.count(BB) && \"Didn't put block into the loop set!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1764, __extension__
__PRETTY_FUNCTION__))
;
1765
1766 // No need to walk past the header.
1767 if (BB == Header)
1768 continue;
1769
1770 // Because we know the inner loop structure remains valid we can use the
1771 // loop structure to jump immediately across the entire nested loop.
1772 // Further, because it is in loop simplified form, we can directly jump
1773 // to its preheader afterward.
1774 if (Loop *InnerL = LI.getLoopFor(BB))
1775 if (InnerL != &L) {
1776 assert(L.contains(InnerL) &&(static_cast <bool> (L.contains(InnerL) && "Should not reach a loop *outside* this loop!"
) ? void (0) : __assert_fail ("L.contains(InnerL) && \"Should not reach a loop *outside* this loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1777, __extension__
__PRETTY_FUNCTION__))
1777 "Should not reach a loop *outside* this loop!")(static_cast <bool> (L.contains(InnerL) && "Should not reach a loop *outside* this loop!"
) ? void (0) : __assert_fail ("L.contains(InnerL) && \"Should not reach a loop *outside* this loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1777, __extension__
__PRETTY_FUNCTION__))
;
1778 // The preheader is the only possible predecessor of the loop so
1779 // insert it into the set and check whether it was already handled.
1780 auto *InnerPH = InnerL->getLoopPreheader();
1781 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "(static_cast <bool> (L.contains(InnerPH) && "Cannot contain an inner loop block "
"but not contain the inner loop " "preheader!") ? void (0) :
__assert_fail ("L.contains(InnerPH) && \"Cannot contain an inner loop block \" \"but not contain the inner loop \" \"preheader!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1783, __extension__
__PRETTY_FUNCTION__))
1782 "but not contain the inner loop "(static_cast <bool> (L.contains(InnerPH) && "Cannot contain an inner loop block "
"but not contain the inner loop " "preheader!") ? void (0) :
__assert_fail ("L.contains(InnerPH) && \"Cannot contain an inner loop block \" \"but not contain the inner loop \" \"preheader!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1783, __extension__
__PRETTY_FUNCTION__))
1783 "preheader!")(static_cast <bool> (L.contains(InnerPH) && "Cannot contain an inner loop block "
"but not contain the inner loop " "preheader!") ? void (0) :
__assert_fail ("L.contains(InnerPH) && \"Cannot contain an inner loop block \" \"but not contain the inner loop \" \"preheader!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1783, __extension__
__PRETTY_FUNCTION__))
;
1784 if (!LoopBlockSet.insert(InnerPH).second)
1785 // The only way to reach the preheader is through the loop body
1786 // itself so if it has been visited the loop is already handled.
1787 continue;
1788
1789 // Insert all of the blocks (other than those already present) into
1790 // the loop set. We expect at least the block that led us to find the
1791 // inner loop to be in the block set, but we may also have other loop
1792 // blocks if they were already enqueued as predecessors of some other
1793 // outer loop block.
1794 for (auto *InnerBB : InnerL->blocks()) {
1795 if (InnerBB == BB) {
1796 assert(LoopBlockSet.count(InnerBB) &&(static_cast <bool> (LoopBlockSet.count(InnerBB) &&
"Block should already be in the set!") ? void (0) : __assert_fail
("LoopBlockSet.count(InnerBB) && \"Block should already be in the set!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1797, __extension__
__PRETTY_FUNCTION__))
1797 "Block should already be in the set!")(static_cast <bool> (LoopBlockSet.count(InnerBB) &&
"Block should already be in the set!") ? void (0) : __assert_fail
("LoopBlockSet.count(InnerBB) && \"Block should already be in the set!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1797, __extension__
__PRETTY_FUNCTION__))
;
1798 continue;
1799 }
1800
1801 LoopBlockSet.insert(InnerBB);
1802 }
1803
1804 // Add the preheader to the worklist so we will continue past the
1805 // loop body.
1806 Worklist.push_back(InnerPH);
1807 continue;
1808 }
1809
1810 // Insert any predecessors that were in the original loop into the new
1811 // set, and if the insert is successful, add them to the worklist.
1812 for (auto *Pred : predecessors(BB))
1813 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1814 Worklist.push_back(Pred);
1815 }
1816
1817 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!")(static_cast <bool> (LoopBlockSet.count(Header) &&
"Cannot fail to add the header!") ? void (0) : __assert_fail
("LoopBlockSet.count(Header) && \"Cannot fail to add the header!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1817, __extension__
__PRETTY_FUNCTION__))
;
1818
1819 // We've found all the blocks participating in the loop, return our completed
1820 // set.
1821 return LoopBlockSet;
1822}
1823
1824/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1825///
1826/// The removal may have removed some child loops entirely but cannot have
1827/// disturbed any remaining child loops. However, they may need to be hoisted
1828/// to the parent loop (or to be top-level loops). The original loop may be
1829/// completely removed.
1830///
1831/// The sibling loops resulting from this update are returned. If the original
1832/// loop remains a valid loop, it will be the first entry in this list with all
1833/// of the newly sibling loops following it.
1834///
1835/// Returns true if the loop remains a loop after unswitching, and false if it
1836/// is no longer a loop after unswitching (and should not continue to be
1837/// referenced).
1838static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
1839 LoopInfo &LI,
1840 SmallVectorImpl<Loop *> &HoistedLoops,
1841 ScalarEvolution *SE) {
1842 auto *PH = L.getLoopPreheader();
1843
1844 // Compute the actual parent loop from the exit blocks. Because we may have
1845 // pruned some exits the loop may be different from the original parent.
1846 Loop *ParentL = nullptr;
1847 SmallVector<Loop *, 4> ExitLoops;
1848 SmallVector<BasicBlock *, 4> ExitsInLoops;
1849 ExitsInLoops.reserve(ExitBlocks.size());
1850 for (auto *ExitBB : ExitBlocks)
1851 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1852 ExitLoops.push_back(ExitL);
1853 ExitsInLoops.push_back(ExitBB);
1854 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1855 ParentL = ExitL;
1856 }
1857
1858 // Recompute the blocks participating in this loop. This may be empty if it
1859 // is no longer a loop.
1860 auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1861
1862 // If we still have a loop, we need to re-set the loop's parent as the exit
1863 // block set changing may have moved it within the loop nest. Note that this
1864 // can only happen when this loop has a parent as it can only hoist the loop
1865 // *up* the nest.
1866 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1867 // Remove this loop's (original) blocks from all of the intervening loops.
1868 for (Loop *IL = L.getParentLoop(); IL != ParentL;
1869 IL = IL->getParentLoop()) {
1870 IL->getBlocksSet().erase(PH);
1871 for (auto *BB : L.blocks())
1872 IL->getBlocksSet().erase(BB);
1873 llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1874 return BB == PH || L.contains(BB);
1875 });
1876 }
1877
1878 LI.changeLoopFor(PH, ParentL);
1879 L.getParentLoop()->removeChildLoop(&L);
1880 if (ParentL)
1881 ParentL->addChildLoop(&L);
1882 else
1883 LI.addTopLevelLoop(&L);
1884 }
1885
1886 // Now we update all the blocks which are no longer within the loop.
1887 auto &Blocks = L.getBlocksVector();
1888 auto BlocksSplitI =
1889 LoopBlockSet.empty()
1890 ? Blocks.begin()
1891 : std::stable_partition(
1892 Blocks.begin(), Blocks.end(),
1893 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1894
1895 // Before we erase the list of unlooped blocks, build a set of them.
1896 SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1897 if (LoopBlockSet.empty())
1898 UnloopedBlocks.insert(PH);
1899
1900 // Now erase these blocks from the loop.
1901 for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1902 L.getBlocksSet().erase(BB);
1903 Blocks.erase(BlocksSplitI, Blocks.end());
1904
1905 // Sort the exits in ascending loop depth, we'll work backwards across these
1906 // to process them inside out.
1907 llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1908 return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1909 });
1910
1911 // We'll build up a set for each exit loop.
1912 SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1913 Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1914
1915 auto RemoveUnloopedBlocksFromLoop =
1916 [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1917 for (auto *BB : UnloopedBlocks)
1918 L.getBlocksSet().erase(BB);
1919 llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1920 return UnloopedBlocks.count(BB);
1921 });
1922 };
1923
1924 SmallVector<BasicBlock *, 16> Worklist;
1925 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1926 assert(Worklist.empty() && "Didn't clear worklist!")(static_cast <bool> (Worklist.empty() && "Didn't clear worklist!"
) ? void (0) : __assert_fail ("Worklist.empty() && \"Didn't clear worklist!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1926, __extension__
__PRETTY_FUNCTION__))
;
1927 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!")(static_cast <bool> (NewExitLoopBlocks.empty() &&
"Didn't clear loop set!") ? void (0) : __assert_fail ("NewExitLoopBlocks.empty() && \"Didn't clear loop set!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1927, __extension__
__PRETTY_FUNCTION__))
;
1928
1929 // Grab the next exit block, in decreasing loop depth order.
1930 BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1931 Loop &ExitL = *LI.getLoopFor(ExitBB);
1932 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!")(static_cast <bool> (ExitL.contains(&L) && "Exit loop must contain the inner loop!"
) ? void (0) : __assert_fail ("ExitL.contains(&L) && \"Exit loop must contain the inner loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1932, __extension__
__PRETTY_FUNCTION__))
;
1933
1934 // Erase all of the unlooped blocks from the loops between the previous
1935 // exit loop and this exit loop. This works because the ExitInLoops list is
1936 // sorted in increasing order of loop depth and thus we visit loops in
1937 // decreasing order of loop depth.
1938 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1939 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1940
1941 // Walk the CFG back until we hit the cloned PH adding everything reachable
1942 // and in the unlooped set to this exit block's loop.
1943 Worklist.push_back(ExitBB);
1944 do {
1945 BasicBlock *BB = Worklist.pop_back_val();
1946 // We can stop recursing at the cloned preheader (if we get there).
1947 if (BB == PH)
1948 continue;
1949
1950 for (BasicBlock *PredBB : predecessors(BB)) {
1951 // If this pred has already been moved to our set or is part of some
1952 // (inner) loop, no update needed.
1953 if (!UnloopedBlocks.erase(PredBB)) {
1954 assert((NewExitLoopBlocks.count(PredBB) ||(static_cast <bool> ((NewExitLoopBlocks.count(PredBB) ||
ExitL.contains(LI.getLoopFor(PredBB))) && "Predecessor not in a nested loop (or already visited)!"
) ? void (0) : __assert_fail ("(NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && \"Predecessor not in a nested loop (or already visited)!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1956, __extension__
__PRETTY_FUNCTION__))
1955 ExitL.contains(LI.getLoopFor(PredBB))) &&(static_cast <bool> ((NewExitLoopBlocks.count(PredBB) ||
ExitL.contains(LI.getLoopFor(PredBB))) && "Predecessor not in a nested loop (or already visited)!"
) ? void (0) : __assert_fail ("(NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && \"Predecessor not in a nested loop (or already visited)!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1956, __extension__
__PRETTY_FUNCTION__))
1956 "Predecessor not in a nested loop (or already visited)!")(static_cast <bool> ((NewExitLoopBlocks.count(PredBB) ||
ExitL.contains(LI.getLoopFor(PredBB))) && "Predecessor not in a nested loop (or already visited)!"
) ? void (0) : __assert_fail ("(NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && \"Predecessor not in a nested loop (or already visited)!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1956, __extension__
__PRETTY_FUNCTION__))
;
1957 continue;
1958 }
1959
1960 // We just insert into the loop set here. We'll add these blocks to the
1961 // exit loop after we build up the set in a deterministic order rather
1962 // than the predecessor-influenced visit order.
1963 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
1964 (void)Inserted;
1965 assert(Inserted && "Should only visit an unlooped block once!")(static_cast <bool> (Inserted && "Should only visit an unlooped block once!"
) ? void (0) : __assert_fail ("Inserted && \"Should only visit an unlooped block once!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1965, __extension__
__PRETTY_FUNCTION__))
;
1966
1967 // And recurse through to its predecessors.
1968 Worklist.push_back(PredBB);
1969 }
1970 } while (!Worklist.empty());
1971
1972 // If blocks in this exit loop were directly part of the original loop (as
1973 // opposed to a child loop) update the map to point to this exit loop. This
1974 // just updates a map and so the fact that the order is unstable is fine.
1975 for (auto *BB : NewExitLoopBlocks)
1976 if (Loop *BBL = LI.getLoopFor(BB))
1977 if (BBL == &L || !L.contains(BBL))
1978 LI.changeLoopFor(BB, &ExitL);
1979
1980 // We will remove the remaining unlooped blocks from this loop in the next
1981 // iteration or below.
1982 NewExitLoopBlocks.clear();
1983 }
1984
1985 // Any remaining unlooped blocks are no longer part of any loop unless they
1986 // are part of some child loop.
1987 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
1988 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1989 for (auto *BB : UnloopedBlocks)
1990 if (Loop *BBL = LI.getLoopFor(BB))
1991 if (BBL == &L || !L.contains(BBL))
1992 LI.changeLoopFor(BB, nullptr);
1993
1994 // Sink all the child loops whose headers are no longer in the loop set to
1995 // the parent (or to be top level loops). We reach into the loop and directly
1996 // update its subloop vector to make this batch update efficient.
1997 auto &SubLoops = L.getSubLoopsVector();
1998 auto SubLoopsSplitI =
1999 LoopBlockSet.empty()
2000 ? SubLoops.begin()
2001 : std::stable_partition(
2002 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
2003 return LoopBlockSet.count(SubL->getHeader());
2004 });
2005 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
2006 HoistedLoops.push_back(HoistedL);
2007 HoistedL->setParentLoop(nullptr);
2008
2009 // To compute the new parent of this hoisted loop we look at where we
2010 // placed the preheader above. We can't lookup the header itself because we
2011 // retained the mapping from the header to the hoisted loop. But the
2012 // preheader and header should have the exact same new parent computed
2013 // based on the set of exit blocks from the original loop as the preheader
2014 // is a predecessor of the header and so reached in the reverse walk. And
2015 // because the loops were all in simplified form the preheader of the
2016 // hoisted loop can't be part of some *other* loop.
2017 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
2018 NewParentL->addChildLoop(HoistedL);
2019 else
2020 LI.addTopLevelLoop(HoistedL);
2021 }
2022 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2023
2024 // Actually delete the loop if nothing remained within it.
2025 if (Blocks.empty()) {
2026 assert(SubLoops.empty() &&(static_cast <bool> (SubLoops.empty() && "Failed to remove all subloops from the original loop!"
) ? void (0) : __assert_fail ("SubLoops.empty() && \"Failed to remove all subloops from the original loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2027, __extension__
__PRETTY_FUNCTION__))
2027 "Failed to remove all subloops from the original loop!")(static_cast <bool> (SubLoops.empty() && "Failed to remove all subloops from the original loop!"
) ? void (0) : __assert_fail ("SubLoops.empty() && \"Failed to remove all subloops from the original loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2027, __extension__
__PRETTY_FUNCTION__))
;
2028 if (Loop *ParentL = L.getParentLoop())
2029 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2030 else
2031 LI.removeLoop(llvm::find(LI, &L));
2032 // markLoopAsDeleted for L should be triggered by the caller (it is typically
2033 // done by using the UnswitchCB callback).
2034 if (SE)
2035 SE->forgetBlockAndLoopDispositions();
2036 LI.destroy(&L);
2037 return false;
2038 }
2039
2040 return true;
2041}
2042
2043/// Helper to visit a dominator subtree, invoking a callable on each node.
2044///
2045/// Returning false at any point will stop walking past that node of the tree.
2046template <typename CallableT>
2047void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2048 SmallVector<DomTreeNode *, 4> DomWorklist;
2049 DomWorklist.push_back(DT[BB]);
2050#ifndef NDEBUG
2051 SmallPtrSet<DomTreeNode *, 4> Visited;
2052 Visited.insert(DT[BB]);
2053#endif
2054 do {
2055 DomTreeNode *N = DomWorklist.pop_back_val();
2056
2057 // Visit this node.
2058 if (!Callable(N->getBlock()))
2059 continue;
2060
2061 // Accumulate the child nodes.
2062 for (DomTreeNode *ChildN : *N) {
2063 assert(Visited.insert(ChildN).second &&(static_cast <bool> (Visited.insert(ChildN).second &&
"Cannot visit a node twice when walking a tree!") ? void (0)
: __assert_fail ("Visited.insert(ChildN).second && \"Cannot visit a node twice when walking a tree!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2064, __extension__
__PRETTY_FUNCTION__))
2064 "Cannot visit a node twice when walking a tree!")(static_cast <bool> (Visited.insert(ChildN).second &&
"Cannot visit a node twice when walking a tree!") ? void (0)
: __assert_fail ("Visited.insert(ChildN).second && \"Cannot visit a node twice when walking a tree!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2064, __extension__
__PRETTY_FUNCTION__))
;
2065 DomWorklist.push_back(ChildN);
2066 }
2067 } while (!DomWorklist.empty());
2068}
2069
2070static void unswitchNontrivialInvariants(
2071 Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2072 IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
2073 AssumptionCache &AC,
2074 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2075 ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2076 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
2077 auto *ParentBB = TI.getParent();
2078 BranchInst *BI = dyn_cast<BranchInst>(&TI);
2079 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2080
2081 // We can only unswitch switches, conditional branches with an invariant
2082 // condition, or combining invariant conditions with an instruction or
2083 // partially invariant instructions.
2084 assert((SI || (BI && BI->isConditional())) &&(static_cast <bool> ((SI || (BI && BI->isConditional
())) && "Can only unswitch switches and conditional branch!"
) ? void (0) : __assert_fail ("(SI || (BI && BI->isConditional())) && \"Can only unswitch switches and conditional branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2085, __extension__
__PRETTY_FUNCTION__))
2085 "Can only unswitch switches and conditional branch!")(static_cast <bool> ((SI || (BI && BI->isConditional
())) && "Can only unswitch switches and conditional branch!"
) ? void (0) : __assert_fail ("(SI || (BI && BI->isConditional())) && \"Can only unswitch switches and conditional branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2085, __extension__
__PRETTY_FUNCTION__))
;
2086 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2087 bool FullUnswitch =
2088 SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2089 !PartiallyInvariant);
2090 if (FullUnswitch)
2091 assert(Invariants.size() == 1 &&(static_cast <bool> (Invariants.size() == 1 && "Cannot have other invariants with full unswitching!"
) ? void (0) : __assert_fail ("Invariants.size() == 1 && \"Cannot have other invariants with full unswitching!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2092, __extension__
__PRETTY_FUNCTION__))
2092 "Cannot have other invariants with full unswitching!")(static_cast <bool> (Invariants.size() == 1 && "Cannot have other invariants with full unswitching!"
) ? void (0) : __assert_fail ("Invariants.size() == 1 && \"Cannot have other invariants with full unswitching!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2092, __extension__
__PRETTY_FUNCTION__))
;
2093 else
2094 assert(isa<Instruction>(skipTrivialSelect(BI->getCondition())) &&(static_cast <bool> (isa<Instruction>(skipTrivialSelect
(BI->getCondition())) && "Partial unswitching requires an instruction as the condition!"
) ? void (0) : __assert_fail ("isa<Instruction>(skipTrivialSelect(BI->getCondition())) && \"Partial unswitching requires an instruction as the condition!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2095, __extension__
__PRETTY_FUNCTION__))
2095 "Partial unswitching requires an instruction as the condition!")(static_cast <bool> (isa<Instruction>(skipTrivialSelect
(BI->getCondition())) && "Partial unswitching requires an instruction as the condition!"
) ? void (0) : __assert_fail ("isa<Instruction>(skipTrivialSelect(BI->getCondition())) && \"Partial unswitching requires an instruction as the condition!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2095, __extension__
__PRETTY_FUNCTION__))
;
2096
2097 if (MSSAU && VerifyMemorySSA)
2098 MSSAU->getMemorySSA()->verifyMemorySSA();
2099
2100 // Constant and BBs tracking the cloned and continuing successor. When we are
2101 // unswitching the entire condition, this can just be trivially chosen to
2102 // unswitch towards `true`. However, when we are unswitching a set of
2103 // invariants combined with `and` or `or` or partially invariant instructions,
2104 // the combining operation determines the best direction to unswitch: we want
2105 // to unswitch the direction that will collapse the branch.
2106 bool Direction = true;
2107 int ClonedSucc = 0;
2108 if (!FullUnswitch) {
2109 Value *Cond = skipTrivialSelect(BI->getCondition());
2110 (void)Cond;
2111 assert(((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) ||(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match
(Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.") ? void (0) : __assert_fail
("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2114, __extension__
__PRETTY_FUNCTION__))
2112 PartiallyInvariant) &&(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match
(Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.") ? void (0) : __assert_fail
("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2114, __extension__
__PRETTY_FUNCTION__))
2113 "Only `or`, `and`, an `select`, partially invariant instructions "(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match
(Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.") ? void (0) : __assert_fail
("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2114, __extension__
__PRETTY_FUNCTION__))
2114 "can combine invariants being unswitched.")(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match
(Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.") ? void (0) : __assert_fail
("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2114, __extension__
__PRETTY_FUNCTION__))
;
2115 if (!match(Cond, m_LogicalOr())) {
2116 if (match(Cond, m_LogicalAnd()) ||
2117 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2118 Direction = false;
2119 ClonedSucc = 1;
2120 }
2121 }
2122 }
2123
2124 BasicBlock *RetainedSuccBB =
2125 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2126 SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2127 if (BI)
2128 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2129 else
2130 for (auto Case : SI->cases())
2131 if (Case.getCaseSuccessor() != RetainedSuccBB)
2132 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2133
2134 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&(static_cast <bool> (!UnswitchedSuccBBs.count(RetainedSuccBB
) && "Should not unswitch the same successor we are retaining!"
) ? void (0) : __assert_fail ("!UnswitchedSuccBBs.count(RetainedSuccBB) && \"Should not unswitch the same successor we are retaining!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2135, __extension__
__PRETTY_FUNCTION__))
2135 "Should not unswitch the same successor we are retaining!")(static_cast <bool> (!UnswitchedSuccBBs.count(RetainedSuccBB
) && "Should not unswitch the same successor we are retaining!"
) ? void (0) : __assert_fail ("!UnswitchedSuccBBs.count(RetainedSuccBB) && \"Should not unswitch the same successor we are retaining!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2135, __extension__
__PRETTY_FUNCTION__))
;
2136
2137 // The branch should be in this exact loop. Any inner loop's invariant branch
2138 // should be handled by unswitching that inner loop. The caller of this
2139 // routine should filter out any candidates that remain (but were skipped for
2140 // whatever reason).
2141 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!")(static_cast <bool> (LI.getLoopFor(ParentBB) == &L &&
"Branch in an inner loop!") ? void (0) : __assert_fail ("LI.getLoopFor(ParentBB) == &L && \"Branch in an inner loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2141, __extension__
__PRETTY_FUNCTION__))
;
2142
2143 // Compute the parent loop now before we start hacking on things.
2144 Loop *ParentL = L.getParentLoop();
2145 // Get blocks in RPO order for MSSA update, before changing the CFG.
2146 LoopBlocksRPO LBRPO(&L);
2147 if (MSSAU)
2148 LBRPO.perform(&LI);
2149
2150 // Compute the outer-most loop containing one of our exit blocks. This is the
2151 // furthest up our loopnest which can be mutated, which we will use below to
2152 // update things.
2153 Loop *OuterExitL = &L;
2154 SmallVector<BasicBlock *, 4> ExitBlocks;
2155 L.getUniqueExitBlocks(ExitBlocks);
2156 for (auto *ExitBB : ExitBlocks) {
2157 Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
2158 if (!NewOuterExitL) {
2159 // We exited the entire nest with this block, so we're done.
2160 OuterExitL = nullptr;
2161 break;
2162 }
2163 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2164 OuterExitL = NewOuterExitL;
2165 }
2166
2167 // At this point, we're definitely going to unswitch something so invalidate
2168 // any cached information in ScalarEvolution for the outer most loop
2169 // containing an exit block and all nested loops.
2170 if (SE) {
2171 if (OuterExitL)
2172 SE->forgetLoop(OuterExitL);
2173 else
2174 SE->forgetTopmostLoop(&L);
2175 SE->forgetBlockAndLoopDispositions();
2176 }
2177
2178 bool InsertFreeze = false;
2179 if (FreezeLoopUnswitchCond) {
2180 ICFLoopSafetyInfo SafetyInfo;
2181 SafetyInfo.computeLoopSafetyInfo(&L);
2182 InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L);
2183 }
2184
2185 // If the edge from this terminator to a successor dominates that successor,
2186 // store a map from each block in its dominator subtree to it. This lets us
2187 // tell when cloning for a particular successor if a block is dominated by
2188 // some *other* successor with a single data structure. We use this to
2189 // significantly reduce cloning.
2190 SmallDenseMap<BasicBlock *, BasicBlock *, 16> DominatingSucc;
2191 for (auto *SuccBB : llvm::concat<BasicBlock *const>(
2192 makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs))
2193 if (SuccBB->getUniquePredecessor() ||
2194 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2195 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2196 }))
2197 visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2198 DominatingSucc[BB] = SuccBB;
2199 return true;
2200 });
2201
2202 // Split the preheader, so that we know that there is a safe place to insert
2203 // the conditional branch. We will change the preheader to have a conditional
2204 // branch on LoopCond. The original preheader will become the split point
2205 // between the unswitched versions, and we will have a new preheader for the
2206 // original loop.
2207 BasicBlock *SplitBB = L.getLoopPreheader();
2208 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2209
2210 // Keep track of the dominator tree updates needed.
2211 SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
2212
2213 // Clone the loop for each unswitched successor.
2214 SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
2215 VMaps.reserve(UnswitchedSuccBBs.size());
2216 SmallDenseMap<BasicBlock *, BasicBlock *, 4> ClonedPHs;
2217 for (auto *SuccBB : UnswitchedSuccBBs) {
2218 VMaps.emplace_back(new ValueToValueMapTy());
2219 ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2220 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2221 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
2222 }
2223
2224 // Drop metadata if we may break its semantics by moving this instr into the
2225 // split block.
2226 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2227 if (DropNonTrivialImplicitNullChecks)
2228 // Do not spend time trying to understand if we can keep it, just drop it
2229 // to save compile time.
2230 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2231 else {
2232 // It is only legal to preserve make.implicit metadata if we are
2233 // guaranteed no reach implicit null check after following this branch.
2234 ICFLoopSafetyInfo SafetyInfo;
2235 SafetyInfo.computeLoopSafetyInfo(&L);
2236 if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2237 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2238 }
2239 }
2240
2241 // The stitching of the branched code back together depends on whether we're
2242 // doing full unswitching or not with the exception that we always want to
2243 // nuke the initial terminator placed in the split block.
2244 SplitBB->getTerminator()->eraseFromParent();
2245 if (FullUnswitch) {
2246 // Splice the terminator from the original loop and rewrite its
2247 // successors.
2248 SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
2249
2250 // Keep a clone of the terminator for MSSA updates.
2251 Instruction *NewTI = TI.clone();
2252 ParentBB->getInstList().push_back(NewTI);
2253
2254 // First wire up the moved terminator to the preheaders.
2255 if (BI) {
2256 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2257 BI->setSuccessor(ClonedSucc, ClonedPH);
2258 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2259 Value *Cond = skipTrivialSelect(BI->getCondition());
2260 if (InsertFreeze) {
2261 if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT))
2262 Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI);
2263 }
2264 BI->setCondition(Cond);
2265 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2266 } else {
2267 assert(SI && "Must either be a branch or switch!")(static_cast <bool> (SI && "Must either be a branch or switch!"
) ? void (0) : __assert_fail ("SI && \"Must either be a branch or switch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2267, __extension__
__PRETTY_FUNCTION__))
;
2268
2269 // Walk the cases and directly update their successors.
2270 assert(SI->getDefaultDest() == RetainedSuccBB &&(static_cast <bool> (SI->getDefaultDest() == RetainedSuccBB
&& "Not retaining default successor!") ? void (0) : __assert_fail
("SI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2271, __extension__
__PRETTY_FUNCTION__))
2271 "Not retaining default successor!")(static_cast <bool> (SI->getDefaultDest() == RetainedSuccBB
&& "Not retaining default successor!") ? void (0) : __assert_fail
("SI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2271, __extension__
__PRETTY_FUNCTION__))
;
2272 SI->setDefaultDest(LoopPH);
2273 for (const auto &Case : SI->cases())
2274 if (Case.getCaseSuccessor() == RetainedSuccBB)
2275 Case.setSuccessor(LoopPH);
2276 else
2277 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2278
2279 if (InsertFreeze) {
2280 auto Cond = SI->getCondition();
2281 if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, SI, &DT))
2282 SI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", SI));
2283 }
2284 // We need to use the set to populate domtree updates as even when there
2285 // are multiple cases pointing at the same successor we only want to
2286 // remove and insert one edge in the domtree.
2287 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2288 DTUpdates.push_back(
2289 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2290 }
2291
2292 if (MSSAU) {
2293 DT.applyUpdates(DTUpdates);
2294 DTUpdates.clear();
2295
2296 // Remove all but one edge to the retained block and all unswitched
2297 // blocks. This is to avoid having duplicate entries in the cloned Phis,
2298 // when we know we only keep a single edge for each case.
2299 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2300 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2301 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2302
2303 for (auto &VMap : VMaps)
2304 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2305 /*IgnoreIncomingWithNoClones=*/true);
2306 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2307
2308 // Remove all edges to unswitched blocks.
2309 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2310 MSSAU->removeEdge(ParentBB, SuccBB);
2311 }
2312
2313 // Now unhook the successor relationship as we'll be replacing
2314 // the terminator with a direct branch. This is much simpler for branches
2315 // than switches so we handle those first.
2316 if (BI) {
2317 // Remove the parent as a predecessor of the unswitched successor.
2318 assert(UnswitchedSuccBBs.size() == 1 &&(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2319, __extension__
__PRETTY_FUNCTION__))
2319 "Only one possible unswitched block for a branch!")(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2319, __extension__
__PRETTY_FUNCTION__))
;
2320 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2321 UnswitchedSuccBB->removePredecessor(ParentBB,
2322 /*KeepOneInputPHIs*/ true);
2323 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2324 } else {
2325 // Note that we actually want to remove the parent block as a predecessor
2326 // of *every* case successor. The case successor is either unswitched,
2327 // completely eliminating an edge from the parent to that successor, or it
2328 // is a duplicate edge to the retained successor as the retained successor
2329 // is always the default successor and as we'll replace this with a direct
2330 // branch we no longer need the duplicate entries in the PHI nodes.
2331 SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2332 assert(NewSI->getDefaultDest() == RetainedSuccBB &&(static_cast <bool> (NewSI->getDefaultDest() == RetainedSuccBB
&& "Not retaining default successor!") ? void (0) : __assert_fail
("NewSI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2333, __extension__
__PRETTY_FUNCTION__))
2333 "Not retaining default successor!")(static_cast <bool> (NewSI->getDefaultDest() == RetainedSuccBB
&& "Not retaining default successor!") ? void (0) : __assert_fail
("NewSI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2333, __extension__
__PRETTY_FUNCTION__))
;
2334 for (const auto &Case : NewSI->cases())
2335 Case.getCaseSuccessor()->removePredecessor(
2336 ParentBB,
2337 /*KeepOneInputPHIs*/ true);
2338
2339 // We need to use the set to populate domtree updates as even when there
2340 // are multiple cases pointing at the same successor we only want to
2341 // remove and insert one edge in the domtree.
2342 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2343 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2344 }
2345
2346 // After MSSAU update, remove the cloned terminator instruction NewTI.
2347 ParentBB->getTerminator()->eraseFromParent();
2348
2349 // Create a new unconditional branch to the continuing block (as opposed to
2350 // the one cloned).
2351 BranchInst::Create(RetainedSuccBB, ParentBB);
2352 } else {
2353 assert(BI && "Only branches have partial unswitching.")(static_cast <bool> (BI && "Only branches have partial unswitching."
) ? void (0) : __assert_fail ("BI && \"Only branches have partial unswitching.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2353, __extension__
__PRETTY_FUNCTION__))
;
2354 assert(UnswitchedSuccBBs.size() == 1 &&(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2355, __extension__
__PRETTY_FUNCTION__))
2355 "Only one possible unswitched block for a branch!")(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2355, __extension__
__PRETTY_FUNCTION__))
;
2356 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2357 // When doing a partial unswitch, we have to do a bit more work to build up
2358 // the branch in the split block.
2359 if (PartiallyInvariant)
2360 buildPartialInvariantUnswitchConditionalBranch(
2361 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2362 else {
2363 buildPartialUnswitchConditionalBranch(
2364 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2365 FreezeLoopUnswitchCond, BI, &AC, DT);
2366 }
2367 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2368
2369 if (MSSAU) {
2370 DT.applyUpdates(DTUpdates);
2371 DTUpdates.clear();
2372
2373 // Perform MSSA cloning updates.
2374 for (auto &VMap : VMaps)
2375 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2376 /*IgnoreIncomingWithNoClones=*/true);
2377 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2378 }
2379 }
2380
2381 // Apply the updates accumulated above to get an up-to-date dominator tree.
2382 DT.applyUpdates(DTUpdates);
2383
2384 // Now that we have an accurate dominator tree, first delete the dead cloned
2385 // blocks so that we can accurately build any cloned loops. It is important to
2386 // not delete the blocks from the original loop yet because we still want to
2387 // reference the original loop to understand the cloned loop's structure.
2388 deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2389
2390 // Build the cloned loop structure itself. This may be substantially
2391 // different from the original structure due to the simplified CFG. This also
2392 // handles inserting all the cloned blocks into the correct loops.
2393 SmallVector<Loop *, 4> NonChildClonedLoops;
2394 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2395 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2396
2397 // Now that our cloned loops have been built, we can update the original loop.
2398 // First we delete the dead blocks from it and then we rebuild the loop
2399 // structure taking these deletions into account.
2400 deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE,DestroyLoopCB);
2401
2402 if (MSSAU && VerifyMemorySSA)
2403 MSSAU->getMemorySSA()->verifyMemorySSA();
2404
2405 SmallVector<Loop *, 4> HoistedLoops;
2406 bool IsStillLoop =
2407 rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
2408
2409 if (MSSAU && VerifyMemorySSA)
2410 MSSAU->getMemorySSA()->verifyMemorySSA();
2411
2412 // This transformation has a high risk of corrupting the dominator tree, and
2413 // the below steps to rebuild loop structures will result in hard to debug
2414 // errors in that case so verify that the dominator tree is sane first.
2415 // FIXME: Remove this when the bugs stop showing up and rely on existing
2416 // verification steps.
2417 assert(DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (DT.verify(DominatorTree::VerificationLevel
::Fast)) ? void (0) : __assert_fail ("DT.verify(DominatorTree::VerificationLevel::Fast)"
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2417, __extension__
__PRETTY_FUNCTION__))
;
2418
2419 if (BI && !PartiallyInvariant) {
2420 // If we unswitched a branch which collapses the condition to a known
2421 // constant we want to replace all the uses of the invariants within both
2422 // the original and cloned blocks. We do this here so that we can use the
2423 // now updated dominator tree to identify which side the users are on.
2424 assert(UnswitchedSuccBBs.size() == 1 &&(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2425, __extension__
__PRETTY_FUNCTION__))
2425 "Only one possible unswitched block for a branch!")(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2425, __extension__
__PRETTY_FUNCTION__))
;
2426 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2427
2428 // When considering multiple partially-unswitched invariants
2429 // we cant just go replace them with constants in both branches.
2430 //
2431 // For 'AND' we infer that true branch ("continue") means true
2432 // for each invariant operand.
2433 // For 'OR' we can infer that false branch ("continue") means false
2434 // for each invariant operand.
2435 // So it happens that for multiple-partial case we dont replace
2436 // in the unswitched branch.
2437 bool ReplaceUnswitched =
2438 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2439
2440 ConstantInt *UnswitchedReplacement =
2441 Direction ? ConstantInt::getTrue(BI->getContext())
2442 : ConstantInt::getFalse(BI->getContext());
2443 ConstantInt *ContinueReplacement =
2444 Direction ? ConstantInt::getFalse(BI->getContext())
2445 : ConstantInt::getTrue(BI->getContext());
2446 for (Value *Invariant : Invariants) {
2447 assert(!isa<Constant>(Invariant) &&(static_cast <bool> (!isa<Constant>(Invariant) &&
"Should not be replacing constant values!") ? void (0) : __assert_fail
("!isa<Constant>(Invariant) && \"Should not be replacing constant values!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2448, __extension__
__PRETTY_FUNCTION__))
2448 "Should not be replacing constant values!")(static_cast <bool> (!isa<Constant>(Invariant) &&
"Should not be replacing constant values!") ? void (0) : __assert_fail
("!isa<Constant>(Invariant) && \"Should not be replacing constant values!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2448, __extension__
__PRETTY_FUNCTION__))
;
2449 // Use make_early_inc_range here as set invalidates the iterator.
2450 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2451 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2452 if (!UserI)
2453 continue;
2454
2455 // Replace it with the 'continue' side if in the main loop body, and the
2456 // unswitched if in the cloned blocks.
2457 if (DT.dominates(LoopPH, UserI->getParent()))
2458 U.set(ContinueReplacement);
2459 else if (ReplaceUnswitched &&
2460 DT.dominates(ClonedPH, UserI->getParent()))
2461 U.set(UnswitchedReplacement);
2462 }
2463 }
2464 }
2465
2466 // We can change which blocks are exit blocks of all the cloned sibling
2467 // loops, the current loop, and any parent loops which shared exit blocks
2468 // with the current loop. As a consequence, we need to re-form LCSSA for
2469 // them. But we shouldn't need to re-form LCSSA for any child loops.
2470 // FIXME: This could be made more efficient by tracking which exit blocks are
2471 // new, and focusing on them, but that isn't likely to be necessary.
2472 //
2473 // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2474 // loop nest and update every loop that could have had its exits changed. We
2475 // also need to cover any intervening loops. We add all of these loops to
2476 // a list and sort them by loop depth to achieve this without updating
2477 // unnecessary loops.
2478 auto UpdateLoop = [&](Loop &UpdateL) {
2479#ifndef NDEBUG
2480 UpdateL.verifyLoop();
2481 for (Loop *ChildL : UpdateL) {
2482 ChildL->verifyLoop();
2483 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&(static_cast <bool> (ChildL->isRecursivelyLCSSAForm(
DT, LI) && "Perturbed a child loop's LCSSA form!") ? void
(0) : __assert_fail ("ChildL->isRecursivelyLCSSAForm(DT, LI) && \"Perturbed a child loop's LCSSA form!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2484, __extension__
__PRETTY_FUNCTION__))
2484 "Perturbed a child loop's LCSSA form!")(static_cast <bool> (ChildL->isRecursivelyLCSSAForm(
DT, LI) && "Perturbed a child loop's LCSSA form!") ? void
(0) : __assert_fail ("ChildL->isRecursivelyLCSSAForm(DT, LI) && \"Perturbed a child loop's LCSSA form!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2484, __extension__
__PRETTY_FUNCTION__))
;
2485 }
2486#endif
2487 // First build LCSSA for this loop so that we can preserve it when
2488 // forming dedicated exits. We don't want to perturb some other loop's
2489 // LCSSA while doing that CFG edit.
2490 formLCSSA(UpdateL, DT, &LI, SE);
2491
2492 // For loops reached by this loop's original exit blocks we may
2493 // introduced new, non-dedicated exits. At least try to re-form dedicated
2494 // exits for these loops. This may fail if they couldn't have dedicated
2495 // exits to start with.
2496 formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2497 };
2498
2499 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2500 // and we can do it in any order as they don't nest relative to each other.
2501 //
2502 // Also check if any of the loops we have updated have become top-level loops
2503 // as that will necessitate widening the outer loop scope.
2504 for (Loop *UpdatedL :
2505 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2506 UpdateLoop(*UpdatedL);
2507 if (UpdatedL->isOutermost())
2508 OuterExitL = nullptr;
2509 }
2510 if (IsStillLoop) {
2511 UpdateLoop(L);
2512 if (L.isOutermost())
2513 OuterExitL = nullptr;
2514 }
2515
2516 // If the original loop had exit blocks, walk up through the outer most loop
2517 // of those exit blocks to update LCSSA and form updated dedicated exits.
2518 if (OuterExitL != &L)
2519 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2520 OuterL = OuterL->getParentLoop())
2521 UpdateLoop(*OuterL);
2522
2523#ifndef NDEBUG
2524 // Verify the entire loop structure to catch any incorrect updates before we
2525 // progress in the pass pipeline.
2526 LI.verify(DT);
2527#endif
2528
2529 // Now that we've unswitched something, make callbacks to report the changes.
2530 // For that we need to merge together the updated loops and the cloned loops
2531 // and check whether the original loop survived.
2532 SmallVector<Loop *, 4> SibLoops;
2533 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2534 if (UpdatedL->getParentLoop() == ParentL)
2535 SibLoops.push_back(UpdatedL);
2536 UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops);
2537
2538 if (MSSAU && VerifyMemorySSA)
2539 MSSAU->getMemorySSA()->verifyMemorySSA();
2540
2541 if (BI)
2542 ++NumBranches;
2543 else
2544 ++NumSwitches;
2545}
2546
2547/// Recursively compute the cost of a dominator subtree based on the per-block
2548/// cost map provided.
2549///
2550/// The recursive computation is memozied into the provided DT-indexed cost map
2551/// to allow querying it for most nodes in the domtree without it becoming
2552/// quadratic.
2553static InstructionCost computeDomSubtreeCost(
2554 DomTreeNode &N,
2555 const SmallDenseMap<BasicBlock *, InstructionCost, 4> &BBCostMap,
2556 SmallDenseMap<DomTreeNode *, InstructionCost, 4> &DTCostMap) {
2557 // Don't accumulate cost (or recurse through) blocks not in our block cost
2558 // map and thus not part of the duplication cost being considered.
2559 auto BBCostIt = BBCostMap.find(N.getBlock());
2560 if (BBCostIt == BBCostMap.end())
2561 return 0;
2562
2563 // Lookup this node to see if we already computed its cost.
2564 auto DTCostIt = DTCostMap.find(&N);
2565 if (DTCostIt != DTCostMap.end())
2566 return DTCostIt->second;
2567
2568 // If not, we have to compute it. We can't use insert above and update
2569 // because computing the cost may insert more things into the map.
2570 InstructionCost Cost = std::accumulate(
2571 N.begin(), N.end(), BBCostIt->second,
2572 [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2573 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2574 });
2575 bool Inserted = DTCostMap.insert({&N, Cost}).second;
2576 (void)Inserted;
2577 assert(Inserted && "Should not insert a node while visiting children!")(static_cast <bool> (Inserted && "Should not insert a node while visiting children!"
) ? void (0) : __assert_fail ("Inserted && \"Should not insert a node while visiting children!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2577, __extension__
__PRETTY_FUNCTION__))
;
2578 return Cost;
2579}
2580
2581/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2582/// making the following replacement:
2583///
2584/// --code before guard--
2585/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2586/// --code after guard--
2587///
2588/// into
2589///
2590/// --code before guard--
2591/// br i1 %cond, label %guarded, label %deopt
2592///
2593/// guarded:
2594/// --code after guard--
2595///
2596/// deopt:
2597/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2598/// unreachable
2599///
2600/// It also makes all relevant DT and LI updates, so that all structures are in
2601/// valid state after this transform.
2602static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
2603 DominatorTree &DT, LoopInfo &LI,
2604 MemorySSAUpdater *MSSAU) {
2605 SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
2606 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Turning " <<
*GI << " into a branch.\n"; } } while (false)
;
2607 BasicBlock *CheckBB = GI->getParent();
2608
2609 if (MSSAU && VerifyMemorySSA)
2610 MSSAU->getMemorySSA()->verifyMemorySSA();
2611
2612 // Remove all CheckBB's successors from DomTree. A block can be seen among
2613 // successors more than once, but for DomTree it should be added only once.
2614 SmallPtrSet<BasicBlock *, 4> Successors;
2615 for (auto *Succ : successors(CheckBB))
2616 if (Successors.insert(Succ).second)
2617 DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
2618
2619 Instruction *DeoptBlockTerm =
2620 SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
2621 BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2622 // SplitBlockAndInsertIfThen inserts control flow that branches to
2623 // DeoptBlockTerm if the condition is true. We want the opposite.
2624 CheckBI->swapSuccessors();
2625
2626 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2627 GuardedBlock->setName("guarded");
2628 CheckBI->getSuccessor(1)->setName("deopt");
2629 BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2630
2631 if (MSSAU)
2632 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2633
2634 GI->moveBefore(DeoptBlockTerm);
2635 GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext()));
2636
2637 // Add new successors of CheckBB into DomTree.
2638 for (auto *Succ : successors(CheckBB))
2639 DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
2640
2641 // Now the blocks that used to be CheckBB's successors are GuardedBlock's
2642 // successors.
2643 for (auto *Succ : Successors)
2644 DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
2645
2646 // Make proper changes to DT.
2647 DT.applyUpdates(DTUpdates);
2648 // Inform LI of a new loop block.
2649 L.addBasicBlockToLoop(GuardedBlock, LI);
2650
2651 if (MSSAU) {
2652 MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2653 MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2654 if (VerifyMemorySSA)
2655 MSSAU->getMemorySSA()->verifyMemorySSA();
2656 }
2657
2658 ++NumGuards;
2659 return CheckBI;
2660}
2661
2662/// Cost multiplier is a way to limit potentially exponential behavior
2663/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2664/// candidates available. Also accounting for the number of "sibling" loops with
2665/// the idea to account for previous unswitches that already happened on this
2666/// cluster of loops. There was an attempt to keep this formula simple,
2667/// just enough to limit the worst case behavior. Even if it is not that simple
2668/// now it is still not an attempt to provide a detailed heuristic size
2669/// prediction.
2670///
2671/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2672/// unswitch candidates, making adequate predictions instead of wild guesses.
2673/// That requires knowing not just the number of "remaining" candidates but
2674/// also costs of unswitching for each of these candidates.
2675static int CalculateUnswitchCostMultiplier(
2676 const Instruction &TI, const Loop &L, const LoopInfo &LI,
2677 const DominatorTree &DT,
2678 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
2679
2680 // Guards and other exiting conditions do not contribute to exponential
2681 // explosion as soon as they dominate the latch (otherwise there might be
2682 // another path to the latch remaining that does not allow to eliminate the
2683 // loop copy on unswitch).
2684 const BasicBlock *Latch = L.getLoopLatch();
2685 const BasicBlock *CondBlock = TI.getParent();
2686 if (DT.dominates(CondBlock, Latch) &&
2687 (isGuard(&TI) ||
2688 llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2689 return L.contains(SuccBB);
2690 }) <= 1)) {
2691 NumCostMultiplierSkipped++;
2692 return 1;
2693 }
2694
2695 auto *ParentL = L.getParentLoop();
2696 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2697 : std::distance(LI.begin(), LI.end()));
2698 // Count amount of clones that all the candidates might cause during
2699 // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
2700 int UnswitchedClones = 0;
2701 for (auto Candidate : UnswitchCandidates) {
2702 const Instruction *CI = Candidate.TI;
2703 const BasicBlock *CondBlock = CI->getParent();
2704 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2705 if (isGuard(CI)) {
2706 if (!SkipExitingSuccessors)
2707 UnswitchedClones++;
2708 continue;
2709 }
2710 int NonExitingSuccessors =
2711 llvm::count_if(successors(CondBlock),
2712 [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
2713 return !SkipExitingSuccessors || L.contains(SuccBB);
2714 });
2715 UnswitchedClones += Log2_32(NonExitingSuccessors);
2716 }
2717
2718 // Ignore up to the "unscaled candidates" number of unswitch candidates
2719 // when calculating the power-of-two scaling of the cost. The main idea
2720 // with this control is to allow a small number of unswitches to happen
2721 // and rely more on siblings multiplier (see below) when the number
2722 // of candidates is small.
2723 unsigned ClonesPower =
2724 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2725
2726 // Allowing top-level loops to spread a bit more than nested ones.
2727 int SiblingsMultiplier =
2728 std::max((ParentL ? SiblingsCount
2729 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2730 1);
2731 // Compute the cost multiplier in a way that won't overflow by saturating
2732 // at an upper bound.
2733 int CostMultiplier;
2734 if (ClonesPower > Log2_32(UnswitchThreshold) ||
2735 SiblingsMultiplier > UnswitchThreshold)
2736 CostMultiplier = UnswitchThreshold;
2737 else
2738 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2739 (int)UnswitchThreshold);
2740
2741 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplierdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed multiplier "
<< CostMultiplier << " (siblings " << SiblingsMultiplier
<< " * clones " << (1 << ClonesPower) <<
")" << " for unswitch candidate: " << TI <<
"\n"; } } while (false)
2742 << " (siblings " << SiblingsMultiplier << " * clones "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed multiplier "
<< CostMultiplier << " (siblings " << SiblingsMultiplier
<< " * clones " << (1 << ClonesPower) <<
")" << " for unswitch candidate: " << TI <<
"\n"; } } while (false)
2743 << (1 << ClonesPower) << ")"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed multiplier "
<< CostMultiplier << " (siblings " << SiblingsMultiplier
<< " * clones " << (1 << ClonesPower) <<
")" << " for unswitch candidate: " << TI <<
"\n"; } } while (false)
2744 << " for unswitch candidate: " << TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed multiplier "
<< CostMultiplier << " (siblings " << SiblingsMultiplier
<< " * clones " << (1 << ClonesPower) <<
")" << " for unswitch candidate: " << TI <<
"\n"; } } while (false)
;
2745 return CostMultiplier;
2746}
2747
2748static bool collectUnswitchCandidates(
2749 SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates,
2750 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
2751 const Loop &L, const LoopInfo &LI, AAResults &AA,
2752 const MemorySSAUpdater *MSSAU) {
2753 assert(UnswitchCandidates.empty() && "Should be!")(static_cast <bool> (UnswitchCandidates.empty() &&
"Should be!") ? void (0) : __assert_fail ("UnswitchCandidates.empty() && \"Should be!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2753, __extension__
__PRETTY_FUNCTION__))
;
22
'?' condition is true
2754 // Whether or not we should also collect guards in the loop.
2755 bool CollectGuards = false;
2756 if (UnswitchGuards) {
23
Assuming the condition is false
24
Taking false branch
2757 auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2758 Intrinsic::getName(Intrinsic::experimental_guard));
2759 if (GuardDecl && !GuardDecl->use_empty())
2760 CollectGuards = true;
2761 }
2762
2763 for (auto *BB : L.blocks()) {
25
Assuming '__begin1' is not equal to '__end1'
2764 if (LI.getLoopFor(BB) != &L)
26
Assuming the condition is false
27
Taking false branch
2765 continue;
2766
2767 if (CollectGuards
27.1
'CollectGuards' is false
27.1
'CollectGuards' is false
27.1
'CollectGuards' is false
)
28
Taking false branch
2768 for (auto &I : *BB)
2769 if (isGuard(&I)) {
2770 auto *Cond =
2771 skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
2772 // TODO: Support AND, OR conditions and partial unswitching.
2773 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2774 UnswitchCandidates.push_back({&I, {Cond}});
2775 }
2776
2777 if (auto *SI
29.1
'SI' is non-null
29.1
'SI' is non-null
29.1
'SI' is non-null
= dyn_cast<SwitchInst>(BB->getTerminator())) {
29
Assuming the object is a 'CastReturnType'
2778 // We can only consider fully loop-invariant switch conditions as we need
2779 // to completely eliminate the switch after unswitching.
2780 if (!isa<Constant>(SI->getCondition()) &&
30
Assuming the object is not a 'Constant'
33
Taking true branch
2781 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
31
Assuming the condition is true
32
Assuming the condition is true
2782 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2783 continue;
34
Execution continues on line 2763
2784 }
2785
2786 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2787 if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
2788 BI->getSuccessor(0) == BI->getSuccessor(1))
2789 continue;
2790
2791 Value *Cond = skipTrivialSelect(BI->getCondition());
2792 if (isa<Constant>(Cond))
2793 continue;
2794
2795 if (L.isLoopInvariant(Cond)) {
2796 UnswitchCandidates.push_back({BI, {Cond}});
2797 continue;
2798 }
2799
2800 Instruction &CondI = *cast<Instruction>(Cond);
2801 if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
2802 TinyPtrVector<Value *> Invariants =
2803 collectHomogenousInstGraphLoopInvariants(L, CondI, LI);
2804 if (Invariants.empty())
2805 continue;
2806
2807 UnswitchCandidates.push_back({BI, std::move(Invariants)});
2808 continue;
2809 }
2810 }
2811
2812 if (MSSAU
34.1
'MSSAU' is null
34.1
'MSSAU' is null
34.1
'MSSAU' is null
&& !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
2813 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2814 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2815 })) {
2816 MemorySSA *MSSA = MSSAU->getMemorySSA();
2817 if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2818 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "simple-loop-unswitch: Found partially invariant condition "
<< *Info->InstToDuplicate[0] << "\n"; } } while
(false)
2819 dbgs() << "simple-loop-unswitch: Found partially invariant condition "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "simple-loop-unswitch: Found partially invariant condition "
<< *Info->InstToDuplicate[0] << "\n"; } } while
(false)
2820 << *Info->InstToDuplicate[0] << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "simple-loop-unswitch: Found partially invariant condition "
<< *Info->InstToDuplicate[0] << "\n"; } } while
(false)
;
2821 PartialIVInfo = *Info;
2822 PartialIVCondBranch = L.getHeader()->getTerminator();
2823 TinyPtrVector<Value *> ValsToDuplicate;
2824 llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
2825 UnswitchCandidates.push_back(
2826 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2827 }
2828 }
2829 return !UnswitchCandidates.empty();
35
Returning without writing to 'PartialIVInfo.KnownValue'
2830}
2831
2832static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI) {
2833 if (!L.isSafeToClone())
2834 return false;
2835 for (auto *BB : L.blocks())
2836 for (auto &I : *BB) {
2837 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
2838 return false;
2839 if (auto *CB = dyn_cast<CallBase>(&I)) {
2840 assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().")(static_cast <bool> (!CB->cannotDuplicate() &&
"Checked by L.isSafeToClone().") ? void (0) : __assert_fail (
"!CB->cannotDuplicate() && \"Checked by L.isSafeToClone().\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2840, __extension__
__PRETTY_FUNCTION__))
;
2841 if (CB->isConvergent())
2842 return false;
2843 }
2844 }
2845
2846 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
2847 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
2848 // irreducible control flow into reducible control flow and introduce new
2849 // loops "out of thin air". If we ever discover important use cases for doing
2850 // this, we can add support to loop unswitch, but it is a lot of complexity
2851 // for what seems little or no real world benefit.
2852 LoopBlocksRPO RPOT(&L);
2853 RPOT.perform(&LI);
2854 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
2855 return false;
2856
2857 SmallVector<BasicBlock *, 4> ExitBlocks;
2858 L.getUniqueExitBlocks(ExitBlocks);
2859 // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
2860 // instruction as we don't know how to split those exit blocks.
2861 // FIXME: We should teach SplitBlock to handle this and remove this
2862 // restriction.
2863 for (auto *ExitBB : ExitBlocks) {
2864 auto *I = ExitBB->getFirstNonPHI();
2865 if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
2866 LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
"in exit block\n"; } } while (false)
2867 "in exit block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
"in exit block\n"; } } while (false)
;
2868 return false;
2869 }
2870 }
2871
2872 return true;
2873}
2874
2875static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
2876 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
2877 const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
2878 const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
2879 // Given that unswitching these terminators will require duplicating parts of
2880 // the loop, so we need to be able to model that cost. Compute the ephemeral
2881 // values and set up a data structure to hold per-BB costs. We cache each
2882 // block's cost so that we don't recompute this when considering different
2883 // subsets of the loop for duplication during unswitching.
2884 SmallPtrSet<const Value *, 4> EphValues;
2885 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
2886 SmallDenseMap<BasicBlock *, InstructionCost, 4> BBCostMap;
2887
2888 // Compute the cost of each block, as well as the total loop cost. Also, bail
2889 // out if we see instructions which are incompatible with loop unswitching
2890 // (convergent, noduplicate, or cross-basic-block tokens).
2891 // FIXME: We might be able to safely handle some of these in non-duplicated
2892 // regions.
2893 TargetTransformInfo::TargetCostKind CostKind =
2894 L.getHeader()->getParent()->hasMinSize()
41
Assuming the condition is false
42
'?' condition is false
2895 ? TargetTransformInfo::TCK_CodeSize
2896 : TargetTransformInfo::TCK_SizeAndLatency;
2897 InstructionCost LoopCost = 0;
2898 for (auto *BB : L.blocks()) {
43
Assuming '__begin1' is equal to '__end1'
2899 InstructionCost Cost = 0;
2900 for (auto &I : *BB) {
2901 if (EphValues.count(&I))
2902 continue;
2903 Cost += TTI.getInstructionCost(&I, CostKind);
2904 }
2905 assert(Cost >= 0 && "Must not have negative costs!")(static_cast <bool> (Cost >= 0 && "Must not have negative costs!"
) ? void (0) : __assert_fail ("Cost >= 0 && \"Must not have negative costs!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2905, __extension__
__PRETTY_FUNCTION__))
;
2906 LoopCost += Cost;
2907 assert(LoopCost >= 0 && "Must not have negative loop costs!")(static_cast <bool> (LoopCost >= 0 && "Must not have negative loop costs!"
) ? void (0) : __assert_fail ("LoopCost >= 0 && \"Must not have negative loop costs!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2907, __extension__
__PRETTY_FUNCTION__))
;
2908 BBCostMap[BB] = Cost;
2909 }
2910 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Total loop cost: "
<< LoopCost << "\n"; } } while (false)
;
44
Assuming 'DebugFlag' is false
45
Loop condition is false. Exiting loop
2911
2912 // Now we find the best candidate by searching for the one with the following
2913 // properties in order:
2914 //
2915 // 1) An unswitching cost below the threshold
2916 // 2) The smallest number of duplicated unswitch candidates (to avoid
2917 // creating redundant subsequent unswitching)
2918 // 3) The smallest cost after unswitching.
2919 //
2920 // We prioritize reducing fanout of unswitch candidates provided the cost
2921 // remains below the threshold because this has a multiplicative effect.
2922 //
2923 // This requires memoizing each dominator subtree to avoid redundant work.
2924 //
2925 // FIXME: Need to actually do the number of candidates part above.
2926 SmallDenseMap<DomTreeNode *, InstructionCost, 4> DTCostMap;
2927 // Given a terminator which might be unswitched, computes the non-duplicated
2928 // cost for that terminator.
2929 auto ComputeUnswitchedCost = [&](Instruction &TI,
2930 bool FullUnswitch) -> InstructionCost {
2931 BasicBlock &BB = *TI.getParent();
2932 SmallPtrSet<BasicBlock *, 4> Visited;
2933
2934 InstructionCost Cost = 0;
2935 for (BasicBlock *SuccBB : successors(&BB)) {
2936 // Don't count successors more than once.
2937 if (!Visited.insert(SuccBB).second)
50
Assuming field 'second' is true
51
Taking false branch
2938 continue;
2939
2940 // If this is a partial unswitch candidate, then it must be a conditional
2941 // branch with a condition of either `or`, `and`, their corresponding
2942 // select forms or partially invariant instructions. In that case, one of
2943 // the successors is necessarily duplicated, so don't even try to remove
2944 // its cost.
2945 if (!FullUnswitch
51.1
'FullUnswitch' is false
51.1
'FullUnswitch' is false
51.1
'FullUnswitch' is false
) {
52
Taking true branch
2946 auto &BI = cast<BranchInst>(TI);
53
'TI' is a 'BranchInst'
2947 Value *Cond = skipTrivialSelect(BI.getCondition());
2948 if (match(Cond, m_LogicalAnd())) {
54
Assuming the condition is false
55
Taking false branch
2949 if (SuccBB == BI.getSuccessor(1))
2950 continue;
2951 } else if (match(Cond, m_LogicalOr())) {
56
Assuming the condition is false
2952 if (SuccBB == BI.getSuccessor(0))
2953 continue;
2954 } else if ((PartialIVInfo.KnownValue->isOneValue() &&
57
Called C++ object pointer is null
2955 SuccBB == BI.getSuccessor(0)) ||
2956 (!PartialIVInfo.KnownValue->isOneValue() &&
2957 SuccBB == BI.getSuccessor(1)))
2958 continue;
2959 }
2960
2961 // This successor's domtree will not need to be duplicated after
2962 // unswitching if the edge to the successor dominates it (and thus the
2963 // entire tree). This essentially means there is no other path into this
2964 // subtree and so it will end up live in only one clone of the loop.
2965 if (SuccBB->getUniquePredecessor() ||
2966 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2967 return PredBB == &BB || DT.dominates(SuccBB, PredBB);
2968 })) {
2969 Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
2970 assert(Cost <= LoopCost &&(static_cast <bool> (Cost <= LoopCost && "Non-duplicated cost should never exceed total loop cost!"
) ? void (0) : __assert_fail ("Cost <= LoopCost && \"Non-duplicated cost should never exceed total loop cost!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2971, __extension__
__PRETTY_FUNCTION__))
2971 "Non-duplicated cost should never exceed total loop cost!")(static_cast <bool> (Cost <= LoopCost && "Non-duplicated cost should never exceed total loop cost!"
) ? void (0) : __assert_fail ("Cost <= LoopCost && \"Non-duplicated cost should never exceed total loop cost!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2971, __extension__
__PRETTY_FUNCTION__))
;
2972 }
2973 }
2974
2975 // Now scale the cost by the number of unique successors minus one. We
2976 // subtract one because there is already at least one copy of the entire
2977 // loop. This is computing the new cost of unswitching a condition.
2978 // Note that guards always have 2 unique successors that are implicit and
2979 // will be materialized if we decide to unswitch it.
2980 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
2981 assert(SuccessorsCount > 1 &&(static_cast <bool> (SuccessorsCount > 1 && "Cannot unswitch a condition without multiple distinct successors!"
) ? void (0) : __assert_fail ("SuccessorsCount > 1 && \"Cannot unswitch a condition without multiple distinct successors!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2982, __extension__
__PRETTY_FUNCTION__))
2982 "Cannot unswitch a condition without multiple distinct successors!")(static_cast <bool> (SuccessorsCount > 1 && "Cannot unswitch a condition without multiple distinct successors!"
) ? void (0) : __assert_fail ("SuccessorsCount > 1 && \"Cannot unswitch a condition without multiple distinct successors!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2982, __extension__
__PRETTY_FUNCTION__))
;
2983 return (LoopCost - Cost) * (SuccessorsCount - 1);
2984 };
2985
2986 Optional<NonTrivialUnswitchCandidate> Best;
2987 for (auto &Candidate : UnswitchCandidates) {
46
Assuming '__begin1' is not equal to '__end1'
2988 Instruction &TI = *Candidate.TI;
2989 ArrayRef<Value *> Invariants = Candidate.Invariants;
2990 BranchInst *BI = dyn_cast<BranchInst>(&TI);
47
Assuming the object is a 'CastReturnType'
2991 InstructionCost CandidateCost = ComputeUnswitchedCost(
49
Calling 'operator()'
2992 TI, /*FullUnswitch*/ !BI
47.1
'BI' is non-null
47.1
'BI' is non-null
47.1
'BI' is non-null
||
2993 (Invariants.size() == 1 &&
48
Assuming the condition is false
2994 Invariants[0] == skipTrivialSelect(BI->getCondition())));
2995 // Calculate cost multiplier which is a tool to limit potentially
2996 // exponential behavior of loop-unswitch.
2997 if (EnableUnswitchCostMultiplier) {
2998 int CostMultiplier =
2999 CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
3000 assert((static_cast <bool> ((CostMultiplier > 0 && CostMultiplier
<= UnswitchThreshold) && "cost multiplier needs to be in the range of 1..UnswitchThreshold"
) ? void (0) : __assert_fail ("(CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && \"cost multiplier needs to be in the range of 1..UnswitchThreshold\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3002, __extension__
__PRETTY_FUNCTION__))
3001 (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&(static_cast <bool> ((CostMultiplier > 0 && CostMultiplier
<= UnswitchThreshold) && "cost multiplier needs to be in the range of 1..UnswitchThreshold"
) ? void (0) : __assert_fail ("(CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && \"cost multiplier needs to be in the range of 1..UnswitchThreshold\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3002, __extension__
__PRETTY_FUNCTION__))
3002 "cost multiplier needs to be in the range of 1..UnswitchThreshold")(static_cast <bool> ((CostMultiplier > 0 && CostMultiplier
<= UnswitchThreshold) && "cost multiplier needs to be in the range of 1..UnswitchThreshold"
) ? void (0) : __assert_fail ("(CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && \"cost multiplier needs to be in the range of 1..UnswitchThreshold\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3002, __extension__
__PRETTY_FUNCTION__))
;
3003 CandidateCost *= CostMultiplier;
3004 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " (multiplier: " << CostMultiplier
<< ")" << " for unswitch candidate: " << TI
<< "\n"; } } while (false)
3005 << " (multiplier: " << CostMultiplier << ")"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " (multiplier: " << CostMultiplier
<< ")" << " for unswitch candidate: " << TI
<< "\n"; } } while (false)
3006 << " for unswitch candidate: " << TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " (multiplier: " << CostMultiplier
<< ")" << " for unswitch candidate: " << TI
<< "\n"; } } while (false)
;
3007 } else {
3008 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " for unswitch candidate: " <<
TI << "\n"; } } while (false)
3009 << " for unswitch candidate: " << TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " for unswitch candidate: " <<
TI << "\n"; } } while (false)
;
3010 }
3011
3012 if (!Best || CandidateCost < Best->Cost) {
3013 Best = Candidate;
3014 Best->Cost = CandidateCost;
3015 }
3016 }
3017 assert(Best && "Must be!")(static_cast <bool> (Best && "Must be!") ? void
(0) : __assert_fail ("Best && \"Must be!\"", "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 3017, __extension__ __PRETTY_FUNCTION__))
;
3018 return *Best;
3019}
3020
3021static bool unswitchBestCondition(
3022 Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
3023 AAResults &AA, TargetTransformInfo &TTI,
3024 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
3025 ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
3026 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
3027 // Collect all invariant conditions within this loop (as opposed to an inner
3028 // loop which would be handled when visiting that inner loop).
3029 SmallVector<NonTrivialUnswitchCandidate, 4> UnswitchCandidates;
3030 IVConditionInfo PartialIVInfo;
18
Calling implicit default constructor for 'IVConditionInfo'
20
Returning from default constructor for 'IVConditionInfo'
3031 Instruction *PartialIVCondBranch = nullptr;
3032 // If we didn't find any candidates, we're done.
3033 if (!collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
21
Calling 'collectUnswitchCandidates'
36
Returning from 'collectUnswitchCandidates'
3034 PartialIVCondBranch, L, LI, AA, MSSAU))
3035 return false;
3036
3037 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Considering " <<
UnswitchCandidates.size() << " non-trivial loop invariant conditions for unswitching.\n"
; } } while (false)
37
Taking false branch
38
Assuming 'DebugFlag' is false
39
Loop condition is false. Exiting loop
3038 dbgs() << "Considering " << UnswitchCandidates.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Considering " <<
UnswitchCandidates.size() << " non-trivial loop invariant conditions for unswitching.\n"
; } } while (false)
3039 << " non-trivial loop invariant conditions for unswitching.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Considering " <<
UnswitchCandidates.size() << " non-trivial loop invariant conditions for unswitching.\n"
; } } while (false)
;
3040
3041 NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
40
Calling 'findBestNonTrivialUnswitchCandidate'
3042 UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
3043
3044 assert(Best.TI && "Failed to find loop unswitch candidate")(static_cast <bool> (Best.TI && "Failed to find loop unswitch candidate"
) ? void (0) : __assert_fail ("Best.TI && \"Failed to find loop unswitch candidate\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3044, __extension__
__PRETTY_FUNCTION__))
;
3045 assert(Best.Cost && "Failed to compute cost")(static_cast <bool> (Best.Cost && "Failed to compute cost"
) ? void (0) : __assert_fail ("Best.Cost && \"Failed to compute cost\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3045, __extension__
__PRETTY_FUNCTION__))
;
3046
3047 if (*Best.Cost >= UnswitchThreshold) {
3048 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Costdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Cannot unswitch, lowest cost found: "
<< *Best.Cost << "\n"; } } while (false)
3049 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Cannot unswitch, lowest cost found: "
<< *Best.Cost << "\n"; } } while (false)
;
3050 return false;
3051 }
3052
3053 if (Best.TI != PartialIVCondBranch)
3054 PartialIVInfo.InstToDuplicate.clear();
3055
3056 // If the best candidate is a guard, turn it into a branch.
3057 if (isGuard(Best.TI))
3058 Best.TI =
3059 turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3060
3061 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Costdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Unswitching non-trivial (cost = "
<< Best.Cost << ") terminator: " << *Best.
TI << "\n"; } } while (false)
3062 << ") terminator: " << *Best.TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Unswitching non-trivial (cost = "
<< Best.Cost << ") terminator: " << *Best.
TI << "\n"; } } while (false)
;
3063 unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3064 LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB);
3065 return true;
3066}
3067
3068/// Unswitch control flow predicated on loop invariant conditions.
3069///
3070/// This first hoists all branches or switches which are trivial (IE, do not
3071/// require duplicating any part of the loop) out of the loop body. It then
3072/// looks at other loop invariant control flows and tries to unswitch those as
3073/// well by cloning the loop if the result is small enough.
3074///
3075/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3076/// also updated based on the unswitch. The `MSSA` analysis is also updated if
3077/// valid (i.e. its use is enabled).
3078///
3079/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3080/// true, we will attempt to do non-trivial unswitching as well as trivial
3081/// unswitching.
3082///
3083/// The `UnswitchCB` callback provided will be run after unswitching is
3084/// complete, with the first parameter set to `true` if the provided loop
3085/// remains a loop, and a list of new sibling loops created.
3086///
3087/// If `SE` is non-null, we will update that analysis based on the unswitching
3088/// done.
3089static bool
3090unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
3091 AAResults &AA, TargetTransformInfo &TTI, bool Trivial,
3092 bool NonTrivial,
3093 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
3094 ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
3095 ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
3096 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
3097 assert(L.isRecursivelyLCSSAForm(DT, LI) &&(static_cast <bool> (L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.") ? void (0
) : __assert_fail ("L.isRecursivelyLCSSAForm(DT, LI) && \"Loops must be in LCSSA form before unswitching.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3098, __extension__
__PRETTY_FUNCTION__))
8
Assuming the condition is true
9
'?' condition is true
3098 "Loops must be in LCSSA form before unswitching.")(static_cast <bool> (L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.") ? void (0
) : __assert_fail ("L.isRecursivelyLCSSAForm(DT, LI) && \"Loops must be in LCSSA form before unswitching.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3098, __extension__
__PRETTY_FUNCTION__))
;
3099
3100 // Must be in loop simplified form: we need a preheader and dedicated exits.
3101 if (!L.isLoopSimplifyForm())
10
Assuming the condition is false
3102 return false;
3103
3104 // Try trivial unswitch first before loop over other basic blocks in the loop.
3105 if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
11
Assuming 'Trivial' is false
3106 // If we unswitched successfully we will want to clean up the loop before
3107 // processing it further so just mark it as unswitched and return.
3108 UnswitchCB(/*CurrentLoopValid*/ true, false, {});
3109 return true;
3110 }
3111
3112 // Check whether we should continue with non-trivial conditions.
3113 // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3114 // unswitching for testing and debugging.
3115 // NonTrivial: Parameter that enables non-trivial unswitching for this
3116 // invocation of the transform. But this should be allowed only
3117 // for targets without branch divergence.
3118 //
3119 // FIXME: If divergence analysis becomes available to a loop
3120 // transform, we should allow unswitching for non-trivial uniform
3121 // branches even on targets that have divergence.
3122 // https://bugs.llvm.org/show_bug.cgi?id=48819
3123 bool ContinueWithNonTrivial =
3124 EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence());
12
Assuming the condition is true
3125 if (!ContinueWithNonTrivial
12.1
'ContinueWithNonTrivial' is true
12.1
'ContinueWithNonTrivial' is true
12.1
'ContinueWithNonTrivial' is true
)
13
Taking false branch
3126 return false;
3127
3128 // Skip non-trivial unswitching for optsize functions.
3129 if (L.getHeader()->getParent()->hasOptSize())
14
Assuming the condition is false
3130 return false;
3131
3132 // Skip cold loops, as unswitching them brings little benefit
3133 // but increases the code size
3134 if (PSI
14.1
'PSI' is null
14.1
'PSI' is null
14.1
'PSI' is null
&& PSI->hasProfileSummary() && BFI &&
3135 PSI->isFunctionColdInCallGraph(L.getHeader()->getParent(), *BFI)) {
3136 LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Skip cold loop: "
<< L << "\n"; } } while (false)
;
3137 return false;
3138 }
3139
3140 // Perform legality checks.
3141 if (!isSafeForNoNTrivialUnswitching(L, LI))
15
Assuming the condition is false
16
Taking false branch
3142 return false;
3143
3144 // For non-trivial unswitching, because it often creates new loops, we rely on
3145 // the pass manager to iterate on the loops rather than trying to immediately
3146 // reach a fixed point. There is no substantial advantage to iterating
3147 // internally, and if any of the new loops are simplified enough to contain
3148 // trivial unswitching we want to prefer those.
3149
3150 // Try to unswitch the best invariant condition. We prefer this full unswitch to
3151 // a partial unswitch when possible below the threshold.
3152 if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU,
17
Calling 'unswitchBestCondition'
3153 DestroyLoopCB))
3154 return true;
3155
3156 // No other opportunities to unswitch.
3157 return false;
3158}
3159
3160PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
3161 LoopStandardAnalysisResults &AR,
3162 LPMUpdater &U) {
3163 Function &F = *L.getHeader()->getParent();
3164 (void)F;
3165 ProfileSummaryInfo *PSI = nullptr;
3166 if (auto OuterProxy
0.1
'OuterProxy' is null
0.1
'OuterProxy' is null
0.1
'OuterProxy' is null
=
3167 AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR)
3168 .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
3169 PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
3170 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << Ldo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Unswitching loop in "
<< F.getName() << ": " << L << "\n";
} } while (false)
1
Taking false branch
2
Assuming 'DebugFlag' is false
3
Loop condition is false. Exiting loop
3171 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Unswitching loop in "
<< F.getName() << ": " << L << "\n";
} } while (false)
;
3172
3173 // Save the current loop name in a variable so that we can report it even
3174 // after it has been deleted.
3175 std::string LoopName = std::string(L.getName());
3176
3177 auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
3178 bool PartiallyInvariant,
3179 ArrayRef<Loop *> NewLoops) {
3180 // If we did a non-trivial unswitch, we have added new (cloned) loops.
3181 if (!NewLoops.empty())
3182 U.addSiblingLoops(NewLoops);
3183
3184 // If the current loop remains valid, we should revisit it to catch any
3185 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
3186 if (CurrentLoopValid) {
3187 if (PartiallyInvariant) {
3188 // Mark the new loop as partially unswitched, to avoid unswitching on
3189 // the same condition again.
3190 auto &Context = L.getHeader()->getContext();
3191 MDNode *DisableUnswitchMD = MDNode::get(
3192 Context,
3193 MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
3194 MDNode *NewLoopID = makePostTransformationMetadata(
3195 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
3196 {DisableUnswitchMD});
3197 L.setLoopID(NewLoopID);
3198 } else
3199 U.revisitCurrentLoop();
3200 } else
3201 U.markLoopAsDeleted(L, LoopName);
3202 };
3203
3204 auto DestroyLoopCB = [&U](Loop &L, StringRef Name) {
3205 U.markLoopAsDeleted(L, Name);
3206 };
3207
3208 Optional<MemorySSAUpdater> MSSAU;
3209 if (AR.MSSA) {
4
Assuming field 'MSSA' is null
5
Taking false branch
3210 MSSAU = MemorySSAUpdater(AR.MSSA);
3211 if (VerifyMemorySSA)
3212 AR.MSSA->verifyMemorySSA();
3213 }
3214 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
7
Calling 'unswitchLoop'
3215 UnswitchCB, &AR.SE, MSSAU ? MSSAU.getPointer() : nullptr,
6
'?' condition is false
3216 PSI, AR.BFI, DestroyLoopCB))
3217 return PreservedAnalyses::all();
3218
3219 if (AR.MSSA && VerifyMemorySSA)
3220 AR.MSSA->verifyMemorySSA();
3221
3222 // Historically this pass has had issues with the dominator tree so verify it
3223 // in asserts builds.
3224 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (AR.DT.verify(DominatorTree::VerificationLevel
::Fast)) ? void (0) : __assert_fail ("AR.DT.verify(DominatorTree::VerificationLevel::Fast)"
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3224, __extension__
__PRETTY_FUNCTION__))
;
3225
3226 auto PA = getLoopPassPreservedAnalyses();
3227 if (AR.MSSA)
3228 PA.preserve<MemorySSAAnalysis>();
3229 return PA;
3230}
3231
3232void SimpleLoopUnswitchPass::printPipeline(
3233 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3234 static_cast<PassInfoMixin<SimpleLoopUnswitchPass> *>(this)->printPipeline(
3235 OS, MapClassName2PassName);
3236
3237 OS << "<";
3238 OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3239 OS << (Trivial ? "" : "no-") << "trivial";
3240 OS << ">";
3241}
3242
3243namespace {
3244
3245class SimpleLoopUnswitchLegacyPass : public LoopPass {
3246 bool NonTrivial;
3247
3248public:
3249 static char ID; // Pass ID, replacement for typeid
3250
3251 explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
3252 : LoopPass(ID), NonTrivial(NonTrivial) {
3253 initializeSimpleLoopUnswitchLegacyPassPass(
3254 *PassRegistry::getPassRegistry());
3255 }
3256
3257 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
3258
3259 void getAnalysisUsage(AnalysisUsage &AU) const override {
3260 AU.addRequired<AssumptionCacheTracker>();
3261 AU.addRequired<TargetTransformInfoWrapperPass>();
3262 AU.addRequired<MemorySSAWrapperPass>();
3263 AU.addPreserved<MemorySSAWrapperPass>();
3264 getLoopAnalysisUsage(AU);
3265 }
3266};
3267
3268} // end anonymous namespace
3269
3270bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
3271 if (skipLoop(L))
3272 return false;
3273
3274 Function &F = *L->getHeader()->getParent();
3275
3276 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *Ldo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Unswitching loop in "
<< F.getName() << ": " << *L << "\n"
; } } while (false)
3277 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Unswitching loop in "
<< F.getName() << ": " << *L << "\n"
; } } while (false)
;
3278 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3279 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3280 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
3281 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3282 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3283 MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
3284 MemorySSAUpdater MSSAU(MSSA);
3285
3286 auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3287 auto *SE = SEWP ? &SEWP->getSE() : nullptr;
3288
3289 auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant,
3290 ArrayRef<Loop *> NewLoops) {
3291 // If we did a non-trivial unswitch, we have added new (cloned) loops.
3292 for (auto *NewL : NewLoops)
3293 LPM.addLoop(*NewL);
3294
3295 // If the current loop remains valid, re-add it to the queue. This is
3296 // a little wasteful as we'll finish processing the current loop as well,
3297 // but it is the best we can do in the old PM.
3298 if (CurrentLoopValid) {
3299 // If the current loop has been unswitched using a partially invariant
3300 // condition, we should not re-add the current loop to avoid unswitching
3301 // on the same condition again.
3302 if (!PartiallyInvariant)
3303 LPM.addLoop(*L);
3304 } else
3305 LPM.markLoopAsDeleted(*L);
3306 };
3307
3308 auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) {
3309 LPM.markLoopAsDeleted(L);
3310 };
3311
3312 if (VerifyMemorySSA)
3313 MSSA->verifyMemorySSA();
3314 bool Changed =
3315 unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
3316 &MSSAU, nullptr, nullptr, DestroyLoopCB);
3317
3318 if (VerifyMemorySSA)
3319 MSSA->verifyMemorySSA();
3320
3321 // Historically this pass has had issues with the dominator tree so verify it
3322 // in asserts builds.
3323 assert(DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (DT.verify(DominatorTree::VerificationLevel
::Fast)) ? void (0) : __assert_fail ("DT.verify(DominatorTree::VerificationLevel::Fast)"
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3323, __extension__
__PRETTY_FUNCTION__))
;
3324
3325 return Changed;
3326}
3327
3328char SimpleLoopUnswitchLegacyPass::ID = 0;
3329INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",static void *initializeSimpleLoopUnswitchLegacyPassPassOnce(PassRegistry
&Registry) {
3330 "Simple unswitch loops", false, false)static void *initializeSimpleLoopUnswitchLegacyPassPassOnce(PassRegistry
&Registry) {
3331INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry);
3332INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
3333INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
3334INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
3335INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry);
3336INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
3337INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",PassInfo *PI = new PassInfo( "Simple unswitch loops", "simple-loop-unswitch"
, &SimpleLoopUnswitchLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SimpleLoopUnswitchLegacyPass>), false,
false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeSimpleLoopUnswitchLegacyPassPassFlag
; void llvm::initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeSimpleLoopUnswitchLegacyPassPassFlag
, initializeSimpleLoopUnswitchLegacyPassPassOnce, std::ref(Registry
)); }
3338 "Simple unswitch loops", false, false)PassInfo *PI = new PassInfo( "Simple unswitch loops", "simple-loop-unswitch"
, &SimpleLoopUnswitchLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SimpleLoopUnswitchLegacyPass>), false,
false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeSimpleLoopUnswitchLegacyPassPassFlag
; void llvm::initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeSimpleLoopUnswitchLegacyPassPassFlag
, initializeSimpleLoopUnswitchLegacyPassPassOnce, std::ref(Registry
)); }
3339
3340Pass *llvm::createSimpleLoopUnswitchLegacyPass(bool NonTrivial) {
3341 return new SimpleLoopUnswitchLegacyPass(NonTrivial);
3342}

/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/basic_string.h

1// Components for manipulating sequences of characters -*- C++ -*-
2
3// Copyright (C) 1997-2020 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/** @file bits/basic_string.h
26 * This is an internal header file, included by other library headers.
27 * Do not attempt to use it directly. @headername{string}
28 */
29
30//
31// ISO C++ 14882: 21 Strings library
32//
33
34#ifndef _BASIC_STRING_H1
35#define _BASIC_STRING_H1 1
36
37#pragma GCC system_header
38
39#include <ext/atomicity.h>
40#include <ext/alloc_traits.h>
41#include <debug/debug.h>
42
43#if __cplusplus201703L >= 201103L
44#include <initializer_list>
45#endif
46
47#if __cplusplus201703L >= 201703L
48# include <string_view>
49#endif
50
51
52namespace std _GLIBCXX_VISIBILITY(default)__attribute__ ((__visibility__ ("default")))
53{
54_GLIBCXX_BEGIN_NAMESPACE_VERSION
55
56#if _GLIBCXX_USE_CXX11_ABI1
57_GLIBCXX_BEGIN_NAMESPACE_CXX11namespace __cxx11 {
58 /**
59 * @class basic_string basic_string.h <string>
60 * @brief Managing sequences of characters and character-like objects.
61 *
62 * @ingroup strings
63 * @ingroup sequences
64 *
65 * @tparam _CharT Type of character
66 * @tparam _Traits Traits for character type, defaults to
67 * char_traits<_CharT>.
68 * @tparam _Alloc Allocator type, defaults to allocator<_CharT>.
69 *
70 * Meets the requirements of a <a href="tables.html#65">container</a>, a
71 * <a href="tables.html#66">reversible container</a>, and a
72 * <a href="tables.html#67">sequence</a>. Of the
73 * <a href="tables.html#68">optional sequence requirements</a>, only
74 * @c push_back, @c at, and @c %array access are supported.
75 */
76 template<typename _CharT, typename _Traits, typename _Alloc>
77 class basic_string
78 {
79 typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
80 rebind<_CharT>::other _Char_alloc_type;
81 typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits;
82
83 // Types:
84 public:
85 typedef _Traits traits_type;
86 typedef typename _Traits::char_type value_type;
87 typedef _Char_alloc_type allocator_type;
88 typedef typename _Alloc_traits::size_type size_type;
89 typedef typename _Alloc_traits::difference_type difference_type;
90 typedef typename _Alloc_traits::reference reference;
91 typedef typename _Alloc_traits::const_reference const_reference;
92 typedef typename _Alloc_traits::pointer pointer;
93 typedef typename _Alloc_traits::const_pointer const_pointer;
94 typedef __gnu_cxx::__normal_iterator<pointer, basic_string> iterator;
95 typedef __gnu_cxx::__normal_iterator<const_pointer, basic_string>
96 const_iterator;
97 typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
98 typedef std::reverse_iterator<iterator> reverse_iterator;
99
100 /// Value returned by various member functions when they fail.
101 static const size_type npos = static_cast<size_type>(-1);
102
103 protected:
104 // type used for positions in insert, erase etc.
105#if __cplusplus201703L < 201103L
106 typedef iterator __const_iterator;
107#else
108 typedef const_iterator __const_iterator;
109#endif
110
111 private:
112#if __cplusplus201703L >= 201703L
113 // A helper type for avoiding boiler-plate.
114 typedef basic_string_view<_CharT, _Traits> __sv_type;
115
116 template<typename _Tp, typename _Res>
117 using _If_sv = enable_if_t<
118 __and_<is_convertible<const _Tp&, __sv_type>,
119 __not_<is_convertible<const _Tp*, const basic_string*>>,
120 __not_<is_convertible<const _Tp&, const _CharT*>>>::value,
121 _Res>;
122
123 // Allows an implicit conversion to __sv_type.
124 static __sv_type
125 _S_to_string_view(__sv_type __svt) noexcept
126 { return __svt; }
127
128 // Wraps a string_view by explicit conversion and thus
129 // allows to add an internal constructor that does not
130 // participate in overload resolution when a string_view
131 // is provided.
132 struct __sv_wrapper
133 {
134 explicit __sv_wrapper(__sv_type __sv) noexcept : _M_sv(__sv) { }
135 __sv_type _M_sv;
136 };
137
138 /**
139 * @brief Only internally used: Construct string from a string view
140 * wrapper.
141 * @param __svw string view wrapper.
142 * @param __a Allocator to use.
143 */
144 explicit
145 basic_string(__sv_wrapper __svw, const _Alloc& __a)
146 : basic_string(__svw._M_sv.data(), __svw._M_sv.size(), __a) { }
147#endif
148
149 // Use empty-base optimization: http://www.cantrip.org/emptyopt.html
150 struct _Alloc_hider : allocator_type // TODO check __is_final
151 {
152#if __cplusplus201703L < 201103L
153 _Alloc_hider(pointer __dat, const _Alloc& __a = _Alloc())
154 : allocator_type(__a), _M_p(__dat) { }
155#else
156 _Alloc_hider(pointer __dat, const _Alloc& __a)
157 : allocator_type(__a), _M_p(__dat) { }
158
159 _Alloc_hider(pointer __dat, _Alloc&& __a = _Alloc())
160 : allocator_type(std::move(__a)), _M_p(__dat) { }
161#endif
162
163 pointer _M_p; // The actual data.
164 };
165
166 _Alloc_hider _M_dataplus;
167 size_type _M_string_length;
168
169 enum { _S_local_capacity = 15 / sizeof(_CharT) };
170
171 union
172 {
173 _CharT _M_local_buf[_S_local_capacity + 1];
174 size_type _M_allocated_capacity;
175 };
176
177 void
178 _M_data(pointer __p)
179 { _M_dataplus._M_p = __p; }
180
181 void
182 _M_length(size_type __length)
183 { _M_string_length = __length; }
184
185 pointer
186 _M_data() const
187 { return _M_dataplus._M_p; }
188
189 pointer
190 _M_local_data()
191 {
192#if __cplusplus201703L >= 201103L
193 return std::pointer_traits<pointer>::pointer_to(*_M_local_buf);
194#else
195 return pointer(_M_local_buf);
196#endif
197 }
198
199 const_pointer
200 _M_local_data() const
201 {
202#if __cplusplus201703L >= 201103L
203 return std::pointer_traits<const_pointer>::pointer_to(*_M_local_buf);
204#else
205 return const_pointer(_M_local_buf);
206#endif
207 }
208
209 void
210 _M_capacity(size_type __capacity)
211 { _M_allocated_capacity = __capacity; }
212
213 void
214 _M_set_length(size_type __n)
215 {
216 _M_length(__n);
217 traits_type::assign(_M_data()[__n], _CharT());
218 }
219
220 bool
221 _M_is_local() const
222 { return _M_data() == _M_local_data(); }
223
224 // Create & Destroy
225 pointer
226 _M_create(size_type&, size_type);
227
228 void
229 _M_dispose()
230 {
231 if (!_M_is_local())
232 _M_destroy(_M_allocated_capacity);
233 }
234
235 void
236 _M_destroy(size_type __size) throw()
237 { _Alloc_traits::deallocate(_M_get_allocator(), _M_data(), __size + 1); }
238
239 // _M_construct_aux is used to implement the 21.3.1 para 15 which
240 // requires special behaviour if _InIterator is an integral type
241 template<typename _InIterator>
242 void
243 _M_construct_aux(_InIterator __beg, _InIterator __end,
244 std::__false_type)
245 {
246 typedef typename iterator_traits<_InIterator>::iterator_category _Tag;
247 _M_construct(__beg, __end, _Tag());
248 }
249
250 // _GLIBCXX_RESOLVE_LIB_DEFECTS
251 // 438. Ambiguity in the "do the right thing" clause
252 template<typename _Integer>
253 void
254 _M_construct_aux(_Integer __beg, _Integer __end, std::__true_type)
255 { _M_construct_aux_2(static_cast<size_type>(__beg), __end); }
256
257 void
258 _M_construct_aux_2(size_type __req, _CharT __c)
259 { _M_construct(__req, __c); }
260
261 template<typename _InIterator>
262 void
263 _M_construct(_InIterator __beg, _InIterator __end)
264 {
265 typedef typename std::__is_integer<_InIterator>::__type _Integral;
266 _M_construct_aux(__beg, __end, _Integral());
267 }
268
269 // For Input Iterators, used in istreambuf_iterators, etc.
270 template<typename _InIterator>
271 void
272 _M_construct(_InIterator __beg, _InIterator __end,
273 std::input_iterator_tag);
274
275 // For forward_iterators up to random_access_iterators, used for
276 // string::iterator, _CharT*, etc.
277 template<typename _FwdIterator>
278 void
279 _M_construct(_FwdIterator __beg, _FwdIterator __end,
280 std::forward_iterator_tag);
281
282 void
283 _M_construct(size_type __req, _CharT __c);
284
285 allocator_type&
286 _M_get_allocator()
287 { return _M_dataplus; }
288
289 const allocator_type&
290 _M_get_allocator() const
291 { return _M_dataplus; }
292
293 private:
294
295#ifdef _GLIBCXX_DISAMBIGUATE_REPLACE_INST
296 // The explicit instantiations in misc-inst.cc require this due to
297 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64063
298 template<typename _Tp, bool _Requires =
299 !__are_same<_Tp, _CharT*>::__value
300 && !__are_same<_Tp, const _CharT*>::__value
301 && !__are_same<_Tp, iterator>::__value
302 && !__are_same<_Tp, const_iterator>::__value>
303 struct __enable_if_not_native_iterator
304 { typedef basic_string& __type; };
305 template<typename _Tp>
306 struct __enable_if_not_native_iterator<_Tp, false> { };
307#endif
308
309 size_type
310 _M_check(size_type __pos, const char* __s) const
311 {
312 if (__pos > this->size())
313 __throw_out_of_range_fmt(__N("%s: __pos (which is %zu) > "("%s: __pos (which is %zu) > " "this->size() (which is %zu)"
)
314 "this->size() (which is %zu)")("%s: __pos (which is %zu) > " "this->size() (which is %zu)"
)
,
315 __s, __pos, this->size());
316 return __pos;
317 }
318
319 void
320 _M_check_length(size_type __n1, size_type __n2, const char* __s) const
321 {
322 if (this->max_size() - (this->size() - __n1) < __n2)
323 __throw_length_error(__N(__s)(__s));
324 }
325
326
327 // NB: _M_limit doesn't check for a bad __pos value.
328 size_type
329 _M_limit(size_type __pos, size_type __off) const _GLIBCXX_NOEXCEPTnoexcept
330 {
331 const bool __testoff = __off < this->size() - __pos;
332 return __testoff ? __off : this->size() - __pos;
333 }
334
335 // True if _Rep and source do not overlap.
336 bool
337 _M_disjunct(const _CharT* __s) const _GLIBCXX_NOEXCEPTnoexcept
338 {
339 return (less<const _CharT*>()(__s, _M_data())
340 || less<const _CharT*>()(_M_data() + this->size(), __s));
341 }
342
343 // When __n = 1 way faster than the general multichar
344 // traits_type::copy/move/assign.
345 static void
346 _S_copy(_CharT* __d, const _CharT* __s, size_type __n)
347 {
348 if (__n == 1)
349 traits_type::assign(*__d, *__s);
350 else
351 traits_type::copy(__d, __s, __n);
352 }
353
354 static void
355 _S_move(_CharT* __d, const _CharT* __s, size_type __n)
356 {
357 if (__n == 1)
358 traits_type::assign(*__d, *__s);
359 else
360 traits_type::move(__d, __s, __n);
361 }
362
363 static void
364 _S_assign(_CharT* __d, size_type __n, _CharT __c)
365 {
366 if (__n == 1)
367 traits_type::assign(*__d, __c);
368 else
369 traits_type::assign(__d, __n, __c);
370 }
371
372 // _S_copy_chars is a separate template to permit specialization
373 // to optimize for the common case of pointers as iterators.
374 template<class _Iterator>
375 static void
376 _S_copy_chars(_CharT* __p, _Iterator __k1, _Iterator __k2)
377 {
378 for (; __k1 != __k2; ++__k1, (void)++__p)
379 traits_type::assign(*__p, *__k1); // These types are off.
380 }
381
382 static void
383 _S_copy_chars(_CharT* __p, iterator __k1, iterator __k2) _GLIBCXX_NOEXCEPTnoexcept
384 { _S_copy_chars(__p, __k1.base(), __k2.base()); }
385
386 static void
387 _S_copy_chars(_CharT* __p, const_iterator __k1, const_iterator __k2)
388 _GLIBCXX_NOEXCEPTnoexcept
389 { _S_copy_chars(__p, __k1.base(), __k2.base()); }
390
391 static void
392 _S_copy_chars(_CharT* __p, _CharT* __k1, _CharT* __k2) _GLIBCXX_NOEXCEPTnoexcept
393 { _S_copy(__p, __k1, __k2 - __k1); }
394
395 static void
396 _S_copy_chars(_CharT* __p, const _CharT* __k1, const _CharT* __k2)
397 _GLIBCXX_NOEXCEPTnoexcept
398 { _S_copy(__p, __k1, __k2 - __k1); }
399
400 static int
401 _S_compare(size_type __n1, size_type __n2) _GLIBCXX_NOEXCEPTnoexcept
402 {
403 const difference_type __d = difference_type(__n1 - __n2);
404
405 if (__d > __gnu_cxx::__numeric_traits<int>::__max)
406 return __gnu_cxx::__numeric_traits<int>::__max;
407 else if (__d < __gnu_cxx::__numeric_traits<int>::__min)
408 return __gnu_cxx::__numeric_traits<int>::__min;
409 else
410 return int(__d);
411 }
412
413 void
414 _M_assign(const basic_string&);
415
416 void
417 _M_mutate(size_type __pos, size_type __len1, const _CharT* __s,
418 size_type __len2);
419
420 void
421 _M_erase(size_type __pos, size_type __n);
422
423 public:
424 // Construct/copy/destroy:
425 // NB: We overload ctors in some cases instead of using default
426 // arguments, per 17.4.4.4 para. 2 item 2.
427
428 /**
429 * @brief Default constructor creates an empty string.
430 */
431 basic_string()
432 _GLIBCXX_NOEXCEPT_IF(is_nothrow_default_constructible<_Alloc>::value)noexcept(is_nothrow_default_constructible<_Alloc>::value
)
433 : _M_dataplus(_M_local_data())
434 { _M_set_length(0); }
435
436 /**
437 * @brief Construct an empty string using allocator @a a.
438 */
439 explicit
440 basic_string(const _Alloc& __a) _GLIBCXX_NOEXCEPTnoexcept
441 : _M_dataplus(_M_local_data(), __a)
442 { _M_set_length(0); }
443
444 /**
445 * @brief Construct string with copy of value of @a __str.
446 * @param __str Source string.
447 */
448 basic_string(const basic_string& __str)
449 : _M_dataplus(_M_local_data(),
450 _Alloc_traits::_S_select_on_copy(__str._M_get_allocator()))
451 { _M_construct(__str._M_data(), __str._M_data() + __str.length()); }
452
453 // _GLIBCXX_RESOLVE_LIB_DEFECTS
454 // 2583. no way to supply an allocator for basic_string(str, pos)
455 /**
456 * @brief Construct string as copy of a substring.
457 * @param __str Source string.
458 * @param __pos Index of first character to copy from.
459 * @param __a Allocator to use.
460 */
461 basic_string(const basic_string& __str, size_type __pos,
462 const _Alloc& __a = _Alloc())
463 : _M_dataplus(_M_local_data(), __a)
464 {
465 const _CharT* __start = __str._M_data()
466 + __str._M_check(__pos, "basic_string::basic_string");
467 _M_construct(__start, __start + __str._M_limit(__pos, npos));
468 }
469
470 /**
471 * @brief Construct string as copy of a substring.
472 * @param __str Source string.
473 * @param __pos Index of first character to copy from.
474 * @param __n Number of characters to copy.
475 */
476 basic_string(const basic_string& __str, size_type __pos,
477 size_type __n)
478 : _M_dataplus(_M_local_data())
479 {
480 const _CharT* __start = __str._M_data()
481 + __str._M_check(__pos, "basic_string::basic_string");
482 _M_construct(__start, __start + __str._M_limit(__pos, __n));
483 }
484
485 /**
486 * @brief Construct string as copy of a substring.
487 * @param __str Source string.
488 * @param __pos Index of first character to copy from.
489 * @param __n Number of characters to copy.
490 * @param __a Allocator to use.
491 */
492 basic_string(const basic_string& __str, size_type __pos,
493 size_type __n, const _Alloc& __a)
494 : _M_dataplus(_M_local_data(), __a)
495 {
496 const _CharT* __start
497 = __str._M_data() + __str._M_check(__pos, "string::string");
498 _M_construct(__start, __start + __str._M_limit(__pos, __n));
499 }
500
501 /**
502 * @brief Construct string initialized by a character %array.
503 * @param __s Source character %array.
504 * @param __n Number of characters to copy.
505 * @param __a Allocator to use (default is default allocator).
506 *
507 * NB: @a __s must have at least @a __n characters, &apos;\\0&apos;
508 * has no special meaning.
509 */
510 basic_string(const _CharT* __s, size_type __n,
511 const _Alloc& __a = _Alloc())
512 : _M_dataplus(_M_local_data(), __a)
513 { _M_construct(__s, __s + __n); }
514
515 /**
516 * @brief Construct string as copy of a C string.
517 * @param __s Source C string.
518 * @param __a Allocator to use (default is default allocator).
519 */
520#if __cpp_deduction_guides201703L && ! defined _GLIBCXX_DEFINING_STRING_INSTANTIATIONS
521 // _GLIBCXX_RESOLVE_LIB_DEFECTS
522 // 3076. basic_string CTAD ambiguity
523 template<typename = _RequireAllocator<_Alloc>>
524#endif
525 basic_string(const _CharT* __s, const _Alloc& __a = _Alloc())
526 : _M_dataplus(_M_local_data(), __a)
527 { _M_construct(__s, __s ? __s + traits_type::length(__s) : __s+npos); }
528
529 /**
530 * @brief Construct string as multiple characters.
531 * @param __n Number of characters.
532 * @param __c Character to use.
533 * @param __a Allocator to use (default is default allocator).
534 */
535#if __cpp_deduction_guides201703L && ! defined _GLIBCXX_DEFINING_STRING_INSTANTIATIONS
536 // _GLIBCXX_RESOLVE_LIB_DEFECTS
537 // 3076. basic_string CTAD ambiguity
538 template<typename = _RequireAllocator<_Alloc>>
539#endif
540 basic_string(size_type __n, _CharT __c, const _Alloc& __a = _Alloc())
541 : _M_dataplus(_M_local_data(), __a)
542 { _M_construct(__n, __c); }
543
544#if __cplusplus201703L >= 201103L
545 /**
546 * @brief Move construct string.
547 * @param __str Source string.
548 *
549 * The newly-created string contains the exact contents of @a __str.
550 * @a __str is a valid, but unspecified string.
551 **/
552 basic_string(basic_string&& __str) noexcept
553 : _M_dataplus(_M_local_data(), std::move(__str._M_get_allocator()))
554 {
555 if (__str._M_is_local())
556 {
557 traits_type::copy(_M_local_buf, __str._M_local_buf,
558 _S_local_capacity + 1);
559 }
560 else
561 {
562 _M_data(__str._M_data());
563 _M_capacity(__str._M_allocated_capacity);
564 }
565
566 // Must use _M_length() here not _M_set_length() because
567 // basic_stringbuf relies on writing into unallocated capacity so
568 // we mess up the contents if we put a '\0' in the string.
569 _M_length(__str.length());
570 __str._M_data(__str._M_local_data());
571 __str._M_set_length(0);
572 }
573
574 /**
575 * @brief Construct string from an initializer %list.
576 * @param __l std::initializer_list of characters.
577 * @param __a Allocator to use (default is default allocator).
578 */
579 basic_string(initializer_list<_CharT> __l, const _Alloc& __a = _Alloc())
580 : _M_dataplus(_M_local_data(), __a)
581 { _M_construct(__l.begin(), __l.end()); }
582
583 basic_string(const basic_string& __str, const _Alloc& __a)
584 : _M_dataplus(_M_local_data(), __a)
585 { _M_construct(__str.begin(), __str.end()); }
586
587 basic_string(basic_string&& __str, const _Alloc& __a)
588 noexcept(_Alloc_traits::_S_always_equal())
589 : _M_dataplus(_M_local_data(), __a)
590 {
591 if (__str._M_is_local())
592 {
593 traits_type::copy(_M_local_buf, __str._M_local_buf,
594 _S_local_capacity + 1);
595 _M_length(__str.length());
596 __str._M_set_length(0);
597 }
598 else if (_Alloc_traits::_S_always_equal()
599 || __str.get_allocator() == __a)
600 {
601 _M_data(__str._M_data());
602 _M_length(__str.length());
603 _M_capacity(__str._M_allocated_capacity);
604 __str._M_data(__str._M_local_buf);
605 __str._M_set_length(0);
606 }
607 else
608 _M_construct(__str.begin(), __str.end());
609 }
610
611#endif // C++11
612
613 /**
614 * @brief Construct string as copy of a range.
615 * @param __beg Start of range.
616 * @param __end End of range.
617 * @param __a Allocator to use (default is default allocator).
618 */
619#if __cplusplus201703L >= 201103L
620 template<typename _InputIterator,
621 typename = std::_RequireInputIter<_InputIterator>>
622#else
623 template<typename _InputIterator>
624#endif
625 basic_string(_InputIterator __beg, _InputIterator __end,
626 const _Alloc& __a = _Alloc())
627 : _M_dataplus(_M_local_data(), __a)
628 { _M_construct(__beg, __end); }
629
630#if __cplusplus201703L >= 201703L
631 /**
632 * @brief Construct string from a substring of a string_view.
633 * @param __t Source object convertible to string view.
634 * @param __pos The index of the first character to copy from __t.
635 * @param __n The number of characters to copy from __t.
636 * @param __a Allocator to use.
637 */
638 template<typename _Tp, typename = _If_sv<_Tp, void>>
639 basic_string(const _Tp& __t, size_type __pos, size_type __n,
640 const _Alloc& __a = _Alloc())
641 : basic_string(_S_to_string_view(__t).substr(__pos, __n), __a) { }
642
643 /**
644 * @brief Construct string from a string_view.
645 * @param __t Source object convertible to string view.
646 * @param __a Allocator to use (default is default allocator).
647 */
648 template<typename _Tp, typename = _If_sv<_Tp, void>>
649 explicit
650 basic_string(const _Tp& __t, const _Alloc& __a = _Alloc())
651 : basic_string(__sv_wrapper(_S_to_string_view(__t)), __a) { }
652#endif // C++17
653
654 /**
655 * @brief Destroy the string instance.
656 */
657 ~basic_string()
658 { _M_dispose(); }
659
660 /**
661 * @brief Assign the value of @a str to this string.
662 * @param __str Source string.
663 */
664 basic_string&
665 operator=(const basic_string& __str)
666 {
667 return this->assign(__str);
668 }
669
670 /**
671 * @brief Copy contents of @a s into this string.
672 * @param __s Source null-terminated string.
673 */
674 basic_string&
675 operator=(const _CharT* __s)
676 { return this->assign(__s); }
677
678 /**
679 * @brief Set value to string of length 1.
680 * @param __c Source character.
681 *
682 * Assigning to a character makes this string length 1 and
683 * (*this)[0] == @a c.
684 */
685 basic_string&
686 operator=(_CharT __c)
687 {
688 this->assign(1, __c);
689 return *this;
690 }
691
692#if __cplusplus201703L >= 201103L
693 /**
694 * @brief Move assign the value of @a str to this string.
695 * @param __str Source string.
696 *
697 * The contents of @a str are moved into this string (without copying).
698 * @a str is a valid, but unspecified string.
699 **/
700 // _GLIBCXX_RESOLVE_LIB_DEFECTS
701 // 2063. Contradictory requirements for string move assignment
702 basic_string&
703 operator=(basic_string&& __str)
704 noexcept(_Alloc_traits::_S_nothrow_move())
705 {
706 if (!_M_is_local() && _Alloc_traits::_S_propagate_on_move_assign()
707 && !_Alloc_traits::_S_always_equal()
708 && _M_get_allocator() != __str._M_get_allocator())
709 {
710 // Destroy existing storage before replacing allocator.
711 _M_destroy(_M_allocated_capacity);
712 _M_data(_M_local_data());
713 _M_set_length(0);
714 }
715 // Replace allocator if POCMA is true.
716 std::__alloc_on_move(_M_get_allocator(), __str._M_get_allocator());
717
718 if (__str._M_is_local())
719 {
720 // We've always got room for a short string, just copy it.
721 if (__str.size())
722 this->_S_copy(_M_data(), __str._M_data(), __str.size());
723 _M_set_length(__str.size());
724 }
725 else if (_Alloc_traits::_S_propagate_on_move_assign()
726 || _Alloc_traits::_S_always_equal()
727 || _M_get_allocator() == __str._M_get_allocator())
728 {
729 // Just move the allocated pointer, our allocator can free it.
730 pointer __data = nullptr;
731 size_type __capacity;
732 if (!_M_is_local())
733 {
734 if (_Alloc_traits::_S_always_equal())
735 {
736 // __str can reuse our existing storage.
737 __data = _M_data();
738 __capacity = _M_allocated_capacity;
739 }
740 else // __str can't use it, so free it.
741 _M_destroy(_M_allocated_capacity);
742 }
743
744 _M_data(__str._M_data());
745 _M_length(__str.length());
746 _M_capacity(__str._M_allocated_capacity);
747 if (__data)
748 {
749 __str._M_data(__data);
750 __str._M_capacity(__capacity);
751 }
752 else
753 __str._M_data(__str._M_local_buf);
754 }
755 else // Need to do a deep copy
756 assign(__str);
757 __str.clear();
758 return *this;
759 }
760
761 /**
762 * @brief Set value to string constructed from initializer %list.
763 * @param __l std::initializer_list.
764 */
765 basic_string&
766 operator=(initializer_list<_CharT> __l)
767 {
768 this->assign(__l.begin(), __l.size());
769 return *this;
770 }
771#endif // C++11
772
773#if __cplusplus201703L >= 201703L
774 /**
775 * @brief Set value to string constructed from a string_view.
776 * @param __svt An object convertible to string_view.
777 */
778 template<typename _Tp>
779 _If_sv<_Tp, basic_string&>
780 operator=(const _Tp& __svt)
781 { return this->assign(__svt); }
782
783 /**
784 * @brief Convert to a string_view.
785 * @return A string_view.
786 */
787 operator __sv_type() const noexcept
788 { return __sv_type(data(), size()); }
789#endif // C++17
790
791 // Iterators:
792 /**
793 * Returns a read/write iterator that points to the first character in
794 * the %string.
795 */
796 iterator
797 begin() _GLIBCXX_NOEXCEPTnoexcept
798 { return iterator(_M_data()); }
799
800 /**
801 * Returns a read-only (constant) iterator that points to the first
802 * character in the %string.
803 */
804 const_iterator
805 begin() const _GLIBCXX_NOEXCEPTnoexcept
806 { return const_iterator(_M_data()); }
807
808 /**
809 * Returns a read/write iterator that points one past the last
810 * character in the %string.
811 */
812 iterator
813 end() _GLIBCXX_NOEXCEPTnoexcept
814 { return iterator(_M_data() + this->size()); }
815
816 /**
817 * Returns a read-only (constant) iterator that points one past the
818 * last character in the %string.
819 */
820 const_iterator
821 end() const _GLIBCXX_NOEXCEPTnoexcept
822 { return const_iterator(_M_data() + this->size()); }
823
824 /**
825 * Returns a read/write reverse iterator that points to the last
826 * character in the %string. Iteration is done in reverse element
827 * order.
828 */
829 reverse_iterator
830 rbegin() _GLIBCXX_NOEXCEPTnoexcept
831 { return reverse_iterator(this->end()); }
832
833 /**
834 * Returns a read-only (constant) reverse iterator that points
835 * to the last character in the %string. Iteration is done in
836 * reverse element order.
837 */
838 const_reverse_iterator
839 rbegin() const _GLIBCXX_NOEXCEPTnoexcept
840 { return const_reverse_iterator(this->end()); }
841
842 /**
843 * Returns a read/write reverse iterator that points to one before the
844 * first character in the %string. Iteration is done in reverse
845 * element order.
846 */
847 reverse_iterator
848 rend() _GLIBCXX_NOEXCEPTnoexcept
849 { return reverse_iterator(this->begin()); }
850
851 /**
852 * Returns a read-only (constant) reverse iterator that points
853 * to one before the first character in the %string. Iteration
854 * is done in reverse element order.
855 */
856 const_reverse_iterator
857 rend() const _GLIBCXX_NOEXCEPTnoexcept
858 { return const_reverse_iterator(this->begin()); }
859
860#if __cplusplus201703L >= 201103L
861 /**
862 * Returns a read-only (constant) iterator that points to the first
863 * character in the %string.
864 */
865 const_iterator
866 cbegin() const noexcept
867 { return const_iterator(this->_M_data()); }
868
869 /**
870 * Returns a read-only (constant) iterator that points one past the
871 * last character in the %string.
872 */
873 const_iterator
874 cend() const noexcept
875 { return const_iterator(this->_M_data() + this->size()); }
876
877 /**
878 * Returns a read-only (constant) reverse iterator that points
879 * to the last character in the %string. Iteration is done in
880 * reverse element order.
881 */
882 const_reverse_iterator
883 crbegin() const noexcept
884 { return const_reverse_iterator(this->end()); }
885
886 /**
887 * Returns a read-only (constant) reverse iterator that points
888 * to one before the first character in the %string. Iteration
889 * is done in reverse element order.
890 */
891 const_reverse_iterator
892 crend() const noexcept
893 { return const_reverse_iterator(this->begin()); }
894#endif
895
896 public:
897 // Capacity:
898 /// Returns the number of characters in the string, not including any
899 /// null-termination.
900 size_type
901 size() const _GLIBCXX_NOEXCEPTnoexcept
902 { return _M_string_length; }
903
904 /// Returns the number of characters in the string, not including any
905 /// null-termination.
906 size_type
907 length() const _GLIBCXX_NOEXCEPTnoexcept
908 { return _M_string_length; }
909
910 /// Returns the size() of the largest possible %string.
911 size_type
912 max_size() const _GLIBCXX_NOEXCEPTnoexcept
913 { return (_Alloc_traits::max_size(_M_get_allocator()) - 1) / 2; }
914
915 /**
916 * @brief Resizes the %string to the specified number of characters.
917 * @param __n Number of characters the %string should contain.
918 * @param __c Character to fill any new elements.
919 *
920 * This function will %resize the %string to the specified
921 * number of characters. If the number is smaller than the
922 * %string's current size the %string is truncated, otherwise
923 * the %string is extended and new elements are %set to @a __c.
924 */
925 void
926 resize(size_type __n, _CharT __c);
927
928 /**
929 * @brief Resizes the %string to the specified number of characters.
930 * @param __n Number of characters the %string should contain.
931 *
932 * This function will resize the %string to the specified length. If
933 * the new size is smaller than the %string's current size the %string
934 * is truncated, otherwise the %string is extended and new characters
935 * are default-constructed. For basic types such as char, this means
936 * setting them to 0.
937 */
938 void
939 resize(size_type __n)
940 { this->resize(__n, _CharT()); }
941
942#if __cplusplus201703L >= 201103L
943 /// A non-binding request to reduce capacity() to size().
944 void
945 shrink_to_fit() noexcept
946 {
947#if __cpp_exceptions
948 if (capacity() > size())
949 {
950 try
951 { reserve(0); }
952 catch(...)
953 { }
954 }
955#endif
956 }
957#endif
958
959 /**
960 * Returns the total number of characters that the %string can hold
961 * before needing to allocate more memory.
962 */
963 size_type
964 capacity() const _GLIBCXX_NOEXCEPTnoexcept
965 {
966 return _M_is_local() ? size_type(_S_local_capacity)
967 : _M_allocated_capacity;
968 }
969
970 /**
971 * @brief Attempt to preallocate enough memory for specified number of
972 * characters.
973 * @param __res_arg Number of characters required.
974 * @throw std::length_error If @a __res_arg exceeds @c max_size().
975 *
976 * This function attempts to reserve enough memory for the
977 * %string to hold the specified number of characters. If the
978 * number requested is more than max_size(), length_error is
979 * thrown.
980 *
981 * The advantage of this function is that if optimal code is a
982 * necessity and the user can determine the string length that will be
983 * required, the user can reserve the memory in %advance, and thus
984 * prevent a possible reallocation of memory and copying of %string
985 * data.
986 */
987 void
988 reserve(size_type __res_arg = 0);
989
990 /**
991 * Erases the string, making it empty.
992 */
993 void
994 clear() _GLIBCXX_NOEXCEPTnoexcept
995 { _M_set_length(0); }
996
997 /**
998 * Returns true if the %string is empty. Equivalent to
999 * <code>*this == ""</code>.
1000 */
1001 _GLIBCXX_NODISCARD[[__nodiscard__]] bool
1002 empty() const _GLIBCXX_NOEXCEPTnoexcept
1003 { return this->size() == 0; }
1004
1005 // Element access:
1006 /**
1007 * @brief Subscript access to the data contained in the %string.
1008 * @param __pos The index of the character to access.
1009 * @return Read-only (constant) reference to the character.
1010 *
1011 * This operator allows for easy, array-style, data access.
1012 * Note that data access with this operator is unchecked and
1013 * out_of_range lookups are not defined. (For checked lookups
1014 * see at().)
1015 */
1016 const_reference
1017 operator[] (size_type __pos) const _GLIBCXX_NOEXCEPTnoexcept
1018 {
1019 __glibcxx_assert(__pos <= size());
1020 return _M_data()[__pos];
1021 }
1022
1023 /**
1024 * @brief Subscript access to the data contained in the %string.
1025 * @param __pos The index of the character to access.
1026 * @return Read/write reference to the character.
1027 *
1028 * This operator allows for easy, array-style, data access.
1029 * Note that data access with this operator is unchecked and
1030 * out_of_range lookups are not defined. (For checked lookups
1031 * see at().)
1032 */
1033 reference
1034 operator[](size_type __pos)
1035 {
1036 // Allow pos == size() both in C++98 mode, as v3 extension,
1037 // and in C++11 mode.
1038 __glibcxx_assert(__pos <= size());
1039 // In pedantic mode be strict in C++98 mode.
1040 _GLIBCXX_DEBUG_PEDASSERT(__cplusplus >= 201103L || __pos < size());
1041 return _M_data()[__pos];
1042 }
1043
1044 /**
1045 * @brief Provides access to the data contained in the %string.
1046 * @param __n The index of the character to access.
1047 * @return Read-only (const) reference to the character.
1048 * @throw std::out_of_range If @a n is an invalid index.
1049 *
1050 * This function provides for safer data access. The parameter is
1051 * first checked that it is in the range of the string. The function
1052 * throws out_of_range if the check fails.
1053 */
1054 const_reference
1055 at(size_type __n) const
1056 {
1057 if (__n >= this->size())
1058 __throw_out_of_range_fmt(__N("basic_string::at: __n "("basic_string::at: __n " "(which is %zu) >= this->size() "
"(which is %zu)")
1059 "(which is %zu) >= this->size() "("basic_string::at: __n " "(which is %zu) >= this->size() "
"(which is %zu)")
1060 "(which is %zu)")("basic_string::at: __n " "(which is %zu) >= this->size() "
"(which is %zu)")
,
1061 __n, this->size());
1062 return _M_data()[__n];
1063 }
1064
1065 /**
1066 * @brief Provides access to the data contained in the %string.
1067 * @param __n The index of the character to access.
1068 * @return Read/write reference to the character.
1069 * @throw std::out_of_range If @a n is an invalid index.
1070 *
1071 * This function provides for safer data access. The parameter is
1072 * first checked that it is in the range of the string. The function
1073 * throws out_of_range if the check fails.
1074 */
1075 reference
1076 at(size_type __n)
1077 {
1078 if (__n >= size())
1079 __throw_out_of_range_fmt(__N("basic_string::at: __n "("basic_string::at: __n " "(which is %zu) >= this->size() "
"(which is %zu)")
1080 "(which is %zu) >= this->size() "("basic_string::at: __n " "(which is %zu) >= this->size() "
"(which is %zu)")
1081 "(which is %zu)")("basic_string::at: __n " "(which is %zu) >= this->size() "
"(which is %zu)")
,
1082 __n, this->size());
1083 return _M_data()[__n];
1084 }
1085
1086#if __cplusplus201703L >= 201103L
1087 /**
1088 * Returns a read/write reference to the data at the first
1089 * element of the %string.
1090 */
1091 reference
1092 front() noexcept
1093 {
1094 __glibcxx_assert(!empty());
1095 return operator[](0);
1096 }
1097
1098 /**
1099 * Returns a read-only (constant) reference to the data at the first
1100 * element of the %string.
1101 */
1102 const_reference
1103 front() const noexcept
1104 {
1105 __glibcxx_assert(!empty());
1106 return operator[](0);
1107 }
1108
1109 /**
1110 * Returns a read/write reference to the data at the last
1111 * element of the %string.
1112 */
1113 reference
1114 back() noexcept
1115 {
1116 __glibcxx_assert(!empty());
1117 return operator[](this->size() - 1);
1118 }
1119
1120 /**
1121 * Returns a read-only (constant) reference to the data at the
1122 * last element of the %string.
1123 */
1124 const_reference
1125 back() const noexcept
1126 {
1127 __glibcxx_assert(!empty());
1128 return operator[](this->size() - 1);
1129 }
1130#endif
1131
1132 // Modifiers:
1133 /**
1134 * @brief Append a string to this string.
1135 * @param __str The string to append.
1136 * @return Reference to this string.
1137 */
1138 basic_string&
1139 operator+=(const basic_string& __str)
1140 { return this->append(__str); }
1141
1142 /**
1143 * @brief Append a C string.
1144 * @param __s The C string to append.
1145 * @return Reference to this string.
1146 */
1147 basic_string&
1148 operator+=(const _CharT* __s)
1149 { return this->append(__s); }
1150
1151 /**
1152 * @brief Append a character.
1153 * @param __c The character to append.
1154 * @return Reference to this string.
1155 */
1156 basic_string&
1157 operator+=(_CharT __c)
1158 {
1159 this->push_back(__c);
1160 return *this;
1161 }
1162
1163#if __cplusplus201703L >= 201103L
1164 /**
1165 * @brief Append an initializer_list of characters.
1166 * @param __l The initializer_list of characters to be appended.
1167 * @return Reference to this string.
1168 */
1169 basic_string&
1170 operator+=(initializer_list<_CharT> __l)
1171 { return this->append(__l.begin(), __l.size()); }
1172#endif // C++11
1173
1174#if __cplusplus201703L >= 201703L
1175 /**
1176 * @brief Append a string_view.
1177 * @param __svt An object convertible to string_view to be appended.
1178 * @return Reference to this string.
1179 */
1180 template<typename _Tp>
1181 _If_sv<_Tp, basic_string&>
1182 operator+=(const _Tp& __svt)
1183 { return this->append(__svt); }
1184#endif // C++17
1185
1186 /**
1187 * @brief Append a string to this string.
1188 * @param __str The string to append.
1189 * @return Reference to this string.
1190 */
1191 basic_string&
1192 append(const basic_string& __str)
1193 { return _M_append(__str._M_data(), __str.size()); }
1194
1195 /**
1196 * @brief Append a substring.
1197 * @param __str The string to append.
1198 * @param __pos Index of the first character of str to append.
1199 * @param __n The number of characters to append.
1200 * @return Reference to this string.
1201 * @throw std::out_of_range if @a __pos is not a valid index.
1202 *
1203 * This function appends @a __n characters from @a __str
1204 * starting at @a __pos to this string. If @a __n is is larger
1205 * than the number of available characters in @a __str, the
1206 * remainder of @a __str is appended.
1207 */
1208 basic_string&
1209 append(const basic_string& __str, size_type __pos, size_type __n = npos)
1210 { return _M_append(__str._M_data()
1211 + __str._M_check(__pos, "basic_string::append"),
1212 __str._M_limit(__pos, __n)); }
1213
1214 /**
1215 * @brief Append a C substring.
1216 * @param __s The C string to append.
1217 * @param __n The number of characters to append.
1218 * @return Reference to this string.
1219 */
1220 basic_string&
1221 append(const _CharT* __s, size_type __n)
1222 {
1223 __glibcxx_requires_string_len(__s, __n);
1224 _M_check_length(size_type(0), __n, "basic_string::append");
1225 return _M_append(__s, __n);
1226 }
1227
1228 /**
1229 * @brief Append a C string.
1230 * @param __s The C string to append.
1231 * @return Reference to this string.
1232 */
1233 basic_string&
1234 append(const _CharT* __s)
1235 {
1236 __glibcxx_requires_string(__s);
1237 const size_type __n = traits_type::length(__s);
1238 _M_check_length(size_type(0), __n, "basic_string::append");
1239 return _M_append(__s, __n);
1240 }
1241
1242 /**
1243 * @brief Append multiple characters.
1244 * @param __n The number of characters to append.
1245 * @param __c The character to use.
1246 * @return Reference to this string.
1247 *
1248 * Appends __n copies of __c to this string.
1249 */
1250 basic_string&
1251 append(size_type __n, _CharT __c)
1252 { return _M_replace_aux(this->size(), size_type(0), __n, __c); }
1253
1254#if __cplusplus201703L >= 201103L
1255 /**
1256 * @brief Append an initializer_list of characters.
1257 * @param __l The initializer_list of characters to append.
1258 * @return Reference to this string.
1259 */
1260 basic_string&
1261 append(initializer_list<_CharT> __l)
1262 { return this->append(__l.begin(), __l.size()); }
1263#endif // C++11
1264
1265 /**
1266 * @brief Append a range of characters.
1267 * @param __first Iterator referencing the first character to append.
1268 * @param __last Iterator marking the end of the range.
1269 * @return Reference to this string.
1270 *
1271 * Appends characters in the range [__first,__last) to this string.
1272 */
1273#if __cplusplus201703L >= 201103L
1274 template<class _InputIterator,
1275 typename = std::_RequireInputIter<_InputIterator>>
1276#else
1277 template<class _InputIterator>
1278#endif
1279 basic_string&
1280 append(_InputIterator __first, _InputIterator __last)
1281 { return this->replace(end(), end(), __first, __last); }
1282
1283#if __cplusplus201703L >= 201703L
1284 /**
1285 * @brief Append a string_view.
1286 * @param __svt An object convertible to string_view to be appended.
1287 * @return Reference to this string.
1288 */
1289 template<typename _Tp>
1290 _If_sv<_Tp, basic_string&>
1291 append(const _Tp& __svt)
1292 {
1293 __sv_type __sv = __svt;
1294 return this->append(__sv.data(), __sv.size());
1295 }
1296
1297 /**
1298 * @brief Append a range of characters from a string_view.
1299 * @param __svt An object convertible to string_view to be appended from.
1300 * @param __pos The position in the string_view to append from.
1301 * @param __n The number of characters to append from the string_view.
1302 * @return Reference to this string.
1303 */
1304 template<typename _Tp>
1305 _If_sv<_Tp, basic_string&>
1306 append(const _Tp& __svt, size_type __pos, size_type __n = npos)
1307 {
1308 __sv_type __sv = __svt;
1309 return _M_append(__sv.data()
1310 + std::__sv_check(__sv.size(), __pos, "basic_string::append"),
1311 std::__sv_limit(__sv.size(), __pos, __n));
1312 }
1313#endif // C++17
1314
1315 /**
1316 * @brief Append a single character.
1317 * @param __c Character to append.
1318 */
1319 void
1320 push_back(_CharT __c)
1321 {
1322 const size_type __size = this->size();
1323 if (__size + 1 > this->capacity())
1324 this->_M_mutate(__size, size_type(0), 0, size_type(1));
1325 traits_type::assign(this->_M_data()[__size], __c);
1326 this->_M_set_length(__size + 1);
1327 }
1328
1329 /**
1330 * @brief Set value to contents of another string.
1331 * @param __str Source string to use.
1332 * @return Reference to this string.
1333 */
1334 basic_string&
1335 assign(const basic_string& __str)
1336 {
1337#if __cplusplus201703L >= 201103L
1338 if (_Alloc_traits::_S_propagate_on_copy_assign())
1339 {
1340 if (!_Alloc_traits::_S_always_equal() && !_M_is_local()
1341 && _M_get_allocator() != __str._M_get_allocator())
1342 {
1343 // Propagating allocator cannot free existing storage so must
1344 // deallocate it before replacing current allocator.
1345 if (__str.size() <= _S_local_capacity)
1346 {
1347 _M_destroy(_M_allocated_capacity);
1348 _M_data(_M_local_data());
1349 _M_set_length(0);
1350 }
1351 else
1352 {
1353 const auto __len = __str.size();
1354 auto __alloc = __str._M_get_allocator();
1355 // If this allocation throws there are no effects:
1356 auto __ptr = _Alloc_traits::allocate(__alloc, __len + 1);
1357 _M_destroy(_M_allocated_capacity);
1358 _M_data(__ptr);
1359 _M_capacity(__len);
1360 _M_set_length(__len);
1361 }
1362 }
1363 std::__alloc_on_copy(_M_get_allocator(), __str._M_get_allocator());
1364 }
1365#endif
1366 this->_M_assign(__str);
1367 return *this;
1368 }
1369
1370#if __cplusplus201703L >= 201103L
1371 /**
1372 * @brief Set value to contents of another string.
1373 * @param __str Source string to use.
1374 * @return Reference to this string.
1375 *
1376 * This function sets this string to the exact contents of @a __str.
1377 * @a __str is a valid, but unspecified string.
1378 */
1379 basic_string&
1380 assign(basic_string&& __str)
1381 noexcept(_Alloc_traits::_S_nothrow_move())
1382 {
1383 // _GLIBCXX_RESOLVE_LIB_DEFECTS
1384 // 2063. Contradictory requirements for string move assignment
1385 return *this = std::move(__str);
1386 }
1387#endif // C++11
1388
1389 /**
1390 * @brief Set value to a substring of a string.
1391 * @param __str The string to use.
1392 * @param __pos Index of the first character of str.
1393 * @param __n Number of characters to use.
1394 * @return Reference to this string.
1395 * @throw std::out_of_range if @a pos is not a valid index.
1396 *
1397 * This function sets this string to the substring of @a __str
1398 * consisting of @a __n characters at @a __pos. If @a __n is
1399 * is larger than the number of available characters in @a
1400 * __str, the remainder of @a __str is used.
1401 */
1402 basic_string&
1403 assign(const basic_string& __str, size_type __pos, size_type __n = npos)
1404 { return _M_replace(size_type(0), this->size(), __str._M_data()
1405 + __str._M_check(__pos, "basic_string::assign"),
1406 __str._M_limit(__pos, __n)); }
1407
1408 /**
1409 * @brief Set value to a C substring.
1410 * @param __s The C string to use.
1411 * @param __n Number of characters to use.
1412 * @return Reference to this string.
1413 *
1414 * This function sets the value of this string to the first @a __n
1415 * characters of @a __s. If @a __n is is larger than the number of
1416 * available characters in @a __s, the remainder of @a __s is used.
1417 */
1418 basic_string&
1419 assign(const _CharT* __s, size_type __n)
1420 {
1421 __glibcxx_requires_string_len(__s, __n);
1422 return _M_replace(size_type(0), this->size(), __s, __n);
1423 }
1424
1425 /**
1426 * @brief Set value to contents of a C string.
1427 * @param __s The C string to use.
1428 * @return Reference to this string.
1429 *
1430 * This function sets the value of this string to the value of @a __s.
1431 * The data is copied, so there is no dependence on @a __s once the
1432 * function returns.
1433 */
1434 basic_string&
1435 assign(const _CharT* __s)
1436 {
1437 __glibcxx_requires_string(__s);
1438 return _M_replace(size_type(0), this->size(), __s,
1439 traits_type::length(__s));
1440 }
1441
1442 /**
1443 * @brief Set value to multiple characters.
1444 * @param __n Length of the resulting string.
1445 * @param __c The character to use.
1446 * @return Reference to this string.
1447 *
1448 * This function sets the value of this string to @a __n copies of
1449 * character @a __c.
1450 */
1451 basic_string&
1452 assign(size_type __n, _CharT __c)
1453 { return _M_replace_aux(size_type(0), this->size(), __n, __c); }
1454
1455 /**
1456 * @brief Set value to a range of characters.
1457 * @param __first Iterator referencing the first character to append.
1458 * @param __last Iterator marking the end of the range.
1459 * @return Reference to this string.
1460 *
1461 * Sets value of string to characters in the range [__first,__last).
1462 */
1463#if __cplusplus201703L >= 201103L
1464 template<class _InputIterator,
1465 typename = std::_RequireInputIter<_InputIterator>>
1466#else
1467 template<class _InputIterator>
1468#endif
1469 basic_string&
1470 assign(_InputIterator __first, _InputIterator __last)
1471 { return this->replace(begin(), end(), __first, __last); }
1472
1473#if __cplusplus201703L >= 201103L
1474 /**
1475 * @brief Set value to an initializer_list of characters.
1476 * @param __l The initializer_list of characters to assign.
1477 * @return Reference to this string.
1478 */
1479 basic_string&
1480 assign(initializer_list<_CharT> __l)
1481 { return this->assign(__l.begin(), __l.size()); }
1482#endif // C++11
1483
1484#if __cplusplus201703L >= 201703L
1485 /**
1486 * @brief Set value from a string_view.
1487 * @param __svt The source object convertible to string_view.
1488 * @return Reference to this string.
1489 */
1490 template<typename _Tp>
1491 _If_sv<_Tp, basic_string&>
1492 assign(const _Tp& __svt)
1493 {
1494 __sv_type __sv = __svt;
1495 return this->assign(__sv.data(), __sv.size());
1496 }
1497
1498 /**
1499 * @brief Set value from a range of characters in a string_view.
1500 * @param __svt The source object convertible to string_view.
1501 * @param __pos The position in the string_view to assign from.
1502 * @param __n The number of characters to assign.
1503 * @return Reference to this string.
1504 */
1505 template<typename _Tp>
1506 _If_sv<_Tp, basic_string&>
1507 assign(const _Tp& __svt, size_type __pos, size_type __n = npos)
1508 {
1509 __sv_type __sv = __svt;
1510 return _M_replace(size_type(0), this->size(),
1511 __sv.data()
1512 + std::__sv_check(__sv.size(), __pos, "basic_string::assign"),
1513 std::__sv_limit(__sv.size(), __pos, __n));
1514 }
1515#endif // C++17
1516
1517#if __cplusplus201703L >= 201103L
1518 /**
1519 * @brief Insert multiple characters.
1520 * @param __p Const_iterator referencing location in string to
1521 * insert at.
1522 * @param __n Number of characters to insert
1523 * @param __c The character to insert.
1524 * @return Iterator referencing the first inserted char.
1525 * @throw std::length_error If new length exceeds @c max_size().
1526 *
1527 * Inserts @a __n copies of character @a __c starting at the
1528 * position referenced by iterator @a __p. If adding
1529 * characters causes the length to exceed max_size(),
1530 * length_error is thrown. The value of the string doesn't
1531 * change if an error is thrown.
1532 */
1533 iterator
1534 insert(const_iterator __p, size_type __n, _CharT __c)
1535 {
1536 _GLIBCXX_DEBUG_PEDASSERT(__p >= begin() && __p <= end());
1537 const size_type __pos = __p - begin();
1538 this->replace(__p, __p, __n, __c);
1539 return iterator(this->_M_data() + __pos);
1540 }
1541#else
1542 /**
1543 * @brief Insert multiple characters.
1544 * @param __p Iterator referencing location in string to insert at.
1545 * @param __n Number of characters to insert
1546 * @param __c The character to insert.
1547 * @throw std::length_error If new length exceeds @c max_size().
1548 *
1549 * Inserts @a __n copies of character @a __c starting at the
1550 * position referenced by iterator @a __p. If adding
1551 * characters causes the length to exceed max_size(),
1552 * length_error is thrown. The value of the string doesn't
1553 * change if an error is thrown.
1554 */
1555 void
1556 insert(iterator __p, size_type __n, _CharT __c)
1557 { this->replace(__p, __p, __n, __c); }
1558#endif
1559
1560#if __cplusplus201703L >= 201103L
1561 /**
1562 * @brief Insert a range of characters.
1563 * @param __p Const_iterator referencing location in string to
1564 * insert at.
1565 * @param __beg Start of range.
1566 * @param __end End of range.
1567 * @return Iterator referencing the first inserted char.
1568 * @throw std::length_error If new length exceeds @c max_size().
1569 *
1570 * Inserts characters in range [beg,end). If adding characters
1571 * causes the length to exceed max_size(), length_error is
1572 * thrown. The value of the string doesn't change if an error
1573 * is thrown.
1574 */
1575 template<class _InputIterator,
1576 typename = std::_RequireInputIter<_InputIterator>>
1577 iterator
1578 insert(const_iterator __p, _InputIterator __beg, _InputIterator __end)
1579 {
1580 _GLIBCXX_DEBUG_PEDASSERT(__p >= begin() && __p <= end());
1581 const size_type __pos = __p - begin();
1582 this->replace(__p, __p, __beg, __end);
1583 return iterator(this->_M_data() + __pos);
1584 }
1585#else
1586 /**
1587 * @brief Insert a range of characters.
1588 * @param __p Iterator referencing location in string to insert at.
1589 * @param __beg Start of range.
1590 * @param __end End of range.
1591 * @throw std::length_error If new length exceeds @c max_size().
1592 *
1593 * Inserts characters in range [__beg,__end). If adding
1594 * characters causes the length to exceed max_size(),
1595 * length_error is thrown. The value of the string doesn't
1596 * change if an error is thrown.
1597 */
1598 template<class _InputIterator>
1599 void
1600 insert(iterator __p, _InputIterator __beg, _InputIterator __end)
1601 { this->replace(__p, __p, __beg, __end); }
1602#endif
1603
1604#if __cplusplus201703L >= 201103L
1605 /**
1606 * @brief Insert an initializer_list of characters.
1607 * @param __p Iterator referencing location in string to insert at.
1608 * @param __l The initializer_list of characters to insert.
1609 * @throw std::length_error If new length exceeds @c max_size().
1610 */
1611 iterator
1612 insert(const_iterator __p, initializer_list<_CharT> __l)
1613 { return this->insert(__p, __l.begin(), __l.end()); }
1614
1615#ifdef _GLIBCXX_DEFINING_STRING_INSTANTIATIONS
1616 // See PR libstdc++/83328
1617 void
1618 insert(iterator __p, initializer_list<_CharT> __l)
1619 {
1620 _GLIBCXX_DEBUG_PEDASSERT(__p >= begin() && __p <= end());
1621 this->insert(__p - begin(), __l.begin(), __l.size());
1622 }
1623#endif
1624#endif // C++11
1625
1626 /**
1627 * @brief Insert value of a string.
1628 * @param __pos1 Position in string to insert at.
1629 * @param __str The string to insert.
1630 * @return Reference to this string.
1631 * @throw std::length_error If new length exceeds @c max_size().
1632 *
1633 * Inserts value of @a __str starting at @a __pos1. If adding
1634 * characters causes the length to exceed max_size(),
1635 * length_error is thrown. The value of the string doesn't
1636 * change if an error is thrown.
1637 */
1638 basic_string&
1639 insert(size_type __pos1, const basic_string& __str)
1640 { return this->replace(__pos1, size_type(0),
1641 __str._M_data(), __str.size()); }
1642
1643 /**
1644 * @brief Insert a substring.
1645 * @param __pos1 Position in string to insert at.
1646 * @param __str The string to insert.
1647 * @param __pos2 Start of characters in str to insert.
1648 * @param __n Number of characters to insert.
1649 * @return Reference to this string.
1650 * @throw std::length_error If new length exceeds @c max_size().
1651 * @throw std::out_of_range If @a pos1 > size() or
1652 * @a __pos2 > @a str.size().
1653 *
1654 * Starting at @a pos1, insert @a __n character of @a __str
1655 * beginning with @a __pos2. If adding characters causes the
1656 * length to exceed max_size(), length_error is thrown. If @a
1657 * __pos1 is beyond the end of this string or @a __pos2 is
1658 * beyond the end of @a __str, out_of_range is thrown. The
1659 * value of the string doesn't change if an error is thrown.
1660 */
1661 basic_string&
1662 insert(size_type __pos1, const basic_string& __str,
1663 size_type __pos2, size_type __n = npos)
1664 { return this->replace(__pos1, size_type(0), __str._M_data()
1665 + __str._M_check(__pos2, "basic_string::insert"),
1666 __str._M_limit(__pos2, __n)); }
1667
1668 /**
1669 * @brief Insert a C substring.
1670 * @param __pos Position in string to insert at.
1671 * @param __s The C string to insert.
1672 * @param __n The number of characters to insert.
1673 * @return Reference to this string.
1674 * @throw std::length_error If new length exceeds @c max_size().
1675 * @throw std::out_of_range If @a __pos is beyond the end of this
1676 * string.
1677 *
1678 * Inserts the first @a __n characters of @a __s starting at @a
1679 * __pos. If adding characters causes the length to exceed
1680 * max_size(), length_error is thrown. If @a __pos is beyond
1681 * end(), out_of_range is thrown. The value of the string
1682 * doesn't change if an error is thrown.
1683 */
1684 basic_string&
1685 insert(size_type __pos, const _CharT* __s, size_type __n)
1686 { return this->replace(__pos, size_type(0), __s, __n); }
1687
1688 /**
1689 * @brief Insert a C string.
1690 * @param __pos Position in string to insert at.
1691 * @param __s The C string to insert.
1692 * @return Reference to this string.
1693 * @throw std::length_error If new length exceeds @c max_size().
1694 * @throw std::out_of_range If @a pos is beyond the end of this
1695 * string.
1696 *
1697 * Inserts the first @a n characters of @a __s starting at @a __pos. If
1698 * adding characters causes the length to exceed max_size(),
1699 * length_error is thrown. If @a __pos is beyond end(), out_of_range is
1700 * thrown. The value of the string doesn't change if an error is
1701 * thrown.
1702 */
1703 basic_string&
1704 insert(size_type __pos, const _CharT* __s)
1705 {
1706 __glibcxx_requires_string(__s);
1707 return this->replace(__pos, size_type(0), __s,
1708 traits_type::length(__s));
1709 }
1710
1711 /**
1712 * @brief Insert multiple characters.
1713 * @param __pos Index in string to insert at.
1714 * @param __n Number of characters to insert
1715 * @param __c The character to insert.
1716 * @return Reference to this string.
1717 * @throw std::length_error If new length exceeds @c max_size().
1718 * @throw std::out_of_range If @a __pos is beyond the end of this
1719 * string.
1720 *
1721 * Inserts @a __n copies of character @a __c starting at index
1722 * @a __pos. If adding characters causes the length to exceed
1723 * max_size(), length_error is thrown. If @a __pos > length(),
1724 * out_of_range is thrown. The value of the string doesn't
1725 * change if an error is thrown.
1726 */
1727 basic_string&
1728 insert(size_type __pos, size_type __n, _CharT __c)
1729 { return _M_replace_aux(_M_check(__pos, "basic_string::insert"),
1730 size_type(0), __n, __c); }
1731
1732 /**
1733 * @brief Insert one character.
1734 * @param __p Iterator referencing position in string to insert at.
1735 * @param __c The character to insert.
1736 * @return Iterator referencing newly inserted char.
1737 * @throw std::length_error If new length exceeds @c max_size().
1738 *
1739 * Inserts character @a __c at position referenced by @a __p.
1740 * If adding character causes the length to exceed max_size(),
1741 * length_error is thrown. If @a __p is beyond end of string,
1742 * out_of_range is thrown. The value of the string doesn't
1743 * change if an error is thrown.
1744 */
1745 iterator
1746 insert(__const_iterator __p, _CharT __c)
1747 {
1748 _GLIBCXX_DEBUG_PEDASSERT(__p >= begin() && __p <= end());
1749 const size_type __pos = __p - begin();
1750 _M_replace_aux(__pos, size_type(0), size_type(1), __c);
1751 return iterator(_M_data() + __pos);
1752 }
1753
1754#if __cplusplus201703L >= 201703L
1755 /**
1756 * @brief Insert a string_view.
1757 * @param __pos Position in string to insert at.
1758 * @param __svt The object convertible to string_view to insert.
1759 * @return Reference to this string.
1760 */
1761 template<typename _Tp>
1762 _If_sv<_Tp, basic_string&>
1763 insert(size_type __pos, const _Tp& __svt)
1764 {
1765 __sv_type __sv = __svt;
1766 return this->insert(__pos, __sv.data(), __sv.size());
1767 }
1768
1769 /**
1770 * @brief Insert a string_view.
1771 * @param __pos1 Position in string to insert at.
1772 * @param __svt The object convertible to string_view to insert from.
1773 * @param __pos2 Start of characters in str to insert.
1774 * @param __n The number of characters to insert.
1775 * @return Reference to this string.
1776 */
1777 template<typename _Tp>
1778 _If_sv<_Tp, basic_string&>
1779 insert(size_type __pos1, const _Tp& __svt,
1780 size_type __pos2, size_type __n = npos)
1781 {
1782 __sv_type __sv = __svt;
1783 return this->replace(__pos1, size_type(0),
1784 __sv.data()
1785 + std::__sv_check(__sv.size(), __pos2, "basic_string::insert"),
1786 std::__sv_limit(__sv.size(), __pos2, __n));
1787 }
1788#endif // C++17
1789
1790 /**
1791 * @brief Remove characters.
1792 * @param __pos Index of first character to remove (default 0).
1793 * @param __n Number of characters to remove (default remainder).
1794 * @return Reference to this string.
1795 * @throw std::out_of_range If @a pos is beyond the end of this
1796 * string.
1797 *
1798 * Removes @a __n characters from this string starting at @a
1799 * __pos. The length of the string is reduced by @a __n. If
1800 * there are < @a __n characters to remove, the remainder of
1801 * the string is truncated. If @a __p is beyond end of string,
1802 * out_of_range is thrown. The value of the string doesn't
1803 * change if an error is thrown.
1804 */
1805 basic_string&
1806 erase(size_type __pos = 0, size_type __n = npos)
1807 {
1808 _M_check(__pos, "basic_string::erase");
1809 if (__n == npos)
1810 this->_M_set_length(__pos);
1811 else if (__n != 0)
1812 this->_M_erase(__pos, _M_limit(__pos, __n));
1813 return *this;
1814 }
1815
1816 /**
1817 * @brief Remove one character.
1818 * @param __position Iterator referencing the character to remove.
1819 * @return iterator referencing same location after removal.
1820 *
1821 * Removes the character at @a __position from this string. The value
1822 * of the string doesn't change if an error is thrown.
1823 */
1824 iterator
1825 erase(__const_iterator __position)
1826 {
1827 _GLIBCXX_DEBUG_PEDASSERT(__position >= begin()
1828 && __position < end());
1829 const size_type __pos = __position - begin();
1830 this->_M_erase(__pos, size_type(1));
1831 return iterator(_M_data() + __pos);
1832 }
1833
1834 /**
1835 * @brief Remove a range of characters.
1836 * @param __first Iterator referencing the first character to remove.
1837 * @param __last Iterator referencing the end of the range.
1838 * @return Iterator referencing location of first after removal.
1839 *
1840 * Removes the characters in the range [first,last) from this string.
1841 * The value of the string doesn't change if an error is thrown.
1842 */
1843 iterator
1844 erase(__const_iterator __first, __const_iterator __last)
1845 {
1846 _GLIBCXX_DEBUG_PEDASSERT(__first >= begin() && __first <= __last
1847 && __last <= end());
1848 const size_type __pos = __first - begin();
1849 if (__last == end())
1850 this->_M_set_length(__pos);
1851 else
1852 this->_M_erase(__pos, __last - __first);
1853 return iterator(this->_M_data() + __pos);
1854 }
1855
1856#if __cplusplus201703L >= 201103L
1857 /**
1858 * @brief Remove the last character.
1859 *
1860 * The string must be non-empty.
1861 */
1862 void
1863 pop_back() noexcept
1864 {
1865 __glibcxx_assert(!empty());
1866 _M_erase(size() - 1, 1);
1867 }
1868#endif // C++11
1869
1870 /**
1871 * @brief Replace characters with value from another string.
1872 * @param __pos Index of first character to replace.
1873 * @param __n Number of characters to be replaced.
1874 * @param __str String to insert.
1875 * @return Reference to this string.
1876 * @throw std::out_of_range If @a pos is beyond the end of this
1877 * string.
1878 * @throw std::length_error If new length exceeds @c max_size().
1879 *
1880 * Removes the characters in the range [__pos,__pos+__n) from
1881 * this string. In place, the value of @a __str is inserted.
1882 * If @a __pos is beyond end of string, out_of_range is thrown.
1883 * If the length of the result exceeds max_size(), length_error
1884 * is thrown. The value of the string doesn't change if an
1885 * error is thrown.
1886 */
1887 basic_string&
1888 replace(size_type __pos, size_type __n, const basic_string& __str)
1889 { return this->replace(__pos, __n, __str._M_data(), __str.size()); }
1890
1891 /**
1892 * @brief Replace characters with value from another string.
1893 * @param __pos1 Index of first character to replace.
1894 * @param __n1 Number of characters to be replaced.
1895 * @param __str String to insert.
1896 * @param __pos2 Index of first character of str to use.
1897 * @param __n2 Number of characters from str to use.
1898 * @return Reference to this string.
1899 * @throw std::out_of_range If @a __pos1 > size() or @a __pos2 >
1900 * __str.size().
1901 * @throw std::length_error If new length exceeds @c max_size().
1902 *
1903 * Removes the characters in the range [__pos1,__pos1 + n) from this
1904 * string. In place, the value of @a __str is inserted. If @a __pos is
1905 * beyond end of string, out_of_range is thrown. If the length of the
1906 * result exceeds max_size(), length_error is thrown. The value of the
1907 * string doesn't change if an error is thrown.
1908 */
1909 basic_string&
1910 replace(size_type __pos1, size_type __n1, const basic_string& __str,
1911 size_type __pos2, size_type __n2 = npos)
1912 { return this->replace(__pos1, __n1, __str._M_data()
1913 + __str._M_check(__pos2, "basic_string::replace"),
1914 __str._M_limit(__pos2, __n2)); }
1915
1916 /**
1917 * @brief Replace characters with value of a C substring.
1918 * @param __pos Index of first character to replace.
1919 * @param __n1 Number of characters to be replaced.
1920 * @param __s C string to insert.
1921 * @param __n2 Number of characters from @a s to use.
1922 * @return Reference to this string.
1923 * @throw std::out_of_range If @a pos1 > size().
1924 * @throw std::length_error If new length exceeds @c max_size().
1925 *
1926 * Removes the characters in the range [__pos,__pos + __n1)
1927 * from this string. In place, the first @a __n2 characters of
1928 * @a __s are inserted, or all of @a __s if @a __n2 is too large. If
1929 * @a __pos is beyond end of string, out_of_range is thrown. If
1930 * the length of result exceeds max_size(), length_error is
1931 * thrown. The value of the string doesn't change if an error
1932 * is thrown.
1933 */
1934 basic_string&
1935 replace(size_type __pos, size_type __n1, const _CharT* __s,
1936 size_type __n2)
1937 {
1938 __glibcxx_requires_string_len(__s, __n2);
1939 re