Bug Summary

File:llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
Warning:line 2876, column 15
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SimpleLoopUnswitch.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/build-llvm/lib/Transforms/Scalar -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/build-llvm/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/build-llvm/lib/Transforms/Scalar -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-06-21-164211-33944-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
14#include "llvm/ADT/SmallPtrSet.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
18#include "llvm/Analysis/AssumptionCache.h"
19#include "llvm/Analysis/CFG.h"
20#include "llvm/Analysis/CodeMetrics.h"
21#include "llvm/Analysis/GuardUtils.h"
22#include "llvm/Analysis/InstructionSimplify.h"
23#include "llvm/Analysis/LoopAnalysisManager.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/LoopIterator.h"
26#include "llvm/Analysis/LoopPass.h"
27#include "llvm/Analysis/MemorySSA.h"
28#include "llvm/Analysis/MemorySSAUpdater.h"
29#include "llvm/Analysis/MustExecute.h"
30#include "llvm/Analysis/ScalarEvolution.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/Constant.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Instructions.h"
40#include "llvm/IR/IntrinsicInst.h"
41#include "llvm/IR/PatternMatch.h"
42#include "llvm/IR/Use.h"
43#include "llvm/IR/Value.h"
44#include "llvm/InitializePasses.h"
45#include "llvm/Pass.h"
46#include "llvm/Support/Casting.h"
47#include "llvm/Support/CommandLine.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/ErrorHandling.h"
50#include "llvm/Support/GenericDomTree.h"
51#include "llvm/Support/raw_ostream.h"
52#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
53#include "llvm/Transforms/Utils/BasicBlockUtils.h"
54#include "llvm/Transforms/Utils/Cloning.h"
55#include "llvm/Transforms/Utils/Local.h"
56#include "llvm/Transforms/Utils/LoopUtils.h"
57#include "llvm/Transforms/Utils/ValueMapper.h"
58#include <algorithm>
59#include <cassert>
60#include <iterator>
61#include <numeric>
62#include <utility>
63
64#define DEBUG_TYPE"simple-loop-unswitch" "simple-loop-unswitch"
65
66using namespace llvm;
67using namespace llvm::PatternMatch;
68
69STATISTIC(NumBranches, "Number of branches unswitched")static llvm::Statistic NumBranches = {"simple-loop-unswitch",
"NumBranches", "Number of branches unswitched"}
;
70STATISTIC(NumSwitches, "Number of switches unswitched")static llvm::Statistic NumSwitches = {"simple-loop-unswitch",
"NumSwitches", "Number of switches unswitched"}
;
71STATISTIC(NumGuards, "Number of guards turned into branches for unswitching")static llvm::Statistic NumGuards = {"simple-loop-unswitch", "NumGuards"
, "Number of guards turned into branches for unswitching"}
;
72STATISTIC(NumTrivial, "Number of unswitches that are trivial")static llvm::Statistic NumTrivial = {"simple-loop-unswitch", "NumTrivial"
, "Number of unswitches that are trivial"}
;
73STATISTIC(static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
}
74 NumCostMultiplierSkipped,static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
}
75 "Number of unswitch candidates that had their cost multiplier skipped")static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
}
;
76
77static cl::opt<bool> EnableNonTrivialUnswitch(
78 "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
79 cl::desc("Forcibly enables non-trivial loop unswitching rather than "
80 "following the configuration passed into the pass."));
81
82static cl::opt<int>
83 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
84 cl::desc("The cost threshold for unswitching a loop."));
85
86static cl::opt<bool> EnableUnswitchCostMultiplier(
87 "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
88 cl::desc("Enable unswitch cost multiplier that prohibits exponential "
89 "explosion in nontrivial unswitch."));
90static cl::opt<int> UnswitchSiblingsToplevelDiv(
91 "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
92 cl::desc("Toplevel siblings divisor for cost multiplier."));
93static cl::opt<int> UnswitchNumInitialUnscaledCandidates(
94 "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
95 cl::desc("Number of unswitch candidates that are ignored when calculating "
96 "cost multiplier."));
97static cl::opt<bool> UnswitchGuards(
98 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
99 cl::desc("If enabled, simple loop unswitching will also consider "
100 "llvm.experimental.guard intrinsics as unswitch candidates."));
101static cl::opt<bool> DropNonTrivialImplicitNullChecks(
102 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
103 cl::init(false), cl::Hidden,
104 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
105 "null checks to save time analyzing if we can keep it."));
106static cl::opt<unsigned>
107 MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
108 cl::desc("Max number of memory uses to explore during "
109 "partial unswitching analysis"),
110 cl::init(100), cl::Hidden);
111
112/// Collect all of the loop invariant input values transitively used by the
113/// homogeneous instruction graph from a given root.
114///
115/// This essentially walks from a root recursively through loop variant operands
116/// which have the exact same opcode and finds all inputs which are loop
117/// invariant. For some operations these can be re-associated and unswitched out
118/// of the loop entirely.
119static TinyPtrVector<Value *>
120collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root,
121 LoopInfo &LI) {
122 assert(!L.isLoopInvariant(&Root) &&(static_cast <bool> (!L.isLoopInvariant(&Root) &&
"Only need to walk the graph if root itself is not invariant."
) ? void (0) : __assert_fail ("!L.isLoopInvariant(&Root) && \"Only need to walk the graph if root itself is not invariant.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 123, __extension__ __PRETTY_FUNCTION__))
123 "Only need to walk the graph if root itself is not invariant.")(static_cast <bool> (!L.isLoopInvariant(&Root) &&
"Only need to walk the graph if root itself is not invariant."
) ? void (0) : __assert_fail ("!L.isLoopInvariant(&Root) && \"Only need to walk the graph if root itself is not invariant.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 123, __extension__ __PRETTY_FUNCTION__))
;
124 TinyPtrVector<Value *> Invariants;
125
126 bool IsRootAnd = match(&Root, m_LogicalAnd());
127 bool IsRootOr = match(&Root, m_LogicalOr());
128
129 // Build a worklist and recurse through operators collecting invariants.
130 SmallVector<Instruction *, 4> Worklist;
131 SmallPtrSet<Instruction *, 8> Visited;
132 Worklist.push_back(&Root);
133 Visited.insert(&Root);
134 do {
135 Instruction &I = *Worklist.pop_back_val();
136 for (Value *OpV : I.operand_values()) {
137 // Skip constants as unswitching isn't interesting for them.
138 if (isa<Constant>(OpV))
139 continue;
140
141 // Add it to our result if loop invariant.
142 if (L.isLoopInvariant(OpV)) {
143 Invariants.push_back(OpV);
144 continue;
145 }
146
147 // If not an instruction with the same opcode, nothing we can do.
148 Instruction *OpI = dyn_cast<Instruction>(OpV);
149
150 if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
151 (IsRootOr && match(OpI, m_LogicalOr())))) {
152 // Visit this operand.
153 if (Visited.insert(OpI).second)
154 Worklist.push_back(OpI);
155 }
156 }
157 } while (!Worklist.empty());
158
159 return Invariants;
160}
161
162static void replaceLoopInvariantUses(Loop &L, Value *Invariant,
163 Constant &Replacement) {
164 assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?")(static_cast <bool> (!isa<Constant>(Invariant) &&
"Why are we unswitching on a constant?") ? void (0) : __assert_fail
("!isa<Constant>(Invariant) && \"Why are we unswitching on a constant?\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 164, __extension__ __PRETTY_FUNCTION__))
;
165
166 // Replace uses of LIC in the loop with the given constant.
167 // We use make_early_inc_range as set invalidates the iterator.
168 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
169 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
170
171 // Replace this use within the loop body.
172 if (UserI && L.contains(UserI))
173 U.set(&Replacement);
174 }
175}
176
177/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
178/// incoming values along this edge.
179static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
180 BasicBlock &ExitBB) {
181 for (Instruction &I : ExitBB) {
182 auto *PN = dyn_cast<PHINode>(&I);
183 if (!PN)
184 // No more PHIs to check.
185 return true;
186
187 // If the incoming value for this edge isn't loop invariant the unswitch
188 // won't be trivial.
189 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
190 return false;
191 }
192 llvm_unreachable("Basic blocks should never be empty!")::llvm::llvm_unreachable_internal("Basic blocks should never be empty!"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 192)
;
193}
194
195/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
196/// end of \p BB and conditionally branch on the copied condition. We only
197/// branch on a single value.
198static void buildPartialUnswitchConditionalBranch(BasicBlock &BB,
199 ArrayRef<Value *> Invariants,
200 bool Direction,
201 BasicBlock &UnswitchedSucc,
202 BasicBlock &NormalSucc) {
203 IRBuilder<> IRB(&BB);
204
205 Value *Cond = Direction ? IRB.CreateOr(Invariants) :
206 IRB.CreateAnd(Invariants);
207 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
208 Direction ? &NormalSucc : &UnswitchedSucc);
209}
210
211/// Copy a set of loop invariant values, and conditionally branch on them.
212static void buildPartialInvariantUnswitchConditionalBranch(
213 BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
214 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
215 MemorySSAUpdater *MSSAU) {
216 ValueToValueMapTy VMap;
217 for (auto *Val : reverse(ToDuplicate)) {
218 Instruction *Inst = cast<Instruction>(Val);
219 Instruction *NewInst = Inst->clone();
220 BB.getInstList().insert(BB.end(), NewInst);
221 RemapInstruction(NewInst, VMap,
222 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
223 VMap[Val] = NewInst;
224
225 if (!MSSAU)
226 continue;
227
228 MemorySSA *MSSA = MSSAU->getMemorySSA();
229 if (auto *MemUse =
230 dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
231 auto *DefiningAccess = MemUse->getDefiningAccess();
232 // Get the first defining access before the loop.
233 while (L.contains(DefiningAccess->getBlock())) {
234 // If the defining access is a MemoryPhi, get the incoming
235 // value for the pre-header as defining access.
236 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
237 DefiningAccess =
238 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
239 else
240 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
241 }
242 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
243 NewInst->getParent(),
244 MemorySSA::BeforeTerminator);
245 }
246 }
247
248 IRBuilder<> IRB(&BB);
249 Value *Cond = VMap[ToDuplicate[0]];
250 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
251 Direction ? &NormalSucc : &UnswitchedSucc);
252}
253
254/// Rewrite the PHI nodes in an unswitched loop exit basic block.
255///
256/// Requires that the loop exit and unswitched basic block are the same, and
257/// that the exiting block was a unique predecessor of that block. Rewrites the
258/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
259/// PHI nodes from the old preheader that now contains the unswitched
260/// terminator.
261static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
262 BasicBlock &OldExitingBB,
263 BasicBlock &OldPH) {
264 for (PHINode &PN : UnswitchedBB.phis()) {
265 // When the loop exit is directly unswitched we just need to update the
266 // incoming basic block. We loop to handle weird cases with repeated
267 // incoming blocks, but expect to typically only have one operand here.
268 for (auto i : seq<int>(0, PN.getNumOperands())) {
269 assert(PN.getIncomingBlock(i) == &OldExitingBB &&(static_cast <bool> (PN.getIncomingBlock(i) == &OldExitingBB
&& "Found incoming block different from unique predecessor!"
) ? void (0) : __assert_fail ("PN.getIncomingBlock(i) == &OldExitingBB && \"Found incoming block different from unique predecessor!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 270, __extension__ __PRETTY_FUNCTION__))
270 "Found incoming block different from unique predecessor!")(static_cast <bool> (PN.getIncomingBlock(i) == &OldExitingBB
&& "Found incoming block different from unique predecessor!"
) ? void (0) : __assert_fail ("PN.getIncomingBlock(i) == &OldExitingBB && \"Found incoming block different from unique predecessor!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 270, __extension__ __PRETTY_FUNCTION__))
;
271 PN.setIncomingBlock(i, &OldPH);
272 }
273 }
274}
275
276/// Rewrite the PHI nodes in the loop exit basic block and the split off
277/// unswitched block.
278///
279/// Because the exit block remains an exit from the loop, this rewrites the
280/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
281/// nodes into the unswitched basic block to select between the value in the
282/// old preheader and the loop exit.
283static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
284 BasicBlock &UnswitchedBB,
285 BasicBlock &OldExitingBB,
286 BasicBlock &OldPH,
287 bool FullUnswitch) {
288 assert(&ExitBB != &UnswitchedBB &&(static_cast <bool> (&ExitBB != &UnswitchedBB &&
"Must have different loop exit and unswitched blocks!") ? void
(0) : __assert_fail ("&ExitBB != &UnswitchedBB && \"Must have different loop exit and unswitched blocks!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 289, __extension__ __PRETTY_FUNCTION__))
289 "Must have different loop exit and unswitched blocks!")(static_cast <bool> (&ExitBB != &UnswitchedBB &&
"Must have different loop exit and unswitched blocks!") ? void
(0) : __assert_fail ("&ExitBB != &UnswitchedBB && \"Must have different loop exit and unswitched blocks!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 289, __extension__ __PRETTY_FUNCTION__))
;
290 Instruction *InsertPt = &*UnswitchedBB.begin();
291 for (PHINode &PN : ExitBB.phis()) {
292 auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
293 PN.getName() + ".split", InsertPt);
294
295 // Walk backwards over the old PHI node's inputs to minimize the cost of
296 // removing each one. We have to do this weird loop manually so that we
297 // create the same number of new incoming edges in the new PHI as we expect
298 // each case-based edge to be included in the unswitched switch in some
299 // cases.
300 // FIXME: This is really, really gross. It would be much cleaner if LLVM
301 // allowed us to create a single entry for a predecessor block without
302 // having separate entries for each "edge" even though these edges are
303 // required to produce identical results.
304 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
305 if (PN.getIncomingBlock(i) != &OldExitingBB)
306 continue;
307
308 Value *Incoming = PN.getIncomingValue(i);
309 if (FullUnswitch)
310 // No more edge from the old exiting block to the exit block.
311 PN.removeIncomingValue(i);
312
313 NewPN->addIncoming(Incoming, &OldPH);
314 }
315
316 // Now replace the old PHI with the new one and wire the old one in as an
317 // input to the new one.
318 PN.replaceAllUsesWith(NewPN);
319 NewPN->addIncoming(&PN, &ExitBB);
320 }
321}
322
323/// Hoist the current loop up to the innermost loop containing a remaining exit.
324///
325/// Because we've removed an exit from the loop, we may have changed the set of
326/// loops reachable and need to move the current loop up the loop nest or even
327/// to an entirely separate nest.
328static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
329 DominatorTree &DT, LoopInfo &LI,
330 MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
331 // If the loop is already at the top level, we can't hoist it anywhere.
332 Loop *OldParentL = L.getParentLoop();
333 if (!OldParentL)
334 return;
335
336 SmallVector<BasicBlock *, 4> Exits;
337 L.getExitBlocks(Exits);
338 Loop *NewParentL = nullptr;
339 for (auto *ExitBB : Exits)
340 if (Loop *ExitL = LI.getLoopFor(ExitBB))
341 if (!NewParentL || NewParentL->contains(ExitL))
342 NewParentL = ExitL;
343
344 if (NewParentL == OldParentL)
345 return;
346
347 // The new parent loop (if different) should always contain the old one.
348 if (NewParentL)
349 assert(NewParentL->contains(OldParentL) &&(static_cast <bool> (NewParentL->contains(OldParentL
) && "Can only hoist this loop up the nest!") ? void (
0) : __assert_fail ("NewParentL->contains(OldParentL) && \"Can only hoist this loop up the nest!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 350, __extension__ __PRETTY_FUNCTION__))
350 "Can only hoist this loop up the nest!")(static_cast <bool> (NewParentL->contains(OldParentL
) && "Can only hoist this loop up the nest!") ? void (
0) : __assert_fail ("NewParentL->contains(OldParentL) && \"Can only hoist this loop up the nest!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 350, __extension__ __PRETTY_FUNCTION__))
;
351
352 // The preheader will need to move with the body of this loop. However,
353 // because it isn't in this loop we also need to update the primary loop map.
354 assert(OldParentL == LI.getLoopFor(&Preheader) &&(static_cast <bool> (OldParentL == LI.getLoopFor(&Preheader
) && "Parent loop of this loop should contain this loop's preheader!"
) ? void (0) : __assert_fail ("OldParentL == LI.getLoopFor(&Preheader) && \"Parent loop of this loop should contain this loop's preheader!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 355, __extension__ __PRETTY_FUNCTION__))
355 "Parent loop of this loop should contain this loop's preheader!")(static_cast <bool> (OldParentL == LI.getLoopFor(&Preheader
) && "Parent loop of this loop should contain this loop's preheader!"
) ? void (0) : __assert_fail ("OldParentL == LI.getLoopFor(&Preheader) && \"Parent loop of this loop should contain this loop's preheader!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 355, __extension__ __PRETTY_FUNCTION__))
;
356 LI.changeLoopFor(&Preheader, NewParentL);
357
358 // Remove this loop from its old parent.
359 OldParentL->removeChildLoop(&L);
360
361 // Add the loop either to the new parent or as a top-level loop.
362 if (NewParentL)
363 NewParentL->addChildLoop(&L);
364 else
365 LI.addTopLevelLoop(&L);
366
367 // Remove this loops blocks from the old parent and every other loop up the
368 // nest until reaching the new parent. Also update all of these
369 // no-longer-containing loops to reflect the nesting change.
370 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
371 OldContainingL = OldContainingL->getParentLoop()) {
372 llvm::erase_if(OldContainingL->getBlocksVector(),
373 [&](const BasicBlock *BB) {
374 return BB == &Preheader || L.contains(BB);
375 });
376
377 OldContainingL->getBlocksSet().erase(&Preheader);
378 for (BasicBlock *BB : L.blocks())
379 OldContainingL->getBlocksSet().erase(BB);
380
381 // Because we just hoisted a loop out of this one, we have essentially
382 // created new exit paths from it. That means we need to form LCSSA PHI
383 // nodes for values used in the no-longer-nested loop.
384 formLCSSA(*OldContainingL, DT, &LI, SE);
385
386 // We shouldn't need to form dedicated exits because the exit introduced
387 // here is the (just split by unswitching) preheader. However, after trivial
388 // unswitching it is possible to get new non-dedicated exits out of parent
389 // loop so let's conservatively form dedicated exit blocks and figure out
390 // if we can optimize later.
391 formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
392 /*PreserveLCSSA*/ true);
393 }
394}
395
396// Return the top-most loop containing ExitBB and having ExitBB as exiting block
397// or the loop containing ExitBB, if there is no parent loop containing ExitBB
398// as exiting block.
399static Loop *getTopMostExitingLoop(BasicBlock *ExitBB, LoopInfo &LI) {
400 Loop *TopMost = LI.getLoopFor(ExitBB);
401 Loop *Current = TopMost;
402 while (Current) {
403 if (Current->isLoopExiting(ExitBB))
404 TopMost = Current;
405 Current = Current->getParentLoop();
406 }
407 return TopMost;
408}
409
410/// Unswitch a trivial branch if the condition is loop invariant.
411///
412/// This routine should only be called when loop code leading to the branch has
413/// been validated as trivial (no side effects). This routine checks if the
414/// condition is invariant and one of the successors is a loop exit. This
415/// allows us to unswitch without duplicating the loop, making it trivial.
416///
417/// If this routine fails to unswitch the branch it returns false.
418///
419/// If the branch can be unswitched, this routine splits the preheader and
420/// hoists the branch above that split. Preserves loop simplified form
421/// (splitting the exit block as necessary). It simplifies the branch within
422/// the loop to an unconditional branch but doesn't remove it entirely. Further
423/// cleanup can be done with some simplify-cfg like pass.
424///
425/// If `SE` is not null, it will be updated based on the potential loop SCEVs
426/// invalidated by this.
427static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
428 LoopInfo &LI, ScalarEvolution *SE,
429 MemorySSAUpdater *MSSAU) {
430 assert(BI.isConditional() && "Can only unswitch a conditional branch!")(static_cast <bool> (BI.isConditional() && "Can only unswitch a conditional branch!"
) ? void (0) : __assert_fail ("BI.isConditional() && \"Can only unswitch a conditional branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 430, __extension__ __PRETTY_FUNCTION__))
;
431 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Trying to unswitch branch: "
<< BI << "\n"; } } while (false)
;
432
433 // The loop invariant values that we want to unswitch.
434 TinyPtrVector<Value *> Invariants;
435
436 // When true, we're fully unswitching the branch rather than just unswitching
437 // some input conditions to the branch.
438 bool FullUnswitch = false;
439
440 if (L.isLoopInvariant(BI.getCondition())) {
441 Invariants.push_back(BI.getCondition());
442 FullUnswitch = true;
443 } else {
444 if (auto *CondInst = dyn_cast<Instruction>(BI.getCondition()))
445 Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
446 if (Invariants.empty()) {
447 LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Couldn't find invariant inputs!\n"
; } } while (false)
;
448 return false;
449 }
450 }
451
452 // Check that one of the branch's successors exits, and which one.
453 bool ExitDirection = true;
454 int LoopExitSuccIdx = 0;
455 auto *LoopExitBB = BI.getSuccessor(0);
456 if (L.contains(LoopExitBB)) {
457 ExitDirection = false;
458 LoopExitSuccIdx = 1;
459 LoopExitBB = BI.getSuccessor(1);
460 if (L.contains(LoopExitBB)) {
461 LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Branch doesn't exit the loop!\n"
; } } while (false)
;
462 return false;
463 }
464 }
465 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
466 auto *ParentBB = BI.getParent();
467 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
468 LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Loop exit PHI's aren't loop-invariant!\n"
; } } while (false)
;
469 return false;
470 }
471
472 // When unswitching only part of the branch's condition, we need the exit
473 // block to be reached directly from the partially unswitched input. This can
474 // be done when the exit block is along the true edge and the branch condition
475 // is a graph of `or` operations, or the exit block is along the false edge
476 // and the condition is a graph of `and` operations.
477 if (!FullUnswitch) {
478 if (ExitDirection ? !match(BI.getCondition(), m_LogicalOr())
479 : !match(BI.getCondition(), m_LogicalAnd())) {
480 LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Branch condition is in improper form for "
"non-full unswitch!\n"; } } while (false)
481 "non-full unswitch!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Branch condition is in improper form for "
"non-full unswitch!\n"; } } while (false)
;
482 return false;
483 }
484 }
485
486 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
487 dbgs() << " unswitching trivial invariant conditions for: " << BIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
488 << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
489 for (Value *Invariant : Invariants) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
490 dbgs() << " " << *Invariant << " == true";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
491 if (Invariant != Invariants.back())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
492 dbgs() << " ||";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
493 dbgs() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
494 }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
495 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
;
496
497 // If we have scalar evolutions, we need to invalidate them including this
498 // loop, the loop containing the exit block and the topmost parent loop
499 // exiting via LoopExitBB.
500 if (SE) {
501 if (Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
502 SE->forgetLoop(ExitL);
503 else
504 // Forget the entire nest as this exits the entire nest.
505 SE->forgetTopmostLoop(&L);
506 }
507
508 if (MSSAU && VerifyMemorySSA)
509 MSSAU->getMemorySSA()->verifyMemorySSA();
510
511 // Split the preheader, so that we know that there is a safe place to insert
512 // the conditional branch. We will change the preheader to have a conditional
513 // branch on LoopCond.
514 BasicBlock *OldPH = L.getLoopPreheader();
515 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
516
517 // Now that we have a place to insert the conditional branch, create a place
518 // to branch to: this is the exit block out of the loop that we are
519 // unswitching. We need to split this if there are other loop predecessors.
520 // Because the loop is in simplified form, *any* other predecessor is enough.
521 BasicBlock *UnswitchedBB;
522 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
523 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&(static_cast <bool> (LoopExitBB->getUniquePredecessor
() == BI.getParent() && "A branch's parent isn't a predecessor!"
) ? void (0) : __assert_fail ("LoopExitBB->getUniquePredecessor() == BI.getParent() && \"A branch's parent isn't a predecessor!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 524, __extension__ __PRETTY_FUNCTION__))
524 "A branch's parent isn't a predecessor!")(static_cast <bool> (LoopExitBB->getUniquePredecessor
() == BI.getParent() && "A branch's parent isn't a predecessor!"
) ? void (0) : __assert_fail ("LoopExitBB->getUniquePredecessor() == BI.getParent() && \"A branch's parent isn't a predecessor!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 524, __extension__ __PRETTY_FUNCTION__))
;
525 UnswitchedBB = LoopExitBB;
526 } else {
527 UnswitchedBB =
528 SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
529 }
530
531 if (MSSAU && VerifyMemorySSA)
532 MSSAU->getMemorySSA()->verifyMemorySSA();
533
534 // Actually move the invariant uses into the unswitched position. If possible,
535 // we do this by moving the instructions, but when doing partial unswitching
536 // we do it by building a new merge of the values in the unswitched position.
537 OldPH->getTerminator()->eraseFromParent();
538 if (FullUnswitch) {
539 // If fully unswitching, we can use the existing branch instruction.
540 // Splice it into the old PH to gate reaching the new preheader and re-point
541 // its successors.
542 OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
543 BI);
544 if (MSSAU) {
545 // Temporarily clone the terminator, to make MSSA update cheaper by
546 // separating "insert edge" updates from "remove edge" ones.
547 ParentBB->getInstList().push_back(BI.clone());
548 } else {
549 // Create a new unconditional branch that will continue the loop as a new
550 // terminator.
551 BranchInst::Create(ContinueBB, ParentBB);
552 }
553 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
554 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
555 } else {
556 // Only unswitching a subset of inputs to the condition, so we will need to
557 // build a new branch that merges the invariant inputs.
558 if (ExitDirection)
559 assert(match(BI.getCondition(), m_LogicalOr()) &&(static_cast <bool> (match(BI.getCondition(), m_LogicalOr
()) && "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
"condition!") ? void (0) : __assert_fail ("match(BI.getCondition(), m_LogicalOr()) && \"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the \" \"condition!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 561, __extension__ __PRETTY_FUNCTION__))
560 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "(static_cast <bool> (match(BI.getCondition(), m_LogicalOr
()) && "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
"condition!") ? void (0) : __assert_fail ("match(BI.getCondition(), m_LogicalOr()) && \"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the \" \"condition!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 561, __extension__ __PRETTY_FUNCTION__))
561 "condition!")(static_cast <bool> (match(BI.getCondition(), m_LogicalOr
()) && "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
"condition!") ? void (0) : __assert_fail ("match(BI.getCondition(), m_LogicalOr()) && \"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the \" \"condition!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 561, __extension__ __PRETTY_FUNCTION__))
;
562 else
563 assert(match(BI.getCondition(), m_LogicalAnd()) &&(static_cast <bool> (match(BI.getCondition(), m_LogicalAnd
()) && "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
" condition!") ? void (0) : __assert_fail ("match(BI.getCondition(), m_LogicalAnd()) && \"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the\" \" condition!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 565, __extension__ __PRETTY_FUNCTION__))
564 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"(static_cast <bool> (match(BI.getCondition(), m_LogicalAnd
()) && "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
" condition!") ? void (0) : __assert_fail ("match(BI.getCondition(), m_LogicalAnd()) && \"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the\" \" condition!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 565, __extension__ __PRETTY_FUNCTION__))
565 " condition!")(static_cast <bool> (match(BI.getCondition(), m_LogicalAnd
()) && "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
" condition!") ? void (0) : __assert_fail ("match(BI.getCondition(), m_LogicalAnd()) && \"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the\" \" condition!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 565, __extension__ __PRETTY_FUNCTION__))
;
566 buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
567 *UnswitchedBB, *NewPH);
568 }
569
570 // Update the dominator tree with the added edge.
571 DT.insertEdge(OldPH, UnswitchedBB);
572
573 // After the dominator tree was updated with the added edge, update MemorySSA
574 // if available.
575 if (MSSAU) {
576 SmallVector<CFGUpdate, 1> Updates;
577 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
578 MSSAU->applyInsertUpdates(Updates, DT);
579 }
580
581 // Finish updating dominator tree and memory ssa for full unswitch.
582 if (FullUnswitch) {
583 if (MSSAU) {
584 // Remove the cloned branch instruction.
585 ParentBB->getTerminator()->eraseFromParent();
586 // Create unconditional branch now.
587 BranchInst::Create(ContinueBB, ParentBB);
588 MSSAU->removeEdge(ParentBB, LoopExitBB);
589 }
590 DT.deleteEdge(ParentBB, LoopExitBB);
591 }
592
593 if (MSSAU && VerifyMemorySSA)
594 MSSAU->getMemorySSA()->verifyMemorySSA();
595
596 // Rewrite the relevant PHI nodes.
597 if (UnswitchedBB == LoopExitBB)
598 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
599 else
600 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
601 *ParentBB, *OldPH, FullUnswitch);
602
603 // The constant we can replace all of our invariants with inside the loop
604 // body. If any of the invariants have a value other than this the loop won't
605 // be entered.
606 ConstantInt *Replacement = ExitDirection
607 ? ConstantInt::getFalse(BI.getContext())
608 : ConstantInt::getTrue(BI.getContext());
609
610 // Since this is an i1 condition we can also trivially replace uses of it
611 // within the loop with a constant.
612 for (Value *Invariant : Invariants)
613 replaceLoopInvariantUses(L, Invariant, *Replacement);
614
615 // If this was full unswitching, we may have changed the nesting relationship
616 // for this loop so hoist it to its correct parent if needed.
617 if (FullUnswitch)
618 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
619
620 if (MSSAU && VerifyMemorySSA)
621 MSSAU->getMemorySSA()->verifyMemorySSA();
622
623 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " done: unswitching trivial branch...\n"
; } } while (false)
;
624 ++NumTrivial;
625 ++NumBranches;
626 return true;
627}
628
629/// Unswitch a trivial switch if the condition is loop invariant.
630///
631/// This routine should only be called when loop code leading to the switch has
632/// been validated as trivial (no side effects). This routine checks if the
633/// condition is invariant and that at least one of the successors is a loop
634/// exit. This allows us to unswitch without duplicating the loop, making it
635/// trivial.
636///
637/// If this routine fails to unswitch the switch it returns false.
638///
639/// If the switch can be unswitched, this routine splits the preheader and
640/// copies the switch above that split. If the default case is one of the
641/// exiting cases, it copies the non-exiting cases and points them at the new
642/// preheader. If the default case is not exiting, it copies the exiting cases
643/// and points the default at the preheader. It preserves loop simplified form
644/// (splitting the exit blocks as necessary). It simplifies the switch within
645/// the loop by removing now-dead cases. If the default case is one of those
646/// unswitched, it replaces its destination with a new basic block containing
647/// only unreachable. Such basic blocks, while technically loop exits, are not
648/// considered for unswitching so this is a stable transform and the same
649/// switch will not be revisited. If after unswitching there is only a single
650/// in-loop successor, the switch is further simplified to an unconditional
651/// branch. Still more cleanup can be done with some simplify-cfg like pass.
652///
653/// If `SE` is not null, it will be updated based on the potential loop SCEVs
654/// invalidated by this.
655static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
656 LoopInfo &LI, ScalarEvolution *SE,
657 MemorySSAUpdater *MSSAU) {
658 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Trying to unswitch switch: "
<< SI << "\n"; } } while (false)
;
659 Value *LoopCond = SI.getCondition();
660
661 // If this isn't switching on an invariant condition, we can't unswitch it.
662 if (!L.isLoopInvariant(LoopCond))
663 return false;
664
665 auto *ParentBB = SI.getParent();
666
667 // The same check must be used both for the default and the exit cases. We
668 // should never leave edges from the switch instruction to a basic block that
669 // we are unswitching, hence the condition used to determine the default case
670 // needs to also be used to populate ExitCaseIndices, which is then used to
671 // remove cases from the switch.
672 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
673 // BBToCheck is not an exit block if it is inside loop L.
674 if (L.contains(&BBToCheck))
675 return false;
676 // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
677 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
678 return false;
679 // We do not unswitch a block that only has an unreachable statement, as
680 // it's possible this is a previously unswitched block. Only unswitch if
681 // either the terminator is not unreachable, or, if it is, it's not the only
682 // instruction in the block.
683 auto *TI = BBToCheck.getTerminator();
684 bool isUnreachable = isa<UnreachableInst>(TI);
685 return !isUnreachable ||
686 (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
687 };
688
689 SmallVector<int, 4> ExitCaseIndices;
690 for (auto Case : SI.cases())
691 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
692 ExitCaseIndices.push_back(Case.getCaseIndex());
693 BasicBlock *DefaultExitBB = nullptr;
694 SwitchInstProfUpdateWrapper::CaseWeightOpt DefaultCaseWeight =
695 SwitchInstProfUpdateWrapper::getSuccessorWeight(SI, 0);
696 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
697 DefaultExitBB = SI.getDefaultDest();
698 } else if (ExitCaseIndices.empty())
699 return false;
700
701 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " unswitching trivial switch...\n"
; } } while (false)
;
702
703 if (MSSAU && VerifyMemorySSA)
704 MSSAU->getMemorySSA()->verifyMemorySSA();
705
706 // We may need to invalidate SCEVs for the outermost loop reached by any of
707 // the exits.
708 Loop *OuterL = &L;
709
710 if (DefaultExitBB) {
711 // Clear out the default destination temporarily to allow accurate
712 // predecessor lists to be examined below.
713 SI.setDefaultDest(nullptr);
714 // Check the loop containing this exit.
715 Loop *ExitL = LI.getLoopFor(DefaultExitBB);
716 if (!ExitL || ExitL->contains(OuterL))
717 OuterL = ExitL;
718 }
719
720 // Store the exit cases into a separate data structure and remove them from
721 // the switch.
722 SmallVector<std::tuple<ConstantInt *, BasicBlock *,
723 SwitchInstProfUpdateWrapper::CaseWeightOpt>,
724 4> ExitCases;
725 ExitCases.reserve(ExitCaseIndices.size());
726 SwitchInstProfUpdateWrapper SIW(SI);
727 // We walk the case indices backwards so that we remove the last case first
728 // and don't disrupt the earlier indices.
729 for (unsigned Index : reverse(ExitCaseIndices)) {
730 auto CaseI = SI.case_begin() + Index;
731 // Compute the outer loop from this exit.
732 Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
733 if (!ExitL || ExitL->contains(OuterL))
734 OuterL = ExitL;
735 // Save the value of this case.
736 auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
737 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
738 // Delete the unswitched cases.
739 SIW.removeCase(CaseI);
740 }
741
742 if (SE) {
743 if (OuterL)
744 SE->forgetLoop(OuterL);
745 else
746 SE->forgetTopmostLoop(&L);
747 }
748
749 // Check if after this all of the remaining cases point at the same
750 // successor.
751 BasicBlock *CommonSuccBB = nullptr;
752 if (SI.getNumCases() > 0 &&
753 all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
754 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
755 }))
756 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
757 if (!DefaultExitBB) {
758 // If we're not unswitching the default, we need it to match any cases to
759 // have a common successor or if we have no cases it is the common
760 // successor.
761 if (SI.getNumCases() == 0)
762 CommonSuccBB = SI.getDefaultDest();
763 else if (SI.getDefaultDest() != CommonSuccBB)
764 CommonSuccBB = nullptr;
765 }
766
767 // Split the preheader, so that we know that there is a safe place to insert
768 // the switch.
769 BasicBlock *OldPH = L.getLoopPreheader();
770 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
771 OldPH->getTerminator()->eraseFromParent();
772
773 // Now add the unswitched switch.
774 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
775 SwitchInstProfUpdateWrapper NewSIW(*NewSI);
776
777 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
778 // First, we split any exit blocks with remaining in-loop predecessors. Then
779 // we update the PHIs in one of two ways depending on if there was a split.
780 // We walk in reverse so that we split in the same order as the cases
781 // appeared. This is purely for convenience of reading the resulting IR, but
782 // it doesn't cost anything really.
783 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
784 SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
785 // Handle the default exit if necessary.
786 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
787 // ranges aren't quite powerful enough yet.
788 if (DefaultExitBB) {
789 if (pred_empty(DefaultExitBB)) {
790 UnswitchedExitBBs.insert(DefaultExitBB);
791 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
792 } else {
793 auto *SplitBB =
794 SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
795 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
796 *ParentBB, *OldPH,
797 /*FullUnswitch*/ true);
798 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
799 }
800 }
801 // Note that we must use a reference in the for loop so that we update the
802 // container.
803 for (auto &ExitCase : reverse(ExitCases)) {
804 // Grab a reference to the exit block in the pair so that we can update it.
805 BasicBlock *ExitBB = std::get<1>(ExitCase);
806
807 // If this case is the last edge into the exit block, we can simply reuse it
808 // as it will no longer be a loop exit. No mapping necessary.
809 if (pred_empty(ExitBB)) {
810 // Only rewrite once.
811 if (UnswitchedExitBBs.insert(ExitBB).second)
812 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
813 continue;
814 }
815
816 // Otherwise we need to split the exit block so that we retain an exit
817 // block from the loop and a target for the unswitched condition.
818 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
819 if (!SplitExitBB) {
820 // If this is the first time we see this, do the split and remember it.
821 SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
822 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
823 *ParentBB, *OldPH,
824 /*FullUnswitch*/ true);
825 }
826 // Update the case pair to point to the split block.
827 std::get<1>(ExitCase) = SplitExitBB;
828 }
829
830 // Now add the unswitched cases. We do this in reverse order as we built them
831 // in reverse order.
832 for (auto &ExitCase : reverse(ExitCases)) {
833 ConstantInt *CaseVal = std::get<0>(ExitCase);
834 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
835
836 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
837 }
838
839 // If the default was unswitched, re-point it and add explicit cases for
840 // entering the loop.
841 if (DefaultExitBB) {
842 NewSIW->setDefaultDest(DefaultExitBB);
843 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
844
845 // We removed all the exit cases, so we just copy the cases to the
846 // unswitched switch.
847 for (const auto &Case : SI.cases())
848 NewSIW.addCase(Case.getCaseValue(), NewPH,
849 SIW.getSuccessorWeight(Case.getSuccessorIndex()));
850 } else if (DefaultCaseWeight) {
851 // We have to set branch weight of the default case.
852 uint64_t SW = *DefaultCaseWeight;
853 for (const auto &Case : SI.cases()) {
854 auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
855 assert(W &&(static_cast <bool> (W && "case weight must be defined as default case weight is defined"
) ? void (0) : __assert_fail ("W && \"case weight must be defined as default case weight is defined\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 856, __extension__ __PRETTY_FUNCTION__))
856 "case weight must be defined as default case weight is defined")(static_cast <bool> (W && "case weight must be defined as default case weight is defined"
) ? void (0) : __assert_fail ("W && \"case weight must be defined as default case weight is defined\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 856, __extension__ __PRETTY_FUNCTION__))
;
857 SW += *W;
858 }
859 NewSIW.setSuccessorWeight(0, SW);
860 }
861
862 // If we ended up with a common successor for every path through the switch
863 // after unswitching, rewrite it to an unconditional branch to make it easy
864 // to recognize. Otherwise we potentially have to recognize the default case
865 // pointing at unreachable and other complexity.
866 if (CommonSuccBB) {
867 BasicBlock *BB = SI.getParent();
868 // We may have had multiple edges to this common successor block, so remove
869 // them as predecessors. We skip the first one, either the default or the
870 // actual first case.
871 bool SkippedFirst = DefaultExitBB == nullptr;
872 for (auto Case : SI.cases()) {
873 assert(Case.getCaseSuccessor() == CommonSuccBB &&(static_cast <bool> (Case.getCaseSuccessor() == CommonSuccBB
&& "Non-common successor!") ? void (0) : __assert_fail
("Case.getCaseSuccessor() == CommonSuccBB && \"Non-common successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 874, __extension__ __PRETTY_FUNCTION__))
874 "Non-common successor!")(static_cast <bool> (Case.getCaseSuccessor() == CommonSuccBB
&& "Non-common successor!") ? void (0) : __assert_fail
("Case.getCaseSuccessor() == CommonSuccBB && \"Non-common successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 874, __extension__ __PRETTY_FUNCTION__))
;
875 (void)Case;
876 if (!SkippedFirst) {
877 SkippedFirst = true;
878 continue;
879 }
880 CommonSuccBB->removePredecessor(BB,
881 /*KeepOneInputPHIs*/ true);
882 }
883 // Now nuke the switch and replace it with a direct branch.
884 SIW.eraseFromParent();
885 BranchInst::Create(CommonSuccBB, BB);
886 } else if (DefaultExitBB) {
887 assert(SI.getNumCases() > 0 &&(static_cast <bool> (SI.getNumCases() > 0 &&
"If we had no cases we'd have a common successor!") ? void (
0) : __assert_fail ("SI.getNumCases() > 0 && \"If we had no cases we'd have a common successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 888, __extension__ __PRETTY_FUNCTION__))
888 "If we had no cases we'd have a common successor!")(static_cast <bool> (SI.getNumCases() > 0 &&
"If we had no cases we'd have a common successor!") ? void (
0) : __assert_fail ("SI.getNumCases() > 0 && \"If we had no cases we'd have a common successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 888, __extension__ __PRETTY_FUNCTION__))
;
889 // Move the last case to the default successor. This is valid as if the
890 // default got unswitched it cannot be reached. This has the advantage of
891 // being simple and keeping the number of edges from this switch to
892 // successors the same, and avoiding any PHI update complexity.
893 auto LastCaseI = std::prev(SI.case_end());
894
895 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
896 SIW.setSuccessorWeight(
897 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
898 SIW.removeCase(LastCaseI);
899 }
900
901 // Walk the unswitched exit blocks and the unswitched split blocks and update
902 // the dominator tree based on the CFG edits. While we are walking unordered
903 // containers here, the API for applyUpdates takes an unordered list of
904 // updates and requires them to not contain duplicates.
905 SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
906 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
907 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
908 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
909 }
910 for (auto SplitUnswitchedPair : SplitExitBBMap) {
911 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
912 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
913 }
914
915 if (MSSAU) {
916 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
917 if (VerifyMemorySSA)
918 MSSAU->getMemorySSA()->verifyMemorySSA();
919 } else {
920 DT.applyUpdates(DTUpdates);
921 }
922
923 assert(DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (DT.verify(DominatorTree::VerificationLevel
::Fast)) ? void (0) : __assert_fail ("DT.verify(DominatorTree::VerificationLevel::Fast)"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 923, __extension__ __PRETTY_FUNCTION__))
;
924
925 // We may have changed the nesting relationship for this loop so hoist it to
926 // its correct parent if needed.
927 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
928
929 if (MSSAU && VerifyMemorySSA)
930 MSSAU->getMemorySSA()->verifyMemorySSA();
931
932 ++NumTrivial;
933 ++NumSwitches;
934 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " done: unswitching trivial switch...\n"
; } } while (false)
;
935 return true;
936}
937
938/// This routine scans the loop to find a branch or switch which occurs before
939/// any side effects occur. These can potentially be unswitched without
940/// duplicating the loop. If a branch or switch is successfully unswitched the
941/// scanning continues to see if subsequent branches or switches have become
942/// trivial. Once all trivial candidates have been unswitched, this routine
943/// returns.
944///
945/// The return value indicates whether anything was unswitched (and therefore
946/// changed).
947///
948/// If `SE` is not null, it will be updated based on the potential loop SCEVs
949/// invalidated by this.
950static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
951 LoopInfo &LI, ScalarEvolution *SE,
952 MemorySSAUpdater *MSSAU) {
953 bool Changed = false;
954
955 // If loop header has only one reachable successor we should keep looking for
956 // trivial condition candidates in the successor as well. An alternative is
957 // to constant fold conditions and merge successors into loop header (then we
958 // only need to check header's terminator). The reason for not doing this in
959 // LoopUnswitch pass is that it could potentially break LoopPassManager's
960 // invariants. Folding dead branches could either eliminate the current loop
961 // or make other loops unreachable. LCSSA form might also not be preserved
962 // after deleting branches. The following code keeps traversing loop header's
963 // successors until it finds the trivial condition candidate (condition that
964 // is not a constant). Since unswitching generates branches with constant
965 // conditions, this scenario could be very common in practice.
966 BasicBlock *CurrentBB = L.getHeader();
967 SmallPtrSet<BasicBlock *, 8> Visited;
968 Visited.insert(CurrentBB);
969 do {
970 // Check if there are any side-effecting instructions (e.g. stores, calls,
971 // volatile loads) in the part of the loop that the code *would* execute
972 // without unswitching.
973 if (MSSAU) // Possible early exit with MSSA
974 if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
975 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
976 return Changed;
977 if (llvm::any_of(*CurrentBB,
978 [](Instruction &I) { return I.mayHaveSideEffects(); }))
979 return Changed;
980
981 Instruction *CurrentTerm = CurrentBB->getTerminator();
982
983 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
984 // Don't bother trying to unswitch past a switch with a constant
985 // condition. This should be removed prior to running this pass by
986 // simplify-cfg.
987 if (isa<Constant>(SI->getCondition()))
988 return Changed;
989
990 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
991 // Couldn't unswitch this one so we're done.
992 return Changed;
993
994 // Mark that we managed to unswitch something.
995 Changed = true;
996
997 // If unswitching turned the terminator into an unconditional branch then
998 // we can continue. The unswitching logic specifically works to fold any
999 // cases it can into an unconditional branch to make it easier to
1000 // recognize here.
1001 auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1002 if (!BI || BI->isConditional())
1003 return Changed;
1004
1005 CurrentBB = BI->getSuccessor(0);
1006 continue;
1007 }
1008
1009 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1010 if (!BI)
1011 // We do not understand other terminator instructions.
1012 return Changed;
1013
1014 // Don't bother trying to unswitch past an unconditional branch or a branch
1015 // with a constant value. These should be removed by simplify-cfg prior to
1016 // running this pass.
1017 if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
1018 return Changed;
1019
1020 // Found a trivial condition candidate: non-foldable conditional branch. If
1021 // we fail to unswitch this, we can't do anything else that is trivial.
1022 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1023 return Changed;
1024
1025 // Mark that we managed to unswitch something.
1026 Changed = true;
1027
1028 // If we only unswitched some of the conditions feeding the branch, we won't
1029 // have collapsed it to a single successor.
1030 BI = cast<BranchInst>(CurrentBB->getTerminator());
1031 if (BI->isConditional())
1032 return Changed;
1033
1034 // Follow the newly unconditional branch into its successor.
1035 CurrentBB = BI->getSuccessor(0);
1036
1037 // When continuing, if we exit the loop or reach a previous visited block,
1038 // then we can not reach any trivial condition candidates (unfoldable
1039 // branch instructions or switch instructions) and no unswitch can happen.
1040 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1041
1042 return Changed;
1043}
1044
1045/// Build the cloned blocks for an unswitched copy of the given loop.
1046///
1047/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1048/// after the split block (`SplitBB`) that will be used to select between the
1049/// cloned and original loop.
1050///
1051/// This routine handles cloning all of the necessary loop blocks and exit
1052/// blocks including rewriting their instructions and the relevant PHI nodes.
1053/// Any loop blocks or exit blocks which are dominated by a different successor
1054/// than the one for this clone of the loop blocks can be trivially skipped. We
1055/// use the `DominatingSucc` map to determine whether a block satisfies that
1056/// property with a simple map lookup.
1057///
1058/// It also correctly creates the unconditional branch in the cloned
1059/// unswitched parent block to only point at the unswitched successor.
1060///
1061/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1062/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1063/// the cloned blocks (and their loops) are left without full `LoopInfo`
1064/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1065/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1066/// instead the caller must recompute an accurate DT. It *does* correctly
1067/// update the `AssumptionCache` provided in `AC`.
1068static BasicBlock *buildClonedLoopBlocks(
1069 Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1070 ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1071 BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1072 const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
1073 ValueToValueMapTy &VMap,
1074 SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC,
1075 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
1076 SmallVector<BasicBlock *, 4> NewBlocks;
1077 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1078
1079 // We will need to clone a bunch of blocks, wrap up the clone operation in
1080 // a helper.
1081 auto CloneBlock = [&](BasicBlock *OldBB) {
1082 // Clone the basic block and insert it before the new preheader.
1083 BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1084 NewBB->moveBefore(LoopPH);
1085
1086 // Record this block and the mapping.
1087 NewBlocks.push_back(NewBB);
1088 VMap[OldBB] = NewBB;
1089
1090 return NewBB;
1091 };
1092
1093 // We skip cloning blocks when they have a dominating succ that is not the
1094 // succ we are cloning for.
1095 auto SkipBlock = [&](BasicBlock *BB) {
1096 auto It = DominatingSucc.find(BB);
1097 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1098 };
1099
1100 // First, clone the preheader.
1101 auto *ClonedPH = CloneBlock(LoopPH);
1102
1103 // Then clone all the loop blocks, skipping the ones that aren't necessary.
1104 for (auto *LoopBB : L.blocks())
1105 if (!SkipBlock(LoopBB))
1106 CloneBlock(LoopBB);
1107
1108 // Split all the loop exit edges so that when we clone the exit blocks, if
1109 // any of the exit blocks are *also* a preheader for some other loop, we
1110 // don't create multiple predecessors entering the loop header.
1111 for (auto *ExitBB : ExitBlocks) {
1112 if (SkipBlock(ExitBB))
1113 continue;
1114
1115 // When we are going to clone an exit, we don't need to clone all the
1116 // instructions in the exit block and we want to ensure we have an easy
1117 // place to merge the CFG, so split the exit first. This is always safe to
1118 // do because there cannot be any non-loop predecessors of a loop exit in
1119 // loop simplified form.
1120 auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
1121
1122 // Rearrange the names to make it easier to write test cases by having the
1123 // exit block carry the suffix rather than the merge block carrying the
1124 // suffix.
1125 MergeBB->takeName(ExitBB);
1126 ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1127
1128 // Now clone the original exit block.
1129 auto *ClonedExitBB = CloneBlock(ExitBB);
1130 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&(static_cast <bool> (ClonedExitBB->getTerminator()->
getNumSuccessors() == 1 && "Exit block should have been split to have one successor!"
) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getNumSuccessors() == 1 && \"Exit block should have been split to have one successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1131, __extension__ __PRETTY_FUNCTION__))
1131 "Exit block should have been split to have one successor!")(static_cast <bool> (ClonedExitBB->getTerminator()->
getNumSuccessors() == 1 && "Exit block should have been split to have one successor!"
) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getNumSuccessors() == 1 && \"Exit block should have been split to have one successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1131, __extension__ __PRETTY_FUNCTION__))
;
1132 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&(static_cast <bool> (ClonedExitBB->getTerminator()->
getSuccessor(0) == MergeBB && "Cloned exit block has the wrong successor!"
) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB && \"Cloned exit block has the wrong successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1133, __extension__ __PRETTY_FUNCTION__))
1133 "Cloned exit block has the wrong successor!")(static_cast <bool> (ClonedExitBB->getTerminator()->
getSuccessor(0) == MergeBB && "Cloned exit block has the wrong successor!"
) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB && \"Cloned exit block has the wrong successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1133, __extension__ __PRETTY_FUNCTION__))
;
1134
1135 // Remap any cloned instructions and create a merge phi node for them.
1136 for (auto ZippedInsts : llvm::zip_first(
1137 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1138 llvm::make_range(ClonedExitBB->begin(),
1139 std::prev(ClonedExitBB->end())))) {
1140 Instruction &I = std::get<0>(ZippedInsts);
1141 Instruction &ClonedI = std::get<1>(ZippedInsts);
1142
1143 // The only instructions in the exit block should be PHI nodes and
1144 // potentially a landing pad.
1145 assert((static_cast <bool> ((isa<PHINode>(I) || isa<LandingPadInst
>(I) || isa<CatchPadInst>(I)) && "Bad instruction in exit block!"
) ? void (0) : __assert_fail ("(isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) && \"Bad instruction in exit block!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1147, __extension__ __PRETTY_FUNCTION__))
1146 (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&(static_cast <bool> ((isa<PHINode>(I) || isa<LandingPadInst
>(I) || isa<CatchPadInst>(I)) && "Bad instruction in exit block!"
) ? void (0) : __assert_fail ("(isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) && \"Bad instruction in exit block!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1147, __extension__ __PRETTY_FUNCTION__))
1147 "Bad instruction in exit block!")(static_cast <bool> ((isa<PHINode>(I) || isa<LandingPadInst
>(I) || isa<CatchPadInst>(I)) && "Bad instruction in exit block!"
) ? void (0) : __assert_fail ("(isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) && \"Bad instruction in exit block!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1147, __extension__ __PRETTY_FUNCTION__))
;
1148 // We should have a value map between the instruction and its clone.
1149 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!")(static_cast <bool> (VMap.lookup(&I) == &ClonedI
&& "Mismatch in the value map!") ? void (0) : __assert_fail
("VMap.lookup(&I) == &ClonedI && \"Mismatch in the value map!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1149, __extension__ __PRETTY_FUNCTION__))
;
1150
1151 auto *MergePN =
1152 PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
1153 &*MergeBB->getFirstInsertionPt());
1154 I.replaceAllUsesWith(MergePN);
1155 MergePN->addIncoming(&I, ExitBB);
1156 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1157 }
1158 }
1159
1160 // Rewrite the instructions in the cloned blocks to refer to the instructions
1161 // in the cloned blocks. We have to do this as a second pass so that we have
1162 // everything available. Also, we have inserted new instructions which may
1163 // include assume intrinsics, so we update the assumption cache while
1164 // processing this.
1165 for (auto *ClonedBB : NewBlocks)
1166 for (Instruction &I : *ClonedBB) {
1167 RemapInstruction(&I, VMap,
1168 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1169 if (auto *II = dyn_cast<AssumeInst>(&I))
1170 AC.registerAssumption(II);
1171 }
1172
1173 // Update any PHI nodes in the cloned successors of the skipped blocks to not
1174 // have spurious incoming values.
1175 for (auto *LoopBB : L.blocks())
1176 if (SkipBlock(LoopBB))
1177 for (auto *SuccBB : successors(LoopBB))
1178 if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1179 for (PHINode &PN : ClonedSuccBB->phis())
1180 PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1181
1182 // Remove the cloned parent as a predecessor of any successor we ended up
1183 // cloning other than the unswitched one.
1184 auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1185 for (auto *SuccBB : successors(ParentBB)) {
1186 if (SuccBB == UnswitchedSuccBB)
1187 continue;
1188
1189 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1190 if (!ClonedSuccBB)
1191 continue;
1192
1193 ClonedSuccBB->removePredecessor(ClonedParentBB,
1194 /*KeepOneInputPHIs*/ true);
1195 }
1196
1197 // Replace the cloned branch with an unconditional branch to the cloned
1198 // unswitched successor.
1199 auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1200 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1201 // Trivial Simplification. If Terminator is a conditional branch and
1202 // condition becomes dead - erase it.
1203 Value *ClonedConditionToErase = nullptr;
1204 if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1205 ClonedConditionToErase = BI->getCondition();
1206 else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1207 ClonedConditionToErase = SI->getCondition();
1208
1209 ClonedTerminator->eraseFromParent();
1210 BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1211
1212 if (ClonedConditionToErase)
1213 RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1214 MSSAU);
1215
1216 // If there are duplicate entries in the PHI nodes because of multiple edges
1217 // to the unswitched successor, we need to nuke all but one as we replaced it
1218 // with a direct branch.
1219 for (PHINode &PN : ClonedSuccBB->phis()) {
1220 bool Found = false;
1221 // Loop over the incoming operands backwards so we can easily delete as we
1222 // go without invalidating the index.
1223 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1224 if (PN.getIncomingBlock(i) != ClonedParentBB)
1225 continue;
1226 if (!Found) {
1227 Found = true;
1228 continue;
1229 }
1230 PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1231 }
1232 }
1233
1234 // Record the domtree updates for the new blocks.
1235 SmallPtrSet<BasicBlock *, 4> SuccSet;
1236 for (auto *ClonedBB : NewBlocks) {
1237 for (auto *SuccBB : successors(ClonedBB))
1238 if (SuccSet.insert(SuccBB).second)
1239 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1240 SuccSet.clear();
1241 }
1242
1243 return ClonedPH;
1244}
1245
1246/// Recursively clone the specified loop and all of its children.
1247///
1248/// The target parent loop for the clone should be provided, or can be null if
1249/// the clone is a top-level loop. While cloning, all the blocks are mapped
1250/// with the provided value map. The entire original loop must be present in
1251/// the value map. The cloned loop is returned.
1252static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1253 const ValueToValueMapTy &VMap, LoopInfo &LI) {
1254 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1255 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!")(static_cast <bool> (ClonedL.getBlocks().empty() &&
"Must start with an empty loop!") ? void (0) : __assert_fail
("ClonedL.getBlocks().empty() && \"Must start with an empty loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1255, __extension__ __PRETTY_FUNCTION__))
;
1256 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1257 for (auto *BB : OrigL.blocks()) {
1258 auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1259 ClonedL.addBlockEntry(ClonedBB);
1260 if (LI.getLoopFor(BB) == &OrigL)
1261 LI.changeLoopFor(ClonedBB, &ClonedL);
1262 }
1263 };
1264
1265 // We specially handle the first loop because it may get cloned into
1266 // a different parent and because we most commonly are cloning leaf loops.
1267 Loop *ClonedRootL = LI.AllocateLoop();
1268 if (RootParentL)
1269 RootParentL->addChildLoop(ClonedRootL);
1270 else
1271 LI.addTopLevelLoop(ClonedRootL);
1272 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1273
1274 if (OrigRootL.isInnermost())
1275 return ClonedRootL;
1276
1277 // If we have a nest, we can quickly clone the entire loop nest using an
1278 // iterative approach because it is a tree. We keep the cloned parent in the
1279 // data structure to avoid repeatedly querying through a map to find it.
1280 SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1281 // Build up the loops to clone in reverse order as we'll clone them from the
1282 // back.
1283 for (Loop *ChildL : llvm::reverse(OrigRootL))
1284 LoopsToClone.push_back({ClonedRootL, ChildL});
1285 do {
1286 Loop *ClonedParentL, *L;
1287 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1288 Loop *ClonedL = LI.AllocateLoop();
1289 ClonedParentL->addChildLoop(ClonedL);
1290 AddClonedBlocksToLoop(*L, *ClonedL);
1291 for (Loop *ChildL : llvm::reverse(*L))
1292 LoopsToClone.push_back({ClonedL, ChildL});
1293 } while (!LoopsToClone.empty());
1294
1295 return ClonedRootL;
1296}
1297
1298/// Build the cloned loops of an original loop from unswitching.
1299///
1300/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1301/// operation. We need to re-verify that there even is a loop (as the backedge
1302/// may not have been cloned), and even if there are remaining backedges the
1303/// backedge set may be different. However, we know that each child loop is
1304/// undisturbed, we only need to find where to place each child loop within
1305/// either any parent loop or within a cloned version of the original loop.
1306///
1307/// Because child loops may end up cloned outside of any cloned version of the
1308/// original loop, multiple cloned sibling loops may be created. All of them
1309/// are returned so that the newly introduced loop nest roots can be
1310/// identified.
1311static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1312 const ValueToValueMapTy &VMap, LoopInfo &LI,
1313 SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1314 Loop *ClonedL = nullptr;
1315
1316 auto *OrigPH = OrigL.getLoopPreheader();
1317 auto *OrigHeader = OrigL.getHeader();
1318
1319 auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1320 auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1321
1322 // We need to know the loops of the cloned exit blocks to even compute the
1323 // accurate parent loop. If we only clone exits to some parent of the
1324 // original parent, we want to clone into that outer loop. We also keep track
1325 // of the loops that our cloned exit blocks participate in.
1326 Loop *ParentL = nullptr;
1327 SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1328 SmallDenseMap<BasicBlock *, Loop *, 16> ExitLoopMap;
1329 ClonedExitsInLoops.reserve(ExitBlocks.size());
1330 for (auto *ExitBB : ExitBlocks)
1331 if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1332 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1333 ExitLoopMap[ClonedExitBB] = ExitL;
1334 ClonedExitsInLoops.push_back(ClonedExitBB);
1335 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1336 ParentL = ExitL;
1337 }
1338 assert((!ParentL || ParentL == OrigL.getParentLoop() ||(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop
() || ParentL->contains(OrigL.getParentLoop())) &&
"The computed parent loop should always contain (or be) the parent of "
"the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1341, __extension__ __PRETTY_FUNCTION__))
1339 ParentL->contains(OrigL.getParentLoop())) &&(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop
() || ParentL->contains(OrigL.getParentLoop())) &&
"The computed parent loop should always contain (or be) the parent of "
"the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1341, __extension__ __PRETTY_FUNCTION__))
1340 "The computed parent loop should always contain (or be) the parent of "(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop
() || ParentL->contains(OrigL.getParentLoop())) &&
"The computed parent loop should always contain (or be) the parent of "
"the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1341, __extension__ __PRETTY_FUNCTION__))
1341 "the original loop.")(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop
() || ParentL->contains(OrigL.getParentLoop())) &&
"The computed parent loop should always contain (or be) the parent of "
"the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1341, __extension__ __PRETTY_FUNCTION__))
;
1342
1343 // We build the set of blocks dominated by the cloned header from the set of
1344 // cloned blocks out of the original loop. While not all of these will
1345 // necessarily be in the cloned loop, it is enough to establish that they
1346 // aren't in unreachable cycles, etc.
1347 SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1348 for (auto *BB : OrigL.blocks())
1349 if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1350 ClonedLoopBlocks.insert(ClonedBB);
1351
1352 // Rebuild the set of blocks that will end up in the cloned loop. We may have
1353 // skipped cloning some region of this loop which can in turn skip some of
1354 // the backedges so we have to rebuild the blocks in the loop based on the
1355 // backedges that remain after cloning.
1356 SmallVector<BasicBlock *, 16> Worklist;
1357 SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1358 for (auto *Pred : predecessors(ClonedHeader)) {
1359 // The only possible non-loop header predecessor is the preheader because
1360 // we know we cloned the loop in simplified form.
1361 if (Pred == ClonedPH)
1362 continue;
1363
1364 // Because the loop was in simplified form, the only non-loop predecessor
1365 // should be the preheader.
1366 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "(static_cast <bool> (ClonedLoopBlocks.count(Pred) &&
"Found a predecessor of the loop " "header other than the preheader "
"that is not part of the loop!") ? void (0) : __assert_fail (
"ClonedLoopBlocks.count(Pred) && \"Found a predecessor of the loop \" \"header other than the preheader \" \"that is not part of the loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1368, __extension__ __PRETTY_FUNCTION__))
1367 "header other than the preheader "(static_cast <bool> (ClonedLoopBlocks.count(Pred) &&
"Found a predecessor of the loop " "header other than the preheader "
"that is not part of the loop!") ? void (0) : __assert_fail (
"ClonedLoopBlocks.count(Pred) && \"Found a predecessor of the loop \" \"header other than the preheader \" \"that is not part of the loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1368, __extension__ __PRETTY_FUNCTION__))
1368 "that is not part of the loop!")(static_cast <bool> (ClonedLoopBlocks.count(Pred) &&
"Found a predecessor of the loop " "header other than the preheader "
"that is not part of the loop!") ? void (0) : __assert_fail (
"ClonedLoopBlocks.count(Pred) && \"Found a predecessor of the loop \" \"header other than the preheader \" \"that is not part of the loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1368, __extension__ __PRETTY_FUNCTION__))
;
1369
1370 // Insert this block into the loop set and on the first visit (and if it
1371 // isn't the header we're currently walking) put it into the worklist to
1372 // recurse through.
1373 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1374 Worklist.push_back(Pred);
1375 }
1376
1377 // If we had any backedges then there *is* a cloned loop. Put the header into
1378 // the loop set and then walk the worklist backwards to find all the blocks
1379 // that remain within the loop after cloning.
1380 if (!BlocksInClonedLoop.empty()) {
1381 BlocksInClonedLoop.insert(ClonedHeader);
1382
1383 while (!Worklist.empty()) {
1384 BasicBlock *BB = Worklist.pop_back_val();
1385 assert(BlocksInClonedLoop.count(BB) &&(static_cast <bool> (BlocksInClonedLoop.count(BB) &&
"Didn't put block into the loop set!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count(BB) && \"Didn't put block into the loop set!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1386, __extension__ __PRETTY_FUNCTION__))
1386 "Didn't put block into the loop set!")(static_cast <bool> (BlocksInClonedLoop.count(BB) &&
"Didn't put block into the loop set!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count(BB) && \"Didn't put block into the loop set!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1386, __extension__ __PRETTY_FUNCTION__))
;
1387
1388 // Insert any predecessors that are in the possible set into the cloned
1389 // set, and if the insert is successful, add them to the worklist. Note
1390 // that we filter on the blocks that are definitely reachable via the
1391 // backedge to the loop header so we may prune out dead code within the
1392 // cloned loop.
1393 for (auto *Pred : predecessors(BB))
1394 if (ClonedLoopBlocks.count(Pred) &&
1395 BlocksInClonedLoop.insert(Pred).second)
1396 Worklist.push_back(Pred);
1397 }
1398
1399 ClonedL = LI.AllocateLoop();
1400 if (ParentL) {
1401 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1402 ParentL->addChildLoop(ClonedL);
1403 } else {
1404 LI.addTopLevelLoop(ClonedL);
1405 }
1406 NonChildClonedLoops.push_back(ClonedL);
1407
1408 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1409 // We don't want to just add the cloned loop blocks based on how we
1410 // discovered them. The original order of blocks was carefully built in
1411 // a way that doesn't rely on predecessor ordering. Rather than re-invent
1412 // that logic, we just re-walk the original blocks (and those of the child
1413 // loops) and filter them as we add them into the cloned loop.
1414 for (auto *BB : OrigL.blocks()) {
1415 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1416 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1417 continue;
1418
1419 // Directly add the blocks that are only in this loop.
1420 if (LI.getLoopFor(BB) == &OrigL) {
1421 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1422 continue;
1423 }
1424
1425 // We want to manually add it to this loop and parents.
1426 // Registering it with LoopInfo will happen when we clone the top
1427 // loop for this block.
1428 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1429 PL->addBlockEntry(ClonedBB);
1430 }
1431
1432 // Now add each child loop whose header remains within the cloned loop. All
1433 // of the blocks within the loop must satisfy the same constraints as the
1434 // header so once we pass the header checks we can just clone the entire
1435 // child loop nest.
1436 for (Loop *ChildL : OrigL) {
1437 auto *ClonedChildHeader =
1438 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1439 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1440 continue;
1441
1442#ifndef NDEBUG
1443 // We should never have a cloned child loop header but fail to have
1444 // all of the blocks for that child loop.
1445 for (auto *ChildLoopBB : ChildL->blocks())
1446 assert(BlocksInClonedLoop.count((static_cast <bool> (BlocksInClonedLoop.count( cast<
BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer "
"loop but not all of its blocks!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1449, __extension__ __PRETTY_FUNCTION__))
1447 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&(static_cast <bool> (BlocksInClonedLoop.count( cast<
BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer "
"loop but not all of its blocks!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1449, __extension__ __PRETTY_FUNCTION__))
1448 "Child cloned loop has a header within the cloned outer "(static_cast <bool> (BlocksInClonedLoop.count( cast<
BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer "
"loop but not all of its blocks!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1449, __extension__ __PRETTY_FUNCTION__))
1449 "loop but not all of its blocks!")(static_cast <bool> (BlocksInClonedLoop.count( cast<
BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer "
"loop but not all of its blocks!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1449, __extension__ __PRETTY_FUNCTION__))
;
1450#endif
1451
1452 cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1453 }
1454 }
1455
1456 // Now that we've handled all the components of the original loop that were
1457 // cloned into a new loop, we still need to handle anything from the original
1458 // loop that wasn't in a cloned loop.
1459
1460 // Figure out what blocks are left to place within any loop nest containing
1461 // the unswitched loop. If we never formed a loop, the cloned PH is one of
1462 // them.
1463 SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1464 if (BlocksInClonedLoop.empty())
1465 UnloopedBlockSet.insert(ClonedPH);
1466 for (auto *ClonedBB : ClonedLoopBlocks)
1467 if (!BlocksInClonedLoop.count(ClonedBB))
1468 UnloopedBlockSet.insert(ClonedBB);
1469
1470 // Copy the cloned exits and sort them in ascending loop depth, we'll work
1471 // backwards across these to process them inside out. The order shouldn't
1472 // matter as we're just trying to build up the map from inside-out; we use
1473 // the map in a more stably ordered way below.
1474 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1475 llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1476 return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1477 ExitLoopMap.lookup(RHS)->getLoopDepth();
1478 });
1479
1480 // Populate the existing ExitLoopMap with everything reachable from each
1481 // exit, starting from the inner most exit.
1482 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1483 assert(Worklist.empty() && "Didn't clear worklist!")(static_cast <bool> (Worklist.empty() && "Didn't clear worklist!"
) ? void (0) : __assert_fail ("Worklist.empty() && \"Didn't clear worklist!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1483, __extension__ __PRETTY_FUNCTION__))
;
1484
1485 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1486 Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1487
1488 // Walk the CFG back until we hit the cloned PH adding everything reachable
1489 // and in the unlooped set to this exit block's loop.
1490 Worklist.push_back(ExitBB);
1491 do {
1492 BasicBlock *BB = Worklist.pop_back_val();
1493 // We can stop recursing at the cloned preheader (if we get there).
1494 if (BB == ClonedPH)
1495 continue;
1496
1497 for (BasicBlock *PredBB : predecessors(BB)) {
1498 // If this pred has already been moved to our set or is part of some
1499 // (inner) loop, no update needed.
1500 if (!UnloopedBlockSet.erase(PredBB)) {
1501 assert((static_cast <bool> ((BlocksInClonedLoop.count(PredBB) ||
ExitLoopMap.count(PredBB)) && "Predecessor not mapped to a loop!"
) ? void (0) : __assert_fail ("(BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && \"Predecessor not mapped to a loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1503, __extension__ __PRETTY_FUNCTION__))
1502 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&(static_cast <bool> ((BlocksInClonedLoop.count(PredBB) ||
ExitLoopMap.count(PredBB)) && "Predecessor not mapped to a loop!"
) ? void (0) : __assert_fail ("(BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && \"Predecessor not mapped to a loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1503, __extension__ __PRETTY_FUNCTION__))
1503 "Predecessor not mapped to a loop!")(static_cast <bool> ((BlocksInClonedLoop.count(PredBB) ||
ExitLoopMap.count(PredBB)) && "Predecessor not mapped to a loop!"
) ? void (0) : __assert_fail ("(BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && \"Predecessor not mapped to a loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1503, __extension__ __PRETTY_FUNCTION__))
;
1504 continue;
1505 }
1506
1507 // We just insert into the loop set here. We'll add these blocks to the
1508 // exit loop after we build up the set in an order that doesn't rely on
1509 // predecessor order (which in turn relies on use list order).
1510 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1511 (void)Inserted;
1512 assert(Inserted && "Should only visit an unlooped block once!")(static_cast <bool> (Inserted && "Should only visit an unlooped block once!"
) ? void (0) : __assert_fail ("Inserted && \"Should only visit an unlooped block once!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1512, __extension__ __PRETTY_FUNCTION__))
;
1513
1514 // And recurse through to its predecessors.
1515 Worklist.push_back(PredBB);
1516 }
1517 } while (!Worklist.empty());
1518 }
1519
1520 // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1521 // blocks to their outer loops, walk the cloned blocks and the cloned exits
1522 // in their original order adding them to the correct loop.
1523
1524 // We need a stable insertion order. We use the order of the original loop
1525 // order and map into the correct parent loop.
1526 for (auto *BB : llvm::concat<BasicBlock *const>(
1527 makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1528 if (Loop *OuterL = ExitLoopMap.lookup(BB))
1529 OuterL->addBasicBlockToLoop(BB, LI);
1530
1531#ifndef NDEBUG
1532 for (auto &BBAndL : ExitLoopMap) {
1533 auto *BB = BBAndL.first;
1534 auto *OuterL = BBAndL.second;
1535 assert(LI.getLoopFor(BB) == OuterL &&(static_cast <bool> (LI.getLoopFor(BB) == OuterL &&
"Failed to put all blocks into outer loops!") ? void (0) : __assert_fail
("LI.getLoopFor(BB) == OuterL && \"Failed to put all blocks into outer loops!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1536, __extension__ __PRETTY_FUNCTION__))
1536 "Failed to put all blocks into outer loops!")(static_cast <bool> (LI.getLoopFor(BB) == OuterL &&
"Failed to put all blocks into outer loops!") ? void (0) : __assert_fail
("LI.getLoopFor(BB) == OuterL && \"Failed to put all blocks into outer loops!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1536, __extension__ __PRETTY_FUNCTION__))
;
1537 }
1538#endif
1539
1540 // Now that all the blocks are placed into the correct containing loop in the
1541 // absence of child loops, find all the potentially cloned child loops and
1542 // clone them into whatever outer loop we placed their header into.
1543 for (Loop *ChildL : OrigL) {
1544 auto *ClonedChildHeader =
1545 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1546 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1547 continue;
1548
1549#ifndef NDEBUG
1550 for (auto *ChildLoopBB : ChildL->blocks())
1551 assert(VMap.count(ChildLoopBB) &&(static_cast <bool> (VMap.count(ChildLoopBB) &&
"Cloned a child loop header but not all of that loops blocks!"
) ? void (0) : __assert_fail ("VMap.count(ChildLoopBB) && \"Cloned a child loop header but not all of that loops blocks!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1552, __extension__ __PRETTY_FUNCTION__))
1552 "Cloned a child loop header but not all of that loops blocks!")(static_cast <bool> (VMap.count(ChildLoopBB) &&
"Cloned a child loop header but not all of that loops blocks!"
) ? void (0) : __assert_fail ("VMap.count(ChildLoopBB) && \"Cloned a child loop header but not all of that loops blocks!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1552, __extension__ __PRETTY_FUNCTION__))
;
1553#endif
1554
1555 NonChildClonedLoops.push_back(cloneLoopNest(
1556 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1557 }
1558}
1559
1560static void
1561deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
1562 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1563 DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1564 // Find all the dead clones, and remove them from their successors.
1565 SmallVector<BasicBlock *, 16> DeadBlocks;
1566 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1567 for (auto &VMap : VMaps)
1568 if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1569 if (!DT.isReachableFromEntry(ClonedBB)) {
1570 for (BasicBlock *SuccBB : successors(ClonedBB))
1571 SuccBB->removePredecessor(ClonedBB);
1572 DeadBlocks.push_back(ClonedBB);
1573 }
1574
1575 // Remove all MemorySSA in the dead blocks
1576 if (MSSAU) {
1577 SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1578 DeadBlocks.end());
1579 MSSAU->removeBlocks(DeadBlockSet);
1580 }
1581
1582 // Drop any remaining references to break cycles.
1583 for (BasicBlock *BB : DeadBlocks)
1584 BB->dropAllReferences();
1585 // Erase them from the IR.
1586 for (BasicBlock *BB : DeadBlocks)
1587 BB->eraseFromParent();
1588}
1589
1590static void deleteDeadBlocksFromLoop(Loop &L,
1591 SmallVectorImpl<BasicBlock *> &ExitBlocks,
1592 DominatorTree &DT, LoopInfo &LI,
1593 MemorySSAUpdater *MSSAU) {
1594 // Find all the dead blocks tied to this loop, and remove them from their
1595 // successors.
1596 SmallSetVector<BasicBlock *, 8> DeadBlockSet;
1597
1598 // Start with loop/exit blocks and get a transitive closure of reachable dead
1599 // blocks.
1600 SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1601 ExitBlocks.end());
1602 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1603 while (!DeathCandidates.empty()) {
1604 auto *BB = DeathCandidates.pop_back_val();
1605 if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1606 for (BasicBlock *SuccBB : successors(BB)) {
1607 SuccBB->removePredecessor(BB);
1608 DeathCandidates.push_back(SuccBB);
1609 }
1610 DeadBlockSet.insert(BB);
1611 }
1612 }
1613
1614 // Remove all MemorySSA in the dead blocks
1615 if (MSSAU)
1616 MSSAU->removeBlocks(DeadBlockSet);
1617
1618 // Filter out the dead blocks from the exit blocks list so that it can be
1619 // used in the caller.
1620 llvm::erase_if(ExitBlocks,
1621 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1622
1623 // Walk from this loop up through its parents removing all of the dead blocks.
1624 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1625 for (auto *BB : DeadBlockSet)
1626 ParentL->getBlocksSet().erase(BB);
1627 llvm::erase_if(ParentL->getBlocksVector(),
1628 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1629 }
1630
1631 // Now delete the dead child loops. This raw delete will clear them
1632 // recursively.
1633 llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1634 if (!DeadBlockSet.count(ChildL->getHeader()))
1635 return false;
1636
1637 assert(llvm::all_of(ChildL->blocks(),(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1642, __extension__ __PRETTY_FUNCTION__))
1638 [&](BasicBlock *ChildBB) {(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1642, __extension__ __PRETTY_FUNCTION__))
1639 return DeadBlockSet.count(ChildBB);(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1642, __extension__ __PRETTY_FUNCTION__))
1640 }) &&(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1642, __extension__ __PRETTY_FUNCTION__))
1641 "If the child loop header is dead all blocks in the child loop must "(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1642, __extension__ __PRETTY_FUNCTION__))
1642 "be dead as well!")(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1642, __extension__ __PRETTY_FUNCTION__))
;
1643 LI.destroy(ChildL);
1644 return true;
1645 });
1646
1647 // Remove the loop mappings for the dead blocks and drop all the references
1648 // from these blocks to others to handle cyclic references as we start
1649 // deleting the blocks themselves.
1650 for (auto *BB : DeadBlockSet) {
1651 // Check that the dominator tree has already been updated.
1652 assert(!DT.getNode(BB) && "Should already have cleared domtree!")(static_cast <bool> (!DT.getNode(BB) && "Should already have cleared domtree!"
) ? void (0) : __assert_fail ("!DT.getNode(BB) && \"Should already have cleared domtree!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1652, __extension__ __PRETTY_FUNCTION__))
;
1653 LI.changeLoopFor(BB, nullptr);
1654 // Drop all uses of the instructions to make sure we won't have dangling
1655 // uses in other blocks.
1656 for (auto &I : *BB)
1657 if (!I.use_empty())
1658 I.replaceAllUsesWith(UndefValue::get(I.getType()));
1659 BB->dropAllReferences();
1660 }
1661
1662 // Actually delete the blocks now that they've been fully unhooked from the
1663 // IR.
1664 for (auto *BB : DeadBlockSet)
1665 BB->eraseFromParent();
1666}
1667
1668/// Recompute the set of blocks in a loop after unswitching.
1669///
1670/// This walks from the original headers predecessors to rebuild the loop. We
1671/// take advantage of the fact that new blocks can't have been added, and so we
1672/// filter by the original loop's blocks. This also handles potentially
1673/// unreachable code that we don't want to explore but might be found examining
1674/// the predecessors of the header.
1675///
1676/// If the original loop is no longer a loop, this will return an empty set. If
1677/// it remains a loop, all the blocks within it will be added to the set
1678/// (including those blocks in inner loops).
1679static SmallPtrSet<const BasicBlock *, 16> recomputeLoopBlockSet(Loop &L,
1680 LoopInfo &LI) {
1681 SmallPtrSet<const BasicBlock *, 16> LoopBlockSet;
1682
1683 auto *PH = L.getLoopPreheader();
1684 auto *Header = L.getHeader();
1685
1686 // A worklist to use while walking backwards from the header.
1687 SmallVector<BasicBlock *, 16> Worklist;
1688
1689 // First walk the predecessors of the header to find the backedges. This will
1690 // form the basis of our walk.
1691 for (auto *Pred : predecessors(Header)) {
1692 // Skip the preheader.
1693 if (Pred == PH)
1694 continue;
1695
1696 // Because the loop was in simplified form, the only non-loop predecessor
1697 // is the preheader.
1698 assert(L.contains(Pred) && "Found a predecessor of the loop header other "(static_cast <bool> (L.contains(Pred) && "Found a predecessor of the loop header other "
"than the preheader that is not part of the " "loop!") ? void
(0) : __assert_fail ("L.contains(Pred) && \"Found a predecessor of the loop header other \" \"than the preheader that is not part of the \" \"loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1700, __extension__ __PRETTY_FUNCTION__))
1699 "than the preheader that is not part of the "(static_cast <bool> (L.contains(Pred) && "Found a predecessor of the loop header other "
"than the preheader that is not part of the " "loop!") ? void
(0) : __assert_fail ("L.contains(Pred) && \"Found a predecessor of the loop header other \" \"than the preheader that is not part of the \" \"loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1700, __extension__ __PRETTY_FUNCTION__))
1700 "loop!")(static_cast <bool> (L.contains(Pred) && "Found a predecessor of the loop header other "
"than the preheader that is not part of the " "loop!") ? void
(0) : __assert_fail ("L.contains(Pred) && \"Found a predecessor of the loop header other \" \"than the preheader that is not part of the \" \"loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1700, __extension__ __PRETTY_FUNCTION__))
;
1701
1702 // Insert this block into the loop set and on the first visit and, if it
1703 // isn't the header we're currently walking, put it into the worklist to
1704 // recurse through.
1705 if (LoopBlockSet.insert(Pred).second && Pred != Header)
1706 Worklist.push_back(Pred);
1707 }
1708
1709 // If no backedges were found, we're done.
1710 if (LoopBlockSet.empty())
1711 return LoopBlockSet;
1712
1713 // We found backedges, recurse through them to identify the loop blocks.
1714 while (!Worklist.empty()) {
1715 BasicBlock *BB = Worklist.pop_back_val();
1716 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!")(static_cast <bool> (LoopBlockSet.count(BB) && "Didn't put block into the loop set!"
) ? void (0) : __assert_fail ("LoopBlockSet.count(BB) && \"Didn't put block into the loop set!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1716, __extension__ __PRETTY_FUNCTION__))
;
1717
1718 // No need to walk past the header.
1719 if (BB == Header)
1720 continue;
1721
1722 // Because we know the inner loop structure remains valid we can use the
1723 // loop structure to jump immediately across the entire nested loop.
1724 // Further, because it is in loop simplified form, we can directly jump
1725 // to its preheader afterward.
1726 if (Loop *InnerL = LI.getLoopFor(BB))
1727 if (InnerL != &L) {
1728 assert(L.contains(InnerL) &&(static_cast <bool> (L.contains(InnerL) && "Should not reach a loop *outside* this loop!"
) ? void (0) : __assert_fail ("L.contains(InnerL) && \"Should not reach a loop *outside* this loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1729, __extension__ __PRETTY_FUNCTION__))
1729 "Should not reach a loop *outside* this loop!")(static_cast <bool> (L.contains(InnerL) && "Should not reach a loop *outside* this loop!"
) ? void (0) : __assert_fail ("L.contains(InnerL) && \"Should not reach a loop *outside* this loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1729, __extension__ __PRETTY_FUNCTION__))
;
1730 // The preheader is the only possible predecessor of the loop so
1731 // insert it into the set and check whether it was already handled.
1732 auto *InnerPH = InnerL->getLoopPreheader();
1733 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "(static_cast <bool> (L.contains(InnerPH) && "Cannot contain an inner loop block "
"but not contain the inner loop " "preheader!") ? void (0) :
__assert_fail ("L.contains(InnerPH) && \"Cannot contain an inner loop block \" \"but not contain the inner loop \" \"preheader!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1735, __extension__ __PRETTY_FUNCTION__))
1734 "but not contain the inner loop "(static_cast <bool> (L.contains(InnerPH) && "Cannot contain an inner loop block "
"but not contain the inner loop " "preheader!") ? void (0) :
__assert_fail ("L.contains(InnerPH) && \"Cannot contain an inner loop block \" \"but not contain the inner loop \" \"preheader!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1735, __extension__ __PRETTY_FUNCTION__))
1735 "preheader!")(static_cast <bool> (L.contains(InnerPH) && "Cannot contain an inner loop block "
"but not contain the inner loop " "preheader!") ? void (0) :
__assert_fail ("L.contains(InnerPH) && \"Cannot contain an inner loop block \" \"but not contain the inner loop \" \"preheader!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1735, __extension__ __PRETTY_FUNCTION__))
;
1736 if (!LoopBlockSet.insert(InnerPH).second)
1737 // The only way to reach the preheader is through the loop body
1738 // itself so if it has been visited the loop is already handled.
1739 continue;
1740
1741 // Insert all of the blocks (other than those already present) into
1742 // the loop set. We expect at least the block that led us to find the
1743 // inner loop to be in the block set, but we may also have other loop
1744 // blocks if they were already enqueued as predecessors of some other
1745 // outer loop block.
1746 for (auto *InnerBB : InnerL->blocks()) {
1747 if (InnerBB == BB) {
1748 assert(LoopBlockSet.count(InnerBB) &&(static_cast <bool> (LoopBlockSet.count(InnerBB) &&
"Block should already be in the set!") ? void (0) : __assert_fail
("LoopBlockSet.count(InnerBB) && \"Block should already be in the set!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1749, __extension__ __PRETTY_FUNCTION__))
1749 "Block should already be in the set!")(static_cast <bool> (LoopBlockSet.count(InnerBB) &&
"Block should already be in the set!") ? void (0) : __assert_fail
("LoopBlockSet.count(InnerBB) && \"Block should already be in the set!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1749, __extension__ __PRETTY_FUNCTION__))
;
1750 continue;
1751 }
1752
1753 LoopBlockSet.insert(InnerBB);
1754 }
1755
1756 // Add the preheader to the worklist so we will continue past the
1757 // loop body.
1758 Worklist.push_back(InnerPH);
1759 continue;
1760 }
1761
1762 // Insert any predecessors that were in the original loop into the new
1763 // set, and if the insert is successful, add them to the worklist.
1764 for (auto *Pred : predecessors(BB))
1765 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1766 Worklist.push_back(Pred);
1767 }
1768
1769 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!")(static_cast <bool> (LoopBlockSet.count(Header) &&
"Cannot fail to add the header!") ? void (0) : __assert_fail
("LoopBlockSet.count(Header) && \"Cannot fail to add the header!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1769, __extension__ __PRETTY_FUNCTION__))
;
1770
1771 // We've found all the blocks participating in the loop, return our completed
1772 // set.
1773 return LoopBlockSet;
1774}
1775
1776/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1777///
1778/// The removal may have removed some child loops entirely but cannot have
1779/// disturbed any remaining child loops. However, they may need to be hoisted
1780/// to the parent loop (or to be top-level loops). The original loop may be
1781/// completely removed.
1782///
1783/// The sibling loops resulting from this update are returned. If the original
1784/// loop remains a valid loop, it will be the first entry in this list with all
1785/// of the newly sibling loops following it.
1786///
1787/// Returns true if the loop remains a loop after unswitching, and false if it
1788/// is no longer a loop after unswitching (and should not continue to be
1789/// referenced).
1790static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
1791 LoopInfo &LI,
1792 SmallVectorImpl<Loop *> &HoistedLoops) {
1793 auto *PH = L.getLoopPreheader();
1794
1795 // Compute the actual parent loop from the exit blocks. Because we may have
1796 // pruned some exits the loop may be different from the original parent.
1797 Loop *ParentL = nullptr;
1798 SmallVector<Loop *, 4> ExitLoops;
1799 SmallVector<BasicBlock *, 4> ExitsInLoops;
1800 ExitsInLoops.reserve(ExitBlocks.size());
1801 for (auto *ExitBB : ExitBlocks)
1802 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1803 ExitLoops.push_back(ExitL);
1804 ExitsInLoops.push_back(ExitBB);
1805 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1806 ParentL = ExitL;
1807 }
1808
1809 // Recompute the blocks participating in this loop. This may be empty if it
1810 // is no longer a loop.
1811 auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1812
1813 // If we still have a loop, we need to re-set the loop's parent as the exit
1814 // block set changing may have moved it within the loop nest. Note that this
1815 // can only happen when this loop has a parent as it can only hoist the loop
1816 // *up* the nest.
1817 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1818 // Remove this loop's (original) blocks from all of the intervening loops.
1819 for (Loop *IL = L.getParentLoop(); IL != ParentL;
1820 IL = IL->getParentLoop()) {
1821 IL->getBlocksSet().erase(PH);
1822 for (auto *BB : L.blocks())
1823 IL->getBlocksSet().erase(BB);
1824 llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1825 return BB == PH || L.contains(BB);
1826 });
1827 }
1828
1829 LI.changeLoopFor(PH, ParentL);
1830 L.getParentLoop()->removeChildLoop(&L);
1831 if (ParentL)
1832 ParentL->addChildLoop(&L);
1833 else
1834 LI.addTopLevelLoop(&L);
1835 }
1836
1837 // Now we update all the blocks which are no longer within the loop.
1838 auto &Blocks = L.getBlocksVector();
1839 auto BlocksSplitI =
1840 LoopBlockSet.empty()
1841 ? Blocks.begin()
1842 : std::stable_partition(
1843 Blocks.begin(), Blocks.end(),
1844 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1845
1846 // Before we erase the list of unlooped blocks, build a set of them.
1847 SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1848 if (LoopBlockSet.empty())
1849 UnloopedBlocks.insert(PH);
1850
1851 // Now erase these blocks from the loop.
1852 for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1853 L.getBlocksSet().erase(BB);
1854 Blocks.erase(BlocksSplitI, Blocks.end());
1855
1856 // Sort the exits in ascending loop depth, we'll work backwards across these
1857 // to process them inside out.
1858 llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1859 return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1860 });
1861
1862 // We'll build up a set for each exit loop.
1863 SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1864 Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1865
1866 auto RemoveUnloopedBlocksFromLoop =
1867 [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1868 for (auto *BB : UnloopedBlocks)
1869 L.getBlocksSet().erase(BB);
1870 llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1871 return UnloopedBlocks.count(BB);
1872 });
1873 };
1874
1875 SmallVector<BasicBlock *, 16> Worklist;
1876 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1877 assert(Worklist.empty() && "Didn't clear worklist!")(static_cast <bool> (Worklist.empty() && "Didn't clear worklist!"
) ? void (0) : __assert_fail ("Worklist.empty() && \"Didn't clear worklist!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1877, __extension__ __PRETTY_FUNCTION__))
;
1878 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!")(static_cast <bool> (NewExitLoopBlocks.empty() &&
"Didn't clear loop set!") ? void (0) : __assert_fail ("NewExitLoopBlocks.empty() && \"Didn't clear loop set!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1878, __extension__ __PRETTY_FUNCTION__))
;
1879
1880 // Grab the next exit block, in decreasing loop depth order.
1881 BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1882 Loop &ExitL = *LI.getLoopFor(ExitBB);
1883 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!")(static_cast <bool> (ExitL.contains(&L) && "Exit loop must contain the inner loop!"
) ? void (0) : __assert_fail ("ExitL.contains(&L) && \"Exit loop must contain the inner loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1883, __extension__ __PRETTY_FUNCTION__))
;
1884
1885 // Erase all of the unlooped blocks from the loops between the previous
1886 // exit loop and this exit loop. This works because the ExitInLoops list is
1887 // sorted in increasing order of loop depth and thus we visit loops in
1888 // decreasing order of loop depth.
1889 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1890 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1891
1892 // Walk the CFG back until we hit the cloned PH adding everything reachable
1893 // and in the unlooped set to this exit block's loop.
1894 Worklist.push_back(ExitBB);
1895 do {
1896 BasicBlock *BB = Worklist.pop_back_val();
1897 // We can stop recursing at the cloned preheader (if we get there).
1898 if (BB == PH)
1899 continue;
1900
1901 for (BasicBlock *PredBB : predecessors(BB)) {
1902 // If this pred has already been moved to our set or is part of some
1903 // (inner) loop, no update needed.
1904 if (!UnloopedBlocks.erase(PredBB)) {
1905 assert((NewExitLoopBlocks.count(PredBB) ||(static_cast <bool> ((NewExitLoopBlocks.count(PredBB) ||
ExitL.contains(LI.getLoopFor(PredBB))) && "Predecessor not in a nested loop (or already visited)!"
) ? void (0) : __assert_fail ("(NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && \"Predecessor not in a nested loop (or already visited)!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1907, __extension__ __PRETTY_FUNCTION__))
1906 ExitL.contains(LI.getLoopFor(PredBB))) &&(static_cast <bool> ((NewExitLoopBlocks.count(PredBB) ||
ExitL.contains(LI.getLoopFor(PredBB))) && "Predecessor not in a nested loop (or already visited)!"
) ? void (0) : __assert_fail ("(NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && \"Predecessor not in a nested loop (or already visited)!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1907, __extension__ __PRETTY_FUNCTION__))
1907 "Predecessor not in a nested loop (or already visited)!")(static_cast <bool> ((NewExitLoopBlocks.count(PredBB) ||
ExitL.contains(LI.getLoopFor(PredBB))) && "Predecessor not in a nested loop (or already visited)!"
) ? void (0) : __assert_fail ("(NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && \"Predecessor not in a nested loop (or already visited)!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1907, __extension__ __PRETTY_FUNCTION__))
;
1908 continue;
1909 }
1910
1911 // We just insert into the loop set here. We'll add these blocks to the
1912 // exit loop after we build up the set in a deterministic order rather
1913 // than the predecessor-influenced visit order.
1914 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
1915 (void)Inserted;
1916 assert(Inserted && "Should only visit an unlooped block once!")(static_cast <bool> (Inserted && "Should only visit an unlooped block once!"
) ? void (0) : __assert_fail ("Inserted && \"Should only visit an unlooped block once!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1916, __extension__ __PRETTY_FUNCTION__))
;
1917
1918 // And recurse through to its predecessors.
1919 Worklist.push_back(PredBB);
1920 }
1921 } while (!Worklist.empty());
1922
1923 // If blocks in this exit loop were directly part of the original loop (as
1924 // opposed to a child loop) update the map to point to this exit loop. This
1925 // just updates a map and so the fact that the order is unstable is fine.
1926 for (auto *BB : NewExitLoopBlocks)
1927 if (Loop *BBL = LI.getLoopFor(BB))
1928 if (BBL == &L || !L.contains(BBL))
1929 LI.changeLoopFor(BB, &ExitL);
1930
1931 // We will remove the remaining unlooped blocks from this loop in the next
1932 // iteration or below.
1933 NewExitLoopBlocks.clear();
1934 }
1935
1936 // Any remaining unlooped blocks are no longer part of any loop unless they
1937 // are part of some child loop.
1938 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
1939 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1940 for (auto *BB : UnloopedBlocks)
1941 if (Loop *BBL = LI.getLoopFor(BB))
1942 if (BBL == &L || !L.contains(BBL))
1943 LI.changeLoopFor(BB, nullptr);
1944
1945 // Sink all the child loops whose headers are no longer in the loop set to
1946 // the parent (or to be top level loops). We reach into the loop and directly
1947 // update its subloop vector to make this batch update efficient.
1948 auto &SubLoops = L.getSubLoopsVector();
1949 auto SubLoopsSplitI =
1950 LoopBlockSet.empty()
1951 ? SubLoops.begin()
1952 : std::stable_partition(
1953 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
1954 return LoopBlockSet.count(SubL->getHeader());
1955 });
1956 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
1957 HoistedLoops.push_back(HoistedL);
1958 HoistedL->setParentLoop(nullptr);
1959
1960 // To compute the new parent of this hoisted loop we look at where we
1961 // placed the preheader above. We can't lookup the header itself because we
1962 // retained the mapping from the header to the hoisted loop. But the
1963 // preheader and header should have the exact same new parent computed
1964 // based on the set of exit blocks from the original loop as the preheader
1965 // is a predecessor of the header and so reached in the reverse walk. And
1966 // because the loops were all in simplified form the preheader of the
1967 // hoisted loop can't be part of some *other* loop.
1968 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
1969 NewParentL->addChildLoop(HoistedL);
1970 else
1971 LI.addTopLevelLoop(HoistedL);
1972 }
1973 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
1974
1975 // Actually delete the loop if nothing remained within it.
1976 if (Blocks.empty()) {
1977 assert(SubLoops.empty() &&(static_cast <bool> (SubLoops.empty() && "Failed to remove all subloops from the original loop!"
) ? void (0) : __assert_fail ("SubLoops.empty() && \"Failed to remove all subloops from the original loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1978, __extension__ __PRETTY_FUNCTION__))
1978 "Failed to remove all subloops from the original loop!")(static_cast <bool> (SubLoops.empty() && "Failed to remove all subloops from the original loop!"
) ? void (0) : __assert_fail ("SubLoops.empty() && \"Failed to remove all subloops from the original loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 1978, __extension__ __PRETTY_FUNCTION__))
;
1979 if (Loop *ParentL = L.getParentLoop())
1980 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
1981 else
1982 LI.removeLoop(llvm::find(LI, &L));
1983 LI.destroy(&L);
1984 return false;
1985 }
1986
1987 return true;
1988}
1989
1990/// Helper to visit a dominator subtree, invoking a callable on each node.
1991///
1992/// Returning false at any point will stop walking past that node of the tree.
1993template <typename CallableT>
1994void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
1995 SmallVector<DomTreeNode *, 4> DomWorklist;
1996 DomWorklist.push_back(DT[BB]);
1997#ifndef NDEBUG
1998 SmallPtrSet<DomTreeNode *, 4> Visited;
1999 Visited.insert(DT[BB]);
2000#endif
2001 do {
2002 DomTreeNode *N = DomWorklist.pop_back_val();
2003
2004 // Visit this node.
2005 if (!Callable(N->getBlock()))
2006 continue;
2007
2008 // Accumulate the child nodes.
2009 for (DomTreeNode *ChildN : *N) {
2010 assert(Visited.insert(ChildN).second &&(static_cast <bool> (Visited.insert(ChildN).second &&
"Cannot visit a node twice when walking a tree!") ? void (0)
: __assert_fail ("Visited.insert(ChildN).second && \"Cannot visit a node twice when walking a tree!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2011, __extension__ __PRETTY_FUNCTION__))
2011 "Cannot visit a node twice when walking a tree!")(static_cast <bool> (Visited.insert(ChildN).second &&
"Cannot visit a node twice when walking a tree!") ? void (0)
: __assert_fail ("Visited.insert(ChildN).second && \"Cannot visit a node twice when walking a tree!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2011, __extension__ __PRETTY_FUNCTION__))
;
2012 DomWorklist.push_back(ChildN);
2013 }
2014 } while (!DomWorklist.empty());
2015}
2016
2017static void unswitchNontrivialInvariants(
2018 Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2019 SmallVectorImpl<BasicBlock *> &ExitBlocks, IVConditionInfo &PartialIVInfo,
2020 DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2021 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2022 ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
2023 auto *ParentBB = TI.getParent();
2024 BranchInst *BI = dyn_cast<BranchInst>(&TI);
2025 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2026
2027 // We can only unswitch switches, conditional branches with an invariant
2028 // condition, or combining invariant conditions with an instruction or
2029 // partially invariant instructions.
2030 assert((SI || (BI && BI->isConditional())) &&(static_cast <bool> ((SI || (BI && BI->isConditional
())) && "Can only unswitch switches and conditional branch!"
) ? void (0) : __assert_fail ("(SI || (BI && BI->isConditional())) && \"Can only unswitch switches and conditional branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2031, __extension__ __PRETTY_FUNCTION__))
2031 "Can only unswitch switches and conditional branch!")(static_cast <bool> ((SI || (BI && BI->isConditional
())) && "Can only unswitch switches and conditional branch!"
) ? void (0) : __assert_fail ("(SI || (BI && BI->isConditional())) && \"Can only unswitch switches and conditional branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2031, __extension__ __PRETTY_FUNCTION__))
;
2032 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2033 bool FullUnswitch =
2034 SI || (BI->getCondition() == Invariants[0] && !PartiallyInvariant);
2035 if (FullUnswitch)
2036 assert(Invariants.size() == 1 &&(static_cast <bool> (Invariants.size() == 1 && "Cannot have other invariants with full unswitching!"
) ? void (0) : __assert_fail ("Invariants.size() == 1 && \"Cannot have other invariants with full unswitching!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2037, __extension__ __PRETTY_FUNCTION__))
2037 "Cannot have other invariants with full unswitching!")(static_cast <bool> (Invariants.size() == 1 && "Cannot have other invariants with full unswitching!"
) ? void (0) : __assert_fail ("Invariants.size() == 1 && \"Cannot have other invariants with full unswitching!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2037, __extension__ __PRETTY_FUNCTION__))
;
2038 else
2039 assert(isa<Instruction>(BI->getCondition()) &&(static_cast <bool> (isa<Instruction>(BI->getCondition
()) && "Partial unswitching requires an instruction as the condition!"
) ? void (0) : __assert_fail ("isa<Instruction>(BI->getCondition()) && \"Partial unswitching requires an instruction as the condition!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2040, __extension__ __PRETTY_FUNCTION__))
2040 "Partial unswitching requires an instruction as the condition!")(static_cast <bool> (isa<Instruction>(BI->getCondition
()) && "Partial unswitching requires an instruction as the condition!"
) ? void (0) : __assert_fail ("isa<Instruction>(BI->getCondition()) && \"Partial unswitching requires an instruction as the condition!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2040, __extension__ __PRETTY_FUNCTION__))
;
2041
2042 if (MSSAU && VerifyMemorySSA)
2043 MSSAU->getMemorySSA()->verifyMemorySSA();
2044
2045 // Constant and BBs tracking the cloned and continuing successor. When we are
2046 // unswitching the entire condition, this can just be trivially chosen to
2047 // unswitch towards `true`. However, when we are unswitching a set of
2048 // invariants combined with `and` or `or` or partially invariant instructions,
2049 // the combining operation determines the best direction to unswitch: we want
2050 // to unswitch the direction that will collapse the branch.
2051 bool Direction = true;
2052 int ClonedSucc = 0;
2053 if (!FullUnswitch) {
2054 Value *Cond = BI->getCondition();
2055 (void)Cond;
2056 assert(((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) ||(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match
(Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.") ? void (0) : __assert_fail
("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2059, __extension__ __PRETTY_FUNCTION__))
2057 PartiallyInvariant) &&(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match
(Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.") ? void (0) : __assert_fail
("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2059, __extension__ __PRETTY_FUNCTION__))
2058 "Only `or`, `and`, an `select`, partially invariant instructions "(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match
(Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.") ? void (0) : __assert_fail
("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2059, __extension__ __PRETTY_FUNCTION__))
2059 "can combine invariants being unswitched.")(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match
(Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.") ? void (0) : __assert_fail
("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2059, __extension__ __PRETTY_FUNCTION__))
;
2060 if (!match(BI->getCondition(), m_LogicalOr())) {
2061 if (match(BI->getCondition(), m_LogicalAnd()) ||
2062 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2063 Direction = false;
2064 ClonedSucc = 1;
2065 }
2066 }
2067 }
2068
2069 BasicBlock *RetainedSuccBB =
2070 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2071 SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2072 if (BI)
2073 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2074 else
2075 for (auto Case : SI->cases())
2076 if (Case.getCaseSuccessor() != RetainedSuccBB)
2077 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2078
2079 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&(static_cast <bool> (!UnswitchedSuccBBs.count(RetainedSuccBB
) && "Should not unswitch the same successor we are retaining!"
) ? void (0) : __assert_fail ("!UnswitchedSuccBBs.count(RetainedSuccBB) && \"Should not unswitch the same successor we are retaining!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2080, __extension__ __PRETTY_FUNCTION__))
2080 "Should not unswitch the same successor we are retaining!")(static_cast <bool> (!UnswitchedSuccBBs.count(RetainedSuccBB
) && "Should not unswitch the same successor we are retaining!"
) ? void (0) : __assert_fail ("!UnswitchedSuccBBs.count(RetainedSuccBB) && \"Should not unswitch the same successor we are retaining!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2080, __extension__ __PRETTY_FUNCTION__))
;
2081
2082 // The branch should be in this exact loop. Any inner loop's invariant branch
2083 // should be handled by unswitching that inner loop. The caller of this
2084 // routine should filter out any candidates that remain (but were skipped for
2085 // whatever reason).
2086 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!")(static_cast <bool> (LI.getLoopFor(ParentBB) == &L &&
"Branch in an inner loop!") ? void (0) : __assert_fail ("LI.getLoopFor(ParentBB) == &L && \"Branch in an inner loop!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2086, __extension__ __PRETTY_FUNCTION__))
;
2087
2088 // Compute the parent loop now before we start hacking on things.
2089 Loop *ParentL = L.getParentLoop();
2090 // Get blocks in RPO order for MSSA update, before changing the CFG.
2091 LoopBlocksRPO LBRPO(&L);
2092 if (MSSAU)
2093 LBRPO.perform(&LI);
2094
2095 // Compute the outer-most loop containing one of our exit blocks. This is the
2096 // furthest up our loopnest which can be mutated, which we will use below to
2097 // update things.
2098 Loop *OuterExitL = &L;
2099 for (auto *ExitBB : ExitBlocks) {
2100 Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
2101 if (!NewOuterExitL) {
2102 // We exited the entire nest with this block, so we're done.
2103 OuterExitL = nullptr;
2104 break;
2105 }
2106 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2107 OuterExitL = NewOuterExitL;
2108 }
2109
2110 // At this point, we're definitely going to unswitch something so invalidate
2111 // any cached information in ScalarEvolution for the outer most loop
2112 // containing an exit block and all nested loops.
2113 if (SE) {
2114 if (OuterExitL)
2115 SE->forgetLoop(OuterExitL);
2116 else
2117 SE->forgetTopmostLoop(&L);
2118 }
2119
2120 // If the edge from this terminator to a successor dominates that successor,
2121 // store a map from each block in its dominator subtree to it. This lets us
2122 // tell when cloning for a particular successor if a block is dominated by
2123 // some *other* successor with a single data structure. We use this to
2124 // significantly reduce cloning.
2125 SmallDenseMap<BasicBlock *, BasicBlock *, 16> DominatingSucc;
2126 for (auto *SuccBB : llvm::concat<BasicBlock *const>(
2127 makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs))
2128 if (SuccBB->getUniquePredecessor() ||
2129 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2130 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2131 }))
2132 visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2133 DominatingSucc[BB] = SuccBB;
2134 return true;
2135 });
2136
2137 // Split the preheader, so that we know that there is a safe place to insert
2138 // the conditional branch. We will change the preheader to have a conditional
2139 // branch on LoopCond. The original preheader will become the split point
2140 // between the unswitched versions, and we will have a new preheader for the
2141 // original loop.
2142 BasicBlock *SplitBB = L.getLoopPreheader();
2143 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2144
2145 // Keep track of the dominator tree updates needed.
2146 SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
2147
2148 // Clone the loop for each unswitched successor.
2149 SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
2150 VMaps.reserve(UnswitchedSuccBBs.size());
2151 SmallDenseMap<BasicBlock *, BasicBlock *, 4> ClonedPHs;
2152 for (auto *SuccBB : UnswitchedSuccBBs) {
2153 VMaps.emplace_back(new ValueToValueMapTy());
2154 ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2155 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2156 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
2157 }
2158
2159 // Drop metadata if we may break its semantics by moving this instr into the
2160 // split block.
2161 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2162 if (DropNonTrivialImplicitNullChecks)
2163 // Do not spend time trying to understand if we can keep it, just drop it
2164 // to save compile time.
2165 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2166 else {
2167 // It is only legal to preserve make.implicit metadata if we are
2168 // guaranteed no reach implicit null check after following this branch.
2169 ICFLoopSafetyInfo SafetyInfo;
2170 SafetyInfo.computeLoopSafetyInfo(&L);
2171 if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2172 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2173 }
2174 }
2175
2176 // The stitching of the branched code back together depends on whether we're
2177 // doing full unswitching or not with the exception that we always want to
2178 // nuke the initial terminator placed in the split block.
2179 SplitBB->getTerminator()->eraseFromParent();
2180 if (FullUnswitch) {
2181 // Splice the terminator from the original loop and rewrite its
2182 // successors.
2183 SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
2184
2185 // Keep a clone of the terminator for MSSA updates.
2186 Instruction *NewTI = TI.clone();
2187 ParentBB->getInstList().push_back(NewTI);
2188
2189 // First wire up the moved terminator to the preheaders.
2190 if (BI) {
2191 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2192 BI->setSuccessor(ClonedSucc, ClonedPH);
2193 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2194 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2195 } else {
2196 assert(SI && "Must either be a branch or switch!")(static_cast <bool> (SI && "Must either be a branch or switch!"
) ? void (0) : __assert_fail ("SI && \"Must either be a branch or switch!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2196, __extension__ __PRETTY_FUNCTION__))
;
2197
2198 // Walk the cases and directly update their successors.
2199 assert(SI->getDefaultDest() == RetainedSuccBB &&(static_cast <bool> (SI->getDefaultDest() == RetainedSuccBB
&& "Not retaining default successor!") ? void (0) : __assert_fail
("SI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2200, __extension__ __PRETTY_FUNCTION__))
2200 "Not retaining default successor!")(static_cast <bool> (SI->getDefaultDest() == RetainedSuccBB
&& "Not retaining default successor!") ? void (0) : __assert_fail
("SI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2200, __extension__ __PRETTY_FUNCTION__))
;
2201 SI->setDefaultDest(LoopPH);
2202 for (auto &Case : SI->cases())
2203 if (Case.getCaseSuccessor() == RetainedSuccBB)
2204 Case.setSuccessor(LoopPH);
2205 else
2206 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2207
2208 // We need to use the set to populate domtree updates as even when there
2209 // are multiple cases pointing at the same successor we only want to
2210 // remove and insert one edge in the domtree.
2211 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2212 DTUpdates.push_back(
2213 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2214 }
2215
2216 if (MSSAU) {
2217 DT.applyUpdates(DTUpdates);
2218 DTUpdates.clear();
2219
2220 // Remove all but one edge to the retained block and all unswitched
2221 // blocks. This is to avoid having duplicate entries in the cloned Phis,
2222 // when we know we only keep a single edge for each case.
2223 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2224 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2225 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2226
2227 for (auto &VMap : VMaps)
2228 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2229 /*IgnoreIncomingWithNoClones=*/true);
2230 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2231
2232 // Remove all edges to unswitched blocks.
2233 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2234 MSSAU->removeEdge(ParentBB, SuccBB);
2235 }
2236
2237 // Now unhook the successor relationship as we'll be replacing
2238 // the terminator with a direct branch. This is much simpler for branches
2239 // than switches so we handle those first.
2240 if (BI) {
2241 // Remove the parent as a predecessor of the unswitched successor.
2242 assert(UnswitchedSuccBBs.size() == 1 &&(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2243, __extension__ __PRETTY_FUNCTION__))
2243 "Only one possible unswitched block for a branch!")(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2243, __extension__ __PRETTY_FUNCTION__))
;
2244 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2245 UnswitchedSuccBB->removePredecessor(ParentBB,
2246 /*KeepOneInputPHIs*/ true);
2247 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2248 } else {
2249 // Note that we actually want to remove the parent block as a predecessor
2250 // of *every* case successor. The case successor is either unswitched,
2251 // completely eliminating an edge from the parent to that successor, or it
2252 // is a duplicate edge to the retained successor as the retained successor
2253 // is always the default successor and as we'll replace this with a direct
2254 // branch we no longer need the duplicate entries in the PHI nodes.
2255 SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2256 assert(NewSI->getDefaultDest() == RetainedSuccBB &&(static_cast <bool> (NewSI->getDefaultDest() == RetainedSuccBB
&& "Not retaining default successor!") ? void (0) : __assert_fail
("NewSI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2257, __extension__ __PRETTY_FUNCTION__))
2257 "Not retaining default successor!")(static_cast <bool> (NewSI->getDefaultDest() == RetainedSuccBB
&& "Not retaining default successor!") ? void (0) : __assert_fail
("NewSI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2257, __extension__ __PRETTY_FUNCTION__))
;
2258 for (auto &Case : NewSI->cases())
2259 Case.getCaseSuccessor()->removePredecessor(
2260 ParentBB,
2261 /*KeepOneInputPHIs*/ true);
2262
2263 // We need to use the set to populate domtree updates as even when there
2264 // are multiple cases pointing at the same successor we only want to
2265 // remove and insert one edge in the domtree.
2266 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2267 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2268 }
2269
2270 // After MSSAU update, remove the cloned terminator instruction NewTI.
2271 ParentBB->getTerminator()->eraseFromParent();
2272
2273 // Create a new unconditional branch to the continuing block (as opposed to
2274 // the one cloned).
2275 BranchInst::Create(RetainedSuccBB, ParentBB);
2276 } else {
2277 assert(BI && "Only branches have partial unswitching.")(static_cast <bool> (BI && "Only branches have partial unswitching."
) ? void (0) : __assert_fail ("BI && \"Only branches have partial unswitching.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2277, __extension__ __PRETTY_FUNCTION__))
;
2278 assert(UnswitchedSuccBBs.size() == 1 &&(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2279, __extension__ __PRETTY_FUNCTION__))
2279 "Only one possible unswitched block for a branch!")(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2279, __extension__ __PRETTY_FUNCTION__))
;
2280 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2281 // When doing a partial unswitch, we have to do a bit more work to build up
2282 // the branch in the split block.
2283 if (PartiallyInvariant)
2284 buildPartialInvariantUnswitchConditionalBranch(
2285 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2286 else
2287 buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
2288 *ClonedPH, *LoopPH);
2289 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2290
2291 if (MSSAU) {
2292 DT.applyUpdates(DTUpdates);
2293 DTUpdates.clear();
2294
2295 // Perform MSSA cloning updates.
2296 for (auto &VMap : VMaps)
2297 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2298 /*IgnoreIncomingWithNoClones=*/true);
2299 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2300 }
2301 }
2302
2303 // Apply the updates accumulated above to get an up-to-date dominator tree.
2304 DT.applyUpdates(DTUpdates);
2305
2306 // Now that we have an accurate dominator tree, first delete the dead cloned
2307 // blocks so that we can accurately build any cloned loops. It is important to
2308 // not delete the blocks from the original loop yet because we still want to
2309 // reference the original loop to understand the cloned loop's structure.
2310 deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2311
2312 // Build the cloned loop structure itself. This may be substantially
2313 // different from the original structure due to the simplified CFG. This also
2314 // handles inserting all the cloned blocks into the correct loops.
2315 SmallVector<Loop *, 4> NonChildClonedLoops;
2316 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2317 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2318
2319 // Now that our cloned loops have been built, we can update the original loop.
2320 // First we delete the dead blocks from it and then we rebuild the loop
2321 // structure taking these deletions into account.
2322 deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU);
2323
2324 if (MSSAU && VerifyMemorySSA)
2325 MSSAU->getMemorySSA()->verifyMemorySSA();
2326
2327 SmallVector<Loop *, 4> HoistedLoops;
2328 bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
2329
2330 if (MSSAU && VerifyMemorySSA)
2331 MSSAU->getMemorySSA()->verifyMemorySSA();
2332
2333 // This transformation has a high risk of corrupting the dominator tree, and
2334 // the below steps to rebuild loop structures will result in hard to debug
2335 // errors in that case so verify that the dominator tree is sane first.
2336 // FIXME: Remove this when the bugs stop showing up and rely on existing
2337 // verification steps.
2338 assert(DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (DT.verify(DominatorTree::VerificationLevel
::Fast)) ? void (0) : __assert_fail ("DT.verify(DominatorTree::VerificationLevel::Fast)"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2338, __extension__ __PRETTY_FUNCTION__))
;
2339
2340 if (BI && !PartiallyInvariant) {
2341 // If we unswitched a branch which collapses the condition to a known
2342 // constant we want to replace all the uses of the invariants within both
2343 // the original and cloned blocks. We do this here so that we can use the
2344 // now updated dominator tree to identify which side the users are on.
2345 assert(UnswitchedSuccBBs.size() == 1 &&(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2346, __extension__ __PRETTY_FUNCTION__))
2346 "Only one possible unswitched block for a branch!")(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2346, __extension__ __PRETTY_FUNCTION__))
;
2347 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2348
2349 // When considering multiple partially-unswitched invariants
2350 // we cant just go replace them with constants in both branches.
2351 //
2352 // For 'AND' we infer that true branch ("continue") means true
2353 // for each invariant operand.
2354 // For 'OR' we can infer that false branch ("continue") means false
2355 // for each invariant operand.
2356 // So it happens that for multiple-partial case we dont replace
2357 // in the unswitched branch.
2358 bool ReplaceUnswitched =
2359 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2360
2361 ConstantInt *UnswitchedReplacement =
2362 Direction ? ConstantInt::getTrue(BI->getContext())
2363 : ConstantInt::getFalse(BI->getContext());
2364 ConstantInt *ContinueReplacement =
2365 Direction ? ConstantInt::getFalse(BI->getContext())
2366 : ConstantInt::getTrue(BI->getContext());
2367 for (Value *Invariant : Invariants)
2368 // Use make_early_inc_range here as set invalidates the iterator.
2369 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2370 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2371 if (!UserI)
2372 continue;
2373
2374 // Replace it with the 'continue' side if in the main loop body, and the
2375 // unswitched if in the cloned blocks.
2376 if (DT.dominates(LoopPH, UserI->getParent()))
2377 U.set(ContinueReplacement);
2378 else if (ReplaceUnswitched &&
2379 DT.dominates(ClonedPH, UserI->getParent()))
2380 U.set(UnswitchedReplacement);
2381 }
2382 }
2383
2384 // We can change which blocks are exit blocks of all the cloned sibling
2385 // loops, the current loop, and any parent loops which shared exit blocks
2386 // with the current loop. As a consequence, we need to re-form LCSSA for
2387 // them. But we shouldn't need to re-form LCSSA for any child loops.
2388 // FIXME: This could be made more efficient by tracking which exit blocks are
2389 // new, and focusing on them, but that isn't likely to be necessary.
2390 //
2391 // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2392 // loop nest and update every loop that could have had its exits changed. We
2393 // also need to cover any intervening loops. We add all of these loops to
2394 // a list and sort them by loop depth to achieve this without updating
2395 // unnecessary loops.
2396 auto UpdateLoop = [&](Loop &UpdateL) {
2397#ifndef NDEBUG
2398 UpdateL.verifyLoop();
2399 for (Loop *ChildL : UpdateL) {
2400 ChildL->verifyLoop();
2401 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&(static_cast <bool> (ChildL->isRecursivelyLCSSAForm(
DT, LI) && "Perturbed a child loop's LCSSA form!") ? void
(0) : __assert_fail ("ChildL->isRecursivelyLCSSAForm(DT, LI) && \"Perturbed a child loop's LCSSA form!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2402, __extension__ __PRETTY_FUNCTION__))
2402 "Perturbed a child loop's LCSSA form!")(static_cast <bool> (ChildL->isRecursivelyLCSSAForm(
DT, LI) && "Perturbed a child loop's LCSSA form!") ? void
(0) : __assert_fail ("ChildL->isRecursivelyLCSSAForm(DT, LI) && \"Perturbed a child loop's LCSSA form!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2402, __extension__ __PRETTY_FUNCTION__))
;
2403 }
2404#endif
2405 // First build LCSSA for this loop so that we can preserve it when
2406 // forming dedicated exits. We don't want to perturb some other loop's
2407 // LCSSA while doing that CFG edit.
2408 formLCSSA(UpdateL, DT, &LI, SE);
2409
2410 // For loops reached by this loop's original exit blocks we may
2411 // introduced new, non-dedicated exits. At least try to re-form dedicated
2412 // exits for these loops. This may fail if they couldn't have dedicated
2413 // exits to start with.
2414 formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2415 };
2416
2417 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2418 // and we can do it in any order as they don't nest relative to each other.
2419 //
2420 // Also check if any of the loops we have updated have become top-level loops
2421 // as that will necessitate widening the outer loop scope.
2422 for (Loop *UpdatedL :
2423 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2424 UpdateLoop(*UpdatedL);
2425 if (UpdatedL->isOutermost())
2426 OuterExitL = nullptr;
2427 }
2428 if (IsStillLoop) {
2429 UpdateLoop(L);
2430 if (L.isOutermost())
2431 OuterExitL = nullptr;
2432 }
2433
2434 // If the original loop had exit blocks, walk up through the outer most loop
2435 // of those exit blocks to update LCSSA and form updated dedicated exits.
2436 if (OuterExitL != &L)
2437 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2438 OuterL = OuterL->getParentLoop())
2439 UpdateLoop(*OuterL);
2440
2441#ifndef NDEBUG
2442 // Verify the entire loop structure to catch any incorrect updates before we
2443 // progress in the pass pipeline.
2444 LI.verify(DT);
2445#endif
2446
2447 // Now that we've unswitched something, make callbacks to report the changes.
2448 // For that we need to merge together the updated loops and the cloned loops
2449 // and check whether the original loop survived.
2450 SmallVector<Loop *, 4> SibLoops;
2451 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2452 if (UpdatedL->getParentLoop() == ParentL)
2453 SibLoops.push_back(UpdatedL);
2454 UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops);
2455
2456 if (MSSAU && VerifyMemorySSA)
2457 MSSAU->getMemorySSA()->verifyMemorySSA();
2458
2459 if (BI)
2460 ++NumBranches;
2461 else
2462 ++NumSwitches;
2463}
2464
2465/// Recursively compute the cost of a dominator subtree based on the per-block
2466/// cost map provided.
2467///
2468/// The recursive computation is memozied into the provided DT-indexed cost map
2469/// to allow querying it for most nodes in the domtree without it becoming
2470/// quadratic.
2471static InstructionCost computeDomSubtreeCost(
2472 DomTreeNode &N,
2473 const SmallDenseMap<BasicBlock *, InstructionCost, 4> &BBCostMap,
2474 SmallDenseMap<DomTreeNode *, InstructionCost, 4> &DTCostMap) {
2475 // Don't accumulate cost (or recurse through) blocks not in our block cost
2476 // map and thus not part of the duplication cost being considered.
2477 auto BBCostIt = BBCostMap.find(N.getBlock());
2478 if (BBCostIt == BBCostMap.end())
2479 return 0;
2480
2481 // Lookup this node to see if we already computed its cost.
2482 auto DTCostIt = DTCostMap.find(&N);
2483 if (DTCostIt != DTCostMap.end())
2484 return DTCostIt->second;
2485
2486 // If not, we have to compute it. We can't use insert above and update
2487 // because computing the cost may insert more things into the map.
2488 InstructionCost Cost = std::accumulate(
2489 N.begin(), N.end(), BBCostIt->second,
2490 [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2491 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2492 });
2493 bool Inserted = DTCostMap.insert({&N, Cost}).second;
2494 (void)Inserted;
2495 assert(Inserted && "Should not insert a node while visiting children!")(static_cast <bool> (Inserted && "Should not insert a node while visiting children!"
) ? void (0) : __assert_fail ("Inserted && \"Should not insert a node while visiting children!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2495, __extension__ __PRETTY_FUNCTION__))
;
2496 return Cost;
2497}
2498
2499/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2500/// making the following replacement:
2501///
2502/// --code before guard--
2503/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2504/// --code after guard--
2505///
2506/// into
2507///
2508/// --code before guard--
2509/// br i1 %cond, label %guarded, label %deopt
2510///
2511/// guarded:
2512/// --code after guard--
2513///
2514/// deopt:
2515/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2516/// unreachable
2517///
2518/// It also makes all relevant DT and LI updates, so that all structures are in
2519/// valid state after this transform.
2520static BranchInst *
2521turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
2522 SmallVectorImpl<BasicBlock *> &ExitBlocks,
2523 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
2524 SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
2525 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Turning " <<
*GI << " into a branch.\n"; } } while (false)
;
2526 BasicBlock *CheckBB = GI->getParent();
2527
2528 if (MSSAU && VerifyMemorySSA)
2529 MSSAU->getMemorySSA()->verifyMemorySSA();
2530
2531 // Remove all CheckBB's successors from DomTree. A block can be seen among
2532 // successors more than once, but for DomTree it should be added only once.
2533 SmallPtrSet<BasicBlock *, 4> Successors;
2534 for (auto *Succ : successors(CheckBB))
2535 if (Successors.insert(Succ).second)
2536 DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
2537
2538 Instruction *DeoptBlockTerm =
2539 SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
2540 BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2541 // SplitBlockAndInsertIfThen inserts control flow that branches to
2542 // DeoptBlockTerm if the condition is true. We want the opposite.
2543 CheckBI->swapSuccessors();
2544
2545 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2546 GuardedBlock->setName("guarded");
2547 CheckBI->getSuccessor(1)->setName("deopt");
2548 BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2549
2550 // We now have a new exit block.
2551 ExitBlocks.push_back(CheckBI->getSuccessor(1));
2552
2553 if (MSSAU)
2554 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2555
2556 GI->moveBefore(DeoptBlockTerm);
2557 GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext()));
2558
2559 // Add new successors of CheckBB into DomTree.
2560 for (auto *Succ : successors(CheckBB))
2561 DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
2562
2563 // Now the blocks that used to be CheckBB's successors are GuardedBlock's
2564 // successors.
2565 for (auto *Succ : Successors)
2566 DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
2567
2568 // Make proper changes to DT.
2569 DT.applyUpdates(DTUpdates);
2570 // Inform LI of a new loop block.
2571 L.addBasicBlockToLoop(GuardedBlock, LI);
2572
2573 if (MSSAU) {
2574 MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2575 MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2576 if (VerifyMemorySSA)
2577 MSSAU->getMemorySSA()->verifyMemorySSA();
2578 }
2579
2580 ++NumGuards;
2581 return CheckBI;
2582}
2583
2584/// Cost multiplier is a way to limit potentially exponential behavior
2585/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2586/// candidates available. Also accounting for the number of "sibling" loops with
2587/// the idea to account for previous unswitches that already happened on this
2588/// cluster of loops. There was an attempt to keep this formula simple,
2589/// just enough to limit the worst case behavior. Even if it is not that simple
2590/// now it is still not an attempt to provide a detailed heuristic size
2591/// prediction.
2592///
2593/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2594/// unswitch candidates, making adequate predictions instead of wild guesses.
2595/// That requires knowing not just the number of "remaining" candidates but
2596/// also costs of unswitching for each of these candidates.
2597static int CalculateUnswitchCostMultiplier(
2598 Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT,
2599 ArrayRef<std::pair<Instruction *, TinyPtrVector<Value *>>>
2600 UnswitchCandidates) {
2601
2602 // Guards and other exiting conditions do not contribute to exponential
2603 // explosion as soon as they dominate the latch (otherwise there might be
2604 // another path to the latch remaining that does not allow to eliminate the
2605 // loop copy on unswitch).
2606 BasicBlock *Latch = L.getLoopLatch();
2607 BasicBlock *CondBlock = TI.getParent();
2608 if (DT.dominates(CondBlock, Latch) &&
2609 (isGuard(&TI) ||
2610 llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) {
2611 return L.contains(SuccBB);
2612 }) <= 1)) {
2613 NumCostMultiplierSkipped++;
2614 return 1;
2615 }
2616
2617 auto *ParentL = L.getParentLoop();
2618 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2619 : std::distance(LI.begin(), LI.end()));
2620 // Count amount of clones that all the candidates might cause during
2621 // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
2622 int UnswitchedClones = 0;
2623 for (auto Candidate : UnswitchCandidates) {
2624 Instruction *CI = Candidate.first;
2625 BasicBlock *CondBlock = CI->getParent();
2626 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2627 if (isGuard(CI)) {
2628 if (!SkipExitingSuccessors)
2629 UnswitchedClones++;
2630 continue;
2631 }
2632 int NonExitingSuccessors = llvm::count_if(
2633 successors(CondBlock), [SkipExitingSuccessors, &L](BasicBlock *SuccBB) {
2634 return !SkipExitingSuccessors || L.contains(SuccBB);
2635 });
2636 UnswitchedClones += Log2_32(NonExitingSuccessors);
2637 }
2638
2639 // Ignore up to the "unscaled candidates" number of unswitch candidates
2640 // when calculating the power-of-two scaling of the cost. The main idea
2641 // with this control is to allow a small number of unswitches to happen
2642 // and rely more on siblings multiplier (see below) when the number
2643 // of candidates is small.
2644 unsigned ClonesPower =
2645 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2646
2647 // Allowing top-level loops to spread a bit more than nested ones.
2648 int SiblingsMultiplier =
2649 std::max((ParentL ? SiblingsCount
2650 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2651 1);
2652 // Compute the cost multiplier in a way that won't overflow by saturating
2653 // at an upper bound.
2654 int CostMultiplier;
2655 if (ClonesPower > Log2_32(UnswitchThreshold) ||
2656 SiblingsMultiplier > UnswitchThreshold)
2657 CostMultiplier = UnswitchThreshold;
2658 else
2659 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2660 (int)UnswitchThreshold);
2661
2662 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplierdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed multiplier "
<< CostMultiplier << " (siblings " << SiblingsMultiplier
<< " * clones " << (1 << ClonesPower) <<
")" << " for unswitch candidate: " << TI <<
"\n"; } } while (false)
2663 << " (siblings " << SiblingsMultiplier << " * clones "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed multiplier "
<< CostMultiplier << " (siblings " << SiblingsMultiplier
<< " * clones " << (1 << ClonesPower) <<
")" << " for unswitch candidate: " << TI <<
"\n"; } } while (false)
2664 << (1 << ClonesPower) << ")"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed multiplier "
<< CostMultiplier << " (siblings " << SiblingsMultiplier
<< " * clones " << (1 << ClonesPower) <<
")" << " for unswitch candidate: " << TI <<
"\n"; } } while (false)
2665 << " for unswitch candidate: " << TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed multiplier "
<< CostMultiplier << " (siblings " << SiblingsMultiplier
<< " * clones " << (1 << ClonesPower) <<
")" << " for unswitch candidate: " << TI <<
"\n"; } } while (false)
;
2666 return CostMultiplier;
2667}
2668
2669static bool unswitchBestCondition(
2670 Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2671 AAResults &AA, TargetTransformInfo &TTI,
2672 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2673 ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
2674 // Collect all invariant conditions within this loop (as opposed to an inner
2675 // loop which would be handled when visiting that inner loop).
2676 SmallVector<std::pair<Instruction *, TinyPtrVector<Value *>>, 4>
2677 UnswitchCandidates;
2678
2679 // Whether or not we should also collect guards in the loop.
2680 bool CollectGuards = false;
2681 if (UnswitchGuards) {
1
Assuming the condition is false
2
Taking false branch
2682 auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2683 Intrinsic::getName(Intrinsic::experimental_guard));
2684 if (GuardDecl && !GuardDecl->use_empty())
2685 CollectGuards = true;
2686 }
2687
2688 IVConditionInfo PartialIVInfo;
3
Calling implicit default constructor for 'IVConditionInfo'
5
Returning from default constructor for 'IVConditionInfo'
2689 for (auto *BB : L.blocks()) {
6
Assuming '__begin1' is equal to '__end1'
2690 if (LI.getLoopFor(BB) != &L)
2691 continue;
2692
2693 if (CollectGuards)
2694 for (auto &I : *BB)
2695 if (isGuard(&I)) {
2696 auto *Cond = cast<IntrinsicInst>(&I)->getArgOperand(0);
2697 // TODO: Support AND, OR conditions and partial unswitching.
2698 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2699 UnswitchCandidates.push_back({&I, {Cond}});
2700 }
2701
2702 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2703 // We can only consider fully loop-invariant switch conditions as we need
2704 // to completely eliminate the switch after unswitching.
2705 if (!isa<Constant>(SI->getCondition()) &&
2706 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2707 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2708 continue;
2709 }
2710
2711 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2712 if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
2713 BI->getSuccessor(0) == BI->getSuccessor(1))
2714 continue;
2715
2716 // If BI's condition is 'select _, true, false', simplify it to confuse
2717 // matchers
2718 Value *Cond = BI->getCondition(), *CondNext;
2719 while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
2720 Cond = CondNext;
2721 BI->setCondition(Cond);
2722
2723 if (L.isLoopInvariant(BI->getCondition())) {
2724 UnswitchCandidates.push_back({BI, {BI->getCondition()}});
2725 continue;
2726 }
2727
2728 Instruction &CondI = *cast<Instruction>(BI->getCondition());
2729 if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
2730 TinyPtrVector<Value *> Invariants =
2731 collectHomogenousInstGraphLoopInvariants(L, CondI, LI);
2732 if (Invariants.empty())
2733 continue;
2734
2735 UnswitchCandidates.push_back({BI, std::move(Invariants)});
2736 continue;
2737 }
2738 }
2739
2740 Instruction *PartialIVCondBranch = nullptr;
2741 if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
7
Assuming 'MSSAU' is null
8
Taking false branch
2742 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2743 return TerminatorAndInvariants.first == L.getHeader()->getTerminator();
2744 })) {
2745 MemorySSA *MSSA = MSSAU->getMemorySSA();
2746 if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2747 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "simple-loop-unswitch: Found partially invariant condition "
<< *Info->InstToDuplicate[0] << "\n"; } } while
(false)
2748 dbgs() << "simple-loop-unswitch: Found partially invariant condition "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "simple-loop-unswitch: Found partially invariant condition "
<< *Info->InstToDuplicate[0] << "\n"; } } while
(false)
2749 << *Info->InstToDuplicate[0] << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "simple-loop-unswitch: Found partially invariant condition "
<< *Info->InstToDuplicate[0] << "\n"; } } while
(false)
;
2750 PartialIVInfo = *Info;
2751 PartialIVCondBranch = L.getHeader()->getTerminator();
2752 TinyPtrVector<Value *> ValsToDuplicate;
2753 for (auto *Inst : Info->InstToDuplicate)
2754 ValsToDuplicate.push_back(Inst);
2755 UnswitchCandidates.push_back(
2756 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2757 }
2758 }
2759
2760 // If we didn't find any candidates, we're done.
2761 if (UnswitchCandidates.empty())
9
Calling 'SmallVectorBase::empty'
12
Returning from 'SmallVectorBase::empty'
13
Taking false branch
2762 return false;
2763
2764 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
2765 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
2766 // irreducible control flow into reducible control flow and introduce new
2767 // loops "out of thin air". If we ever discover important use cases for doing
2768 // this, we can add support to loop unswitch, but it is a lot of complexity
2769 // for what seems little or no real world benefit.
2770 LoopBlocksRPO RPOT(&L);
2771 RPOT.perform(&LI);
2772 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
14
Calling 'containsIrreducibleCFG<const llvm::BasicBlock *, llvm::LoopBlocksRPO, llvm::LoopInfo, llvm::GraphTraits<const llvm::BasicBlock *>>'
16
Returning from 'containsIrreducibleCFG<const llvm::BasicBlock *, llvm::LoopBlocksRPO, llvm::LoopInfo, llvm::GraphTraits<const llvm::BasicBlock *>>'
17
Taking false branch
2773 return false;
2774
2775 SmallVector<BasicBlock *, 4> ExitBlocks;
2776 L.getUniqueExitBlocks(ExitBlocks);
2777
2778 // We cannot unswitch if exit blocks contain a cleanuppad instruction as we
2779 // don't know how to split those exit blocks.
2780 // FIXME: We should teach SplitBlock to handle this and remove this
2781 // restriction.
2782 for (auto *ExitBB : ExitBlocks) {
18
Assuming '__begin1' is equal to '__end1'
2783 if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI())) {
2784 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Cannot unswitch because of cleanuppad in exit block\n"
; } } while (false)
2785 dbgs() << "Cannot unswitch because of cleanuppad in exit block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Cannot unswitch because of cleanuppad in exit block\n"
; } } while (false)
;
2786 return false;
2787 }
2788 }
2789
2790 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Considering " <<
UnswitchCandidates.size() << " non-trivial loop invariant conditions for unswitching.\n"
; } } while (false)
19
Assuming 'DebugFlag' is false
20
Loop condition is false. Exiting loop
2791 dbgs() << "Considering " << UnswitchCandidates.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Considering " <<
UnswitchCandidates.size() << " non-trivial loop invariant conditions for unswitching.\n"
; } } while (false)
2792 << " non-trivial loop invariant conditions for unswitching.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Considering " <<
UnswitchCandidates.size() << " non-trivial loop invariant conditions for unswitching.\n"
; } } while (false)
;
2793
2794 // Given that unswitching these terminators will require duplicating parts of
2795 // the loop, so we need to be able to model that cost. Compute the ephemeral
2796 // values and set up a data structure to hold per-BB costs. We cache each
2797 // block's cost so that we don't recompute this when considering different
2798 // subsets of the loop for duplication during unswitching.
2799 SmallPtrSet<const Value *, 4> EphValues;
2800 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
2801 SmallDenseMap<BasicBlock *, InstructionCost, 4> BBCostMap;
2802
2803 // Compute the cost of each block, as well as the total loop cost. Also, bail
2804 // out if we see instructions which are incompatible with loop unswitching
2805 // (convergent, noduplicate, or cross-basic-block tokens).
2806 // FIXME: We might be able to safely handle some of these in non-duplicated
2807 // regions.
2808 TargetTransformInfo::TargetCostKind CostKind =
2809 L.getHeader()->getParent()->hasMinSize()
21
Assuming the condition is false
22
'?' condition is false
2810 ? TargetTransformInfo::TCK_CodeSize
2811 : TargetTransformInfo::TCK_SizeAndLatency;
2812 InstructionCost LoopCost = 0;
2813 for (auto *BB : L.blocks()) {
23
Assuming '__begin1' is equal to '__end1'
2814 InstructionCost Cost = 0;
2815 for (auto &I : *BB) {
2816 if (EphValues.count(&I))
2817 continue;
2818
2819 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
2820 return false;
2821 if (auto *CB = dyn_cast<CallBase>(&I))
2822 if (CB->isConvergent() || CB->cannotDuplicate())
2823 return false;
2824
2825 Cost += TTI.getUserCost(&I, CostKind);
2826 }
2827 assert(Cost >= 0 && "Must not have negative costs!")(static_cast <bool> (Cost >= 0 && "Must not have negative costs!"
) ? void (0) : __assert_fail ("Cost >= 0 && \"Must not have negative costs!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2827, __extension__ __PRETTY_FUNCTION__))
;
2828 LoopCost += Cost;
2829 assert(LoopCost >= 0 && "Must not have negative loop costs!")(static_cast <bool> (LoopCost >= 0 && "Must not have negative loop costs!"
) ? void (0) : __assert_fail ("LoopCost >= 0 && \"Must not have negative loop costs!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2829, __extension__ __PRETTY_FUNCTION__))
;
2830 BBCostMap[BB] = Cost;
2831 }
2832 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Total loop cost: "
<< LoopCost << "\n"; } } while (false)
;
24
Assuming 'DebugFlag' is false
25
Loop condition is false. Exiting loop
2833
2834 // Now we find the best candidate by searching for the one with the following
2835 // properties in order:
2836 //
2837 // 1) An unswitching cost below the threshold
2838 // 2) The smallest number of duplicated unswitch candidates (to avoid
2839 // creating redundant subsequent unswitching)
2840 // 3) The smallest cost after unswitching.
2841 //
2842 // We prioritize reducing fanout of unswitch candidates provided the cost
2843 // remains below the threshold because this has a multiplicative effect.
2844 //
2845 // This requires memoizing each dominator subtree to avoid redundant work.
2846 //
2847 // FIXME: Need to actually do the number of candidates part above.
2848 SmallDenseMap<DomTreeNode *, InstructionCost, 4> DTCostMap;
2849 // Given a terminator which might be unswitched, computes the non-duplicated
2850 // cost for that terminator.
2851 auto ComputeUnswitchedCost = [&](Instruction &TI,
2852 bool FullUnswitch) -> InstructionCost {
2853 BasicBlock &BB = *TI.getParent();
2854 SmallPtrSet<BasicBlock *, 4> Visited;
2855
2856 InstructionCost Cost = 0;
2857 for (BasicBlock *SuccBB : successors(&BB)) {
2858 // Don't count successors more than once.
2859 if (!Visited.insert(SuccBB).second)
30
Assuming field 'second' is true
31
Taking false branch
2860 continue;
2861
2862 // If this is a partial unswitch candidate, then it must be a conditional
2863 // branch with a condition of either `or`, `and`, their corresponding
2864 // select forms or partially invariant instructions. In that case, one of
2865 // the successors is necessarily duplicated, so don't even try to remove
2866 // its cost.
2867 if (!FullUnswitch
31.1
'FullUnswitch' is false
31.1
'FullUnswitch' is false
31.1
'FullUnswitch' is false
31.1
'FullUnswitch' is false
31.1
'FullUnswitch' is false
) {
32
Taking true branch
2868 auto &BI = cast<BranchInst>(TI);
33
'TI' is a 'BranchInst'
2869 if (match(BI.getCondition(), m_LogicalAnd())) {
34
Calling 'match<llvm::Value, llvm::PatternMatch::LogicalOp_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::class_match<llvm::Value>, 28>>'
41
Returning from 'match<llvm::Value, llvm::PatternMatch::LogicalOp_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::class_match<llvm::Value>, 28>>'
42
Taking false branch
2870 if (SuccBB == BI.getSuccessor(1))
2871 continue;
2872 } else if (match(BI.getCondition(), m_LogicalOr())) {
43
Calling 'match<llvm::Value, llvm::PatternMatch::LogicalOp_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::class_match<llvm::Value>, 29>>'
50
Returning from 'match<llvm::Value, llvm::PatternMatch::LogicalOp_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::class_match<llvm::Value>, 29>>'
51
Taking false branch
2873 if (SuccBB == BI.getSuccessor(0))
2874 continue;
2875 } else if (!PartialIVInfo.InstToDuplicate.empty()) {
52
Calling 'SmallVectorBase::empty'
55
Returning from 'SmallVectorBase::empty'
56
Taking true branch
2876 if (PartialIVInfo.KnownValue->isOneValue() &&
57
Called C++ object pointer is null
2877 SuccBB == BI.getSuccessor(1))
2878 continue;
2879 else if (!PartialIVInfo.KnownValue->isOneValue() &&
2880 SuccBB == BI.getSuccessor(0))
2881 continue;
2882 }
2883 }
2884
2885 // This successor's domtree will not need to be duplicated after
2886 // unswitching if the edge to the successor dominates it (and thus the
2887 // entire tree). This essentially means there is no other path into this
2888 // subtree and so it will end up live in only one clone of the loop.
2889 if (SuccBB->getUniquePredecessor() ||
2890 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2891 return PredBB == &BB || DT.dominates(SuccBB, PredBB);
2892 })) {
2893 Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
2894 assert(Cost <= LoopCost &&(static_cast <bool> (Cost <= LoopCost && "Non-duplicated cost should never exceed total loop cost!"
) ? void (0) : __assert_fail ("Cost <= LoopCost && \"Non-duplicated cost should never exceed total loop cost!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2895, __extension__ __PRETTY_FUNCTION__))
2895 "Non-duplicated cost should never exceed total loop cost!")(static_cast <bool> (Cost <= LoopCost && "Non-duplicated cost should never exceed total loop cost!"
) ? void (0) : __assert_fail ("Cost <= LoopCost && \"Non-duplicated cost should never exceed total loop cost!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2895, __extension__ __PRETTY_FUNCTION__))
;
2896 }
2897 }
2898
2899 // Now scale the cost by the number of unique successors minus one. We
2900 // subtract one because there is already at least one copy of the entire
2901 // loop. This is computing the new cost of unswitching a condition.
2902 // Note that guards always have 2 unique successors that are implicit and
2903 // will be materialized if we decide to unswitch it.
2904 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
2905 assert(SuccessorsCount > 1 &&(static_cast <bool> (SuccessorsCount > 1 && "Cannot unswitch a condition without multiple distinct successors!"
) ? void (0) : __assert_fail ("SuccessorsCount > 1 && \"Cannot unswitch a condition without multiple distinct successors!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2906, __extension__ __PRETTY_FUNCTION__))
2906 "Cannot unswitch a condition without multiple distinct successors!")(static_cast <bool> (SuccessorsCount > 1 && "Cannot unswitch a condition without multiple distinct successors!"
) ? void (0) : __assert_fail ("SuccessorsCount > 1 && \"Cannot unswitch a condition without multiple distinct successors!\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2906, __extension__ __PRETTY_FUNCTION__))
;
2907 return (LoopCost - Cost) * (SuccessorsCount - 1);
2908 };
2909 Instruction *BestUnswitchTI = nullptr;
2910 InstructionCost BestUnswitchCost = 0;
2911 ArrayRef<Value *> BestUnswitchInvariants;
2912 for (auto &TerminatorAndInvariants : UnswitchCandidates) {
26
Assuming '__begin1' is not equal to '__end1'
2913 Instruction &TI = *TerminatorAndInvariants.first;
2914 ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
2915 BranchInst *BI = dyn_cast<BranchInst>(&TI);
27
Assuming the object is a 'BranchInst'
2916 InstructionCost CandidateCost = ComputeUnswitchedCost(
29
Calling 'operator()'
2917 TI, /*FullUnswitch*/ !BI
27.1
'BI' is non-null
27.1
'BI' is non-null
27.1
'BI' is non-null
27.1
'BI' is non-null
27.1
'BI' is non-null
|| (Invariants.size() == 1 &&
28
Assuming the condition is false
2918 Invariants[0] == BI->getCondition()));
2919 // Calculate cost multiplier which is a tool to limit potentially
2920 // exponential behavior of loop-unswitch.
2921 if (EnableUnswitchCostMultiplier) {
2922 int CostMultiplier =
2923 CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
2924 assert((static_cast <bool> ((CostMultiplier > 0 && CostMultiplier
<= UnswitchThreshold) && "cost multiplier needs to be in the range of 1..UnswitchThreshold"
) ? void (0) : __assert_fail ("(CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && \"cost multiplier needs to be in the range of 1..UnswitchThreshold\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2926, __extension__ __PRETTY_FUNCTION__))
2925 (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&(static_cast <bool> ((CostMultiplier > 0 && CostMultiplier
<= UnswitchThreshold) && "cost multiplier needs to be in the range of 1..UnswitchThreshold"
) ? void (0) : __assert_fail ("(CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && \"cost multiplier needs to be in the range of 1..UnswitchThreshold\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2926, __extension__ __PRETTY_FUNCTION__))
2926 "cost multiplier needs to be in the range of 1..UnswitchThreshold")(static_cast <bool> ((CostMultiplier > 0 && CostMultiplier
<= UnswitchThreshold) && "cost multiplier needs to be in the range of 1..UnswitchThreshold"
) ? void (0) : __assert_fail ("(CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && \"cost multiplier needs to be in the range of 1..UnswitchThreshold\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2926, __extension__ __PRETTY_FUNCTION__))
;
2927 CandidateCost *= CostMultiplier;
2928 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " (multiplier: " << CostMultiplier
<< ")" << " for unswitch candidate: " << TI
<< "\n"; } } while (false)
2929 << " (multiplier: " << CostMultiplier << ")"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " (multiplier: " << CostMultiplier
<< ")" << " for unswitch candidate: " << TI
<< "\n"; } } while (false)
2930 << " for unswitch candidate: " << TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " (multiplier: " << CostMultiplier
<< ")" << " for unswitch candidate: " << TI
<< "\n"; } } while (false)
;
2931 } else {
2932 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " for unswitch candidate: " <<
TI << "\n"; } } while (false)
2933 << " for unswitch candidate: " << TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " for unswitch candidate: " <<
TI << "\n"; } } while (false)
;
2934 }
2935
2936 if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
2937 BestUnswitchTI = &TI;
2938 BestUnswitchCost = CandidateCost;
2939 BestUnswitchInvariants = Invariants;
2940 }
2941 }
2942 assert(BestUnswitchTI && "Failed to find loop unswitch candidate")(static_cast <bool> (BestUnswitchTI && "Failed to find loop unswitch candidate"
) ? void (0) : __assert_fail ("BestUnswitchTI && \"Failed to find loop unswitch candidate\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2942, __extension__ __PRETTY_FUNCTION__))
;
2943
2944 if (BestUnswitchCost >= UnswitchThreshold) {
2945 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Cannot unswitch, lowest cost found: "
<< BestUnswitchCost << "\n"; } } while (false)
2946 << BestUnswitchCost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Cannot unswitch, lowest cost found: "
<< BestUnswitchCost << "\n"; } } while (false)
;
2947 return false;
2948 }
2949
2950 if (BestUnswitchTI != PartialIVCondBranch)
2951 PartialIVInfo.InstToDuplicate.clear();
2952
2953 // If the best candidate is a guard, turn it into a branch.
2954 if (isGuard(BestUnswitchTI))
2955 BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L,
2956 ExitBlocks, DT, LI, MSSAU);
2957
2958 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Unswitching non-trivial (cost = "
<< BestUnswitchCost << ") terminator: " <<
*BestUnswitchTI << "\n"; } } while (false)
2959 << BestUnswitchCost << ") terminator: " << *BestUnswitchTIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Unswitching non-trivial (cost = "
<< BestUnswitchCost << ") terminator: " <<
*BestUnswitchTI << "\n"; } } while (false)
2960 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Unswitching non-trivial (cost = "
<< BestUnswitchCost << ") terminator: " <<
*BestUnswitchTI << "\n"; } } while (false)
;
2961 unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
2962 ExitBlocks, PartialIVInfo, DT, LI, AC,
2963 UnswitchCB, SE, MSSAU);
2964 return true;
2965}
2966
2967/// Unswitch control flow predicated on loop invariant conditions.
2968///
2969/// This first hoists all branches or switches which are trivial (IE, do not
2970/// require duplicating any part of the loop) out of the loop body. It then
2971/// looks at other loop invariant control flows and tries to unswitch those as
2972/// well by cloning the loop if the result is small enough.
2973///
2974/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
2975/// also updated based on the unswitch. The `MSSA` analysis is also updated if
2976/// valid (i.e. its use is enabled).
2977///
2978/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
2979/// true, we will attempt to do non-trivial unswitching as well as trivial
2980/// unswitching.
2981///
2982/// The `UnswitchCB` callback provided will be run after unswitching is
2983/// complete, with the first parameter set to `true` if the provided loop
2984/// remains a loop, and a list of new sibling loops created.
2985///
2986/// If `SE` is non-null, we will update that analysis based on the unswitching
2987/// done.
2988static bool
2989unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2990 AAResults &AA, TargetTransformInfo &TTI, bool NonTrivial,
2991 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2992 ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
2993 assert(L.isRecursivelyLCSSAForm(DT, LI) &&(static_cast <bool> (L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.") ? void (0
) : __assert_fail ("L.isRecursivelyLCSSAForm(DT, LI) && \"Loops must be in LCSSA form before unswitching.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2994, __extension__ __PRETTY_FUNCTION__))
2994 "Loops must be in LCSSA form before unswitching.")(static_cast <bool> (L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.") ? void (0
) : __assert_fail ("L.isRecursivelyLCSSAForm(DT, LI) && \"Loops must be in LCSSA form before unswitching.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 2994, __extension__ __PRETTY_FUNCTION__))
;
2995
2996 // Must be in loop simplified form: we need a preheader and dedicated exits.
2997 if (!L.isLoopSimplifyForm())
2998 return false;
2999
3000 // Try trivial unswitch first before loop over other basic blocks in the loop.
3001 if (unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3002 // If we unswitched successfully we will want to clean up the loop before
3003 // processing it further so just mark it as unswitched and return.
3004 UnswitchCB(/*CurrentLoopValid*/ true, false, {});
3005 return true;
3006 }
3007
3008 // Check whether we should continue with non-trivial conditions.
3009 // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3010 // unswitching for testing and debugging.
3011 // NonTrivial: Parameter that enables non-trivial unswitching for this
3012 // invocation of the transform. But this should be allowed only
3013 // for targets without branch divergence.
3014 //
3015 // FIXME: If divergence analysis becomes available to a loop
3016 // transform, we should allow unswitching for non-trivial uniform
3017 // branches even on targets that have divergence.
3018 // https://bugs.llvm.org/show_bug.cgi?id=48819
3019 bool ContinueWithNonTrivial =
3020 EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence());
3021 if (!ContinueWithNonTrivial)
3022 return false;
3023
3024 // Skip non-trivial unswitching for optsize functions.
3025 if (L.getHeader()->getParent()->hasOptSize())
3026 return false;
3027
3028 // Skip non-trivial unswitching for loops that cannot be cloned.
3029 if (!L.isSafeToClone())
3030 return false;
3031
3032 // For non-trivial unswitching, because it often creates new loops, we rely on
3033 // the pass manager to iterate on the loops rather than trying to immediately
3034 // reach a fixed point. There is no substantial advantage to iterating
3035 // internally, and if any of the new loops are simplified enough to contain
3036 // trivial unswitching we want to prefer those.
3037
3038 // Try to unswitch the best invariant condition. We prefer this full unswitch to
3039 // a partial unswitch when possible below the threshold.
3040 if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU))
3041 return true;
3042
3043 // No other opportunities to unswitch.
3044 return false;
3045}
3046
3047PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
3048 LoopStandardAnalysisResults &AR,
3049 LPMUpdater &U) {
3050 Function &F = *L.getHeader()->getParent();
3051 (void)F;
3052
3053 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << Ldo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Unswitching loop in "
<< F.getName() << ": " << L << "\n";
} } while (false)
3054 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Unswitching loop in "
<< F.getName() << ": " << L << "\n";
} } while (false)
;
3055
3056 // Save the current loop name in a variable so that we can report it even
3057 // after it has been deleted.
3058 std::string LoopName = std::string(L.getName());
3059
3060 auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
3061 bool PartiallyInvariant,
3062 ArrayRef<Loop *> NewLoops) {
3063 // If we did a non-trivial unswitch, we have added new (cloned) loops.
3064 if (!NewLoops.empty())
3065 U.addSiblingLoops(NewLoops);
3066
3067 // If the current loop remains valid, we should revisit it to catch any
3068 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
3069 if (CurrentLoopValid) {
3070 if (PartiallyInvariant) {
3071 // Mark the new loop as partially unswitched, to avoid unswitching on
3072 // the same condition again.
3073 auto &Context = L.getHeader()->getContext();
3074 MDNode *DisableUnswitchMD = MDNode::get(
3075 Context,
3076 MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
3077 MDNode *NewLoopID = makePostTransformationMetadata(
3078 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
3079 {DisableUnswitchMD});
3080 L.setLoopID(NewLoopID);
3081 } else
3082 U.revisitCurrentLoop();
3083 } else
3084 U.markLoopAsDeleted(L, LoopName);
3085 };
3086
3087 Optional<MemorySSAUpdater> MSSAU;
3088 if (AR.MSSA) {
3089 MSSAU = MemorySSAUpdater(AR.MSSA);
3090 if (VerifyMemorySSA)
3091 AR.MSSA->verifyMemorySSA();
3092 }
3093 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, NonTrivial,
3094 UnswitchCB, &AR.SE,
3095 MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
3096 return PreservedAnalyses::all();
3097
3098 if (AR.MSSA && VerifyMemorySSA)
3099 AR.MSSA->verifyMemorySSA();
3100
3101 // Historically this pass has had issues with the dominator tree so verify it
3102 // in asserts builds.
3103 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (AR.DT.verify(DominatorTree::VerificationLevel
::Fast)) ? void (0) : __assert_fail ("AR.DT.verify(DominatorTree::VerificationLevel::Fast)"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 3103, __extension__ __PRETTY_FUNCTION__))
;
3104
3105 auto PA = getLoopPassPreservedAnalyses();
3106 if (AR.MSSA)
3107 PA.preserve<MemorySSAAnalysis>();
3108 return PA;
3109}
3110
3111namespace {
3112
3113class SimpleLoopUnswitchLegacyPass : public LoopPass {
3114 bool NonTrivial;
3115
3116public:
3117 static char ID; // Pass ID, replacement for typeid
3118
3119 explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
3120 : LoopPass(ID), NonTrivial(NonTrivial) {
3121 initializeSimpleLoopUnswitchLegacyPassPass(
3122 *PassRegistry::getPassRegistry());
3123 }
3124
3125 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
3126
3127 void getAnalysisUsage(AnalysisUsage &AU) const override {
3128 AU.addRequired<AssumptionCacheTracker>();
3129 AU.addRequired<TargetTransformInfoWrapperPass>();
3130 if (EnableMSSALoopDependency) {
3131 AU.addRequired<MemorySSAWrapperPass>();
3132 AU.addPreserved<MemorySSAWrapperPass>();
3133 }
3134 getLoopAnalysisUsage(AU);
3135 }
3136};
3137
3138} // end anonymous namespace
3139
3140bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
3141 if (skipLoop(L))
3142 return false;
3143
3144 Function &F = *L->getHeader()->getParent();
3145
3146 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *Ldo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Unswitching loop in "
<< F.getName() << ": " << *L << "\n"
; } } while (false)
3147 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Unswitching loop in "
<< F.getName() << ": " << *L << "\n"
; } } while (false)
;
3148
3149 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3150 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3151 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
3152 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3153 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3154 MemorySSA *MSSA = nullptr;
3155 Optional<MemorySSAUpdater> MSSAU;
3156 if (EnableMSSALoopDependency) {
3157 MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
3158 MSSAU = MemorySSAUpdater(MSSA);
3159 }
3160
3161 auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3162 auto *SE = SEWP ? &SEWP->getSE() : nullptr;
3163
3164 auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant,
3165 ArrayRef<Loop *> NewLoops) {
3166 // If we did a non-trivial unswitch, we have added new (cloned) loops.
3167 for (auto *NewL : NewLoops)
3168 LPM.addLoop(*NewL);
3169
3170 // If the current loop remains valid, re-add it to the queue. This is
3171 // a little wasteful as we'll finish processing the current loop as well,
3172 // but it is the best we can do in the old PM.
3173 if (CurrentLoopValid) {
3174 // If the current loop has been unswitched using a partially invariant
3175 // condition, we should not re-add the current loop to avoid unswitching
3176 // on the same condition again.
3177 if (!PartiallyInvariant)
3178 LPM.addLoop(*L);
3179 } else
3180 LPM.markLoopAsDeleted(*L);
3181 };
3182
3183 if (MSSA && VerifyMemorySSA)
3184 MSSA->verifyMemorySSA();
3185
3186 bool Changed =
3187 unswitchLoop(*L, DT, LI, AC, AA, TTI, NonTrivial, UnswitchCB, SE,
3188 MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
3189
3190 if (MSSA && VerifyMemorySSA)
3191 MSSA->verifyMemorySSA();
3192
3193 // Historically this pass has had issues with the dominator tree so verify it
3194 // in asserts builds.
3195 assert(DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (DT.verify(DominatorTree::VerificationLevel
::Fast)) ? void (0) : __assert_fail ("DT.verify(DominatorTree::VerificationLevel::Fast)"
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 3195, __extension__ __PRETTY_FUNCTION__))
;
3196
3197 return Changed;
3198}
3199
3200char SimpleLoopUnswitchLegacyPass::ID = 0;
3201INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",static void *initializeSimpleLoopUnswitchLegacyPassPassOnce(PassRegistry
&Registry) {
3202 "Simple unswitch loops", false, false)static void *initializeSimpleLoopUnswitchLegacyPassPassOnce(PassRegistry
&Registry) {
3203INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry);
3204INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
3205INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
3206INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
3207INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry);
3208INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
3209INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",PassInfo *PI = new PassInfo( "Simple unswitch loops", "simple-loop-unswitch"
, &SimpleLoopUnswitchLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SimpleLoopUnswitchLegacyPass>), false,
false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeSimpleLoopUnswitchLegacyPassPassFlag
; void llvm::initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeSimpleLoopUnswitchLegacyPassPassFlag
, initializeSimpleLoopUnswitchLegacyPassPassOnce, std::ref(Registry
)); }
3210 "Simple unswitch loops", false, false)PassInfo *PI = new PassInfo( "Simple unswitch loops", "simple-loop-unswitch"
, &SimpleLoopUnswitchLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SimpleLoopUnswitchLegacyPass>), false,
false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeSimpleLoopUnswitchLegacyPassPassFlag
; void llvm::initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeSimpleLoopUnswitchLegacyPassPassFlag
, initializeSimpleLoopUnswitchLegacyPassPassOnce, std::ref(Registry
)); }
3211
3212Pass *llvm::createSimpleLoopUnswitchLegacyPass(bool NonTrivial) {
3213 return new SimpleLoopUnswitchLegacyPass(NonTrivial);
3214}

/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Transforms/Utils/LoopUtils.h

1//===- llvm/Transforms/Utils/LoopUtils.h - Loop utilities -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines some loop transformation utilities.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
14#define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
15
16#include "llvm/ADT/StringRef.h"
17#include "llvm/Analysis/IVDescriptors.h"
18#include "llvm/Analysis/TargetTransformInfo.h"
19#include "llvm/Transforms/Utils/ValueMapper.h"
20
21namespace llvm {
22
23template <typename T> class DomTreeNodeBase;
24using DomTreeNode = DomTreeNodeBase<BasicBlock>;
25class AAResults;
26class AliasSet;
27class AliasSetTracker;
28class BasicBlock;
29class BlockFrequencyInfo;
30class ICFLoopSafetyInfo;
31class IRBuilderBase;
32class Loop;
33class LoopInfo;
34class MemoryAccess;
35class MemorySSA;
36class MemorySSAUpdater;
37class OptimizationRemarkEmitter;
38class PredIteratorCache;
39class ScalarEvolution;
40class ScalarEvolutionExpander;
41class SCEV;
42class SCEVExpander;
43class TargetLibraryInfo;
44class LPPassManager;
45class Instruction;
46struct RuntimeCheckingPtrGroup;
47typedef std::pair<const RuntimeCheckingPtrGroup *,
48 const RuntimeCheckingPtrGroup *>
49 RuntimePointerCheck;
50
51template <typename T> class Optional;
52template <typename T, unsigned N> class SmallSetVector;
53template <typename T, unsigned N> class SmallVector;
54template <typename T> class SmallVectorImpl;
55template <typename T, unsigned N> class SmallPriorityWorklist;
56
57BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
58 MemorySSAUpdater *MSSAU, bool PreserveLCSSA);
59
60/// Ensure that all exit blocks of the loop are dedicated exits.
61///
62/// For any loop exit block with non-loop predecessors, we split the loop
63/// predecessors to use a dedicated loop exit block. We update the dominator
64/// tree and loop info if provided, and will preserve LCSSA if requested.
65bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
66 MemorySSAUpdater *MSSAU, bool PreserveLCSSA);
67
68/// Ensures LCSSA form for every instruction from the Worklist in the scope of
69/// innermost containing loop.
70///
71/// For the given instruction which have uses outside of the loop, an LCSSA PHI
72/// node is inserted and the uses outside the loop are rewritten to use this
73/// node.
74///
75/// LoopInfo and DominatorTree are required and, since the routine makes no
76/// changes to CFG, preserved.
77///
78/// Returns true if any modifications are made.
79///
80/// This function may introduce unused PHI nodes. If \p PHIsToRemove is not
81/// nullptr, those are added to it (before removing, the caller has to check if
82/// they still do not have any uses). Otherwise the PHIs are directly removed.
83bool formLCSSAForInstructions(
84 SmallVectorImpl<Instruction *> &Worklist, const DominatorTree &DT,
85 const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder,
86 SmallVectorImpl<PHINode *> *PHIsToRemove = nullptr);
87
88/// Put loop into LCSSA form.
89///
90/// Looks at all instructions in the loop which have uses outside of the
91/// current loop. For each, an LCSSA PHI node is inserted and the uses outside
92/// the loop are rewritten to use this node. Sub-loops must be in LCSSA form
93/// already.
94///
95/// LoopInfo and DominatorTree are required and preserved.
96///
97/// If ScalarEvolution is passed in, it will be preserved.
98///
99/// Returns true if any modifications are made to the loop.
100bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
101 ScalarEvolution *SE);
102
103/// Put a loop nest into LCSSA form.
104///
105/// This recursively forms LCSSA for a loop nest.
106///
107/// LoopInfo and DominatorTree are required and preserved.
108///
109/// If ScalarEvolution is passed in, it will be preserved.
110///
111/// Returns true if any modifications are made to the loop.
112bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
113 ScalarEvolution *SE);
114
115/// Flags controlling how much is checked when sinking or hoisting
116/// instructions. The number of memory access in the loop (and whether there
117/// are too many) is determined in the constructors when using MemorySSA.
118class SinkAndHoistLICMFlags {
119public:
120 // Explicitly set limits.
121 SinkAndHoistLICMFlags(unsigned LicmMssaOptCap,
122 unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
123 Loop *L = nullptr, MemorySSA *MSSA = nullptr);
124 // Use default limits.
125 SinkAndHoistLICMFlags(bool IsSink, Loop *L = nullptr,
126 MemorySSA *MSSA = nullptr);
127
128 void setIsSink(bool B) { IsSink = B; }
129 bool getIsSink() { return IsSink; }
130 bool tooManyMemoryAccesses() { return NoOfMemAccTooLarge; }
131 bool tooManyClobberingCalls() { return LicmMssaOptCounter >= LicmMssaOptCap; }
132 void incrementClobberingCalls() { ++LicmMssaOptCounter; }
133
134protected:
135 bool NoOfMemAccTooLarge = false;
136 unsigned LicmMssaOptCounter = 0;
137 unsigned LicmMssaOptCap;
138 unsigned LicmMssaNoAccForPromotionCap;
139 bool IsSink;
140};
141
142/// Walk the specified region of the CFG (defined by all blocks
143/// dominated by the specified block, and that are in the current loop) in
144/// reverse depth first order w.r.t the DominatorTree. This allows us to visit
145/// uses before definitions, allowing us to sink a loop body in one pass without
146/// iteration. Takes DomTreeNode, AAResults, LoopInfo, DominatorTree,
147/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
148/// instructions of the loop and loop safety information as
149/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
150bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
151 BlockFrequencyInfo *, TargetLibraryInfo *,
152 TargetTransformInfo *, Loop *, AliasSetTracker *,
153 MemorySSAUpdater *, ICFLoopSafetyInfo *,
154 SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
155
156/// Walk the specified region of the CFG (defined by all blocks
157/// dominated by the specified block, and that are in the current loop) in depth
158/// first order w.r.t the DominatorTree. This allows us to visit definitions
159/// before uses, allowing us to hoist a loop body in one pass without iteration.
160/// Takes DomTreeNode, AAResults, LoopInfo, DominatorTree,
161/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
162/// instructions of the loop and loop safety information as arguments.
163/// Diagnostics is emitted via \p ORE. It returns changed status.
164bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
165 BlockFrequencyInfo *, TargetLibraryInfo *, Loop *,
166 AliasSetTracker *, MemorySSAUpdater *, ScalarEvolution *,
167 ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
168 OptimizationRemarkEmitter *);
169
170/// This function deletes dead loops. The caller of this function needs to
171/// guarantee that the loop is infact dead.
172/// The function requires a bunch or prerequisites to be present:
173/// - The loop needs to be in LCSSA form
174/// - The loop needs to have a Preheader
175/// - A unique dedicated exit block must exist
176///
177/// This also updates the relevant analysis information in \p DT, \p SE, \p LI
178/// and \p MSSA if pointers to those are provided.
179/// It also updates the loop PM if an updater struct is provided.
180
181void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
182 LoopInfo *LI, MemorySSA *MSSA = nullptr);
183
184/// Remove the backedge of the specified loop. Handles loop nests and general
185/// loop structures subject to the precondition that the loop has no parent
186/// loop and has a single latch block. Preserves all listed analyses.
187void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
188 LoopInfo &LI, MemorySSA *MSSA);
189
190/// Try to promote memory values to scalars by sinking stores out of
191/// the loop and moving loads to before the loop. We do this by looping over
192/// the stores in the loop, looking for stores to Must pointers which are
193/// loop invariant. It takes a set of must-alias values, Loop exit blocks
194/// vector, loop exit blocks insertion point vector, PredIteratorCache,
195/// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions
196/// of the loop and loop safety information as arguments.
197/// Diagnostics is emitted via \p ORE. It returns changed status.
198bool promoteLoopAccessesToScalars(
199 const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
200 SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
201 PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *,
202 Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
203 OptimizationRemarkEmitter *);
204
205/// Does a BFS from a given node to all of its children inside a given loop.
206/// The returned vector of nodes includes the starting point.
207SmallVector<DomTreeNode *, 16> collectChildrenInLoop(DomTreeNode *N,
208 const Loop *CurLoop);
209
210/// Returns the instructions that use values defined in the loop.
211SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L);
212
213/// Find a combination of metadata ("llvm.loop.vectorize.width" and
214/// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a
215/// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found
216/// then None is returned.
217Optional<ElementCount>
218getOptionalElementCountLoopAttribute(const Loop *TheLoop);
219
220/// Create a new loop identifier for a loop created from a loop transformation.
221///
222/// @param OrigLoopID The loop ID of the loop before the transformation.
223/// @param FollowupAttrs List of attribute names that contain attributes to be
224/// added to the new loop ID.
225/// @param InheritOptionsAttrsPrefix Selects which attributes should be inherited
226/// from the original loop. The following values
227/// are considered:
228/// nullptr : Inherit all attributes from @p OrigLoopID.
229/// "" : Do not inherit any attribute from @p OrigLoopID; only use
230/// those specified by a followup attribute.
231/// "<prefix>": Inherit all attributes except those which start with
232/// <prefix>; commonly used to remove metadata for the
233/// applied transformation.
234/// @param AlwaysNew If true, do not try to reuse OrigLoopID and never return
235/// None.
236///
237/// @return The loop ID for the after-transformation loop. The following values
238/// can be returned:
239/// None : No followup attribute was found; it is up to the
240/// transformation to choose attributes that make sense.
241/// @p OrigLoopID: The original identifier can be reused.
242/// nullptr : The new loop has no attributes.
243/// MDNode* : A new unique loop identifier.
244Optional<MDNode *>
245makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef<StringRef> FollowupAttrs,
246 const char *InheritOptionsAttrsPrefix = "",
247 bool AlwaysNew = false);
248
249/// Look for the loop attribute that disables all transformation heuristic.
250bool hasDisableAllTransformsHint(const Loop *L);
251
252/// Look for the loop attribute that disables the LICM transformation heuristics.
253bool hasDisableLICMTransformsHint(const Loop *L);
254
255/// The mode sets how eager a transformation should be applied.
256enum TransformationMode {
257 /// The pass can use heuristics to determine whether a transformation should
258 /// be applied.
259 TM_Unspecified,
260
261 /// The transformation should be applied without considering a cost model.
262 TM_Enable,
263
264 /// The transformation should not be applied.
265 TM_Disable,
266
267 /// Force is a flag and should not be used alone.
268 TM_Force = 0x04,
269
270 /// The transformation was directed by the user, e.g. by a #pragma in
271 /// the source code. If the transformation could not be applied, a
272 /// warning should be emitted.
273 TM_ForcedByUser = TM_Enable | TM_Force,
274
275 /// The transformation must not be applied. For instance, `#pragma clang loop
276 /// unroll(disable)` explicitly forbids any unrolling to take place. Unlike
277 /// general loop metadata, it must not be dropped. Most passes should not
278 /// behave differently under TM_Disable and TM_SuppressedByUser.
279 TM_SuppressedByUser = TM_Disable | TM_Force
280};
281
282/// @{
283/// Get the mode for LLVM's supported loop transformations.
284TransformationMode hasUnrollTransformation(const Loop *L);
285TransformationMode hasUnrollAndJamTransformation(const Loop *L);
286TransformationMode hasVectorizeTransformation(const Loop *L);
287TransformationMode hasDistributeTransformation(const Loop *L);
288TransformationMode hasLICMVersioningTransformation(const Loop *L);
289/// @}
290
291/// Set input string into loop metadata by keeping other values intact.
292/// If the string is already in loop metadata update value if it is
293/// different.
294void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
295 unsigned V = 0);
296
297/// Returns a loop's estimated trip count based on branch weight metadata.
298/// In addition if \p EstimatedLoopInvocationWeight is not null it is
299/// initialized with weight of loop's latch leading to the exit.
300/// Returns 0 when the count is estimated to be 0, or None when a meaningful
301/// estimate can not be made.
302Optional<unsigned>
303getLoopEstimatedTripCount(Loop *L,
304 unsigned *EstimatedLoopInvocationWeight = nullptr);
305
306/// Set a loop's branch weight metadata to reflect that loop has \p
307/// EstimatedTripCount iterations and \p EstimatedLoopInvocationWeight exits
308/// through latch. Returns true if metadata is successfully updated, false
309/// otherwise. Note that loop must have a latch block which controls loop exit
310/// in order to succeed.
311bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
312 unsigned EstimatedLoopInvocationWeight);
313
314/// Check inner loop (L) backedge count is known to be invariant on all
315/// iterations of its outer loop. If the loop has no parent, this is trivially
316/// true.
317bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE);
318
319/// Helper to consistently add the set of standard passes to a loop pass's \c
320/// AnalysisUsage.
321///
322/// All loop passes should call this as part of implementing their \c
323/// getAnalysisUsage.
324void getLoopAnalysisUsage(AnalysisUsage &AU);
325
326/// Returns true if is legal to hoist or sink this instruction disregarding the
327/// possible introduction of faults. Reasoning about potential faulting
328/// instructions is the responsibility of the caller since it is challenging to
329/// do efficiently from within this routine.
330/// \p TargetExecutesOncePerLoop is true only when it is guaranteed that the
331/// target executes at most once per execution of the loop body. This is used
332/// to assess the legality of duplicating atomic loads. Generally, this is
333/// true when moving out of loop and not true when moving into loops.
334/// If \p ORE is set use it to emit optimization remarks.
335bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
336 Loop *CurLoop, AliasSetTracker *CurAST,
337 MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop,
338 SinkAndHoistLICMFlags *LICMFlags = nullptr,
339 OptimizationRemarkEmitter *ORE = nullptr);
340
341/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
342/// The Builder's fast-math-flags must be set to propagate the expected values.
343Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
344 Value *Right);
345
346/// Generates an ordered vector reduction using extracts to reduce the value.
347Value *getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
348 unsigned Op, RecurKind MinMaxKind = RecurKind::None,
349 ArrayRef<Value *> RedOps = None);
350
351/// Generates a vector reduction using shufflevectors to reduce the value.
352/// Fast-math-flags are propagated using the IRBuilder's setting.
353Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op,
354 RecurKind MinMaxKind = RecurKind::None,
355 ArrayRef<Value *> RedOps = None);
356
357/// Create a target reduction of the given vector. The reduction operation
358/// is described by the \p Opcode parameter. min/max reductions require
359/// additional information supplied in \p RdxKind.
360/// The target is queried to determine if intrinsics or shuffle sequences are
361/// required to implement the reduction.
362/// Fast-math-flags are propagated using the IRBuilder's setting.
363Value *createSimpleTargetReduction(IRBuilderBase &B,
364 const TargetTransformInfo *TTI, Value *Src,
365 RecurKind RdxKind,
366 ArrayRef<Value *> RedOps = None);
367
368/// Create a generic target reduction using a recurrence descriptor \p Desc
369/// The target is queried to determine if intrinsics or shuffle sequences are
370/// required to implement the reduction.
371/// Fast-math-flags are propagated using the RecurrenceDescriptor.
372Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI,
373 const RecurrenceDescriptor &Desc, Value *Src);
374
375/// Create an ordered reduction intrinsic using the given recurrence
376/// descriptor \p Desc.
377Value *createOrderedReduction(IRBuilderBase &B,
378 const RecurrenceDescriptor &Desc, Value *Src,
379 Value *Start);
380
381/// Get the intersection (logical and) of all of the potential IR flags
382/// of each scalar operation (VL) that will be converted into a vector (I).
383/// If OpValue is non-null, we only consider operations similar to OpValue
384/// when intersecting.
385/// Flag set: NSW, NUW, exact, and all of fast-math.
386void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr);
387
388/// Returns true if we can prove that \p S is defined and always negative in
389/// loop \p L.
390bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE);
391
392/// Returns true if we can prove that \p S is defined and always non-negative in
393/// loop \p L.
394bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
395 ScalarEvolution &SE);
396
397/// Returns true if \p S is defined and never is equal to signed/unsigned max.
398bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
399 bool Signed);
400
401/// Returns true if \p S is defined and never is equal to signed/unsigned min.
402bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
403 bool Signed);
404
405enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
406
407/// If the final value of any expressions that are recurrent in the loop can
408/// be computed, substitute the exit values from the loop into any instructions
409/// outside of the loop that use the final values of the current expressions.
410/// Return the number of loop exit values that have been replaced, and the
411/// corresponding phi node will be added to DeadInsts.
412int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
413 ScalarEvolution *SE, const TargetTransformInfo *TTI,
414 SCEVExpander &Rewriter, DominatorTree *DT,
415 ReplaceExitVal ReplaceExitValue,
416 SmallVector<WeakTrackingVH, 16> &DeadInsts);
417
418/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
419/// \p OrigLoop and the following distribution of \p OrigLoop iteration among \p
420/// UnrolledLoop and \p RemainderLoop. \p UnrolledLoop receives weights that
421/// reflect TC/UF iterations, and \p RemainderLoop receives weights that reflect
422/// the remaining TC%UF iterations.
423///
424/// Note that \p OrigLoop may be equal to either \p UnrolledLoop or \p
425/// RemainderLoop in which case weights for \p OrigLoop are updated accordingly.
426/// Note also behavior is undefined if \p UnrolledLoop and \p RemainderLoop are
427/// equal. \p UF must be greater than zero.
428/// If \p OrigLoop has no profile info associated nothing happens.
429///
430/// This utility may be useful for such optimizations as unroller and
431/// vectorizer as it's typical transformation for them.
432void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
433 Loop *RemainderLoop, uint64_t UF);
434
435/// Utility that implements appending of loops onto a worklist given a range.
436/// We want to process loops in postorder, but the worklist is a LIFO data
437/// structure, so we append to it in *reverse* postorder.
438/// For trees, a preorder traversal is a viable reverse postorder, so we
439/// actually append using a preorder walk algorithm.
440template <typename RangeT>
441void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist<Loop *, 4> &);
442/// Utility that implements appending of loops onto a worklist given a range.
443/// It has the same behavior as appendLoopsToWorklist, but assumes the range of
444/// loops has already been reversed, so it processes loops in the given order.
445template <typename RangeT>
446void appendReversedLoopsToWorklist(RangeT &&,
447 SmallPriorityWorklist<Loop *, 4> &);
448
449/// Utility that implements appending of loops onto a worklist given LoopInfo.
450/// Calls the templated utility taking a Range of loops, handing it the Loops
451/// in LoopInfo, iterated in reverse. This is because the loops are stored in
452/// RPO w.r.t. the control flow graph in LoopInfo. For the purpose of unrolling,
453/// loop deletion, and LICM, we largely want to work forward across the CFG so
454/// that we visit defs before uses and can propagate simplifications from one
455/// loop nest into the next. Calls appendReversedLoopsToWorklist with the
456/// already reversed loops in LI.
457/// FIXME: Consider changing the order in LoopInfo.
458void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist<Loop *, 4> &);
459
460/// Recursively clone the specified loop and all of its children,
461/// mapping the blocks with the specified map.
462Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
463 LoopInfo *LI, LPPassManager *LPM);
464
465/// Add code that checks at runtime if the accessed arrays in \p PointerChecks
466/// overlap.
467///
468/// Returns a pair of instructions where the first element is the first
469/// instruction generated in possibly a sequence of instructions and the
470/// second value is the final comparator value or NULL if no check is needed.
471std::pair<Instruction *, Instruction *>
472addRuntimeChecks(Instruction *Loc, Loop *TheLoop,
473 const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
474 SCEVExpander &Expander);
475
476/// Struct to hold information about a partially invariant condition.
477struct IVConditionInfo {
478 /// Instructions that need to be duplicated and checked for the unswitching
479 /// condition.
480 SmallVector<Instruction *> InstToDuplicate;
481
482 /// Constant to indicate for which value the condition is invariant.
483 Constant *KnownValue = nullptr;
4
Null pointer value stored to 'PartialIVInfo.KnownValue'
484
485 /// True if the partially invariant path is no-op (=does not have any
486 /// side-effects and no loop value is used outside the loop).
487 bool PathIsNoop = true;
488
489 /// If the partially invariant path reaches a single exit block, ExitForPath
490 /// is set to that block. Otherwise it is nullptr.
491 BasicBlock *ExitForPath = nullptr;
492};
493
494/// Check if the loop header has a conditional branch that is not
495/// loop-invariant, because it involves load instructions. If all paths from
496/// either the true or false successor to the header or loop exists do not
497/// modify the memory feeding the condition, perform 'partial unswitching'. That
498/// is, duplicate the instructions feeding the condition in the pre-header. Then
499/// unswitch on the duplicated condition. The condition is now known in the
500/// unswitched version for the 'invariant' path through the original loop.
501///
502/// If the branch condition of the header is partially invariant, return a pair
503/// containing the instructions to duplicate and a boolean Constant to update
504/// the condition in the loops created for the true or false successors.
505Optional<IVConditionInfo> hasPartialIVCondition(Loop &L, unsigned MSSAThreshold,
506 MemorySSA &MSSA, AAResults &AA);
507
508} // end namespace llvm
509
510#endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H

/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/ADT/SmallVector.h

1//===- llvm/ADT/SmallVector.h - 'Normally small' vectors --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the SmallVector class.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_ADT_SMALLVECTOR_H
14#define LLVM_ADT_SMALLVECTOR_H
15
16#include "llvm/ADT/iterator_range.h"
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/ErrorHandling.h"
19#include "llvm/Support/MemAlloc.h"
20#include "llvm/Support/type_traits.h"
21#include <algorithm>
22#include <cassert>
23#include <cstddef>
24#include <cstdlib>
25#include <cstring>
26#include <initializer_list>
27#include <iterator>
28#include <limits>
29#include <memory>
30#include <new>
31#include <type_traits>
32#include <utility>
33
34namespace llvm {
35
36/// This is all the stuff common to all SmallVectors.
37///
38/// The template parameter specifies the type which should be used to hold the
39/// Size and Capacity of the SmallVector, so it can be adjusted.
40/// Using 32 bit size is desirable to shrink the size of the SmallVector.
41/// Using 64 bit size is desirable for cases like SmallVector<char>, where a
42/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
43/// buffering bitcode output - which can exceed 4GB.
44template <class Size_T> class SmallVectorBase {
45protected:
46 void *BeginX;
47 Size_T Size = 0, Capacity;
48
49 /// The maximum value of the Size_T used.
50 static constexpr size_t SizeTypeMax() {
51 return std::numeric_limits<Size_T>::max();
52 }
53
54 SmallVectorBase() = delete;
55 SmallVectorBase(void *FirstEl, size_t TotalCapacity)
56 : BeginX(FirstEl), Capacity(TotalCapacity) {}
57
58 /// This is a helper for \a grow() that's out of line to reduce code
59 /// duplication. This function will report a fatal error if it can't grow at
60 /// least to \p MinSize.
61 void *mallocForGrow(size_t MinSize, size_t TSize, size_t &NewCapacity);
62
63 /// This is an implementation of the grow() method which only works
64 /// on POD-like data types and is out of line to reduce code duplication.
65 /// This function will report a fatal error if it cannot increase capacity.
66 void grow_pod(void *FirstEl, size_t MinSize, size_t TSize);
67
68public:
69 size_t size() const { return Size; }
70 size_t capacity() const { return Capacity; }
71
72 LLVM_NODISCARD[[clang::warn_unused_result]] bool empty() const { return !Size; }
10
Assuming field 'Size' is not equal to 0
11
Returning zero, which participates in a condition later
53
Assuming field 'Size' is not equal to 0, which participates in a condition later
54
Returning zero, which participates in a condition later
73
74 /// Set the array size to \p N, which the current array must have enough
75 /// capacity for.
76 ///
77 /// This does not construct or destroy any elements in the vector.
78 ///
79 /// Clients can use this in conjunction with capacity() to write past the end
80 /// of the buffer when they know that more elements are available, and only
81 /// update the size later. This avoids the cost of value initializing elements
82 /// which will only be overwritten.
83 void set_size(size_t N) {
84 assert(N <= capacity())(static_cast <bool> (N <= capacity()) ? void (0) : __assert_fail
("N <= capacity()", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/ADT/SmallVector.h"
, 84, __extension__ __PRETTY_FUNCTION__))
;
85 Size = N;
86 }
87};
88
89template <class T>
90using SmallVectorSizeType =
91 typename std::conditional<sizeof(T) < 4 && sizeof(void *) >= 8, uint64_t,
92 uint32_t>::type;
93
94/// Figure out the offset of the first element.
95template <class T, typename = void> struct SmallVectorAlignmentAndSize {
96 alignas(SmallVectorBase<SmallVectorSizeType<T>>) char Base[sizeof(
97 SmallVectorBase<SmallVectorSizeType<T>>)];
98 alignas(T) char FirstEl[sizeof(T)];
99};
100
101/// This is the part of SmallVectorTemplateBase which does not depend on whether
102/// the type T is a POD. The extra dummy template argument is used by ArrayRef
103/// to avoid unnecessarily requiring T to be complete.
104template <typename T, typename = void>
105class SmallVectorTemplateCommon
106 : public SmallVectorBase<SmallVectorSizeType<T>> {
107 using Base = SmallVectorBase<SmallVectorSizeType<T>>;
108
109 /// Find the address of the first element. For this pointer math to be valid
110 /// with small-size of 0 for T with lots of alignment, it's important that
111 /// SmallVectorStorage is properly-aligned even for small-size of 0.
112 void *getFirstEl() const {
113 return const_cast<void *>(reinterpret_cast<const void *>(
114 reinterpret_cast<const char *>(this) +
115 offsetof(SmallVectorAlignmentAndSize<T>, FirstEl)__builtin_offsetof(SmallVectorAlignmentAndSize<T>, FirstEl
)
));
116 }
117 // Space after 'FirstEl' is clobbered, do not add any instance vars after it.
118
119protected:
120 SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {}
121
122 void grow_pod(size_t MinSize, size_t TSize) {
123 Base::grow_pod(getFirstEl(), MinSize, TSize);
124 }
125
126 /// Return true if this is a smallvector which has not had dynamic
127 /// memory allocated for it.
128 bool isSmall() const { return this->BeginX == getFirstEl(); }
129
130 /// Put this vector in a state of being small.
131 void resetToSmall() {
132 this->BeginX = getFirstEl();
133 this->Size = this->Capacity = 0; // FIXME: Setting Capacity to 0 is suspect.
134 }
135
136 /// Return true if V is an internal reference to the given range.
137 bool isReferenceToRange(const void *V, const void *First, const void *Last) const {
138 // Use std::less to avoid UB.
139 std::less<> LessThan;
140 return !LessThan(V, First) && LessThan(V, Last);
141 }
142
143 /// Return true if V is an internal reference to this vector.
144 bool isReferenceToStorage(const void *V) const {
145 return isReferenceToRange(V, this->begin(), this->end());
146 }
147
148 /// Return true if First and Last form a valid (possibly empty) range in this
149 /// vector's storage.
150 bool isRangeInStorage(const void *First, const void *Last) const {
151 // Use std::less to avoid UB.
152 std::less<> LessThan;
153 return !LessThan(First, this->begin()) && !LessThan(Last, First) &&
154 !LessThan(this->end(), Last);
155 }
156
157 /// Return true unless Elt will be invalidated by resizing the vector to
158 /// NewSize.
159 bool isSafeToReferenceAfterResize(const void *Elt, size_t NewSize) {
160 // Past the end.
161 if (LLVM_LIKELY(!isReferenceToStorage(Elt))__builtin_expect((bool)(!isReferenceToStorage(Elt)), true))
162 return true;
163
164 // Return false if Elt will be destroyed by shrinking.
165 if (NewSize <= this->size())
166 return Elt < this->begin() + NewSize;
167
168 // Return false if we need to grow.
169 return NewSize <= this->capacity();
170 }
171
172 /// Check whether Elt will be invalidated by resizing the vector to NewSize.
173 void assertSafeToReferenceAfterResize(const void *Elt, size_t NewSize) {
174 assert(isSafeToReferenceAfterResize(Elt, NewSize) &&(static_cast <bool> (isSafeToReferenceAfterResize(Elt, NewSize
) && "Attempting to reference an element of the vector in an operation "
"that invalidates it") ? void (0) : __assert_fail ("isSafeToReferenceAfterResize(Elt, NewSize) && \"Attempting to reference an element of the vector in an operation \" \"that invalidates it\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/ADT/SmallVector.h"
, 176, __extension__ __PRETTY_FUNCTION__))
175 "Attempting to reference an element of the vector in an operation "(static_cast <bool> (isSafeToReferenceAfterResize(Elt, NewSize
) && "Attempting to reference an element of the vector in an operation "
"that invalidates it") ? void (0) : __assert_fail ("isSafeToReferenceAfterResize(Elt, NewSize) && \"Attempting to reference an element of the vector in an operation \" \"that invalidates it\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/ADT/SmallVector.h"
, 176, __extension__ __PRETTY_FUNCTION__))
176 "that invalidates it")(static_cast <bool> (isSafeToReferenceAfterResize(Elt, NewSize
) && "Attempting to reference an element of the vector in an operation "
"that invalidates it") ? void (0) : __assert_fail ("isSafeToReferenceAfterResize(Elt, NewSize) && \"Attempting to reference an element of the vector in an operation \" \"that invalidates it\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/ADT/SmallVector.h"
, 176, __extension__ __PRETTY_FUNCTION__))
;
177 }
178
179 /// Check whether Elt will be invalidated by increasing the size of the
180 /// vector by N.
181 void assertSafeToAdd(const void *Elt, size_t N = 1) {
182 this->assertSafeToReferenceAfterResize(Elt, this->size() + N);
183 }
184
185 /// Check whether any part of the range will be invalidated by clearing.
186 void assertSafeToReferenceAfterClear(const T *From, const T *To) {
187 if (From == To)
188 return;
189 this->assertSafeToReferenceAfterResize(From, 0);
190 this->assertSafeToReferenceAfterResize(To - 1, 0);
191 }
192 template <
193 class ItTy,
194 std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value,
195 bool> = false>
196 void assertSafeToReferenceAfterClear(ItTy, ItTy) {}
197
198 /// Check whether any part of the range will be invalidated by growing.
199 void assertSafeToAddRange(const T *From, const T *To) {
200 if (From == To)
201 return;
202 this->assertSafeToAdd(From, To - From);
203 this->assertSafeToAdd(To - 1, To - From);
204 }
205 template <
206 class ItTy,
207 std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value,
208 bool> = false>
209 void assertSafeToAddRange(ItTy, ItTy) {}
210
211 /// Reserve enough space to add one element, and return the updated element
212 /// pointer in case it was a reference to the storage.
213 template <class U>
214 static const T *reserveForParamAndGetAddressImpl(U *This, const T &Elt,
215 size_t N) {
216 size_t NewSize = This->size() + N;
217 if (LLVM_LIKELY(NewSize <= This->capacity())__builtin_expect((bool)(NewSize <= This->capacity()), true
)
)
218 return &Elt;
219
220 bool ReferencesStorage = false;
221 int64_t Index = -1;
222 if (!U::TakesParamByValue) {
223 if (LLVM_UNLIKELY(This->isReferenceToStorage(&Elt))__builtin_expect((bool)(This->isReferenceToStorage(&Elt
)), false)
) {
224 ReferencesStorage = true;
225 Index = &Elt - This->begin();
226 }
227 }
228 This->grow(NewSize);
229 return ReferencesStorage ? This->begin() + Index : &Elt;
230 }
231
232public:
233 using size_type = size_t;
234 using difference_type = ptrdiff_t;
235 using value_type = T;
236 using iterator = T *;
237 using const_iterator = const T *;
238
239 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
240 using reverse_iterator = std::reverse_iterator<iterator>;
241
242 using reference = T &;
243 using const_reference = const T &;
244 using pointer = T *;
245 using const_pointer = const T *;
246
247 using Base::capacity;
248 using Base::empty;
249 using Base::size;
250
251 // forward iterator creation methods.
252 iterator begin() { return (iterator)this->BeginX; }
253 const_iterator begin() const { return (const_iterator)this->BeginX; }
254 iterator end() { return begin() + size(); }
255 const_iterator end() const { return begin() + size(); }
256
257 // reverse iterator creation methods.
258 reverse_iterator rbegin() { return reverse_iterator(end()); }
259 const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); }
260 reverse_iterator rend() { return reverse_iterator(begin()); }
261 const_reverse_iterator rend() const { return const_reverse_iterator(begin());}
262
263 size_type size_in_bytes() const { return size() * sizeof(T); }
264 size_type max_size() const {
265 return std::min(this->SizeTypeMax(), size_type(-1) / sizeof(T));
266 }
267
268 size_t capacity_in_bytes() const { return capacity() * sizeof(T); }
269
270 /// Return a pointer to the vector's buffer, even if empty().
271 pointer data() { return pointer(begin()); }
272 /// Return a pointer to the vector's buffer, even if empty().
273 const_pointer data() const { return const_pointer(begin()); }
274
275 reference operator[](size_type idx) {
276 assert(idx < size())(static_cast <bool> (idx < size()) ? void (0) : __assert_fail
("idx < size()", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/ADT/SmallVector.h"
, 276, __extension__ __PRETTY_FUNCTION__))
;
277 return begin()[idx];
278 }
279 const_reference operator[](size_type idx) const {
280 assert(idx < size())(static_cast <bool> (idx < size()) ? void (0) : __assert_fail
("idx < size()", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/ADT/SmallVector.h"
, 280, __extension__ __PRETTY_FUNCTION__))
;
281 return begin()[idx];
282 }
283
284 reference front() {
285 assert(!empty())(static_cast <bool> (!empty()) ? void (0) : __assert_fail
("!empty()", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/ADT/SmallVector.h"
, 285, __extension__ __PRETTY_FUNCTION__))
;
286 return begin()[0];
287 }
288 const_reference front() const {
289 assert(!empty())(static_cast <bool> (!empty()) ? void (0) : __assert_fail
("!empty()", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/ADT/SmallVector.h"
, 289, __extension__ __PRETTY_FUNCTION__))
;
290 return begin()[0];
291 }
292
293 reference back() {
294 assert(!empty())(static_cast <bool> (!empty()) ? void (0) : __assert_fail
("!empty()", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/ADT/SmallVector.h"
, 294, __extension__ __PRETTY_FUNCTION__))
;
295 return end()[-1];
296 }
297 const_reference back() const {
298 assert(!empty())(static_cast <bool> (!empty()) ? void (0) : __assert_fail
("!empty()", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/ADT/SmallVector.h"
, 298, __extension__ __PRETTY_FUNCTION__))
;
299 return end()[-1];
300 }
301};
302
303/// SmallVectorTemplateBase<TriviallyCopyable = false> - This is where we put
304/// method implementations that are designed to work with non-trivial T's.
305///
306/// We approximate is_trivially_copyable with trivial move/copy construction and
307/// trivial destruction. While the standard doesn't specify that you're allowed
308/// copy these types with memcpy, there is no way for the type to observe this.
309/// This catches the important case of std::pair<POD, POD>, which is not
310/// trivially assignable.
311template <typename T, bool = (is_trivially_copy_constructible<T>::value) &&
312 (is_trivially_move_constructible<T>::value) &&
313 std::is_trivially_destructible<T>::value>
314class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
315 friend class SmallVectorTemplateCommon<T>;
316
317protected:
318 static constexpr bool TakesParamByValue = false;
319 using ValueParamT = const T &;
320
321 SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
322
323 static void destroy_range(T *S, T *E) {
324 while (S != E) {
325 --E;
326 E->~T();
327 }
328 }
329
330 /// Move the range [I, E) into the uninitialized memory starting with "Dest",
331 /// constructing elements as needed.
332 template<typename It1, typename It2>
333 static void uninitialized_move(It1 I, It1 E, It2 Dest) {
334 std::uninitialized_copy(std::make_move_iterator(I),
335 std::make_move_iterator(E), Dest);
336 }
337
338 /// Copy the range [I, E) onto the uninitialized memory starting with "Dest",
339 /// constructing elements as needed.
340 template<typename It1, typename It2>
341 static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
342 std::uninitialized_copy(I, E, Dest);
343 }
344
345 /// Grow the allocated memory (without initializing new elements), doubling
346 /// the size of the allocated memory. Guarantees space for at least one more
347 /// element, or MinSize more elements if specified.
348 void grow(size_t MinSize = 0);
349
350 /// Create a new allocation big enough for \p MinSize and pass back its size
351 /// in \p NewCapacity. This is the first section of \a grow().
352 T *mallocForGrow(size_t MinSize, size_t &NewCapacity) {
353 return static_cast<T *>(
354 SmallVectorBase<SmallVectorSizeType<T>>::mallocForGrow(
355 MinSize, sizeof(T), NewCapacity));
356 }
357
358 /// Move existing elements over to the new allocation \p NewElts, the middle
359 /// section of \a grow().
360 void moveElementsForGrow(T *NewElts);
361
362 /// Transfer ownership of the allocation, finishing up \a grow().
363 void takeAllocationForGrow(T *NewElts, size_t NewCapacity);
364
365 /// Reserve enough space to add one element, and return the updated element
366 /// pointer in case it was a reference to the storage.
367 const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) {
368 return this->reserveForParamAndGetAddressImpl(this, Elt, N);
369 }
370
371 /// Reserve enough space to add one element, and return the updated element
372 /// pointer in case it was a reference to the storage.
373 T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) {
374 return const_cast<T *>(
375 this->reserveForParamAndGetAddressImpl(this, Elt, N));
376 }
377
378 static T &&forward_value_param(T &&V) { return std::move(V); }
379 static const T &forward_value_param(const T &V) { return V; }
380
381 void growAndAssign(size_t NumElts, const T &Elt) {
382 // Grow manually in case Elt is an internal reference.
383 size_t NewCapacity;
384 T *NewElts = mallocForGrow(NumElts, NewCapacity);
385 std::uninitialized_fill_n(NewElts, NumElts, Elt);
386 this->destroy_range(this->begin(), this->end());
387 takeAllocationForGrow(NewElts, NewCapacity);
388 this->set_size(NumElts);
389 }
390
391 template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
392 // Grow manually in case one of Args is an internal reference.
393 size_t NewCapacity;
394 T *NewElts = mallocForGrow(0, NewCapacity);
395 ::new ((void *)(NewElts + this->size())) T(std::forward<ArgTypes>(Args)...);
396 moveElementsForGrow(NewElts);
397 takeAllocationForGrow(NewElts, NewCapacity);
398 this->set_size(this->size() + 1);
399 return this->back();
400 }
401
402public:
403 void push_back(const T &Elt) {
404 const T *EltPtr = reserveForParamAndGetAddress(Elt);
405 ::new ((void *)this->end()) T(*EltPtr);
406 this->set_size(this->size() + 1);
407 }
408
409 void push_back(T &&Elt) {
410 T *EltPtr = reserveForParamAndGetAddress(Elt);
411 ::new ((void *)this->end()) T(::std::move(*EltPtr));
412 this->set_size(this->size() + 1);
413 }
414
415 void pop_back() {
416 this->set_size(this->size() - 1);
417 this->end()->~T();
418 }
419};
420
421// Define this out-of-line to dissuade the C++ compiler from inlining it.
422template <typename T, bool TriviallyCopyable>
423void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) {
424 size_t NewCapacity;
425 T *NewElts = mallocForGrow(MinSize, NewCapacity);
426 moveElementsForGrow(NewElts);
427 takeAllocationForGrow(NewElts, NewCapacity);
428}
429
430// Define this out-of-line to dissuade the C++ compiler from inlining it.
431template <typename T, bool TriviallyCopyable>
432void SmallVectorTemplateBase<T, TriviallyCopyable>::moveElementsForGrow(
433 T *NewElts) {
434 // Move the elements over.
435 this->uninitialized_move(this->begin(), this->end(), NewElts);
436
437 // Destroy the original elements.
438 destroy_range(this->begin(), this->end());
439}
440
441// Define this out-of-line to dissuade the C++ compiler from inlining it.
442template <typename T, bool TriviallyCopyable>
443void SmallVectorTemplateBase<T, TriviallyCopyable>::takeAllocationForGrow(
444 T *NewElts, size_t NewCapacity) {
445 // If this wasn't grown from the inline copy, deallocate the old space.
446 if (!this->isSmall())
447 free(this->begin());
448
449 this->BeginX = NewElts;
450 this->Capacity = NewCapacity;
451}
452
453/// SmallVectorTemplateBase<TriviallyCopyable = true> - This is where we put
454/// method implementations that are designed to work with trivially copyable
455/// T's. This allows using memcpy in place of copy/move construction and
456/// skipping destruction.
457template <typename T>
458class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> {
459 friend class SmallVectorTemplateCommon<T>;
460
461protected:
462 /// True if it's cheap enough to take parameters by value. Doing so avoids
463 /// overhead related to mitigations for reference invalidation.
464 static constexpr bool TakesParamByValue = sizeof(T) <= 2 * sizeof(void *);
465
466 /// Either const T& or T, depending on whether it's cheap enough to take
467 /// parameters by value.
468 using ValueParamT =
469 typename std::conditional<TakesParamByValue, T, const T &>::type;
470
471 SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
472
473 // No need to do a destroy loop for POD's.
474 static void destroy_range(T *, T *) {}
475
476 /// Move the range [I, E) onto the uninitialized memory
477 /// starting with "Dest", constructing elements into it as needed.
478 template<typename It1, typename It2>
479 static void uninitialized_move(It1 I, It1 E, It2 Dest) {
480 // Just do a copy.
481 uninitialized_copy(I, E, Dest);
482 }
483
484 /// Copy the range [I, E) onto the uninitialized memory
485 /// starting with "Dest", constructing elements into it as needed.
486 template<typename It1, typename It2>
487 static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
488 // Arbitrary iterator types; just use the basic implementation.
489 std::uninitialized_copy(I, E, Dest);
490 }
491
492 /// Copy the range [I, E) onto the uninitialized memory
493 /// starting with "Dest", constructing elements into it as needed.
494 template <typename T1, typename T2>
495 static void uninitialized_copy(
496 T1 *I, T1 *E, T2 *Dest,
497 std::enable_if_t<std::is_same<typename std::remove_const<T1>::type,
498 T2>::value> * = nullptr) {
499 // Use memcpy for PODs iterated by pointers (which includes SmallVector
500 // iterators): std::uninitialized_copy optimizes to memmove, but we can
501 // use memcpy here. Note that I and E are iterators and thus might be
502 // invalid for memcpy if they are equal.
503 if (I != E)
504 memcpy(reinterpret_cast<void *>(Dest), I, (E - I) * sizeof(T));
505 }
506
507 /// Double the size of the allocated memory, guaranteeing space for at
508 /// least one more element or MinSize if specified.
509 void grow(size_t MinSize = 0) { this->grow_pod(MinSize, sizeof(T)); }
510
511 /// Reserve enough space to add one element, and return the updated element
512 /// pointer in case it was a reference to the storage.
513 const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) {
514 return this->reserveForParamAndGetAddressImpl(this, Elt, N);
515 }
516
517 /// Reserve enough space to add one element, and return the updated element
518 /// pointer in case it was a reference to the storage.
519 T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) {
520 return const_cast<T *>(
521 this->reserveForParamAndGetAddressImpl(this, Elt, N));
522 }
523
524 /// Copy \p V or return a reference, depending on \a ValueParamT.
525 static ValueParamT forward_value_param(ValueParamT V) { return V; }
526
527 void growAndAssign(size_t NumElts, T Elt) {
528 // Elt has been copied in case it's an internal reference, side-stepping
529 // reference invalidation problems without losing the realloc optimization.
530 this->set_size(0);
531 this->grow(NumElts);
532 std::uninitialized_fill_n(this->begin(), NumElts, Elt);
533 this->set_size(NumElts);
534 }
535
536 template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
537 // Use push_back with a copy in case Args has an internal reference,
538 // side-stepping reference invalidation problems without losing the realloc
539 // optimization.
540 push_back(T(std::forward<ArgTypes>(Args)...));
541 return this->back();
542 }
543
544public:
545 void push_back(ValueParamT Elt) {
546 const T *EltPtr = reserveForParamAndGetAddress(Elt);
547 memcpy(reinterpret_cast<void *>(this->end()), EltPtr, sizeof(T));
548 this->set_size(this->size() + 1);
549 }
550
551 void pop_back() { this->set_size(this->size() - 1); }
552};
553
554/// This class consists of common code factored out of the SmallVector class to
555/// reduce code duplication based on the SmallVector 'N' template parameter.
556template <typename T>
557class SmallVectorImpl : public SmallVectorTemplateBase<T> {
558 using SuperClass = SmallVectorTemplateBase<T>;
559
560public:
561 using iterator = typename SuperClass::iterator;
562 using const_iterator = typename SuperClass::const_iterator;
563 using reference = typename SuperClass::reference;
564 using size_type = typename SuperClass::size_type;
565
566protected:
567 using SmallVectorTemplateBase<T>::TakesParamByValue;
568 using ValueParamT = typename SuperClass::ValueParamT;
569
570 // Default ctor - Initialize to empty.
571 explicit SmallVectorImpl(unsigned N)
572 : SmallVectorTemplateBase<T>(N) {}
573
574public:
575 SmallVectorImpl(const SmallVectorImpl &) = delete;
576
577 ~SmallVectorImpl() {
578 // Subclass has already destructed this vector's elements.
579 // If this wasn't grown from the inline copy, deallocate the old space.
580 if (!this->isSmall())
581 free(this->begin());
582 }
583
584 void clear() {
585 this->destroy_range(this->begin(), this->end());
586 this->Size = 0;
587 }
588
589private:
590 template <bool ForOverwrite> void resizeImpl(size_type N) {
591 if (N < this->size()) {
592 this->pop_back_n(this->size() - N);
593 } else if (N > this->size()) {
594 this->reserve(N);
595 for (auto I = this->end(), E = this->begin() + N; I != E; ++I)
596 if (ForOverwrite)
597 new (&*I) T;
598 else
599 new (&*I) T();
600 this->set_size(N);
601 }
602 }
603
604public:
605 void resize(size_type N) { resizeImpl<false>(N); }
606
607 /// Like resize, but \ref T is POD, the new values won't be initialized.
608 void resize_for_overwrite(size_type N) { resizeImpl<true>(N); }
609
610 void resize(size_type N, ValueParamT NV) {
611 if (N == this->size())
612 return;
613
614 if (N < this->size()) {
615 this->pop_back_n(this->size() - N);
616 return;
617 }
618
619 // N > this->size(). Defer to append.
620 this->append(N - this->size(), NV);
621 }
622
623 void reserve(size_type N) {
624 if (this->capacity() < N)
625 this->grow(N);
626 }
627
628 void pop_back_n(size_type NumItems) {
629 assert(this->size() >= NumItems)(static_cast <bool> (this->size() >= NumItems) ? void
(0) : __assert_fail ("this->size() >= NumItems", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/ADT/SmallVector.h"
, 629, __extension__ __PRETTY_FUNCTION__))
;
630 this->destroy_range(this->end() - NumItems, this->end());
631 this->set_size(this->size() - NumItems);
632 }
633
634 LLVM_NODISCARD[[clang::warn_unused_result]] T pop_back_val() {
635 T Result = ::std::move(this->back());
636 this->pop_back();
637 return Result;
638 }
639
640 void swap(SmallVectorImpl &RHS);
641
642 /// Add the specified range to the end of the SmallVector.
643 template <typename in_iter,
644 typename = std::enable_if_t<std::is_convertible<
645 typename std::iterator_traits<in_iter>::iterator_category,
646 std::input_iterator_tag>::value>>
647 void append(in_iter in_start, in_iter in_end) {
648 this->assertSafeToAddRange(in_start, in_end);
649 size_type NumInputs = std::distance(in_start, in_end);
650 this->reserve(this->size() + NumInputs);
651 this->uninitialized_copy(in_start, in_end, this->end());
652 this->set_size(this->size() + NumInputs);
653 }
654
655 /// Append \p NumInputs copies of \p Elt to the end.
656 void append(size_type NumInputs, ValueParamT Elt) {
657 const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumInputs);
658 std::uninitialized_fill_n(this->end(), NumInputs, *EltPtr);
659 this->set_size(this->size() + NumInputs);
660 }
661
662 void append(std::initializer_list<T> IL) {
663 append(IL.begin(), IL.end());
664 }
665
666 void append(const SmallVectorImpl &RHS) { append(RHS.begin(), RHS.end()); }
667
668 void assign(size_type NumElts, ValueParamT Elt) {
669 // Note that Elt could be an internal reference.
670 if (NumElts > this->capacity()) {
671 this->growAndAssign(NumElts, Elt);
672 return;
673 }
674
675 // Assign over existing elements.
676 std::fill_n(this->begin(), std::min(NumElts, this->size()), Elt);
677 if (NumElts > this->size())
678 std::uninitialized_fill_n(this->end(), NumElts - this->size(), Elt);
679 else if (NumElts < this->size())
680 this->destroy_range(this->begin() + NumElts, this->end());
681 this->set_size(NumElts);
682 }
683
684 // FIXME: Consider assigning over existing elements, rather than clearing &
685 // re-initializing them - for all assign(...) variants.
686
687 template <typename in_iter,
688 typename = std::enable_if_t<std::is_convertible<
689 typename std::iterator_traits<in_iter>::iterator_category,
690 std::input_iterator_tag>::value>>
691 void assign(in_iter in_start, in_iter in_end) {
692 this->assertSafeToReferenceAfterClear(in_start, in_end);
693 clear();
694 append(in_start, in_end);
695 }
696
697 void assign(std::initializer_list<T> IL) {
698 clear();
699 append(IL);
700 }
701
702 void assign(const SmallVectorImpl &RHS) { assign(RHS.begin(), RHS.end()); }
703
704 iterator erase(const_iterator CI) {
705 // Just cast away constness because this is a non-const member function.
706 iterator I = const_cast<iterator>(CI);
707
708 assert(this->isReferenceToStorage(CI) && "Iterator to erase is out of bounds.")(static_cast <bool> (this->isReferenceToStorage(CI) &&
"Iterator to erase is out of bounds.") ? void (0) : __assert_fail
("this->isReferenceToStorage(CI) && \"Iterator to erase is out of bounds.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/ADT/SmallVector.h"
, 708, __extension__ __PRETTY_FUNCTION__))
;
709
710 iterator N = I;
711 // Shift all elts down one.
712 std::move(I+1, this->end(), I);
713 // Drop the last elt.
714 this->pop_back();
715 return(N);
716 }
717
718 iterator erase(const_iterator CS, const_iterator CE) {
719 // Just cast away constness because this is a non-const member function.
720 iterator S = const_cast<iterator>(CS);
721 iterator E = const_cast<iterator>(CE);
722
723 assert(this->isRangeInStorage(S, E) && "Range to erase is out of bounds.")(static_cast <bool> (this->isRangeInStorage(S, E) &&
"Range to erase is out of bounds.") ? void (0) : __assert_fail
("this->isRangeInStorage(S, E) && \"Range to erase is out of bounds.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/ADT/SmallVector.h"
, 723, __extension__ __PRETTY_FUNCTION__))
;
724
725 iterator N = S;
726 // Shift all elts down.
727 iterator I = std::move(E, this->end(), S);
728 // Drop the last elts.
729 this->destroy_range(I, this->end());
730 this->set_size(I - this->begin());
731 return(N);
732 }
733
734private:
735 template <class ArgType> iterator insert_one_impl(iterator I, ArgType &&Elt) {
736 // Callers ensure that ArgType is derived from T.
737 static_assert(
738 std::is_same<std::remove_const_t<std::remove_reference_t<ArgType>>,
739 T>::value,
740 "ArgType must be derived from T!");
741
742 if (I == this->end()) { // Important special case for empty vector.
743 this->push_back(::std::forward<ArgType>(Elt));
744 return this->end()-1;
745 }
746
747 assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")(static_cast <bool> (this->isReferenceToStorage(I) &&
"Insertion iterator is out of bounds.") ? void (0) : __assert_fail
("this->isReferenceToStorage(I) && \"Insertion iterator is out of bounds.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/ADT/SmallVector.h"
, 747, __extension__ __PRETTY_FUNCTION__))
;
748
749 // Grow if necessary.
750 size_t Index = I - this->begin();
751 std::remove_reference_t<ArgType> *EltPtr =
752 this->reserveForParamAndGetAddress(Elt);
753 I = this->begin() + Index;
754
755 ::new ((void*) this->end()) T(::std::move(this->back()));
756 // Push everything else over.
757 std::move_backward(I, this->end()-1, this->end());
758 this->set_size(this->size() + 1);
759
760 // If we just moved the element we're inserting, be sure to update
761 // the reference (never happens if TakesParamByValue).
762 static_assert(!TakesParamByValue || std::is_same<ArgType, T>::value,
763 "ArgType must be 'T' when taking by value!");
764 if (!TakesParamByValue && this->isReferenceToRange(EltPtr, I, this->end()))
765 ++EltPtr;
766
767 *I = ::std::forward<ArgType>(*EltPtr);
768 return I;
769 }
770
771public:
772 iterator insert(iterator I, T &&Elt) {
773 return insert_one_impl(I, this->forward_value_param(std::move(Elt)));
774 }
775
776 iterator insert(iterator I, const T &Elt) {
777 return insert_one_impl(I, this->forward_value_param(Elt));
778 }
779
780 iterator insert(iterator I, size_type NumToInsert, ValueParamT Elt) {
781 // Convert iterator to elt# to avoid invalidating iterator when we reserve()
782 size_t InsertElt = I - this->begin();
783
784 if (I == this->end()) { // Important special case for empty vector.
785 append(NumToInsert, Elt);
786 return this->begin()+InsertElt;
787 }
788
789 assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")(static_cast <bool> (this->isReferenceToStorage(I) &&
"Insertion iterator is out of bounds.") ? void (0) : __assert_fail
("this->isReferenceToStorage(I) && \"Insertion iterator is out of bounds.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/ADT/SmallVector.h"
, 789, __extension__ __PRETTY_FUNCTION__))
;
790
791 // Ensure there is enough space, and get the (maybe updated) address of
792 // Elt.
793 const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumToInsert);
794
795 // Uninvalidate the iterator.
796 I = this->begin()+InsertElt;
797
798 // If there are more elements between the insertion point and the end of the
799 // range than there are being inserted, we can use a simple approach to
800 // insertion. Since we already reserved space, we know that this won't
801 // reallocate the vector.
802 if (size_t(this->end()-I) >= NumToInsert) {
803 T *OldEnd = this->end();
804 append(std::move_iterator<iterator>(this->end() - NumToInsert),
805 std::move_iterator<iterator>(this->end()));
806
807 // Copy the existing elements that get replaced.
808 std::move_backward(I, OldEnd-NumToInsert, OldEnd);
809
810 // If we just moved the element we're inserting, be sure to update
811 // the reference (never happens if TakesParamByValue).
812 if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end())
813 EltPtr += NumToInsert;
814
815 std::fill_n(I, NumToInsert, *EltPtr);
816 return I;
817 }
818
819 // Otherwise, we're inserting more elements than exist already, and we're
820 // not inserting at the end.
821
822 // Move over the elements that we're about to overwrite.
823 T *OldEnd = this->end();
824 this->set_size(this->size() + NumToInsert);
825 size_t NumOverwritten = OldEnd-I;
826 this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten);
827
828 // If we just moved the element we're inserting, be sure to update
829 // the reference (never happens if TakesParamByValue).
830 if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end())
831 EltPtr += NumToInsert;
832
833 // Replace the overwritten part.
834 std::fill_n(I, NumOverwritten, *EltPtr);
835
836 // Insert the non-overwritten middle part.
837 std::uninitialized_fill_n(OldEnd, NumToInsert - NumOverwritten, *EltPtr);
838 return I;
839 }
840
841 template <typename ItTy,
842 typename = std::enable_if_t<std::is_convertible<
843 typename std::iterator_traits<ItTy>::iterator_category,
844 std::input_iterator_tag>::value>>
845 iterator insert(iterator I, ItTy From, ItTy To) {
846 // Convert iterator to elt# to avoid invalidating iterator when we reserve()
847 size_t InsertElt = I - this->begin();
848
849 if (I == this->end()) { // Important special case for empty vector.
850 append(From, To);
851 return this->begin()+InsertElt;
852 }
853
854 assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")(static_cast <bool> (this->isReferenceToStorage(I) &&
"Insertion iterator is out of bounds.") ? void (0) : __assert_fail
("this->isReferenceToStorage(I) && \"Insertion iterator is out of bounds.\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/ADT/SmallVector.h"
, 854, __extension__ __PRETTY_FUNCTION__))
;
855
856 // Check that the reserve that follows doesn't invalidate the iterators.
857 this->assertSafeToAddRange(From, To);
858
859 size_t NumToInsert = std::distance(From, To);
860
861 // Ensure there is enough space.
862 reserve(this->size() + NumToInsert);
863
864 // Uninvalidate the iterator.
865 I = this->begin()+InsertElt;
866
867 // If there are more elements between the insertion point and the end of the
868 // range than there are being inserted, we can use a simple approach to
869 // insertion. Since we already reserved space, we know that this won't
870 // reallocate the vector.
871 if (size_t(this->end()-I) >= NumToInsert) {
872 T *OldEnd = this->end();
873 append(std::move_iterator<iterator>(this->end() - NumToInsert),
874 std::move_iterator<iterator>(this->end()));
875
876 // Copy the existing elements that get replaced.
877 std::move_backward(I, OldEnd-NumToInsert, OldEnd);
878
879 std::copy(From, To, I);
880 return I;
881 }
882
883 // Otherwise, we're inserting more elements than exist already, and we're
884 // not inserting at the end.
885
886 // Move over the elements that we're about to overwrite.
887 T *OldEnd = this->end();
888 this->set_size(this->size() + NumToInsert);
889 size_t NumOverwritten = OldEnd-I;
890 this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten);
891
892 // Replace the overwritten part.
893 for (T *J = I; NumOverwritten > 0; --NumOverwritten) {
894 *J = *From;
895 ++J; ++From;
896 }
897
898 // Insert the non-overwritten middle part.
899 this->uninitialized_copy(From, To, OldEnd);
900 return I;
901 }
902
903 void insert(iterator I, std::initializer_list<T> IL) {
904 insert(I, IL.begin(), IL.end());
905 }
906
907 template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) {
908 if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity
()), false)
)
909 return this->growAndEmplaceBack(std::forward<ArgTypes>(Args)...);
910
911 ::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...);
912 this->set_size(this->size() + 1);
913 return this->back();
914 }
915
916 SmallVectorImpl &operator=(const SmallVectorImpl &RHS);
917
918 SmallVectorImpl &operator=(SmallVectorImpl &&RHS);
919
920 bool operator==(const SmallVectorImpl &RHS) const {
921 if (this->size() != RHS.size()) return false;
922 return std::equal(this->begin(), this->end(), RHS.begin());
923 }
924 bool operator!=(const SmallVectorImpl &RHS) const {
925 return !(*this == RHS);
926 }
927
928 bool operator<(const SmallVectorImpl &RHS) const {
929 return std::lexicographical_compare(this->begin(), this->end(),
930 RHS.begin(), RHS.end());
931 }
932};
933
934template <typename T>
935void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) {
936 if (this == &RHS) return;
937
938 // We can only avoid copying elements if neither vector is small.
939 if (!this->isSmall() && !RHS.isSmall()) {
940 std::swap(this->BeginX, RHS.BeginX);
941 std::swap(this->Size, RHS.Size);
942 std::swap(this->Capacity, RHS.Capacity);
943 return;
944 }
945 this->reserve(RHS.size());
946 RHS.reserve(this->size());
947
948 // Swap the shared elements.
949 size_t NumShared = this->size();
950 if (NumShared > RHS.size()) NumShared = RHS.size();
951 for (size_type i = 0; i != NumShared; ++i)
952 std::swap((*this)[i], RHS[i]);
953
954 // Copy over the extra elts.
955 if (this->size() > RHS.size()) {
956 size_t EltDiff = this->size() - RHS.size();
957 this->uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end());
958 RHS.set_size(RHS.size() + EltDiff);
959 this->destroy_range(this->begin()+NumShared, this->end());
960 this->set_size(NumShared);
961 } else if (RHS.size() > this->size()) {
962 size_t EltDiff = RHS.size() - this->size();
963 this->uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end());
964 this->set_size(this->size() + EltDiff);
965 this->destroy_range(RHS.begin()+NumShared, RHS.end());
966 RHS.set_size(NumShared);
967 }
968}
969
970template <typename T>
971SmallVectorImpl<T> &SmallVectorImpl<T>::
972 operator=(const SmallVectorImpl<T> &RHS) {
973 // Avoid self-assignment.
974 if (this == &RHS) return *this;
975
976 // If we already have sufficient space, assign the common elements, then
977 // destroy any excess.
978 size_t RHSSize = RHS.size();
979 size_t CurSize = this->size();
980 if (CurSize >= RHSSize) {
981 // Assign common elements.
982 iterator NewEnd;
983 if (RHSSize)
984 NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, this->begin());
985 else
986 NewEnd = this->begin();
987
988 // Destroy excess elements.
989 this->destroy_range(NewEnd, this->end());
990
991 // Trim.
992 this->set_size(RHSSize);
993 return *this;
994 }
995
996 // If we have to grow to have enough elements, destroy the current elements.
997 // This allows us to avoid copying them during the grow.
998 // FIXME: don't do this if they're efficiently moveable.
999 if (this->capacity() < RHSSize) {
1000 // Destroy current elements.
1001 this->clear();
1002 CurSize = 0;
1003 this->grow(RHSSize);
1004 } else if (CurSize) {
1005 // Otherwise, use assignment for the already-constructed elements.
1006 std::copy(RHS.begin(), RHS.begin()+CurSize, this->begin());
1007 }
1008
1009 // Copy construct the new elements in place.
1010 this->uninitialized_copy(RHS.begin()+CurSize, RHS.end(),
1011 this->begin()+CurSize);
1012
1013 // Set end.
1014 this->set_size(RHSSize);
1015 return *this;
1016}
1017
1018template <typename T>
1019SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) {
1020 // Avoid self-assignment.
1021 if (this == &RHS) return *this;
1022
1023 // If the RHS isn't small, clear this vector and then steal its buffer.
1024 if (!RHS.isSmall()) {
1025 this->destroy_range(this->begin(), this->end());
1026 if (!this->isSmall()) free(this->begin());
1027 this->BeginX = RHS.BeginX;
1028 this->Size = RHS.Size;
1029 this->Capacity = RHS.Capacity;
1030 RHS.resetToSmall();
1031 return *this;
1032 }
1033
1034 // If we already have sufficient space, assign the common elements, then
1035 // destroy any excess.
1036 size_t RHSSize = RHS.size();
1037 size_t CurSize = this->size();
1038 if (CurSize >= RHSSize) {
1039 // Assign common elements.
1040 iterator NewEnd = this->begin();
1041 if (RHSSize)
1042 NewEnd = std::move(RHS.begin(), RHS.end(), NewEnd);
1043
1044 // Destroy excess elements and trim the bounds.
1045 this->destroy_range(NewEnd, this->end());
1046 this->set_size(RHSSize);
1047
1048 // Clear the RHS.
1049 RHS.clear();
1050
1051 return *this;
1052 }
1053
1054 // If we have to grow to have enough elements, destroy the current elements.
1055 // This allows us to avoid copying them during the grow.
1056 // FIXME: this may not actually make any sense if we can efficiently move
1057 // elements.
1058 if (this->capacity() < RHSSize) {
1059 // Destroy current elements.
1060 this->clear();
1061 CurSize = 0;
1062 this->grow(RHSSize);
1063 } else if (CurSize) {
1064 // Otherwise, use assignment for the already-constructed elements.
1065 std::move(RHS.begin(), RHS.begin()+CurSize, this->begin());
1066 }
1067
1068 // Move-construct the new elements in place.
1069 this->uninitialized_move(RHS.begin()+CurSize, RHS.end(),
1070 this->begin()+CurSize);
1071
1072 // Set end.
1073 this->set_size(RHSSize);
1074
1075 RHS.clear();
1076 return *this;
1077}
1078
1079/// Storage for the SmallVector elements. This is specialized for the N=0 case
1080/// to avoid allocating unnecessary storage.
1081template <typename T, unsigned N>
1082struct SmallVectorStorage {
1083 alignas(T) char InlineElts[N * sizeof(T)];
1084};
1085
1086/// We need the storage to be properly aligned even for small-size of 0 so that
1087/// the pointer math in \a SmallVectorTemplateCommon::getFirstEl() is
1088/// well-defined.
1089template <typename T> struct alignas(T) SmallVectorStorage<T, 0> {};
1090
1091/// Forward declaration of SmallVector so that
1092/// calculateSmallVectorDefaultInlinedElements can reference
1093/// `sizeof(SmallVector<T, 0>)`.
1094template <typename T, unsigned N> class LLVM_GSL_OWNER[[gsl::Owner]] SmallVector;
1095
1096/// Helper class for calculating the default number of inline elements for
1097/// `SmallVector<T>`.
1098///
1099/// This should be migrated to a constexpr function when our minimum
1100/// compiler support is enough for multi-statement constexpr functions.
1101template <typename T> struct CalculateSmallVectorDefaultInlinedElements {
1102 // Parameter controlling the default number of inlined elements
1103 // for `SmallVector<T>`.
1104 //
1105 // The default number of inlined elements ensures that
1106 // 1. There is at least one inlined element.
1107 // 2. `sizeof(SmallVector<T>) <= kPreferredSmallVectorSizeof` unless
1108 // it contradicts 1.
1109 static constexpr size_t kPreferredSmallVectorSizeof = 64;
1110
1111 // static_assert that sizeof(T) is not "too big".
1112 //
1113 // Because our policy guarantees at least one inlined element, it is possible
1114 // for an arbitrarily large inlined element to allocate an arbitrarily large
1115 // amount of inline storage. We generally consider it an antipattern for a
1116 // SmallVector to allocate an excessive amount of inline storage, so we want
1117 // to call attention to these cases and make sure that users are making an
1118 // intentional decision if they request a lot of inline storage.
1119 //
1120 // We want this assertion to trigger in pathological cases, but otherwise
1121 // not be too easy to hit. To accomplish that, the cutoff is actually somewhat
1122 // larger than kPreferredSmallVectorSizeof (otherwise,
1123 // `SmallVector<SmallVector<T>>` would be one easy way to trip it, and that
1124 // pattern seems useful in practice).
1125 //
1126 // One wrinkle is that this assertion is in theory non-portable, since
1127 // sizeof(T) is in general platform-dependent. However, we don't expect this
1128 // to be much of an issue, because most LLVM development happens on 64-bit
1129 // hosts, and therefore sizeof(T) is expected to *decrease* when compiled for
1130 // 32-bit hosts, dodging the issue. The reverse situation, where development
1131 // happens on a 32-bit host and then fails due to sizeof(T) *increasing* on a
1132 // 64-bit host, is expected to be very rare.
1133 static_assert(
1134 sizeof(T) <= 256,
1135 "You are trying to use a default number of inlined elements for "
1136 "`SmallVector<T>` but `sizeof(T)` is really big! Please use an "
1137 "explicit number of inlined elements with `SmallVector<T, N>` to make "
1138 "sure you really want that much inline storage.");
1139
1140 // Discount the size of the header itself when calculating the maximum inline
1141 // bytes.
1142 static constexpr size_t PreferredInlineBytes =
1143 kPreferredSmallVectorSizeof - sizeof(SmallVector<T, 0>);
1144 static constexpr size_t NumElementsThatFit = PreferredInlineBytes / sizeof(T);
1145 static constexpr size_t value =
1146 NumElementsThatFit == 0 ? 1 : NumElementsThatFit;
1147};
1148
1149/// This is a 'vector' (really, a variable-sized array), optimized
1150/// for the case when the array is small. It contains some number of elements
1151/// in-place, which allows it to avoid heap allocation when the actual number of
1152/// elements is below that threshold. This allows normal "small" cases to be
1153/// fast without losing generality for large inputs.
1154///
1155/// \note
1156/// In the absence of a well-motivated choice for the number of inlined
1157/// elements \p N, it is recommended to use \c SmallVector<T> (that is,
1158/// omitting the \p N). This will choose a default number of inlined elements
1159/// reasonable for allocation on the stack (for example, trying to keep \c
1160/// sizeof(SmallVector<T>) around 64 bytes).
1161///
1162/// \warning This does not attempt to be exception safe.
1163///
1164/// \see https://llvm.org/docs/ProgrammersManual.html#llvm-adt-smallvector-h
1165template <typename T,
1166 unsigned N = CalculateSmallVectorDefaultInlinedElements<T>::value>
1167class LLVM_GSL_OWNER[[gsl::Owner]] SmallVector : public SmallVectorImpl<T>,
1168 SmallVectorStorage<T, N> {
1169public:
1170 SmallVector() : SmallVectorImpl<T>(N) {}
1171
1172 ~SmallVector() {
1173 // Destroy the constructed elements in the vector.
1174 this->destroy_range(this->begin(), this->end());
1175 }
1176
1177 explicit SmallVector(size_t Size, const T &Value = T())
1178 : SmallVectorImpl<T>(N) {
1179 this->assign(Size, Value);
1180 }
1181
1182 template <typename ItTy,
1183 typename = std::enable_if_t<std::is_convertible<
1184 typename std::iterator_traits<ItTy>::iterator_category,
1185 std::input_iterator_tag>::value>>
1186 SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(N) {
1187 this->append(S, E);
1188 }
1189
1190 template <typename RangeTy>
1191 explicit SmallVector(const iterator_range<RangeTy> &R)
1192 : SmallVectorImpl<T>(N) {
1193 this->append(R.begin(), R.end());
1194 }
1195
1196 SmallVector(std::initializer_list<T> IL) : SmallVectorImpl<T>(N) {
1197 this->assign(IL);
1198 }
1199
1200 SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(N) {
1201 if (!RHS.empty())
1202 SmallVectorImpl<T>::operator=(RHS);
1203 }
1204
1205 SmallVector &operator=(const SmallVector &RHS) {
1206 SmallVectorImpl<T>::operator=(RHS);
1207 return *this;
1208 }
1209
1210 SmallVector(SmallVector &&RHS) : SmallVectorImpl<T>(N) {
1211 if (!RHS.empty())
1212 SmallVectorImpl<T>::operator=(::std::move(RHS));
1213 }
1214
1215 SmallVector(SmallVectorImpl<T> &&RHS) : SmallVectorImpl<T>(N) {
1216 if (!RHS.empty())
1217 SmallVectorImpl<T>::operator=(::std::move(RHS));
1218 }
1219
1220 SmallVector &operator=(SmallVector &&RHS) {
1221 SmallVectorImpl<T>::operator=(::std::move(RHS));
1222 return *this;
1223 }
1224
1225 SmallVector &operator=(SmallVectorImpl<T> &&RHS) {
1226 SmallVectorImpl<T>::operator=(::std::move(RHS));
1227 return *this;
1228 }
1229
1230 SmallVector &operator=(std::initializer_list<T> IL) {
1231 this->assign(IL);
1232 return *this;
1233 }
1234};
1235
1236template <typename T, unsigned N>
1237inline size_t capacity_in_bytes(const SmallVector<T, N> &X) {
1238 return X.capacity_in_bytes();
1239}
1240
1241/// Given a range of type R, iterate the entire range and return a
1242/// SmallVector with elements of the vector. This is useful, for example,
1243/// when you want to iterate a range and then sort the results.
1244template <unsigned Size, typename R>
1245SmallVector<typename std::remove_const<typename std::remove_reference<
1246 decltype(*std::begin(std::declval<R &>()))>::type>::type,
1247 Size>
1248to_vector(R &&Range) {
1249 return {std::begin(Range), std::end(Range)};
1250}
1251
1252} // end namespace llvm
1253
1254namespace std {
1255
1256 /// Implement std::swap in terms of SmallVector swap.
1257 template<typename T>
1258 inline void
1259 swap(llvm::SmallVectorImpl<T> &LHS, llvm::SmallVectorImpl<T> &RHS) {
1260 LHS.swap(RHS);
1261 }
1262
1263 /// Implement std::swap in terms of SmallVector swap.
1264 template<typename T, unsigned N>
1265 inline void
1266 swap(llvm::SmallVector<T, N> &LHS, llvm::SmallVector<T, N> &RHS) {
1267 LHS.swap(RHS);
1268 }
1269
1270} // end namespace std
1271
1272#endif // LLVM_ADT_SMALLVECTOR_H

/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/Analysis/CFG.h

1//===-- Analysis/CFG.h - BasicBlock Analyses --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This family of functions performs analyses on basic blocks, and instructions
10// contained within basic blocks.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_CFG_H
15#define LLVM_ANALYSIS_CFG_H
16
17#include "llvm/ADT/GraphTraits.h"
18#include "llvm/ADT/SmallPtrSet.h"
19#include <utility>
20
21namespace llvm {
22
23class BasicBlock;
24class DominatorTree;
25class Function;
26class Instruction;
27class LoopInfo;
28template <typename T> class SmallVectorImpl;
29
30/// Analyze the specified function to find all of the loop backedges in the
31/// function and return them. This is a relatively cheap (compared to
32/// computing dominators and loop info) analysis.
33///
34/// The output is added to Result, as pairs of <from,to> edge info.
35void FindFunctionBackedges(
36 const Function &F,
37 SmallVectorImpl<std::pair<const BasicBlock *, const BasicBlock *> > &
38 Result);
39
40/// Search for the specified successor of basic block BB and return its position
41/// in the terminator instruction's list of successors. It is an error to call
42/// this with a block that is not a successor.
43unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ);
44
45/// Return true if the specified edge is a critical edge. Critical edges are
46/// edges from a block with multiple successors to a block with multiple
47/// predecessors.
48///
49bool isCriticalEdge(const Instruction *TI, unsigned SuccNum,
50 bool AllowIdenticalEdges = false);
51bool isCriticalEdge(const Instruction *TI, const BasicBlock *Succ,
52 bool AllowIdenticalEdges = false);
53
54/// Determine whether instruction 'To' is reachable from 'From', without passing
55/// through any blocks in ExclusionSet, returning true if uncertain.
56///
57/// Determine whether there is a path from From to To within a single function.
58/// Returns false only if we can prove that once 'From' has been executed then
59/// 'To' can not be executed. Conservatively returns true.
60///
61/// This function is linear with respect to the number of blocks in the CFG,
62/// walking down successors from From to reach To, with a fixed threshold.
63/// Using DT or LI allows us to answer more quickly. LI reduces the cost of
64/// an entire loop of any number of blocks to be the same as the cost of a
65/// single block. DT reduces the cost by allowing the search to terminate when
66/// we find a block that dominates the block containing 'To'. DT is most useful
67/// on branchy code but not loops, and LI is most useful on code with loops but
68/// does not help on branchy code outside loops.
69bool isPotentiallyReachable(
70 const Instruction *From, const Instruction *To,
71 const SmallPtrSetImpl<BasicBlock *> *ExclusionSet = nullptr,
72 const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
73
74/// Determine whether block 'To' is reachable from 'From', returning
75/// true if uncertain.
76///
77/// Determine whether there is a path from From to To within a single function.
78/// Returns false only if we can prove that once 'From' has been reached then
79/// 'To' can not be executed. Conservatively returns true.
80bool isPotentiallyReachable(
81 const BasicBlock *From, const BasicBlock *To,
82 const SmallPtrSetImpl<BasicBlock *> *ExclusionSet = nullptr,
83 const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
84
85/// Determine whether there is at least one path from a block in
86/// 'Worklist' to 'StopBB', returning true if uncertain.
87///
88/// Determine whether there is a path from at least one block in Worklist to
89/// StopBB within a single function. Returns false only if we can prove that
90/// once any block in 'Worklist' has been reached then 'StopBB' can not be
91/// executed. Conservatively returns true.
92bool isPotentiallyReachableFromMany(SmallVectorImpl<BasicBlock *> &Worklist,
93 BasicBlock *StopBB,
94 const DominatorTree *DT = nullptr,
95 const LoopInfo *LI = nullptr);
96
97/// Determine whether there is at least one path from a block in
98/// 'Worklist' to 'StopBB' without passing through any blocks in
99/// 'ExclusionSet', returning true if uncertain.
100///
101/// Determine whether there is a path from at least one block in Worklist to
102/// StopBB within a single function without passing through any of the blocks
103/// in 'ExclusionSet'. Returns false only if we can prove that once any block
104/// in 'Worklist' has been reached then 'StopBB' can not be executed.
105/// Conservatively returns true.
106bool isPotentiallyReachableFromMany(
107 SmallVectorImpl<BasicBlock *> &Worklist, BasicBlock *StopBB,
108 const SmallPtrSetImpl<BasicBlock *> *ExclusionSet,
109 const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
110
111/// Return true if the control flow in \p RPOTraversal is irreducible.
112///
113/// This is a generic implementation to detect CFG irreducibility based on loop
114/// info analysis. It can be used for any kind of CFG (Loop, MachineLoop,
115/// Function, MachineFunction, etc.) by providing an RPO traversal (\p
116/// RPOTraversal) and the loop info analysis (\p LI) of the CFG. This utility
117/// function is only recommended when loop info analysis is available. If loop
118/// info analysis isn't available, please, don't compute it explicitly for this
119/// purpose. There are more efficient ways to detect CFG irreducibility that
120/// don't require recomputing loop info analysis (e.g., T1/T2 or Tarjan's
121/// algorithm).
122///
123/// Requirements:
124/// 1) GraphTraits must be implemented for NodeT type. It is used to access
125/// NodeT successors.
126// 2) \p RPOTraversal must be a valid reverse post-order traversal of the
127/// target CFG with begin()/end() iterator interfaces.
128/// 3) \p LI must be a valid LoopInfoBase that contains up-to-date loop
129/// analysis information of the CFG.
130///
131/// This algorithm uses the information about reducible loop back-edges already
132/// computed in \p LI. When a back-edge is found during the RPO traversal, the
133/// algorithm checks whether the back-edge is one of the reducible back-edges in
134/// loop info. If it isn't, the CFG is irreducible. For example, for the CFG
135/// below (canonical irreducible graph) loop info won't contain any loop, so the
136/// algorithm will return that the CFG is irreducible when checking the B <-
137/// -> C back-edge.
138///
139/// (A->B, A->C, B->C, C->B, C->D)
140/// A
141/// / \
142/// B<- ->C
143/// |
144/// D
145///
146template <class NodeT, class RPOTraversalT, class LoopInfoT,
147 class GT = GraphTraits<NodeT>>
148bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI) {
149 /// Check whether the edge (\p Src, \p Dst) is a reducible loop backedge
150 /// according to LI. I.e., check if there exists a loop that contains Src and
151 /// where Dst is the loop header.
152 auto isProperBackedge = [&](NodeT Src, NodeT Dst) {
153 for (const auto *Lp = LI.getLoopFor(Src); Lp; Lp = Lp->getParentLoop()) {
154 if (Lp->getHeader() == Dst)
155 return true;
156 }
157 return false;
158 };
159
160 SmallPtrSet<NodeT, 32> Visited;
161 for (NodeT Node : RPOTraversal) {
162 Visited.insert(Node);
163 for (NodeT Succ : make_range(GT::child_begin(Node), GT::child_end(Node))) {
164 // Succ hasn't been visited yet
165 if (!Visited.count(Succ))
166 continue;
167 // We already visited Succ, thus Node->Succ must be a backedge. Check that
168 // the head matches what we have in the loop information. Otherwise, we
169 // have an irreducible graph.
170 if (!isProperBackedge(Node, Succ))
171 return true;
172 }
173 }
174
175 return false;
15
Returning zero, which participates in a condition later
176}
177} // End llvm namespace
178
179#endif

/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/IR/PatternMatch.h

1//===- PatternMatch.h - Match on the LLVM IR --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides a simple and efficient mechanism for performing general
10// tree-based pattern matches on the LLVM IR. The power of these routines is
11// that it allows you to write concise patterns that are expressive and easy to
12// understand. The other major advantage of this is that it allows you to
13// trivially capture/bind elements in the pattern to variables. For example,
14// you can do something like this:
15//
16// Value *Exp = ...
17// Value *X, *Y; ConstantInt *C1, *C2; // (X & C1) | (Y & C2)
18// if (match(Exp, m_Or(m_And(m_Value(X), m_ConstantInt(C1)),
19// m_And(m_Value(Y), m_ConstantInt(C2))))) {
20// ... Pattern is matched and variables are bound ...
21// }
22//
23// This is primarily useful to things like the instruction combiner, but can
24// also be useful for static analysis tools or code generators.
25//
26//===----------------------------------------------------------------------===//
27
28#ifndef LLVM_IR_PATTERNMATCH_H
29#define LLVM_IR_PATTERNMATCH_H
30
31#include "llvm/ADT/APFloat.h"
32#include "llvm/ADT/APInt.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/InstrTypes.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/IntrinsicInst.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Operator.h"
42#include "llvm/IR/Value.h"
43#include "llvm/Support/Casting.h"
44#include <cstdint>
45
46namespace llvm {
47namespace PatternMatch {
48
49template <typename Val, typename Pattern> bool match(Val *V, const Pattern &P) {
50 return const_cast<Pattern &>(P).match(V);
35
Calling 'LogicalOp_match::match'
39
Returning from 'LogicalOp_match::match'
40
Returning zero, which participates in a condition later
44
Calling 'LogicalOp_match::match'
48
Returning from 'LogicalOp_match::match'
49
Returning zero, which participates in a condition later
51}
52
53template <typename Pattern> bool match(ArrayRef<int> Mask, const Pattern &P) {
54 return const_cast<Pattern &>(P).match(Mask);
55}
56
57template <typename SubPattern_t> struct OneUse_match {
58 SubPattern_t SubPattern;
59
60 OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
61
62 template <typename OpTy> bool match(OpTy *V) {
63 return V->hasOneUse() && SubPattern.match(V);
64 }
65};
66
67template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
68 return SubPattern;
69}
70
71template <typename Class> struct class_match {
72 template <typename ITy> bool match(ITy *V) { return isa<Class>(V); }
73};
74
75/// Match an arbitrary value and ignore it.
76inline class_match<Value> m_Value() { return class_match<Value>(); }
77
78/// Match an arbitrary unary operation and ignore it.
79inline class_match<UnaryOperator> m_UnOp() {
80 return class_match<UnaryOperator>();
81}
82
83/// Match an arbitrary binary operation and ignore it.
84inline class_match<BinaryOperator> m_BinOp() {
85 return class_match<BinaryOperator>();
86}
87
88/// Matches any compare instruction and ignore it.
89inline class_match<CmpInst> m_Cmp() { return class_match<CmpInst>(); }
90
91struct undef_match {
92 static bool check(const Value *V) {
93 if (isa<UndefValue>(V))
94 return true;
95
96 const auto *CA = dyn_cast<ConstantAggregate>(V);
97 if (!CA)
98 return false;
99
100 SmallPtrSet<const ConstantAggregate *, 8> Seen;
101 SmallVector<const ConstantAggregate *, 8> Worklist;
102
103 // Either UndefValue, PoisonValue, or an aggregate that only contains
104 // these is accepted by matcher.
105 // CheckValue returns false if CA cannot satisfy this constraint.
106 auto CheckValue = [&](const ConstantAggregate *CA) {
107 for (const Value *Op : CA->operand_values()) {
108 if (isa<UndefValue>(Op))
109 continue;
110
111 const auto *CA = dyn_cast<ConstantAggregate>(Op);
112 if (!CA)
113 return false;
114 if (Seen.insert(CA).second)
115 Worklist.emplace_back(CA);
116 }
117
118 return true;
119 };
120
121 if (!CheckValue(CA))
122 return false;
123
124 while (!Worklist.empty()) {
125 if (!CheckValue(Worklist.pop_back_val()))
126 return false;
127 }
128 return true;
129 }
130 template <typename ITy> bool match(ITy *V) { return check(V); }
131};
132
133/// Match an arbitrary undef constant. This matches poison as well.
134/// If this is an aggregate and contains a non-aggregate element that is
135/// neither undef nor poison, the aggregate is not matched.
136inline auto m_Undef() { return undef_match(); }
137
138/// Match an arbitrary poison constant.
139inline class_match<PoisonValue> m_Poison() { return class_match<PoisonValue>(); }
140
141/// Match an arbitrary Constant and ignore it.
142inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
143
144/// Match an arbitrary ConstantInt and ignore it.
145inline class_match<ConstantInt> m_ConstantInt() {
146 return class_match<ConstantInt>();
147}
148
149/// Match an arbitrary ConstantFP and ignore it.
150inline class_match<ConstantFP> m_ConstantFP() {
151 return class_match<ConstantFP>();
152}
153
154/// Match an arbitrary ConstantExpr and ignore it.
155inline class_match<ConstantExpr> m_ConstantExpr() {
156 return class_match<ConstantExpr>();
157}
158
159/// Match an arbitrary basic block value and ignore it.
160inline class_match<BasicBlock> m_BasicBlock() {
161 return class_match<BasicBlock>();
162}
163
164/// Inverting matcher
165template <typename Ty> struct match_unless {
166 Ty M;
167
168 match_unless(const Ty &Matcher) : M(Matcher) {}
169
170 template <typename ITy> bool match(ITy *V) { return !M.match(V); }
171};
172
173/// Match if the inner matcher does *NOT* match.
174template <typename Ty> inline match_unless<Ty> m_Unless(const Ty &M) {
175 return match_unless<Ty>(M);
176}
177
178/// Matching combinators
179template <typename LTy, typename RTy> struct match_combine_or {
180 LTy L;
181 RTy R;
182
183 match_combine_or(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
184
185 template <typename ITy> bool match(ITy *V) {
186 if (L.match(V))
187 return true;
188 if (R.match(V))
189 return true;
190 return false;
191 }
192};
193
194template <typename LTy, typename RTy> struct match_combine_and {
195 LTy L;
196 RTy R;
197
198 match_combine_and(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
199
200 template <typename ITy> bool match(ITy *V) {
201 if (L.match(V))
202 if (R.match(V))
203 return true;
204 return false;
205 }
206};
207
208/// Combine two pattern matchers matching L || R
209template <typename LTy, typename RTy>
210inline match_combine_or<LTy, RTy> m_CombineOr(const LTy &L, const RTy &R) {
211 return match_combine_or<LTy, RTy>(L, R);
212}
213
214/// Combine two pattern matchers matching L && R
215template <typename LTy, typename RTy>
216inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
217 return match_combine_and<LTy, RTy>(L, R);
218}
219
220struct apint_match {
221 const APInt *&Res;
222 bool AllowUndef;
223
224 apint_match(const APInt *&Res, bool AllowUndef)
225 : Res(Res), AllowUndef(AllowUndef) {}
226
227 template <typename ITy> bool match(ITy *V) {
228 if (auto *CI = dyn_cast<ConstantInt>(V)) {
229 Res = &CI->getValue();
230 return true;
231 }
232 if (V->getType()->isVectorTy())
233 if (const auto *C = dyn_cast<Constant>(V))
234 if (auto *CI = dyn_cast_or_null<ConstantInt>(
235 C->getSplatValue(AllowUndef))) {
236 Res = &CI->getValue();
237 return true;
238 }
239 return false;
240 }
241};
242// Either constexpr if or renaming ConstantFP::getValueAPF to
243// ConstantFP::getValue is needed to do it via single template
244// function for both apint/apfloat.
245struct apfloat_match {
246 const APFloat *&Res;
247 bool AllowUndef;
248
249 apfloat_match(const APFloat *&Res, bool AllowUndef)
250 : Res(Res), AllowUndef(AllowUndef) {}
251
252 template <typename ITy> bool match(ITy *V) {
253 if (auto *CI = dyn_cast<ConstantFP>(V)) {
254 Res = &CI->getValueAPF();
255 return true;
256 }
257 if (V->getType()->isVectorTy())
258 if (const auto *C = dyn_cast<Constant>(V))
259 if (auto *CI = dyn_cast_or_null<ConstantFP>(
260 C->getSplatValue(AllowUndef))) {
261 Res = &CI->getValueAPF();
262 return true;
263 }
264 return false;
265 }
266};
267
268/// Match a ConstantInt or splatted ConstantVector, binding the
269/// specified pointer to the contained APInt.
270inline apint_match m_APInt(const APInt *&Res) {
271 // Forbid undefs by default to maintain previous behavior.
272 return apint_match(Res, /* AllowUndef */ false);
273}
274
275/// Match APInt while allowing undefs in splat vector constants.
276inline apint_match m_APIntAllowUndef(const APInt *&Res) {
277 return apint_match(Res, /* AllowUndef */ true);
278}
279
280/// Match APInt while forbidding undefs in splat vector constants.
281inline apint_match m_APIntForbidUndef(const APInt *&Res) {
282 return apint_match(Res, /* AllowUndef */ false);
283}
284
285/// Match a ConstantFP or splatted ConstantVector, binding the
286/// specified pointer to the contained APFloat.
287inline apfloat_match m_APFloat(const APFloat *&Res) {
288 // Forbid undefs by default to maintain previous behavior.
289 return apfloat_match(Res, /* AllowUndef */ false);
290}
291
292/// Match APFloat while allowing undefs in splat vector constants.
293inline apfloat_match m_APFloatAllowUndef(const APFloat *&Res) {
294 return apfloat_match(Res, /* AllowUndef */ true);
295}
296
297/// Match APFloat while forbidding undefs in splat vector constants.
298inline apfloat_match m_APFloatForbidUndef(const APFloat *&Res) {
299 return apfloat_match(Res, /* AllowUndef */ false);
300}
301
302template <int64_t Val> struct constantint_match {
303 template <typename ITy> bool match(ITy *V) {
304 if (const auto *CI = dyn_cast<ConstantInt>(V)) {
305 const APInt &CIV = CI->getValue();
306 if (Val >= 0)
307 return CIV == static_cast<uint64_t>(Val);
308 // If Val is negative, and CI is shorter than it, truncate to the right
309 // number of bits. If it is larger, then we have to sign extend. Just
310 // compare their negated values.
311 return -CIV == -Val;
312 }
313 return false;
314 }
315};
316
317/// Match a ConstantInt with a specific value.
318template <int64_t Val> inline constantint_match<Val> m_ConstantInt() {
319 return constantint_match<Val>();
320}
321
322/// This helper class is used to match constant scalars, vector splats,
323/// and fixed width vectors that satisfy a specified predicate.
324/// For fixed width vector constants, undefined elements are ignored.
325template <typename Predicate, typename ConstantVal>
326struct cstval_pred_ty : public Predicate {
327 template <typename ITy> bool match(ITy *V) {
328 if (const auto *CV = dyn_cast<ConstantVal>(V))
329 return this->isValue(CV->getValue());
330 if (const auto *VTy = dyn_cast<VectorType>(V->getType())) {
331 if (const auto *C = dyn_cast<Constant>(V)) {
332 if (const auto *CV = dyn_cast_or_null<ConstantVal>(C->getSplatValue()))
333 return this->isValue(CV->getValue());
334
335 // Number of elements of a scalable vector unknown at compile time
336 auto *FVTy = dyn_cast<FixedVectorType>(VTy);
337 if (!FVTy)
338 return false;
339
340 // Non-splat vector constant: check each element for a match.
341 unsigned NumElts = FVTy->getNumElements();
342 assert(NumElts != 0 && "Constant vector with no elements?")(static_cast <bool> (NumElts != 0 && "Constant vector with no elements?"
) ? void (0) : __assert_fail ("NumElts != 0 && \"Constant vector with no elements?\""
, "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/IR/PatternMatch.h"
, 342, __extension__ __PRETTY_FUNCTION__))
;
343 bool HasNonUndefElements = false;
344 for (unsigned i = 0; i != NumElts; ++i) {
345 Constant *Elt = C->getAggregateElement(i);
346 if (!Elt)
347 return false;
348 if (isa<UndefValue>(Elt))
349 continue;
350 auto *CV = dyn_cast<ConstantVal>(Elt);
351 if (!CV || !this->isValue(CV->getValue()))
352 return false;
353 HasNonUndefElements = true;
354 }
355 return HasNonUndefElements;
356 }
357 }
358 return false;
359 }
360};
361
362/// specialization of cstval_pred_ty for ConstantInt
363template <typename Predicate>
364using cst_pred_ty = cstval_pred_ty<Predicate, ConstantInt>;
365
366/// specialization of cstval_pred_ty for ConstantFP
367template <typename Predicate>
368using cstfp_pred_ty = cstval_pred_ty<Predicate, ConstantFP>;
369
370/// This helper class is used to match scalar and vector constants that
371/// satisfy a specified predicate, and bind them to an APInt.
372template <typename Predicate> struct api_pred_ty : public Predicate {
373 const APInt *&Res;
374
375 api_pred_ty(const APInt *&R) : Res(R) {}
376
377 template <typename ITy> bool match(ITy *V) {
378 if (const auto *CI = dyn_cast<ConstantInt>(V))
379 if (this->isValue(CI->getValue())) {
380 Res = &CI->getValue();
381 return true;
382 }
383 if (V->getType()->isVectorTy())
384 if (const auto *C = dyn_cast<Constant>(V))
385 if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue()))
386 if (this->isValue(CI->getValue())) {
387 Res = &CI->getValue();
388 return true;
389 }
390
391 return false;
392 }
393};
394
395/// This helper class is used to match scalar and vector constants that
396/// satisfy a specified predicate, and bind them to an APFloat.
397/// Undefs are allowed in splat vector constants.
398template <typename Predicate> struct apf_pred_ty : public Predicate {
399 const APFloat *&Res;
400
401 apf_pred_ty(const APFloat *&R) : Res(R) {}
402
403 template <typename ITy> bool match(ITy *V) {
404 if (const auto *CI = dyn_cast<ConstantFP>(V))
405 if (this->isValue(CI->getValue())) {
406 Res = &CI->getValue();
407 return true;
408 }
409 if (V->getType()->isVectorTy())
410 if (const auto *C = dyn_cast<Constant>(V))
411 if (auto *CI = dyn_cast_or_null<ConstantFP>(
412 C->getSplatValue(/* AllowUndef */ true)))
413 if (this->isValue(CI->getValue())) {
414 Res = &CI->getValue();
415 return true;
416 }
417
418 return false;
419 }
420};
421
422///////////////////////////////////////////////////////////////////////////////
423//
424// Encapsulate constant value queries for use in templated predicate matchers.
425// This allows checking if constants match using compound predicates and works
426// with vector constants, possibly with relaxed constraints. For example, ignore
427// undef values.
428//
429///////////////////////////////////////////////////////////////////////////////
430
431struct is_any_apint {
432 bool isValue(const APInt &C) { return true; }
433};
434/// Match an integer or vector with any integral constant.
435/// For vectors, this includes constants with undefined elements.
436inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() {
437 return cst_pred_ty<is_any_apint>();
438}
439
440struct is_all_ones {
441 bool isValue(const APInt &C) { return C.isAllOnesValue(); }
442};
443/// Match an integer or vector with all bits set.
444/// For vectors, this includes constants with undefined elements.
445inline cst_pred_ty<is_all_ones> m_AllOnes() {
446 return cst_pred_ty<is_all_ones>();
447}
448
449struct is_maxsignedvalue {
450 bool isValue(const APInt &C) { return C.isMaxSignedValue(); }
451};
452/// Match an integer or vector with values having all bits except for the high
453/// bit set (0x7f...).
454/// For vectors, this includes constants with undefined elements.
455inline cst_pred_ty<is_maxsignedvalue> m_MaxSignedValue() {
456 return cst_pred_ty<is_maxsignedvalue>();
457}
458inline api_pred_ty<is_maxsignedvalue> m_MaxSignedValue(const APInt *&V) {
459 return V;
460}
461
462struct is_negative {
463 bool isValue(const APInt &C) { return C.isNegative(); }
464};
465/// Match an integer or vector of negative values.
466/// For vectors, this includes constants with undefined elements.
467inline cst_pred_ty<is_negative> m_Negative() {
468 return cst_pred_ty<is_negative>();
469}
470inline api_pred_ty<is_negative> m_Negative(const APInt *&V) {
471 return V;
472}
473
474struct is_nonnegative {
475 bool isValue(const APInt &C) { return C.isNonNegative(); }
476};
477/// Match an integer or vector of non-negative values.
478/// For vectors, this includes constants with undefined elements.
479inline cst_pred_ty<is_nonnegative> m_NonNegative() {
480 return cst_pred_ty<is_nonnegative>();
481}
482inline api_pred_ty<is_nonnegative> m_NonNegative(const APInt *&V) {
483 return V;
484}
485
486struct is_strictlypositive {
487 bool isValue(const APInt &C) { return C.isStrictlyPositive(); }
488};
489/// Match an integer or vector of strictly positive values.
490/// For vectors, this includes constants with undefined elements.
491inline cst_pred_ty<is_strictlypositive> m_StrictlyPositive() {
492 return cst_pred_ty<is_strictlypositive>();
493}
494inline api_pred_ty<is_strictlypositive> m_StrictlyPositive(const APInt *&V) {
495 return V;
496}
497
498struct is_nonpositive {
499 bool isValue(const APInt &C) { return C.isNonPositive(); }
500};
501/// Match an integer or vector of non-positive values.
502/// For vectors, this includes constants with undefined elements.
503inline cst_pred_ty<is_nonpositive> m_NonPositive() {
504 return cst_pred_ty<is_nonpositive>();
505}
506inline api_pred_ty<is_nonpositive> m_NonPositive(const APInt *&V) { return V; }
507
508struct is_one {
509 bool isValue(const APInt &C) { return C.isOneValue(); }
510};
511/// Match an integer 1 or a vector with all elements equal to 1.
512/// For vectors, this includes constants with undefined elements.
513inline cst_pred_ty<is_one> m_One() {
514 return cst_pred_ty<is_one>();
515}
516
517struct is_zero_int {
518 bool isValue(const APInt &C) { return C.isNullValue(); }
519};
520/// Match an integer 0 or a vector with all elements equal to 0.
521/// For vectors, this includes constants with undefined elements.
522inline cst_pred_ty<is_zero_int> m_ZeroInt() {
523 return cst_pred_ty<is_zero_int>();
524}
525
526struct is_zero {
527 template <typename ITy> bool match(ITy *V) {
528 auto *C = dyn_cast<Constant>(V);
529 // FIXME: this should be able to do something for scalable vectors
530 return C && (C->isNullValue() || cst_pred_ty<is_zero_int>().match(C));
531 }
532};
533/// Match any null constant or a vector with all elements equal to 0.
534/// For vectors, this includes constants with undefined elements.
535inline is_zero m_Zero() {
536 return is_zero();
537}
538
539struct is_power2 {
540 bool isValue(const APInt &C) { return C.isPowerOf2(); }
541};
542/// Match an integer or vector power-of-2.
543/// For vectors, this includes constants with undefined elements.
544inline cst_pred_ty<is_power2> m_Power2() {
545 return cst_pred_ty<is_power2>();
546}
547inline api_pred_ty<is_power2> m_Power2(const APInt *&V) {
548 return V;
549}
550
551struct is_negated_power2 {
552 bool isValue(const APInt &C) { return (-C).isPowerOf2(); }
553};
554/// Match a integer or vector negated power-of-2.
555/// For vectors, this includes constants with undefined elements.
556inline cst_pred_ty<is_negated_power2> m_NegatedPower2() {
557 return cst_pred_ty<is_negated_power2>();
558}
559inline api_pred_ty<is_negated_power2> m_NegatedPower2(const APInt *&V) {
560 return V;
561}
562
563struct is_power2_or_zero {
564 bool isValue(const APInt &C) { return !C || C.isPowerOf2(); }
565};
566/// Match an integer or vector of 0 or power-of-2 values.
567/// For vectors, this includes constants with undefined elements.
568inline cst_pred_ty<is_power2_or_zero> m_Power2OrZero() {
569 return cst_pred_ty<is_power2_or_zero>();
570}
571inline api_pred_ty<is_power2_or_zero> m_Power2OrZero(const APInt *&V) {
572 return V;
573}
574
575struct is_sign_mask {
576 bool isValue(const APInt &C) { return C.isSignMask(); }
577};
578/// Match an integer or vector with only the sign bit(s) set.
579/// For vectors, this includes constants with undefined elements.
580inline cst_pred_ty<is_sign_mask> m_SignMask() {
581 return cst_pred_ty<is_sign_mask>();
582}
583
584struct is_lowbit_mask {
585 bool isValue(const APInt &C) { return C.isMask(); }
586};
587/// Match an integer or vector with only the low bit(s) set.
588/// For vectors, this includes constants with undefined elements.
589inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
590 return cst_pred_ty<is_lowbit_mask>();
591}
592
593struct icmp_pred_with_threshold {
594 ICmpInst::Predicate Pred;
595 const APInt *Thr;
596 bool isValue(const APInt &C) {
597 switch (Pred) {
598 case ICmpInst::Predicate::ICMP_EQ:
599 return C.eq(*Thr);
600 case ICmpInst::Predicate::ICMP_NE:
601 return C.ne(*Thr);
602 case ICmpInst::Predicate::ICMP_UGT:
603 return C.ugt(*Thr);
604 case ICmpInst::Predicate::ICMP_UGE:
605 return C.uge(*Thr);
606 case ICmpInst::Predicate::ICMP_ULT:
607 return C.ult(*Thr);
608 case ICmpInst::Predicate::ICMP_ULE:
609 return C.ule(*Thr);
610 case ICmpInst::Predicate::ICMP_SGT:
611 return C.sgt(*Thr);
612 case ICmpInst::Predicate::ICMP_SGE:
613 return C.sge(*Thr);
614 case ICmpInst::Predicate::ICMP_SLT:
615 return C.slt(*Thr);
616 case ICmpInst::Predicate::ICMP_SLE:
617 return C.sle(*Thr);
618 default:
619 llvm_unreachable("Unhandled ICmp predicate")::llvm::llvm_unreachable_internal("Unhandled ICmp predicate",
"/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/IR/PatternMatch.h"
, 619)
;
620 }
621 }
622};
623/// Match an integer or vector with every element comparing 'pred' (eg/ne/...)
624/// to Threshold. For vectors, this includes constants with undefined elements.
625inline cst_pred_ty<icmp_pred_with_threshold>
626m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold) {
627 cst_pred_ty<icmp_pred_with_threshold> P;
628 P.Pred = Predicate;
629 P.Thr = &Threshold;
630 return P;
631}
632
633struct is_nan {
634 bool isValue(const APFloat &C) { return C.isNaN(); }
635};
636/// Match an arbitrary NaN constant. This includes quiet and signalling nans.
637/// For vectors, this includes constants with undefined elements.
638inline cstfp_pred_ty<is_nan> m_NaN() {
639 return cstfp_pred_ty<is_nan>();
640}
641
642struct is_nonnan {
643 bool isValue(const APFloat &C) { return !C.isNaN(); }
644};
645/// Match a non-NaN FP constant.
646/// For vectors, this includes constants with undefined elements.
647inline cstfp_pred_ty<is_nonnan> m_NonNaN() {
648 return cstfp_pred_ty<is_nonnan>();
649}
650
651struct is_inf {
652 bool isValue(const APFloat &C) { return C.isInfinity(); }
653};
654/// Match a positive or negative infinity FP constant.
655/// For vectors, this includes constants with undefined elements.
656inline cstfp_pred_ty<is_inf> m_Inf() {
657 return cstfp_pred_ty<is_inf>();
658}
659
660struct is_noninf {
661 bool isValue(const APFloat &C) { return !C.isInfinity(); }
662};
663/// Match a non-infinity FP constant, i.e. finite or NaN.
664/// For vectors, this includes constants with undefined elements.
665inline cstfp_pred_ty<is_noninf> m_NonInf() {
666 return cstfp_pred_ty<is_noninf>();
667}
668
669struct is_finite {
670 bool isValue(const APFloat &C) { return C.isFinite(); }
671};
672/// Match a finite FP constant, i.e. not infinity or NaN.
673/// For vectors, this includes constants with undefined elements.
674inline cstfp_pred_ty<is_finite> m_Finite() {
675 return cstfp_pred_ty<is_finite>();
676}
677inline apf_pred_ty<is_finite> m_Finite(const APFloat *&V) { return V; }
678
679struct is_finitenonzero {
680 bool isValue(const APFloat &C) { return C.isFiniteNonZero(); }
681};
682/// Match a finite non-zero FP constant.
683/// For vectors, this includes constants with undefined elements.
684inline cstfp_pred_ty<is_finitenonzero> m_FiniteNonZero() {
685 return cstfp_pred_ty<is_finitenonzero>();
686}
687inline apf_pred_ty<is_finitenonzero> m_FiniteNonZero(const APFloat *&V) {
688 return V;
689}
690
691struct is_any_zero_fp {
692 bool isValue(const APFloat &C) { return C.isZero(); }
693};
694/// Match a floating-point negative zero or positive zero.
695/// For vectors, this includes constants with undefined elements.
696inline cstfp_pred_ty<is_any_zero_fp> m_AnyZeroFP() {
697 return cstfp_pred_ty<is_any_zero_fp>();
698}
699
700struct is_pos_zero_fp {
701 bool isValue(const APFloat &C) { return C.isPosZero(); }
702};
703/// Match a floating-point positive zero.
704/// For vectors, this includes constants with undefined elements.
705inline cstfp_pred_ty<is_pos_zero_fp> m_PosZeroFP() {
706 return cstfp_pred_ty<is_pos_zero_fp>();
707}
708
709struct is_neg_zero_fp {
710 bool isValue(const APFloat &C) { return C.isNegZero(); }
711};
712/// Match a floating-point negative zero.
713/// For vectors, this includes constants with undefined elements.
714inline cstfp_pred_ty<is_neg_zero_fp> m_NegZeroFP() {
715 return cstfp_pred_ty<is_neg_zero_fp>();
716}
717
718struct is_non_zero_fp {
719 bool isValue(const APFloat &C) { return C.isNonZero(); }
720};
721/// Match a floating-point non-zero.
722/// For vectors, this includes constants with undefined elements.
723inline cstfp_pred_ty<is_non_zero_fp> m_NonZeroFP() {
724 return cstfp_pred_ty<is_non_zero_fp>();
725}
726
727///////////////////////////////////////////////////////////////////////////////
728
729template <typename Class> struct bind_ty {
730 Class *&VR;
731
732 bind_ty(Class *&V) : VR(V) {}
733
734 template <typename ITy> bool match(ITy *V) {
735 if (auto *CV = dyn_cast<Class>(V)) {
736 VR = CV;
737 return true;
738 }
739 return false;
740 }
741};
742
743/// Match a value, capturing it if we match.
744inline bind_ty<Value> m_Value(Value *&V) { return V; }
745inline bind_ty<const Value> m_Value(const Value *&V) { return V; }
746
747/// Match an instruction, capturing it if we match.
748inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
749/// Match a unary operator, capturing it if we match.
750inline bind_ty<UnaryOperator> m_UnOp(UnaryOperator *&I) { return I; }
751/// Match a binary operator, capturing it if we match.
752inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
753/// Match a with overflow intrinsic, capturing it if we match.
754inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) { return I; }
755inline bind_ty<const WithOverflowInst>
756m_WithOverflowInst(const WithOverflowInst *&I) {
757 return I;
758}
759
760/// Match a Constant, capturing the value if we match.
761inline bind_ty<Constant> m_Constant(Constant *&C) { return C; }
762
763/// Match a ConstantInt, capturing the value if we match.
764inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; }
765
766/// Match a ConstantFP, capturing the value if we match.
767inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; }
768
769/// Match a ConstantExpr, capturing the value if we match.
770inline bind_ty<ConstantExpr> m_ConstantExpr(ConstantExpr *&C) { return C; }
771
772/// Match a basic block value, capturing it if we match.
773inline bind_ty<BasicBlock> m_BasicBlock(BasicBlock *&V) { return V; }
774inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) {
775 return V;
776}
777
778/// Match an arbitrary immediate Constant and ignore it.
779inline match_combine_and<class_match<Constant>,
780 match_unless<class_match<ConstantExpr>>>
781m_ImmConstant() {
782 return m_CombineAnd(m_Constant(), m_Unless(m_ConstantExpr()));
783}
784
785/// Match an immediate Constant, capturing the value if we match.
786inline match_combine_and<bind_ty<Constant>,
787 match_unless<class_match<ConstantExpr>>>
788m_ImmConstant(Constant *&C) {
789 return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr()));
790}
791
792/// Match a specified Value*.
793struct specificval_ty {
794 const Value *Val;
795
796 specificval_ty(const Value *V) : Val(V) {}
797
798 template <typename ITy> bool match(ITy *V) { return V == Val; }
799};
800
801/// Match if we have a specific specified value.
802inline specificval_ty m_Specific(const Value *V) { return V; }
803
804/// Stores a reference to the Value *, not the Value * itself,
805/// thus can be used in commutative matchers.
806template <typename Class> struct deferredval_ty {
807 Class *const &Val;
808
809 deferredval_ty(Class *const &V) : Val(V) {}
810
811 template <typename ITy> bool match(ITy *const V) { return V == Val; }
812};
813
814/// Like m_Specific(), but works if the specific value to match is determined
815/// as part of the same match() expression. For example:
816/// m_Add(m_Value(X), m_Specific(X)) is incorrect, because m_Specific() will
817/// bind X before the pattern match starts.
818/// m_Add(m_Value(X), m_Deferred(X)) is correct, and will check against
819/// whichever value m_Value(X) populated.
820inline deferredval_ty<Value> m_Deferred(Value *const &V) { return V; }
821inline deferredval_ty<const Value> m_Deferred(const Value *const &V) {
822 return V;
823}
824
825/// Match a specified floating point value or vector of all elements of
826/// that value.
827struct specific_fpval {
828 double Val;
829
830 specific_fpval(double V) : Val(V) {}
831
832 template <typename ITy> bool match(ITy *V) {
833 if (const auto *CFP = dyn_cast<ConstantFP>(V))
834 return CFP->isExactlyValue(Val);
835 if (V->getType()->isVectorTy())
836 if (const auto *C = dyn_cast<Constant>(V))
837 if (auto *CFP = dyn_cast_or_null<ConstantFP>(C->getSplatValue()))
838 return CFP->isExactlyValue(Val);
839 return false;
840 }
841};
842
843/// Match a specific floating point value or vector with all elements
844/// equal to the value.
845inline specific_fpval m_SpecificFP(double V) { return specific_fpval(V); }
846
847/// Match a float 1.0 or vector with all elements equal to 1.0.
848inline specific_fpval m_FPOne() { return m_SpecificFP(1.0); }
849
850struct bind_const_intval_ty {
851 uint64_t &VR;
852
853 bind_const_intval_ty(uint64_t &V) : VR(V) {}
854
855 template <typename ITy> bool match(ITy *V) {
856 if (const auto *CV = dyn_cast<ConstantInt>(V))
857 if (CV->getValue().ule(UINT64_MAX(18446744073709551615UL))) {
858 VR = CV->getZExtValue();
859 return true;
860 }
861 return false;
862 }
863};
864
865/// Match a specified integer value or vector of all elements of that
866/// value.
867template <bool AllowUndefs>
868struct specific_intval {
869 APInt Val;
870
871 specific_intval(APInt V) : Val(std::move(V)) {}
872
873 template <typename ITy> bool match(ITy *V) {
874 const auto *CI = dyn_cast<ConstantInt>(V);
875 if (!CI && V->getType()->isVectorTy())
876 if (const auto *C = dyn_cast<Constant>(V))
877 CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue(AllowUndefs));
878
879 return CI && APInt::isSameValue(CI->getValue(), Val);
880 }
881};
882
883/// Match a specific integer value or vector with all elements equal to
884/// the value.
885inline specific_intval<false> m_SpecificInt(APInt V) {
886 return specific_intval<false>(std::move(V));
887}
888
889inline specific_intval<false> m_SpecificInt(uint64_t V) {
890 return m_SpecificInt(APInt(64, V));
891}
892
893inline specific_intval<true> m_SpecificIntAllowUndef(APInt V) {
894 return specific_intval<true>(std::move(V));
895}
896
897inline specific_intval<true> m_SpecificIntAllowUndef(uint64_t V) {
898 return m_SpecificIntAllowUndef(APInt(64, V));
899}
900
901/// Match a ConstantInt and bind to its value. This does not match
902/// ConstantInts wider than 64-bits.
903inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; }
904
905/// Match a specified basic block value.
906struct specific_bbval {
907 BasicBlock *Val;
908
909 specific_bbval(BasicBlock *Val) : Val(Val) {}
910
911 template <typename ITy> bool match(ITy *V) {
912 const auto *BB = dyn_cast<BasicBlock>(V);
913 return BB && BB == Val;
914 }
915};
916
917/// Match a specific basic block value.
918inline specific_bbval m_SpecificBB(BasicBlock *BB) {
919 return specific_bbval(BB);
920}
921
922/// A commutative-friendly version of m_Specific().
923inline deferredval_ty<BasicBlock> m_Deferred(BasicBlock *const &BB) {
924 return BB;
925}
926inline deferredval_ty<const BasicBlock>
927m_Deferred(const BasicBlock *const &BB) {
928 return BB;
929}
930
931//===----------------------------------------------------------------------===//
932// Matcher for any binary operator.
933//
934template <typename LHS_t, typename RHS_t, bool Commutable = false>
935struct AnyBinaryOp_match {
936 LHS_t L;
937 RHS_t R;
938
939 // The evaluation order is always stable, regardless of Commutability.
940 // The LHS is always matched first.
941 AnyBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
942
943 template <typename OpTy> bool match(OpTy *V) {
944 if (auto *I = dyn_cast<BinaryOperator>(V))
945 return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
946 (Commutable && L.match(I->getOperand(1)) &&
947 R.match(I->getOperand(0)));
948 return false;
949 }
950};
951
952template <typename LHS, typename RHS>
953inline AnyBinaryOp_match<LHS, RHS> m_BinOp(const LHS &L, const RHS &R) {
954 return AnyBinaryOp_match<LHS, RHS>(L, R);
955}
956
957//===----------------------------------------------------------------------===//
958// Matcher for any unary operator.
959// TODO fuse unary, binary matcher into n-ary matcher
960//
961template <typename OP_t> struct AnyUnaryOp_match {
962 OP_t X;
963
964 AnyUnaryOp_match(const OP_t &X) : X(X) {}
965
966 template <typename OpTy> bool match(OpTy *V) {
967 if (auto *I = dyn_cast<UnaryOperator>(V))
968 return X.match(I->getOperand(0));
969 return false;
970 }
971};
972
973template <typename OP_t> inline AnyUnaryOp_match<OP_t> m_UnOp(const OP_t &X) {
974 return AnyUnaryOp_match<OP_t>(X);
975}
976
977//===----------------------------------------------------------------------===//
978// Matchers for specific binary operators.
979//
980
981template <typename LHS_t, typename RHS_t, unsigned Opcode,
982 bool Commutable = false>
983struct BinaryOp_match {
984 LHS_t L;
985 RHS_t R;
986
987 // The evaluation order is always stable, regardless of Commutability.
988 // The LHS is always matched first.
989 BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
990
991 template <typename OpTy> bool match(OpTy *V) {
992 if (V->getValueID() == Value::InstructionVal + Opcode) {
993 auto *I = cast<BinaryOperator>(V);
994 return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
995 (Commutable && L.match(I->getOperand(1)) &&
996 R.match(I->getOperand(0)));
997 }
998 if (auto *CE = dyn_cast<ConstantExpr>(V))
999 return CE->getOpcode() == Opcode &&
1000 ((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) ||
1001 (Commutable && L.match(CE->getOperand(1)) &&
1002 R.match(CE->getOperand(0))));
1003 return false;
1004 }
1005};
1006
1007template <typename LHS, typename RHS>
1008inline BinaryOp_match<LHS, RHS, Instruction::Add> m_Add(const LHS &L,
1009 const RHS &R) {
1010 return BinaryOp_match<LHS, RHS, Instruction::Add>(L, R);
1011}
1012
1013template <typename LHS, typename RHS>
1014inline BinaryOp_match<LHS, RHS, Instruction::FAdd> m_FAdd(const LHS &L,
1015 const RHS &R) {
1016 return BinaryOp_match<LHS, RHS, Instruction::FAdd>(L, R);
1017}
1018
1019template <typename LHS, typename RHS>
1020inline BinaryOp_match<LHS, RHS, Instruction::Sub> m_Sub(const LHS &L,
1021 const RHS &R) {
1022 return BinaryOp_match<LHS, RHS, Instruction::Sub>(L, R);
1023}
1024
1025template <typename LHS, typename RHS>
1026inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L,
1027 const RHS &R) {
1028 return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R);
1029}
1030
1031template <typename Op_t> struct FNeg_match {
1032 Op_t X;
1033
1034 FNeg_match(const Op_t &Op) : X(Op) {}
1035 template <typename OpTy> bool match(OpTy *V) {
1036 auto *FPMO = dyn_cast<FPMathOperator>(V);
1037 if (!FPMO) return false;
1038
1039 if (FPMO->getOpcode() == Instruction::FNeg)
1040 return X.match(FPMO->getOperand(0));
1041
1042 if (FPMO->getOpcode() == Instruction::FSub) {
1043 if (FPMO->hasNoSignedZeros()) {
1044 // With 'nsz', any zero goes.
1045 if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0)))
1046 return false;
1047 } else {
1048 // Without 'nsz', we need fsub -0.0, X exactly.
1049 if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0)))
1050 return false;
1051 }
1052
1053 return X.match(FPMO->getOperand(1));
1054 }
1055
1056 return false;
1057 }
1058};
1059
1060/// Match 'fneg X' as 'fsub -0.0, X'.
1061template <typename OpTy>
1062inline FNeg_match<OpTy>
1063m_FNeg(const OpTy &X) {
1064 return FNeg_match<OpTy>(X);
1065}
1066
1067/// Match 'fneg X' as 'fsub +-0.0, X'.
1068template <typename RHS>
1069inline BinaryOp_match<cstfp_pred_ty<is_any_zero_fp>, RHS, Instruction::FSub>
1070m_FNegNSZ(const RHS &X) {
1071 return m_FSub(m_AnyZeroFP(), X);
1072}
1073
1074template <typename LHS, typename RHS>
1075inline BinaryOp_match<LHS, RHS, Instruction::Mul> m_Mul(const LHS &L,
1076 const RHS &R) {
1077 return BinaryOp_match<LHS, RHS, Instruction::Mul>(L, R);
1078}
1079
1080template <typename LHS, typename RHS>
1081inline BinaryOp_match<LHS, RHS, Instruction::FMul> m_FMul(const LHS &L,
1082 const RHS &R) {
1083 return BinaryOp_match<LHS, RHS, Instruction::FMul>(L, R);
1084}
1085
1086template <typename LHS, typename RHS>
1087inline BinaryOp_match<LHS, RHS, Instruction::UDiv> m_UDiv(const LHS &L,
1088 const RHS &R) {
1089 return BinaryOp_match<LHS, RHS, Instruction::UDiv>(L, R);
1090}
1091
1092template <typename LHS, typename RHS>
1093inline BinaryOp_match<LHS, RHS, Instruction::SDiv> m_SDiv(const LHS &L,
1094 const RHS &R) {
1095 return BinaryOp_match<LHS, RHS, Instruction::SDiv>(L, R);
1096}
1097
1098template <typename LHS, typename RHS>
1099inline BinaryOp_match<LHS, RHS, Instruction::FDiv> m_FDiv(const LHS &L,
1100 const RHS &R) {
1101 return BinaryOp_match<LHS, RHS, Instruction::FDiv>(L, R);
1102}
1103
1104template <typename LHS, typename RHS>
1105inline BinaryOp_match<LHS, RHS, Instruction::URem> m_URem(const LHS &L,
1106 const RHS &R) {
1107 return BinaryOp_match<LHS, RHS, Instruction::URem>(L, R);
1108}
1109
1110template <typename LHS, typename RHS>
1111inline BinaryOp_match<LHS, RHS, Instruction::SRem> m_SRem(const LHS &L,
1112 const RHS &R) {
1113 return BinaryOp_match<LHS, RHS, Instruction::SRem>(L, R);
1114}
1115
1116template <typename LHS, typename RHS>
1117inline BinaryOp_match<LHS, RHS, Instruction::FRem> m_FRem(const LHS &L,
1118 const RHS &R) {
1119 return BinaryOp_match<LHS, RHS, Instruction::FRem>(L, R);
1120}
1121
1122template <typename LHS, typename RHS>
1123inline BinaryOp_match<LHS, RHS, Instruction::And> m_And(const LHS &L,
1124 const RHS &R) {
1125 return BinaryOp_match<LHS, RHS, Instruction::And>(L, R);
1126}
1127
1128template <typename LHS, typename RHS>
1129inline BinaryOp_match<LHS, RHS, Instruction::Or> m_Or(const LHS &L,
1130 const RHS &R) {
1131 return BinaryOp_match<LHS, RHS, Instruction::Or>(L, R);
1132}
1133
1134template <typename LHS, typename RHS>
1135inline BinaryOp_match<LHS, RHS, Instruction::Xor> m_Xor(const LHS &L,
1136 const RHS &R) {
1137 return BinaryOp_match<LHS, RHS, Instruction::Xor>(L, R);
1138}
1139
1140template <typename LHS, typename RHS>
1141inline BinaryOp_match<LHS, RHS, Instruction::Shl> m_Shl(const LHS &L,
1142 const RHS &R) {
1143 return BinaryOp_match<LHS, RHS, Instruction::Shl>(L, R);
1144}
1145
1146template <typename LHS, typename RHS>
1147inline BinaryOp_match<LHS, RHS, Instruction::LShr> m_LShr(const LHS &L,
1148 const RHS &R) {
1149 return BinaryOp_match<LHS, RHS, Instruction::LShr>(L, R);
1150}
1151
1152template <typename LHS, typename RHS>
1153inline BinaryOp_match<LHS, RHS, Instruction::AShr> m_AShr(const LHS &L,
1154 const RHS &R) {
1155 return BinaryOp_match<LHS, RHS, Instruction::AShr>(L, R);
1156}
1157
1158template <typename LHS_t, typename RHS_t, unsigned Opcode,
1159 unsigned WrapFlags = 0>
1160struct OverflowingBinaryOp_match {
1161 LHS_t L;
1162 RHS_t R;
1163
1164 OverflowingBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS)
1165 : L(LHS), R(RHS) {}
1166
1167 template <typename OpTy> bool match(OpTy *V) {
1168 if (auto *Op = dyn_cast<OverflowingBinaryOperator>(V)) {
1169 if (Op->getOpcode() != Opcode)
1170 return false;
1171 if ((WrapFlags & OverflowingBinaryOperator::NoUnsignedWrap) &&
1172 !Op->hasNoUnsignedWrap())
1173 return false;
1174 if ((WrapFlags & OverflowingBinaryOperator::NoSignedWrap) &&
1175 !Op->hasNoSignedWrap())
1176 return false;
1177 return L.match(Op->getOperand(0)) && R.match(Op->getOperand(1));
1178 }
1179 return false;
1180 }
1181};
1182
1183template <typename LHS, typename RHS>
1184inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1185 OverflowingBinaryOperator::NoSignedWrap>
1186m_NSWAdd(const LHS &L, const RHS &R) {
1187 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1188 OverflowingBinaryOperator::NoSignedWrap>(
1189 L, R);
1190}
1191template <typename LHS, typename RHS>
1192inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1193 OverflowingBinaryOperator::NoSignedWrap>
1194m_NSWSub(const LHS &L, const RHS &R) {
1195 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1196 OverflowingBinaryOperator::NoSignedWrap>(
1197 L, R);
1198}
1199template <typename LHS, typename RHS>
1200inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1201 OverflowingBinaryOperator::NoSignedWrap>
1202m_NSWMul(const LHS &L, const RHS &R) {
1203 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1204 OverflowingBinaryOperator::NoSignedWrap>(
1205 L, R);
1206}
1207template <typename LHS, typename RHS>
1208inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1209 OverflowingBinaryOperator::NoSignedWrap>
1210m_NSWShl(const LHS &L, const RHS &R) {
1211 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1212 OverflowingBinaryOperator::NoSignedWrap>(
1213 L, R);
1214}
1215
1216template <typename LHS, typename RHS>
1217inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1218 OverflowingBinaryOperator::NoUnsignedWrap>
1219m_NUWAdd(const LHS &L, const RHS &R) {
1220 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1221 OverflowingBinaryOperator::NoUnsignedWrap>(
1222 L, R);
1223}
1224template <typename LHS, typename RHS>
1225inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1226 OverflowingBinaryOperator::NoUnsignedWrap>
1227m_NUWSub(const LHS &L, const RHS &R) {
1228 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1229 OverflowingBinaryOperator::NoUnsignedWrap>(
1230 L, R);
1231}
1232template <typename LHS, typename RHS>
1233inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1234 OverflowingBinaryOperator::NoUnsignedWrap>
1235m_NUWMul(const LHS &L, const RHS &R) {
1236 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1237 OverflowingBinaryOperator::NoUnsignedWrap>(
1238 L, R);
1239}
1240template <typename LHS, typename RHS>
1241inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1242 OverflowingBinaryOperator::NoUnsignedWrap>
1243m_NUWShl(const LHS &L, const RHS &R) {
1244 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1245 OverflowingBinaryOperator::NoUnsignedWrap>(
1246 L, R);
1247}
1248
1249//===----------------------------------------------------------------------===//
1250// Class that matches a group of binary opcodes.
1251//
1252template <typename LHS_t, typename RHS_t, typename Predicate>
1253struct BinOpPred_match : Predicate {
1254 LHS_t L;
1255 RHS_t R;
1256
1257 BinOpPred_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
1258
1259 template <typename OpTy> bool match(OpTy *V) {
1260 if (auto *I = dyn_cast<Instruction>(V))
1261 return this->isOpType(I->getOpcode()) && L.match(I->getOperand(0)) &&
1262 R.match(I->getOperand(1));
1263 if (auto *CE = dyn_cast<ConstantExpr>(V))
1264 return this->isOpType(CE->getOpcode()) && L.match(CE->getOperand(0)) &&
1265 R.match(CE->getOperand(1));
1266 return false;
1267 }
1268};
1269
1270struct is_shift_op {
1271 bool isOpType(unsigned Opcode) { return Instruction::isShift(Opcode); }
1272};
1273
1274struct is_right_shift_op {
1275 bool isOpType(unsigned Opcode) {
1276 return Opcode == Instruction::LShr || Opcode == Instruction::AShr;
1277 }
1278};
1279
1280struct is_logical_shift_op {
1281 bool isOpType(unsigned Opcode) {
1282 return Opcode == Instruction::LShr || Opcode == Instruction::Shl;
1283 }
1284};
1285
1286struct is_bitwiselogic_op {
1287 bool isOpType(unsigned Opcode) {
1288 return Instruction::isBitwiseLogicOp(Opcode);
1289 }
1290};
1291
1292struct is_idiv_op {
1293 bool isOpType(unsigned Opcode) {
1294 return Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
1295 }
1296};
1297
1298struct is_irem_op {
1299 bool isOpType(unsigned Opcode) {
1300 return Opcode == Instruction::SRem || Opcode == Instruction::URem;
1301 }
1302};
1303
1304/// Matches shift operations.
1305template <typename LHS, typename RHS>
1306inline BinOpPred_match<LHS, RHS, is_shift_op> m_Shift(const LHS &L,
1307 const RHS &R) {
1308 return BinOpPred_match<LHS, RHS, is_shift_op>(L, R);
1309}
1310
1311/// Matches logical shift operations.
1312template <typename LHS, typename RHS>
1313inline BinOpPred_match<LHS, RHS, is_right_shift_op> m_Shr(const LHS &L,
1314 const RHS &R) {
1315 return BinOpPred_match<LHS, RHS, is_right_shift_op>(L, R);
1316}
1317
1318/// Matches logical shift operations.
1319template <typename LHS, typename RHS>
1320inline BinOpPred_match<LHS, RHS, is_logical_shift_op>
1321m_LogicalShift(const LHS &L, const RHS &R) {
1322 return BinOpPred_match<LHS, RHS, is_logical_shift_op>(L, R);
1323}
1324
1325/// Matches bitwise logic operations.
1326template <typename LHS, typename RHS>
1327inline BinOpPred_match<LHS, RHS, is_bitwiselogic_op>
1328m_BitwiseLogic(const LHS &L, const RHS &R) {
1329 return BinOpPred_match<LHS, RHS, is_bitwiselogic_op>(L, R);
1330}
1331
1332/// Matches integer division operations.
1333template <typename LHS, typename RHS>
1334inline BinOpPred_match<LHS, RHS, is_idiv_op> m_IDiv(const LHS &L,
1335 const RHS &R) {
1336 return BinOpPred_match<LHS, RHS, is_idiv_op>(L, R);
1337}
1338
1339/// Matches integer remainder operations.
1340template <typename LHS, typename RHS>
1341inline BinOpPred_match<LHS, RHS, is_irem_op> m_IRem(const LHS &L,
1342 const RHS &R) {
1343 return BinOpPred_match<LHS, RHS, is_irem_op>(L, R);
1344}
1345
1346//===----------------------------------------------------------------------===//
1347// Class that matches exact binary ops.
1348//
1349template <typename SubPattern_t> struct Exact_match {
1350 SubPattern_t SubPattern;
1351
1352 Exact_match(const SubPattern_t &SP) : SubPattern(SP) {}
1353
1354 template <typename OpTy> bool match(OpTy *V) {
1355 if (auto *PEO = dyn_cast<PossiblyExactOperator>(V))
1356 return PEO->isExact() && SubPattern.match(V);
1357 return false;
1358 }
1359};
1360
1361template <typename T> inline Exact_match<T> m_Exact(const T &SubPattern) {
1362 return SubPattern;
1363}
1364
1365//===----------------------------------------------------------------------===//
1366// Matchers for CmpInst classes
1367//
1368
1369template <typename LHS_t, typename RHS_t, typename Class, typename PredicateTy,
1370 bool Commutable = false>
1371struct CmpClass_match {
1372 PredicateTy &Predicate;
1373 LHS_t L;
1374 RHS_t R;
1375
1376 // The evaluation order is always stable, regardless of Commutability.
1377 // The LHS is always matched first.
1378 CmpClass_match(PredicateTy &Pred, const LHS_t &LHS, const RHS_t &RHS)
1379 : Predicate(Pred), L(LHS), R(RHS) {}
1380
1381 template <typename OpTy> bool match(OpTy *V) {
1382 if (auto *I = dyn_cast<Class>(V)) {
1383 if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) {
1384 Predicate = I->getPredicate();
1385 return true;
1386 } else if (Commutable && L.match(I->getOperand(1)) &&
1387 R.match(I->getOperand(0))) {
1388 Predicate = I->getSwappedPredicate();
1389 return true;
1390 }
1391 }
1392 return false;
1393 }
1394};
1395
1396template <typename LHS, typename RHS>
1397inline CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>
1398m_Cmp(CmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1399 return CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>(Pred, L, R);
1400}
1401
1402template <typename LHS, typename RHS>
1403inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>
1404m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1405 return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>(Pred, L, R);
1406}
1407
1408template <typename LHS, typename RHS>
1409inline CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>
1410m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1411 return CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>(Pred, L, R);
1412}
1413
1414//===----------------------------------------------------------------------===//
1415// Matchers for instructions with a given opcode and number of operands.
1416//
1417
1418/// Matches instructions with Opcode and three operands.
1419template <typename T0, unsigned Opcode> struct OneOps_match {
1420 T0 Op1;
1421
1422 OneOps_match(const T0 &Op1) : Op1(Op1) {}
1423
1424 template <typename OpTy> bool match(OpTy *V) {
1425 if (V->getValueID() == Value::InstructionVal + Opcode) {
1426 auto *I = cast<Instruction>(V);
1427 return Op1.match(I->getOperand(0));
1428 }
1429 return false;
1430 }
1431};
1432
1433/// Matches instructions with Opcode and three operands.
1434template <typename T0, typename T1, unsigned Opcode> struct TwoOps_match {
1435 T0 Op1;
1436 T1 Op2;
1437
1438 TwoOps_match(const T0 &Op1, const T1 &Op2) : Op1(Op1), Op2(Op2) {}
1439
1440 template <typename OpTy> bool match(OpTy *V) {
1441 if (V->getValueID() == Value::InstructionVal + Opcode) {
1442 auto *I = cast<Instruction>(V);
1443 return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1));
1444 }
1445 return false;
1446 }
1447};
1448
1449/// Matches instructions with Opcode and three operands.
1450template <typename T0, typename T1, typename T2, unsigned Opcode>
1451struct ThreeOps_match {
1452 T0 Op1;
1453 T1 Op2;
1454 T2 Op3;
1455
1456 ThreeOps_match(const T0 &Op1, const T1 &Op2, const T2 &Op3)
1457 : Op1(Op1), Op2(Op2), Op3(Op3) {}
1458
1459 template <typename OpTy> bool match(OpTy *V) {
1460 if (V->getValueID() == Value::InstructionVal + Opcode) {
1461 auto *I = cast<Instruction>(V);
1462 return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) &&
1463 Op3.match(I->getOperand(2));
1464 }
1465 return false;
1466 }
1467};
1468
1469/// Matches SelectInst.
1470template <typename Cond, typename LHS, typename RHS>
1471inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select>
1472m_Select(const Cond &C, const LHS &L, const RHS &R) {
1473 return ThreeOps_match<Cond, LHS, RHS, Instruction::Select>(C, L, R);
1474}
1475
1476/// This matches a select of two constants, e.g.:
1477/// m_SelectCst<-1, 0>(m_Value(V))
1478template <int64_t L, int64_t R, typename Cond>
1479inline ThreeOps_match<Cond, constantint_match<L>, constantint_match<R>,
1480 Instruction::Select>
1481m_SelectCst(const Cond &C) {
1482 return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>());
1483}
1484
1485/// Matches FreezeInst.
1486template <typename OpTy>
1487inline OneOps_match<OpTy, Instruction::Freeze> m_Freeze(const OpTy &Op) {
1488 return OneOps_match<OpTy, Instruction::Freeze>(Op);
1489}
1490
1491/// Matches InsertElementInst.
1492template <typename Val_t, typename Elt_t, typename Idx_t>
1493inline ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>
1494m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx) {
1495 return ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>(
1496 Val, Elt, Idx);
1497}
1498
1499/// Matches ExtractElementInst.
1500template <typename Val_t, typename Idx_t>
1501inline TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>
1502m_ExtractElt(const Val_t &Val, const Idx_t &Idx) {
1503 return TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>(Val, Idx);
1504}
1505
1506/// Matches shuffle.
1507template <typename T0, typename T1, typename T2> struct Shuffle_match {
1508 T0 Op1;
1509 T1 Op2;
1510 T2 Mask;
1511
1512 Shuffle_match(const T0 &Op1, const T1 &Op2, const T2 &Mask)
1513 : Op1(Op1), Op2(Op2), Mask(Mask) {}
1514
1515 template <typename OpTy> bool match(OpTy *V) {
1516 if (auto *I = dyn_cast<ShuffleVectorInst>(V)) {
1517 return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) &&
1518 Mask.match(I->getShuffleMask());
1519 }
1520 return false;
1521 }
1522};
1523
1524struct m_Mask {
1525 ArrayRef<int> &MaskRef;
1526 m_Mask(ArrayRef<int> &MaskRef) : MaskRef(MaskRef) {}
1527 bool match(ArrayRef<int> Mask) {
1528 MaskRef = Mask;
1529 return true;
1530 }
1531};
1532
1533struct m_ZeroMask {
1534 bool match(ArrayRef<int> Mask) {
1535 return all_of(Mask, [](int Elem) { return Elem == 0 || Elem == -1; });
1536 }
1537};
1538
1539struct m_SpecificMask {
1540 ArrayRef<int> &MaskRef;
1541 m_SpecificMask(ArrayRef<int> &MaskRef) : MaskRef(MaskRef) {}
1542 bool match(ArrayRef<int> Mask) { return MaskRef == Mask; }
1543};
1544
1545struct m_SplatOrUndefMask {
1546 int &SplatIndex;
1547 m_SplatOrUndefMask(int &SplatIndex) : SplatIndex(SplatIndex) {}
1548 bool match(ArrayRef<int> Mask) {
1549 auto First = find_if(Mask, [](int Elem) { return Elem != -1; });
1550 if (First == Mask.end())
1551 return false;
1552 SplatIndex = *First;
1553 return all_of(Mask,
1554 [First](int Elem) { return Elem == *First || Elem == -1; });
1555 }
1556};
1557
1558/// Matches ShuffleVectorInst independently of mask value.
1559template <typename V1_t, typename V2_t>
1560inline TwoOps_match<V1_t, V2_t, Instruction::ShuffleVector>
1561m_Shuffle(const V1_t &v1, const V2_t &v2) {
1562 return TwoOps_match<V1_t, V2_t, Instruction::ShuffleVector>(v1, v2);
1563}
1564
1565template <typename V1_t, typename V2_t, typename Mask_t>
1566inline Shuffle_match<V1_t, V2_t, Mask_t>
1567m_Shuffle(const V1_t &v1, const V2_t &v2, const Mask_t &mask) {
1568 return Shuffle_match<V1_t, V2_t, Mask_t>(v1, v2, mask);
1569}
1570
1571/// Matches LoadInst.
1572template <typename OpTy>
1573inline OneOps_match<OpTy, Instruction::Load> m_Load(const OpTy &Op) {
1574 return OneOps_match<OpTy, Instruction::Load>(Op);
1575}
1576
1577/// Matches StoreInst.
1578template <typename ValueOpTy, typename PointerOpTy>
1579inline TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>
1580m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) {
1581 return TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>(ValueOp,
1582 PointerOp);
1583}
1584
1585//===----------------------------------------------------------------------===//
1586// Matchers for CastInst classes
1587//
1588
1589template <typename Op_t, unsigned Opcode> struct CastClass_match {
1590 Op_t Op;
1591
1592 CastClass_match(const Op_t &OpMatch) : Op(OpMatch) {}
1593
1594 template <typename OpTy> bool match(OpTy *V) {
1595 if (auto *O = dyn_cast<Operator>(V))
1596 return O->getOpcode() == Opcode && Op.match(O->getOperand(0));
1597 return false;
1598 }
1599};
1600
1601/// Matches BitCast.
1602template <typename OpTy>
1603inline CastClass_match<OpTy, Instruction::BitCast> m_BitCast(const OpTy &Op) {
1604 return CastClass_match<OpTy, Instruction::BitCast>(Op);
1605}
1606
1607/// Matches PtrToInt.
1608template <typename OpTy>
1609inline CastClass_match<OpTy, Instruction::PtrToInt> m_PtrToInt(const OpTy &Op) {
1610 return CastClass_match<OpTy, Instruction::PtrToInt>(Op);
1611}
1612
1613/// Matches IntToPtr.
1614template <typename OpTy>
1615inline CastClass_match<OpTy, Instruction::IntToPtr> m_IntToPtr(const OpTy &Op) {
1616 return CastClass_match<OpTy, Instruction::IntToPtr>(Op);
1617}
1618
1619/// Matches Trunc.
1620template <typename OpTy>
1621inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) {
1622 return CastClass_match<OpTy, Instruction::Trunc>(Op);
1623}
1624
1625template <typename OpTy>
1626inline match_combine_or<CastClass_match<OpTy, Instruction::Trunc>, OpTy>
1627m_TruncOrSelf(const OpTy &Op) {
1628 return m_CombineOr(m_Trunc(Op), Op);
1629}
1630
1631/// Matches SExt.
1632template <typename OpTy>
1633inline CastClass_match<OpTy, Instruction::SExt> m_SExt(const OpTy &Op) {
1634 return CastClass_match<OpTy, Instruction::SExt>(Op);
1635}
1636
1637/// Matches ZExt.
1638template <typename OpTy>
1639inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) {
1640 return CastClass_match<OpTy, Instruction::ZExt>(Op);
1641}
1642
1643template <typename OpTy>
1644inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, OpTy>
1645m_ZExtOrSelf(const OpTy &Op) {
1646 return m_CombineOr(m_ZExt(Op), Op);
1647}
1648
1649template <typename OpTy>
1650inline match_combine_or<CastClass_match<OpTy, Instruction::SExt>, OpTy>
1651m_SExtOrSelf(const OpTy &Op) {
1652 return m_CombineOr(m_SExt(Op), Op);
1653}
1654
1655template <typename OpTy>
1656inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
1657 CastClass_match<OpTy, Instruction::SExt>>
1658m_ZExtOrSExt(const OpTy &Op) {
1659 return m_CombineOr(m_ZExt(Op), m_SExt(Op));
1660}
1661
1662template <typename OpTy>
1663inline match_combine_or<
1664 match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
1665 CastClass_match<OpTy, Instruction::SExt>>,
1666 OpTy>
1667m_ZExtOrSExtOrSelf(const OpTy &Op) {
1668 return m_CombineOr(m_ZExtOrSExt(Op), Op);
1669}
1670
1671template <typename OpTy>
1672inline CastClass_match<OpTy, Instruction::UIToFP> m_UIToFP(const OpTy &Op) {
1673 return CastClass_match<OpTy, Instruction::UIToFP>(Op);
1674}
1675
1676template <typename OpTy>
1677inline CastClass_match<OpTy, Instruction::SIToFP> m_SIToFP(const OpTy &Op) {
1678 return CastClass_match<OpTy, Instruction::SIToFP>(Op);
1679}
1680
1681template <typename OpTy>
1682inline CastClass_match<OpTy, Instruction::FPToUI> m_FPToUI(const OpTy &Op) {
1683 return CastClass_match<OpTy, Instruction::FPToUI>(Op);
1684}
1685
1686template <typename OpTy>
1687inline CastClass_match<OpTy, Instruction::FPToSI> m_FPToSI(const OpTy &Op) {
1688 return CastClass_match<OpTy, Instruction::FPToSI>(Op);
1689}
1690
1691template <typename OpTy>
1692inline CastClass_match<OpTy, Instruction::FPTrunc> m_FPTrunc(const OpTy &Op) {
1693 return CastClass_match<OpTy, Instruction::FPTrunc>(Op);
1694}
1695
1696template <typename OpTy>
1697inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) {
1698 return CastClass_match<OpTy, Instruction::FPExt>(Op);
1699}
1700
1701//===----------------------------------------------------------------------===//
1702// Matchers for control flow.
1703//
1704
1705struct br_match {
1706 BasicBlock *&Succ;
1707
1708 br_match(BasicBlock *&Succ) : Succ(Succ) {}
1709
1710 template <typename OpTy> bool match(OpTy *V) {
1711 if (auto *BI = dyn_cast<BranchInst>(V))
1712 if (BI->isUnconditional()) {
1713 Succ = BI->getSuccessor(0);
1714 return true;
1715 }
1716 return false;
1717 }
1718};
1719
1720inline br_match m_UnconditionalBr(BasicBlock *&Succ) { return br_match(Succ); }
1721
1722template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t>
1723struct brc_match {
1724 Cond_t Cond;
1725 TrueBlock_t T;
1726 FalseBlock_t F;
1727
1728 brc_match(const Cond_t &C, const TrueBlock_t &t, const FalseBlock_t &f)
1729 : Cond(C), T(t), F(f) {}
1730
1731 template <typename OpTy> bool match(OpTy *V) {
1732 if (auto *BI = dyn_cast<BranchInst>(V))
1733 if (BI->isConditional() && Cond.match(BI->getCondition()))
1734 return T.match(BI->getSuccessor(0)) && F.match(BI->getSuccessor(1));
1735 return false;
1736 }
1737};
1738
1739template <typename Cond_t>
1740inline brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>
1741m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) {
1742 return brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>(
1743 C, m_BasicBlock(T), m_BasicBlock(F));
1744}
1745
1746template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t>
1747inline brc_match<Cond_t, TrueBlock_t, FalseBlock_t>
1748m_Br(const Cond_t &C, const TrueBlock_t &T, const FalseBlock_t &F) {
1749 return brc_match<Cond_t, TrueBlock_t, FalseBlock_t>(C, T, F);
1750}
1751
1752//===----------------------------------------------------------------------===//
1753// Matchers for max/min idioms, eg: "select (sgt x, y), x, y" -> smax(x,y).
1754//
1755
1756template <typename CmpInst_t, typename LHS_t, typename RHS_t, typename Pred_t,
1757 bool Commutable = false>
1758struct MaxMin_match {
1759 using PredType = Pred_t;
1760 LHS_t L;
1761 RHS_t R;
1762
1763 // The evaluation order is always stable, regardless of Commutability.
1764 // The LHS is always matched first.
1765 MaxMin_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
1766
1767 template <typename OpTy> bool match(OpTy *V) {
1768 if (auto *II = dyn_cast<IntrinsicInst>(V)) {
1769 Intrinsic::ID IID = II->getIntrinsicID();
1770 if ((IID == Intrinsic::smax && Pred_t::match(ICmpInst::ICMP_SGT)) ||
1771 (IID == Intrinsic::smin && Pred_t::match(ICmpInst::ICMP_SLT)) ||
1772 (IID == Intrinsic::umax && Pred_t::match(ICmpInst::ICMP_UGT)) ||
1773 (IID == Intrinsic::umin && Pred_t::match(ICmpInst::ICMP_ULT))) {
1774 Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
1775 return (L.match(LHS) && R.match(RHS)) ||
1776 (Commutable && L.match(RHS) && R.match(LHS));
1777 }
1778 }
1779 // Look for "(x pred y) ? x : y" or "(x pred y) ? y : x".
1780 auto *SI = dyn_cast<SelectInst>(V);
1781 if (!SI)
1782 return false;
1783 auto *Cmp = dyn_cast<CmpInst_t>(SI->getCondition());
1784 if (!Cmp)
1785 return false;
1786 // At this point we have a select conditioned on a comparison. Check that
1787 // it is the values returned by the select that are being compared.
1788 auto *TrueVal = SI->getTrueValue();
1789 auto *FalseVal = SI->getFalseValue();
1790 auto *LHS = Cmp->getOperand(0);
1791 auto *RHS = Cmp->getOperand(1);
1792 if ((TrueVal != LHS || FalseVal != RHS) &&
1793 (TrueVal != RHS || FalseVal != LHS))
1794 return false;
1795 typename CmpInst_t::Predicate Pred =
1796 LHS == TrueVal ? Cmp->getPredicate() : Cmp->getInversePredicate();
1797 // Does "(x pred y) ? x : y" represent the desired max/min operation?
1798 if (!Pred_t::match(Pred))
1799 return false;
1800 // It does! Bind the operands.
1801 return (L.match(LHS) && R.match(RHS)) ||
1802 (Commutable && L.match(RHS) && R.match(LHS));
1803 }
1804};
1805
1806/// Helper class for identifying signed max predicates.
1807struct smax_pred_ty {
1808 static bool match(ICmpInst::Predicate Pred) {
1809 return Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE;
1810 }
1811};
1812
1813/// Helper class for identifying signed min predicates.
1814struct smin_pred_ty {
1815 static bool match(ICmpInst::Predicate Pred) {
1816 return Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SLE;
1817 }
1818};
1819
1820/// Helper class for identifying unsigned max predicates.
1821struct umax_pred_ty {
1822 static bool match(ICmpInst::Predicate Pred) {
1823 return Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE;
1824 }
1825};
1826
1827/// Helper class for identifying unsigned min predicates.
1828struct umin_pred_ty {
1829 static bool match(ICmpInst::Predicate Pred) {
1830 return Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_ULE;
1831 }
1832};
1833
1834/// Helper class for identifying ordered max predicates.
1835struct ofmax_pred_ty {
1836 static bool match(FCmpInst::Predicate Pred) {
1837 return Pred == CmpInst::FCMP_OGT || Pred == CmpInst::FCMP_OGE;
1838 }
1839};
1840
1841/// Helper class for identifying ordered min predicates.
1842struct ofmin_pred_ty {
1843 static bool match(FCmpInst::Predicate Pred) {
1844 return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE;
1845 }
1846};
1847
1848/// Helper class for identifying unordered max predicates.
1849struct ufmax_pred_ty {
1850 static bool match(FCmpInst::Predicate Pred) {
1851 return Pred == CmpInst::FCMP_UGT || Pred == CmpInst::FCMP_UGE;
1852 }
1853};
1854
1855/// Helper class for identifying unordered min predicates.
1856struct ufmin_pred_ty {
1857 static bool match(FCmpInst::Predicate Pred) {
1858 return Pred == CmpInst::FCMP_ULT || Pred == CmpInst::FCMP_ULE;
1859 }
1860};
1861
1862template <typename LHS, typename RHS>
1863inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty> m_SMax(const LHS &L,
1864 const RHS &R) {
1865 return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>(L, R);
1866}
1867
1868template <typename LHS, typename RHS>
1869inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty> m_SMin(const LHS &L,
1870 const RHS &R) {
1871 return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>(L, R);
1872}
1873
1874template <typename LHS, typename RHS>
1875inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty> m_UMax(const LHS &L,
1876 const RHS &R) {
1877 return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>(L, R);
1878}
1879
1880template <typename LHS, typename RHS>
1881inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty> m_UMin(const LHS &L,
1882 const RHS &R) {
1883 return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>(L, R);
1884}
1885
1886template <typename LHS, typename RHS>
1887inline match_combine_or<
1888 match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>,
1889 MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>>,
1890 match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>,
1891 MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>>>
1892m_MaxOrMin(const LHS &L, const RHS &R) {
1893 return m_CombineOr(m_CombineOr(m_SMax(L, R), m_SMin(L, R)),
1894 m_CombineOr(m_UMax(L, R), m_UMin(L, R)));
1895}
1896
1897/// Match an 'ordered' floating point maximum function.
1898/// Floating point has one special value 'NaN'. Therefore, there is no total
1899/// order. However, if we can ignore the 'NaN' value (for example, because of a
1900/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
1901/// semantics. In the presence of 'NaN' we have to preserve the original
1902/// select(fcmp(ogt/ge, L, R), L, R) semantics matched by this predicate.
1903///
1904/// max(L, R) iff L and R are not NaN
1905/// m_OrdFMax(L, R) = R iff L or R are NaN
1906template <typename LHS, typename RHS>
1907inline MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty> m_OrdFMax(const LHS &L,
1908 const RHS &R) {
1909 return MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty>(L, R);
1910}
1911
1912/// Match an 'ordered' floating point minimum function.
1913/// Floating point has one special value 'NaN'. Therefore, there is no total
1914/// order. However, if we can ignore the 'NaN' value (for example, because of a
1915/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
1916/// semantics. In the presence of 'NaN' we have to preserve the original
1917/// select(fcmp(olt/le, L, R), L, R) semantics matched by this predicate.
1918///
1919/// min(L, R) iff L and R are not NaN
1920/// m_OrdFMin(L, R) = R iff L or R are NaN
1921template <typename LHS, typename RHS>
1922inline MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty> m_OrdFMin(const LHS &L,
1923 const RHS &R) {
1924 return MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty>(L, R);
1925}
1926
1927/// Match an 'unordered' floating point maximum function.
1928/// Floating point has one special value 'NaN'. Therefore, there is no total
1929/// order. However, if we can ignore the 'NaN' value (for example, because of a
1930/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
1931/// semantics. In the presence of 'NaN' we have to preserve the original
1932/// select(fcmp(ugt/ge, L, R), L, R) semantics matched by this predicate.
1933///
1934/// max(L, R) iff L and R are not NaN
1935/// m_UnordFMax(L, R) = L iff L or R are NaN
1936template <typename LHS, typename RHS>
1937inline MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>
1938m_UnordFMax(const LHS &L, const RHS &R) {
1939 return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R);
1940}
1941
1942/// Match an 'unordered' floating point minimum function.
1943/// Floating point has one special value 'NaN'. Therefore, there is no total
1944/// order. However, if we can ignore the 'NaN' value (for example, because of a
1945/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
1946/// semantics. In the presence of 'NaN' we have to preserve the original
1947/// select(fcmp(ult/le, L, R), L, R) semantics matched by this predicate.
1948///
1949/// min(L, R) iff L and R are not NaN
1950/// m_UnordFMin(L, R) = L iff L or R are NaN
1951template <typename LHS, typename RHS>
1952inline MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>
1953m_UnordFMin(const LHS &L, const RHS &R) {
1954 return MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>(L, R);
1955}
1956
1957//===----------------------------------------------------------------------===//
1958// Matchers for overflow check patterns: e.g. (a + b) u< a, (a ^ -1) <u b
1959// Note that S might be matched to other instructions than AddInst.
1960//
1961
1962template <typename LHS_t, typename RHS_t, typename Sum_t>
1963struct UAddWithOverflow_match {
1964 LHS_t L;
1965 RHS_t R;
1966 Sum_t S;
1967
1968 UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S)
1969 : L(L), R(R), S(S) {}
1970
1971 template <typename OpTy> bool match(OpTy *V) {
1972 Value *ICmpLHS, *ICmpRHS;
1973 ICmpInst::Predicate Pred;
1974 if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V))
1975 return false;
1976
1977 Value *AddLHS, *AddRHS;
1978 auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS));
1979
1980 // (a + b) u< a, (a + b) u< b
1981 if (Pred == ICmpInst::ICMP_ULT)
1982 if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS))
1983 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
1984
1985 // a >u (a + b), b >u (a + b)
1986 if (Pred == ICmpInst::ICMP_UGT)
1987 if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
1988 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
1989
1990 Value *Op1;
1991 auto XorExpr = m_OneUse(m_Xor(m_Value(Op1), m_AllOnes()));
1992 // (a ^ -1) <u b
1993 if (Pred == ICmpInst::ICMP_ULT) {
1994 if (XorExpr.match(ICmpLHS))
1995 return L.match(Op1) && R.match(ICmpRHS) && S.match(ICmpLHS);
1996 }
1997 // b > u (a ^ -1)
1998 if (Pred == ICmpInst::ICMP_UGT) {
1999 if (XorExpr.match(ICmpRHS))
2000 return L.match(Op1) && R.match(ICmpLHS) && S.match(ICmpRHS);
2001 }
2002
2003 // Match special-case for increment-by-1.
2004 if (Pred == ICmpInst::ICMP_EQ) {
2005 // (a + 1) == 0
2006 // (1 + a) == 0
2007 if (AddExpr.match(ICmpLHS) && m_ZeroInt().match(ICmpRHS) &&
2008 (m_One().match(AddLHS) || m_One().match(AddRHS)))
2009 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
2010 // 0 == (a + 1)
2011 // 0 == (1 + a)
2012 if (m_ZeroInt().match(ICmpLHS) && AddExpr.match(ICmpRHS) &&
2013 (m_One().match(AddLHS) || m_One().match(AddRHS)))
2014 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
2015 }
2016
2017 return false;
2018 }
2019};
2020
2021/// Match an icmp instruction checking for unsigned overflow on addition.
2022///
2023/// S is matched to the addition whose result is being checked for overflow, and
2024/// L and R are matched to the LHS and RHS of S.
2025template <typename LHS_t, typename RHS_t, typename Sum_t>
2026UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>
2027m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) {
2028 return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S);
2029}
2030
2031template <typename Opnd_t> struct Argument_match {
2032 unsigned OpI;
2033 Opnd_t Val;
2034
2035 Argument_match(unsigned OpIdx, const Opnd_t &V) : OpI(OpIdx), Val(V) {}
2036
2037 template <typename OpTy> bool match(OpTy *V) {
2038 // FIXME: Should likely be switched to use `CallBase`.
2039 if (const auto *CI = dyn_cast<CallInst>(V))
2040 return Val.match(CI->getArgOperand(OpI));
2041 return false;
2042 }
2043};
2044
2045/// Match an argument.
2046template <unsigned OpI, typename Opnd_t>
2047inline Argument_match<Opnd_t> m_Argument(const Opnd_t &Op) {
2048 return Argument_match<Opnd_t>(OpI, Op);
2049}
2050
2051/// Intrinsic matchers.
2052struct IntrinsicID_match {
2053 unsigned ID;
2054
2055 IntrinsicID_match(Intrinsic::ID IntrID) : ID(IntrID) {}
2056
2057 template <typename OpTy> bool match(OpTy *V) {
2058 if (const auto *CI = dyn_cast<CallInst>(V))
2059 if (const auto *F = CI->getCalledFunction())
2060 return F->getIntrinsicID() == ID;
2061 return false;
2062 }
2063};
2064
2065/// Intrinsic matches are combinations of ID matchers, and argument
2066/// matchers. Higher arity matcher are defined recursively in terms of and-ing
2067/// them with lower arity matchers. Here's some convenient typedefs for up to
2068/// several arguments, and more can be added as needed
2069template <typename T0 = void, typename T1 = void, typename T2 = void,
2070 typename T3 = void, typename T4 = void, typename T5 = void,
2071 typename T6 = void, typename T7 = void, typename T8 = void,
2072 typename T9 = void, typename T10 = void>
2073struct m_Intrinsic_Ty;
2074template <typename T0> struct m_Intrinsic_Ty<T0> {
2075 using Ty = match_combine_and<IntrinsicID_match, Argument_match<T0>>;
2076};
2077template <typename T0, typename T1> struct m_Intrinsic_Ty<T0, T1> {
2078 using Ty =
2079 match_combine_and<typename m_Intrinsic_Ty<T0>::Ty, Argument_match<T1>>;
2080};
2081template <typename T0, typename T1, typename T2>
2082struct m_Intrinsic_Ty<T0, T1, T2> {
2083 using Ty =
2084 match_combine_and<typename m_Intrinsic_Ty<T0, T1>::Ty,
2085 Argument_match<T2>>;
2086};
2087template <typename T0, typename T1, typename T2, typename T3>
2088struct m_Intrinsic_Ty<T0, T1, T2, T3> {
2089 using Ty =
2090 match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2>::Ty,
2091 Argument_match<T3>>;
2092};
2093
2094template <typename T0, typename T1, typename T2, typename T3, typename T4>
2095struct m_Intrinsic_Ty<T0, T1, T2, T3, T4> {
2096 using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty,
2097 Argument_match<T4>>;
2098};
2099
2100template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
2101struct m_Intrinsic_Ty<T0, T1, T2, T3, T4, T5> {
2102 using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty,
2103 Argument_match<T5>>;
2104};
2105
2106/// Match intrinsic calls like this:
2107/// m_Intrinsic<Intrinsic::fabs>(m_Value(X))
2108template <Intrinsic::ID IntrID> inline IntrinsicID_match m_Intrinsic() {
2109 return IntrinsicID_match(IntrID);
2110}
2111
2112template <Intrinsic::ID IntrID, typename T0>
2113inline typename m_Intrinsic_Ty<T0>::Ty m_Intrinsic(const T0 &Op0) {
2114 return m_CombineAnd(m_Intrinsic<IntrID>(), m_Argument<0>(Op0));
2115}
2116
2117template <Intrinsic::ID IntrID, typename T0, typename T1>
2118inline typename m_Intrinsic_Ty<T0, T1>::Ty m_Intrinsic(const T0 &Op0,
2119 const T1 &Op1) {
2120 return m_CombineAnd(m_Intrinsic<IntrID>(Op0), m_Argument<1>(Op1));
2121}
2122
2123template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2>
2124inline typename m_Intrinsic_Ty<T0, T1, T2>::Ty
2125m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2) {
2126 return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1), m_Argument<2>(Op2));
2127}
2128
2129template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
2130 typename T3>
2131inline typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty
2132m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
2133 return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
2134}
2135
2136template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
2137 typename T3, typename T4>
2138inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty
2139m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3,
2140 const T4 &Op4) {
2141 return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3),
2142 m_Argument<4>(Op4));
2143}
2144
2145template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
2146 typename T3, typename T4, typename T5>
2147inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4, T5>::Ty
2148m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3,
2149 const T4 &Op4, const T5 &Op5) {
2150 return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3, Op4),
2151 m_Argument<5>(Op5));
2152}
2153
2154// Helper intrinsic matching specializations.
2155template <typename Opnd0>
2156inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BitReverse(const Opnd0 &Op0) {
2157 return m_Intrinsic<Intrinsic::bitreverse>(Op0);
2158}
2159
2160template <typename Opnd0>
2161inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) {
2162 return m_Intrinsic<Intrinsic::bswap>(Op0);
2163}
2164
2165template <typename Opnd0>
2166inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FAbs(const Opnd0 &Op0) {
2167 return m_Intrinsic<Intrinsic::fabs>(Op0);
2168}
2169
2170template <typename Opnd0>
2171inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FCanonicalize(const Opnd0 &Op0) {
2172 return m_Intrinsic<Intrinsic::canonicalize>(Op0);
2173}
2174
2175template <typename Opnd0, typename Opnd1>
2176inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMin(const Opnd0 &Op0,
2177 const Opnd1 &Op1) {
2178 return m_Intrinsic<Intrinsic::minnum>(Op0, Op1);
2179}
2180
2181template <typename Opnd0, typename Opnd1>
2182inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMax(const Opnd0 &Op0,
2183 const Opnd1 &Op1) {
2184 return m_Intrinsic<Intrinsic::maxnum>(Op0, Op1);
2185}
2186
2187template <typename Opnd0, typename Opnd1, typename Opnd2>
2188inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty
2189m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) {
2190 return m_Intrinsic<Intrinsic::fshl>(Op0, Op1, Op2);
2191}
2192
2193template <typename Opnd0, typename Opnd1, typename Opnd2>
2194inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty
2195m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) {
2196 return m_Intrinsic<Intrinsic::fshr>(Op0, Op1, Op2);
2197}
2198
2199//===----------------------------------------------------------------------===//
2200// Matchers for two-operands operators with the operators in either order
2201//
2202
2203/// Matches a BinaryOperator with LHS and RHS in either order.
2204template <typename LHS, typename RHS>
2205inline AnyBinaryOp_match<LHS, RHS, true> m_c_BinOp(const LHS &L, const RHS &R) {
2206 return AnyBinaryOp_match<LHS, RHS, true>(L, R);
2207}
2208
2209/// Matches an ICmp with a predicate over LHS and RHS in either order.
2210/// Swaps the predicate if operands are commuted.
2211template <typename LHS, typename RHS>
2212inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>
2213m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
2214 return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>(Pred, L,
2215 R);
2216}
2217
2218/// Matches a Add with LHS and RHS in either order.
2219template <typename LHS, typename RHS>
2220inline BinaryOp_match<LHS, RHS, Instruction::Add, true> m_c_Add(const LHS &L,
2221 const RHS &R) {
2222 return BinaryOp_match<LHS, RHS, Instruction::Add, true>(L, R);
2223}
2224
2225/// Matches a Mul with LHS and RHS in either order.
2226template <typename LHS, typename RHS>
2227inline BinaryOp_match<LHS, RHS, Instruction::Mul, true> m_c_Mul(const LHS &L,
2228 const RHS &R) {
2229 return BinaryOp_match<LHS, RHS, Instruction::Mul, true>(L, R);
2230}
2231
2232/// Matches an And with LHS and RHS in either order.
2233template <typename LHS, typename RHS>
2234inline BinaryOp_match<LHS, RHS, Instruction::And, true> m_c_And(const LHS &L,
2235 const RHS &R) {
2236 return BinaryOp_match<LHS, RHS, Instruction::And, true>(L, R);
2237}
2238
2239/// Matches an Or with LHS and RHS in either order.
2240template <typename LHS, typename RHS>
2241inline BinaryOp_match<LHS, RHS, Instruction::Or, true> m_c_Or(const LHS &L,
2242 const RHS &R) {
2243 return BinaryOp_match<LHS, RHS, Instruction::Or, true>(L, R);
2244}
2245
2246/// Matches an Xor with LHS and RHS in either order.
2247template <typename LHS, typename RHS>
2248inline BinaryOp_match<LHS, RHS, Instruction::Xor, true> m_c_Xor(const LHS &L,
2249 const RHS &R) {
2250 return BinaryOp_match<LHS, RHS, Instruction::Xor, true>(L, R);
2251}
2252
2253/// Matches a 'Neg' as 'sub 0, V'.
2254template <typename ValTy>
2255inline BinaryOp_match<cst_pred_ty<is_zero_int>, ValTy, Instruction::Sub>
2256m_Neg(const ValTy &V) {
2257 return m_Sub(m_ZeroInt(), V);
2258}
2259
2260/// Matches a 'Neg' as 'sub nsw 0, V'.
2261template <typename ValTy>
2262inline OverflowingBinaryOp_match<cst_pred_ty<is_zero_int>, ValTy,
2263 Instruction::Sub,
2264 OverflowingBinaryOperator::NoSignedWrap>
2265m_NSWNeg(const ValTy &V) {
2266 return m_NSWSub(m_ZeroInt(), V);
2267}
2268
2269/// Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
2270template <typename ValTy>
2271inline BinaryOp_match<ValTy, cst_pred_ty<is_all_ones>, Instruction::Xor, true>
2272m_Not(const ValTy &V) {
2273 return m_c_Xor(V, m_AllOnes());
2274}
2275
2276/// Matches an SMin with LHS and RHS in either order.
2277template <typename LHS, typename RHS>
2278inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>
2279m_c_SMin(const LHS &L, const RHS &R) {
2280 return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>(L, R);
2281}
2282/// Matches an SMax with LHS and RHS in either order.
2283template <typename LHS, typename RHS>
2284inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>
2285m_c_SMax(const LHS &L, const RHS &R) {
2286 return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>(L, R);
2287}
2288/// Matches a UMin with LHS and RHS in either order.
2289template <typename LHS, typename RHS>
2290inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>
2291m_c_UMin(const LHS &L, const RHS &R) {
2292 return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>(L, R);
2293}
2294/// Matches a UMax with LHS and RHS in either order.
2295template <typename LHS, typename RHS>
2296inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>
2297m_c_UMax(const LHS &L, const RHS &R) {
2298 return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>(L, R);
2299}
2300
2301template <typename LHS, typename RHS>
2302inline match_combine_or<
2303 match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>,
2304 MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>>,
2305 match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>,
2306 MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>>>
2307m_c_MaxOrMin(const LHS &L, const RHS &R) {
2308 return m_CombineOr(m_CombineOr(m_c_SMax(L, R), m_c_SMin(L, R)),
2309 m_CombineOr(m_c_UMax(L, R), m_c_UMin(L, R)));
2310}
2311
2312/// Matches FAdd with LHS and RHS in either order.
2313template <typename LHS, typename RHS>
2314inline BinaryOp_match<LHS, RHS, Instruction::FAdd, true>
2315m_c_FAdd(const LHS &L, const RHS &R) {
2316 return BinaryOp_match<LHS, RHS, Instruction::FAdd, true>(L, R);
2317}
2318
2319/// Matches FMul with LHS and RHS in either order.
2320template <typename LHS, typename RHS>
2321inline BinaryOp_match<LHS, RHS, Instruction::FMul, true>
2322m_c_FMul(const LHS &L, const RHS &R) {
2323 return BinaryOp_match<LHS, RHS, Instruction::FMul, true>(L, R);
2324}
2325
2326template <typename Opnd_t> struct Signum_match {
2327 Opnd_t Val;
2328 Signum_match(const Opnd_t &V) : Val(V) {}
2329
2330 template <typename OpTy> bool match(OpTy *V) {
2331 unsigned TypeSize = V->getType()->getScalarSizeInBits();
2332 if (TypeSize == 0)
2333 return false;
2334
2335 unsigned ShiftWidth = TypeSize - 1;
2336 Value *OpL = nullptr, *OpR = nullptr;
2337
2338 // This is the representation of signum we match:
2339 //
2340 // signum(x) == (x >> 63) | (-x >>u 63)
2341 //
2342 // An i1 value is its own signum, so it's correct to match
2343 //
2344 // signum(x) == (x >> 0) | (-x >>u 0)
2345 //
2346 // for i1 values.
2347
2348 auto LHS = m_AShr(m_Value(OpL), m_SpecificInt(ShiftWidth));
2349 auto RHS = m_LShr(m_Neg(m_Value(OpR)), m_SpecificInt(ShiftWidth));
2350 auto Signum = m_Or(LHS, RHS);
2351
2352 return Signum.match(V) && OpL == OpR && Val.match(OpL);
2353 }
2354};
2355
2356/// Matches a signum pattern.
2357///
2358/// signum(x) =
2359/// x > 0 -> 1
2360/// x == 0 -> 0
2361/// x < 0 -> -1
2362template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) {
2363 return Signum_match<Val_t>(V);
2364}
2365
2366template <int Ind, typename Opnd_t> struct ExtractValue_match {
2367 Opnd_t Val;
2368 ExtractValue_match(const Opnd_t &V) : Val(V) {}
2369
2370 template <typename OpTy> bool match(OpTy *V) {
2371 if (auto *I = dyn_cast<ExtractValueInst>(V)) {
2372 // If Ind is -1, don't inspect indices
2373 if (Ind != -1 &&
2374 !(I->getNumIndices() == 1 && I->getIndices()[0] == (unsigned)Ind))
2375 return false;
2376 return Val.match(I->getAggregateOperand());
2377 }
2378 return false;
2379 }
2380};
2381
2382/// Match a single index ExtractValue instruction.
2383/// For example m_ExtractValue<1>(...)
2384template <int Ind, typename Val_t>
2385inline ExtractValue_match<Ind, Val_t> m_ExtractValue(const Val_t &V) {
2386 return ExtractValue_match<Ind, Val_t>(V);
2387}
2388
2389/// Match an ExtractValue instruction with any index.
2390/// For example m_ExtractValue(...)
2391template <typename Val_t>
2392inline ExtractValue_match<-1, Val_t> m_ExtractValue(const Val_t &V) {
2393 return ExtractValue_match<-1, Val_t>(V);
2394}
2395
2396/// Matcher for a single index InsertValue instruction.
2397template <int Ind, typename T0, typename T1> struct InsertValue_match {
2398 T0 Op0;
2399 T1 Op1;
2400
2401 InsertValue_match(const T0 &Op0, const T1 &Op1) : Op0(Op0), Op1(Op1) {}
2402
2403 template <typename OpTy> bool match(OpTy *V) {
2404 if (auto *I = dyn_cast<InsertValueInst>(V)) {
2405 return Op0.match(I->getOperand(0)) && Op1.match(I->getOperand(1)) &&
2406 I->getNumIndices() == 1 && Ind == I->getIndices()[0];
2407 }
2408 return false;
2409 }
2410};
2411
2412/// Matches a single index InsertValue instruction.
2413template <int Ind, typename Val_t, typename Elt_t>
2414inline InsertValue_match<Ind, Val_t, Elt_t> m_InsertValue(const Val_t &Val,
2415 const Elt_t &Elt) {
2416 return InsertValue_match<Ind, Val_t, Elt_t>(Val, Elt);
2417}
2418
2419/// Matches patterns for `vscale`. This can either be a call to `llvm.vscale` or
2420/// the constant expression
2421/// `ptrtoint(gep <vscale x 1 x i8>, <vscale x 1 x i8>* null, i32 1>`
2422/// under the right conditions determined by DataLayout.
2423struct VScaleVal_match {
2424private:
2425 template <typename Base, typename Offset>
2426 inline BinaryOp_match<Base, Offset, Instruction::GetElementPtr>
2427 m_OffsetGep(const Base &B, const Offset &O) {
2428 return BinaryOp_match<Base, Offset, Instruction::GetElementPtr>(B, O);
2429 }
2430
2431public:
2432 const DataLayout &DL;
2433 VScaleVal_match(const DataLayout &DL) : DL(DL) {}
2434
2435 template <typename ITy> bool match(ITy *V) {
2436 if (m_Intrinsic<Intrinsic::vscale>().match(V))
2437 return true;
2438
2439 if (m_PtrToInt(m_OffsetGep(m_Zero(), m_SpecificInt(1))).match(V)) {
2440 Type *PtrTy = cast<Operator>(V)->getOperand(0)->getType();
2441 auto *DerefTy = PtrTy->getPointerElementType();
2442 if (isa<ScalableVectorType>(DerefTy) &&
2443 DL.getTypeAllocSizeInBits(DerefTy).getKnownMinSize() == 8)
2444 return true;
2445 }
2446
2447 return false;
2448 }
2449};
2450
2451inline VScaleVal_match m_VScale(const DataLayout &DL) {
2452 return VScaleVal_match(DL);
2453}
2454
2455template <typename LHS, typename RHS, unsigned Opcode>
2456struct LogicalOp_match {
2457 LHS L;
2458 RHS R;
2459
2460 LogicalOp_match(const LHS &L, const RHS &R) : L(L), R(R) {}
2461
2462 template <typename T> bool match(T *V) {
2463 if (auto *I
36.1
'I' is null
45.1
'I' is null
36.1
'I' is null
45.1
'I' is null
36.1
'I' is null
45.1
'I' is null
36.1
'I' is null
45.1
'I' is null
36.1
'I' is null
45.1
'I' is null
= dyn_cast<Instruction>(V)) {
36
Assuming 'V' is not a 'Instruction'
37
Taking false branch
45
'V' is not a 'Instruction'
46
Taking false branch
2464 if (!I->getType()->isIntOrIntVectorTy(1))
2465 return false;
2466
2467 if (I->getOpcode() == Opcode && L.match(I->getOperand(0)) &&
2468 R.match(I->getOperand(1)))
2469 return true;
2470
2471 if (auto *SI = dyn_cast<SelectInst>(I)) {
2472 if (Opcode == Instruction::And) {
2473 if (const auto *C = dyn_cast<Constant>(SI->getFalseValue()))
2474 if (C->isNullValue() && L.match(SI->getCondition()) &&
2475 R.match(SI->getTrueValue()))
2476 return true;
2477 } else {
2478 assert(Opcode == Instruction::Or)(static_cast <bool> (Opcode == Instruction::Or) ? void (
0) : __assert_fail ("Opcode == Instruction::Or", "/build/llvm-toolchain-snapshot-13~++20210621111111+acefe0eaaf82/llvm/include/llvm/IR/PatternMatch.h"
, 2478, __extension__ __PRETTY_FUNCTION__))
;
2479 if (const auto *C = dyn_cast<Constant>(SI->getTrueValue()))
2480 if (C->isOneValue() && L.match(SI->getCondition()) &&
2481 R.match(SI->getFalseValue()))
2482 return true;
2483 }
2484 }
2485 }
2486
2487 return false;
38
Returning zero, which participates in a condition later
47
Returning zero, which participates in a condition later
2488 }
2489};
2490
2491/// Matches L && R either in the form of L & R or L ? R : false.
2492/// Note that the latter form is poison-blocking.
2493template <typename LHS, typename RHS>
2494inline LogicalOp_match<LHS, RHS, Instruction::And>
2495m_LogicalAnd(const LHS &L, const RHS &R) {
2496 return LogicalOp_match<LHS, RHS, Instruction::And>(L, R);
2497}
2498
2499/// Matches L && R where L and R are arbitrary values.
2500inline auto m_LogicalAnd() { return m_LogicalAnd(m_Value(), m_Value()); }
2501
2502/// Matches L || R either in the form of L | R or L ? true : R.
2503/// Note that the latter form is poison-blocking.
2504template <typename LHS, typename RHS>
2505inline LogicalOp_match<LHS, RHS, Instruction::Or>
2506m_LogicalOr(const LHS &L, const RHS &R) {
2507 return LogicalOp_match<LHS, RHS, Instruction::Or>(L, R);
2508}
2509
2510/// Matches L || R where L and R are arbitrary values.
2511inline auto m_LogicalOr() {
2512 return m_LogicalOr(m_Value(), m_Value());
2513}
2514
2515} // end namespace PatternMatch
2516} // end namespace llvm
2517
2518#endif // LLVM_IR_PATTERNMATCH_H