Bug Summary

File:llvm/lib/Transforms/Scalar/LICM.cpp
Warning:line 1104, column 11
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name LICM.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/include -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Transforms/Scalar -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-01-13-084841-49055-1 -x c++ /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp

1//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs loop invariant code motion, attempting to remove as much
10// code from the body of a loop as possible. It does this by either hoisting
11// code into the preheader block, or by sinking code to the exit blocks if it is
12// safe. This pass also promotes must-aliased memory locations in the loop to
13// live in registers, thus hoisting and sinking "invariant" loads and stores.
14//
15// This pass uses alias analysis for two purposes:
16//
17// 1. Moving loop invariant loads and calls out of loops. If we can determine
18// that a load or call inside of a loop never aliases anything stored to,
19// we can hoist it or sink it like any other instruction.
20// 2. Scalar Promotion of Memory - If there is a store instruction inside of
21// the loop, we try to move the store to happen AFTER the loop instead of
22// inside of the loop. This can only happen if a few conditions are true:
23// A. The pointer stored through is loop invariant
24// B. There are no stores or loads in the loop which _may_ alias the
25// pointer. There are no calls in the loop which mod/ref the pointer.
26// If these conditions are true, we can promote the loads and stores in the
27// loop of the pointer to use a temporary alloca'd variable. We then use
28// the SSAUpdater to construct the appropriate SSA form for the value.
29//
30//===----------------------------------------------------------------------===//
31
32#include "llvm/Transforms/Scalar/LICM.h"
33#include "llvm/ADT/SetOperations.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/Analysis/AliasAnalysis.h"
36#include "llvm/Analysis/AliasSetTracker.h"
37#include "llvm/Analysis/BasicAliasAnalysis.h"
38#include "llvm/Analysis/CaptureTracking.h"
39#include "llvm/Analysis/ConstantFolding.h"
40#include "llvm/Analysis/GlobalsModRef.h"
41#include "llvm/Analysis/GuardUtils.h"
42#include "llvm/Analysis/Loads.h"
43#include "llvm/Analysis/LoopInfo.h"
44#include "llvm/Analysis/LoopIterator.h"
45#include "llvm/Analysis/LoopPass.h"
46#include "llvm/Analysis/MemoryBuiltins.h"
47#include "llvm/Analysis/MemorySSA.h"
48#include "llvm/Analysis/MemorySSAUpdater.h"
49#include "llvm/Analysis/OptimizationRemarkEmitter.h"
50#include "llvm/Analysis/ScalarEvolution.h"
51#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
52#include "llvm/Analysis/TargetLibraryInfo.h"
53#include "llvm/Analysis/ValueTracking.h"
54#include "llvm/IR/CFG.h"
55#include "llvm/IR/Constants.h"
56#include "llvm/IR/DataLayout.h"
57#include "llvm/IR/DebugInfoMetadata.h"
58#include "llvm/IR/DerivedTypes.h"
59#include "llvm/IR/Dominators.h"
60#include "llvm/IR/Instructions.h"
61#include "llvm/IR/IntrinsicInst.h"
62#include "llvm/IR/LLVMContext.h"
63#include "llvm/IR/Metadata.h"
64#include "llvm/IR/PatternMatch.h"
65#include "llvm/IR/PredIteratorCache.h"
66#include "llvm/InitializePasses.h"
67#include "llvm/Support/CommandLine.h"
68#include "llvm/Support/Debug.h"
69#include "llvm/Support/raw_ostream.h"
70#include "llvm/Transforms/Scalar.h"
71#include "llvm/Transforms/Scalar/LoopPassManager.h"
72#include "llvm/Transforms/Utils/BasicBlockUtils.h"
73#include "llvm/Transforms/Utils/Local.h"
74#include "llvm/Transforms/Utils/LoopUtils.h"
75#include "llvm/Transforms/Utils/SSAUpdater.h"
76#include <algorithm>
77#include <utility>
78using namespace llvm;
79
80#define DEBUG_TYPE"licm" "licm"
81
82STATISTIC(NumCreatedBlocks, "Number of blocks created")static llvm::Statistic NumCreatedBlocks = {"licm", "NumCreatedBlocks"
, "Number of blocks created"}
;
83STATISTIC(NumClonedBranches, "Number of branches cloned")static llvm::Statistic NumClonedBranches = {"licm", "NumClonedBranches"
, "Number of branches cloned"}
;
84STATISTIC(NumSunk, "Number of instructions sunk out of loop")static llvm::Statistic NumSunk = {"licm", "NumSunk", "Number of instructions sunk out of loop"
}
;
85STATISTIC(NumHoisted, "Number of instructions hoisted out of loop")static llvm::Statistic NumHoisted = {"licm", "NumHoisted", "Number of instructions hoisted out of loop"
}
;
86STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk")static llvm::Statistic NumMovedLoads = {"licm", "NumMovedLoads"
, "Number of load insts hoisted or sunk"}
;
87STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk")static llvm::Statistic NumMovedCalls = {"licm", "NumMovedCalls"
, "Number of call insts hoisted or sunk"}
;
88STATISTIC(NumPromoted, "Number of memory locations promoted to registers")static llvm::Statistic NumPromoted = {"licm", "NumPromoted", "Number of memory locations promoted to registers"
}
;
89
90/// Memory promotion is enabled by default.
91static cl::opt<bool>
92 DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
93 cl::desc("Disable memory promotion in LICM pass"));
94
95static cl::opt<bool> ControlFlowHoisting(
96 "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
97 cl::desc("Enable control flow (and PHI) hoisting in LICM"));
98
99static cl::opt<uint32_t> MaxNumUsesTraversed(
100 "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
101 cl::desc("Max num uses visited for identifying load "
102 "invariance in loop using invariant start (default = 8)"));
103
104// Default value of zero implies we use the regular alias set tracker mechanism
105// instead of the cross product using AA to identify aliasing of the memory
106// location we are interested in.
107static cl::opt<int>
108LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
109 cl::desc("How many instruction to cross product using AA"));
110
111// Experimental option to allow imprecision in LICM in pathological cases, in
112// exchange for faster compile. This is to be removed if MemorySSA starts to
113// address the same issue. This flag applies only when LICM uses MemorySSA
114// instead on AliasSetTracker. LICM calls MemorySSAWalker's
115// getClobberingMemoryAccess, up to the value of the Cap, getting perfect
116// accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
117// which may not be precise, since optimizeUses is capped. The result is
118// correct, but we may not get as "far up" as possible to get which access is
119// clobbering the one queried.
120cl::opt<unsigned> llvm::SetLicmMssaOptCap(
121 "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
122 cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
123 "for faster compile. Caps the MemorySSA clobbering calls."));
124
125// Experimentally, memory promotion carries less importance than sinking and
126// hoisting. Limit when we do promotion when using MemorySSA, in order to save
127// compile time.
128cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
129 "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
130 cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
131 "effect. When MSSA in LICM is enabled, then this is the maximum "
132 "number of accesses allowed to be present in a loop in order to "
133 "enable memory promotion."));
134
135static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
136static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
137 const LoopSafetyInfo *SafetyInfo,
138 TargetTransformInfo *TTI, bool &FreeInLoop);
139static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
140 BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
141 MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
142 OptimizationRemarkEmitter *ORE);
143static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
144 const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
145 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
146static bool isSafeToExecuteUnconditionally(Instruction &Inst,
147 const DominatorTree *DT,
148 const Loop *CurLoop,
149 const LoopSafetyInfo *SafetyInfo,
150 OptimizationRemarkEmitter *ORE,
151 const Instruction *CtxI = nullptr);
152static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
153 AliasSetTracker *CurAST, Loop *CurLoop,
154 AliasAnalysis *AA);
155static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
156 Loop *CurLoop,
157 SinkAndHoistLICMFlags &Flags);
158static Instruction *CloneInstructionInExitBlock(
159 Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
160 const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
161
162static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
163 AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
164
165static void moveInstructionBefore(Instruction &I, Instruction &Dest,
166 ICFLoopSafetyInfo &SafetyInfo,
167 MemorySSAUpdater *MSSAU, ScalarEvolution *SE);
168
169namespace {
170struct LoopInvariantCodeMotion {
171 using ASTrackerMapTy = DenseMap<Loop *, std::unique_ptr<AliasSetTracker>>;
172 bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
173 TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
174 ScalarEvolution *SE, MemorySSA *MSSA,
175 OptimizationRemarkEmitter *ORE, bool DeleteAST);
176
177 ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; }
178 LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
179 unsigned LicmMssaNoAccForPromotionCap)
180 : LicmMssaOptCap(LicmMssaOptCap),
181 LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
182
183private:
184 ASTrackerMapTy LoopToAliasSetMap;
185 unsigned LicmMssaOptCap;
186 unsigned LicmMssaNoAccForPromotionCap;
187
188 std::unique_ptr<AliasSetTracker>
189 collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA);
190 std::unique_ptr<AliasSetTracker>
191 collectAliasInfoForLoopWithMSSA(Loop *L, AliasAnalysis *AA,
192 MemorySSAUpdater *MSSAU);
193};
194
195struct LegacyLICMPass : public LoopPass {
196 static char ID; // Pass identification, replacement for typeid
197 LegacyLICMPass(
198 unsigned LicmMssaOptCap = SetLicmMssaOptCap,
199 unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap)
200 : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) {
201 initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
202 }
203
204 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
205 if (skipLoop(L)) {
206 // If we have run LICM on a previous loop but now we are skipping
207 // (because we've hit the opt-bisect limit), we need to clear the
208 // loop alias information.
209 LICM.getLoopToAliasSetMap().clear();
210 return false;
211 }
212
213 auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
214 MemorySSA *MSSA = EnableMSSALoopDependency
215 ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA())
216 : nullptr;
217 // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
218 // pass. Function analyses need to be preserved across loop transformations
219 // but ORE cannot be preserved (see comment before the pass definition).
220 OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
221 return LICM.runOnLoop(L,
222 &getAnalysis<AAResultsWrapperPass>().getAAResults(),
223 &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
224 &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
225 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
226 *L->getHeader()->getParent()),
227 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
228 *L->getHeader()->getParent()),
229 SE ? &SE->getSE() : nullptr, MSSA, &ORE, false);
230 }
231
232 /// This transformation requires natural loop information & requires that
233 /// loop preheaders be inserted into the CFG...
234 ///
235 void getAnalysisUsage(AnalysisUsage &AU) const override {
236 AU.addPreserved<DominatorTreeWrapperPass>();
237 AU.addPreserved<LoopInfoWrapperPass>();
238 AU.addRequired<TargetLibraryInfoWrapperPass>();
239 if (EnableMSSALoopDependency) {
240 AU.addRequired<MemorySSAWrapperPass>();
241 AU.addPreserved<MemorySSAWrapperPass>();
242 }
243 AU.addRequired<TargetTransformInfoWrapperPass>();
244 getLoopAnalysisUsage(AU);
245 }
246
247 using llvm::Pass::doFinalization;
248
249 bool doFinalization() override {
250 auto &AliasSetMap = LICM.getLoopToAliasSetMap();
251 // All loops in the AliasSetMap should be cleaned up already. The only case
252 // where we fail to do so is if an outer loop gets deleted before LICM
253 // visits it.
254 assert(all_of(AliasSetMap,((all_of(AliasSetMap, [](LoopInvariantCodeMotion::ASTrackerMapTy
::value_type &KV) { return !KV.first->getParentLoop();
}) && "Didn't free loop alias sets") ? static_cast<
void> (0) : __assert_fail ("all_of(AliasSetMap, [](LoopInvariantCodeMotion::ASTrackerMapTy::value_type &KV) { return !KV.first->getParentLoop(); }) && \"Didn't free loop alias sets\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 258, __PRETTY_FUNCTION__))
255 [](LoopInvariantCodeMotion::ASTrackerMapTy::value_type &KV) {((all_of(AliasSetMap, [](LoopInvariantCodeMotion::ASTrackerMapTy
::value_type &KV) { return !KV.first->getParentLoop();
}) && "Didn't free loop alias sets") ? static_cast<
void> (0) : __assert_fail ("all_of(AliasSetMap, [](LoopInvariantCodeMotion::ASTrackerMapTy::value_type &KV) { return !KV.first->getParentLoop(); }) && \"Didn't free loop alias sets\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 258, __PRETTY_FUNCTION__))
256 return !KV.first->getParentLoop();((all_of(AliasSetMap, [](LoopInvariantCodeMotion::ASTrackerMapTy
::value_type &KV) { return !KV.first->getParentLoop();
}) && "Didn't free loop alias sets") ? static_cast<
void> (0) : __assert_fail ("all_of(AliasSetMap, [](LoopInvariantCodeMotion::ASTrackerMapTy::value_type &KV) { return !KV.first->getParentLoop(); }) && \"Didn't free loop alias sets\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 258, __PRETTY_FUNCTION__))
257 }) &&((all_of(AliasSetMap, [](LoopInvariantCodeMotion::ASTrackerMapTy
::value_type &KV) { return !KV.first->getParentLoop();
}) && "Didn't free loop alias sets") ? static_cast<
void> (0) : __assert_fail ("all_of(AliasSetMap, [](LoopInvariantCodeMotion::ASTrackerMapTy::value_type &KV) { return !KV.first->getParentLoop(); }) && \"Didn't free loop alias sets\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 258, __PRETTY_FUNCTION__))
258 "Didn't free loop alias sets")((all_of(AliasSetMap, [](LoopInvariantCodeMotion::ASTrackerMapTy
::value_type &KV) { return !KV.first->getParentLoop();
}) && "Didn't free loop alias sets") ? static_cast<
void> (0) : __assert_fail ("all_of(AliasSetMap, [](LoopInvariantCodeMotion::ASTrackerMapTy::value_type &KV) { return !KV.first->getParentLoop(); }) && \"Didn't free loop alias sets\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 258, __PRETTY_FUNCTION__))
;
259 AliasSetMap.clear();
260 return false;
261 }
262
263private:
264 LoopInvariantCodeMotion LICM;
265
266 /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
267 void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
268 Loop *L) override;
269
270 /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
271 /// set.
272 void deleteAnalysisValue(Value *V, Loop *L) override;
273
274 /// Simple Analysis hook. Delete loop L from alias set map.
275 void deleteAnalysisLoop(Loop *L) override;
276};
277} // namespace
278
279PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
280 LoopStandardAnalysisResults &AR, LPMUpdater &) {
281 const auto &FAM =
282 AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
283 Function *F = L.getHeader()->getParent();
284
285 auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
286 // FIXME: This should probably be optional rather than required.
287 if (!ORE)
288 report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
289 "cached at a higher level");
290
291 LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
292 if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
293 AR.MSSA, ORE, true))
294 return PreservedAnalyses::all();
295
296 auto PA = getLoopPassPreservedAnalyses();
297
298 PA.preserve<DominatorTreeAnalysis>();
299 PA.preserve<LoopAnalysis>();
300 if (AR.MSSA)
301 PA.preserve<MemorySSAAnalysis>();
302
303 return PA;
304}
305
306char LegacyLICMPass::ID = 0;
307INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",static void *initializeLegacyLICMPassPassOnce(PassRegistry &
Registry) {
308 false, false)static void *initializeLegacyLICMPassPassOnce(PassRegistry &
Registry) {
309INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
310INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
311INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
312INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry);
313INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,PassInfo *PI = new PassInfo( "Loop Invariant Code Motion", "licm"
, &LegacyLICMPass::ID, PassInfo::NormalCtor_t(callDefaultCtor
<LegacyLICMPass>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeLegacyLICMPassPassFlag
; void llvm::initializeLegacyLICMPassPass(PassRegistry &Registry
) { llvm::call_once(InitializeLegacyLICMPassPassFlag, initializeLegacyLICMPassPassOnce
, std::ref(Registry)); }
314 false)PassInfo *PI = new PassInfo( "Loop Invariant Code Motion", "licm"
, &LegacyLICMPass::ID, PassInfo::NormalCtor_t(callDefaultCtor
<LegacyLICMPass>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeLegacyLICMPassPassFlag
; void llvm::initializeLegacyLICMPassPass(PassRegistry &Registry
) { llvm::call_once(InitializeLegacyLICMPassPassFlag, initializeLegacyLICMPassPassOnce
, std::ref(Registry)); }
315
316Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
317Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
318 unsigned LicmMssaNoAccForPromotionCap) {
319 return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
320}
321
322/// Hoist expressions out of the specified loop. Note, alias info for inner
323/// loop is not preserved so it is not a good idea to run LICM multiple
324/// times on one loop.
325/// We should delete AST for inner loops in the new pass manager to avoid
326/// memory leak.
327///
328bool LoopInvariantCodeMotion::runOnLoop(
329 Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
330 TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE,
331 MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
332 bool Changed = false;
333
334 assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.")((L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."
) ? static_cast<void> (0) : __assert_fail ("L->isLCSSAForm(*DT) && \"Loop is not in LCSSA form.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 334, __PRETTY_FUNCTION__))
;
335
336 // If this loop has metadata indicating that LICM is not to be performed then
337 // just exit.
338 if (hasDisableLICMTransformsHint(L)) {
339 return false;
340 }
341
342 std::unique_ptr<AliasSetTracker> CurAST;
343 std::unique_ptr<MemorySSAUpdater> MSSAU;
344 bool NoOfMemAccTooLarge = false;
345 unsigned LicmMssaOptCounter = 0;
346
347 if (!MSSA) {
348 LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM: Using Alias Set Tracker.\n"
; } } while (false)
;
349 CurAST = collectAliasInfoForLoop(L, LI, AA);
350 } else {
351 LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM: Using MemorySSA.\n"; } } while
(false)
;
352 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
353
354 unsigned AccessCapCount = 0;
355 for (auto *BB : L->getBlocks()) {
356 if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
357 for (const auto &MA : *Accesses) {
358 (void)MA;
359 AccessCapCount++;
360 if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
361 NoOfMemAccTooLarge = true;
362 break;
363 }
364 }
365 }
366 if (NoOfMemAccTooLarge)
367 break;
368 }
369 }
370
371 // Get the preheader block to move instructions into...
372 BasicBlock *Preheader = L->getLoopPreheader();
373
374 // Compute loop safety information.
375 ICFLoopSafetyInfo SafetyInfo(DT);
376 SafetyInfo.computeLoopSafetyInfo(L);
377
378 // We want to visit all of the instructions in this loop... that are not parts
379 // of our subloops (they have already had their invariants hoisted out of
380 // their loop, into this loop, so there is no need to process the BODIES of
381 // the subloops).
382 //
383 // Traverse the body of the loop in depth first order on the dominator tree so
384 // that we are guaranteed to see definitions before we see uses. This allows
385 // us to sink instructions in one pass, without iteration. After sinking
386 // instructions, we perform another pass to hoist them out of the loop.
387 SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter,
388 LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
389 /*IsSink=*/true};
390 if (L->hasDedicatedExits())
391 Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
392 CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
393 Flags.IsSink = false;
394 if (Preheader)
395 Changed |=
396 hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
397 CurAST.get(), MSSAU.get(), SE, &SafetyInfo, Flags, ORE);
398
399 // Now that all loop invariants have been removed from the loop, promote any
400 // memory references to scalars that we can.
401 // Don't sink stores from loops without dedicated block exits. Exits
402 // containing indirect branches are not transformed by loop simplify,
403 // make sure we catch that. An additional load may be generated in the
404 // preheader for SSA updater, so also avoid sinking when no preheader
405 // is available.
406 if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
407 !NoOfMemAccTooLarge) {
408 // Figure out the loop exits and their insertion points
409 SmallVector<BasicBlock *, 8> ExitBlocks;
410 L->getUniqueExitBlocks(ExitBlocks);
411
412 // We can't insert into a catchswitch.
413 bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
414 return isa<CatchSwitchInst>(Exit->getTerminator());
415 });
416
417 if (!HasCatchSwitch) {
418 SmallVector<Instruction *, 8> InsertPts;
419 SmallVector<MemoryAccess *, 8> MSSAInsertPts;
420 InsertPts.reserve(ExitBlocks.size());
421 if (MSSAU)
422 MSSAInsertPts.reserve(ExitBlocks.size());
423 for (BasicBlock *ExitBlock : ExitBlocks) {
424 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
425 if (MSSAU)
426 MSSAInsertPts.push_back(nullptr);
427 }
428
429 PredIteratorCache PIC;
430
431 bool Promoted = false;
432
433 // Build an AST using MSSA.
434 if (!CurAST.get())
435 CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get());
436
437 // Loop over all of the alias sets in the tracker object.
438 for (AliasSet &AS : *CurAST) {
439 // We can promote this alias set if it has a store, if it is a "Must"
440 // alias set, if the pointer is loop invariant, and if we are not
441 // eliminating any volatile loads or stores.
442 if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
443 !L->isLoopInvariant(AS.begin()->getValue()))
444 continue;
445
446 assert(((!AS.empty() && "Must alias set should have at least one pointer element in it!"
) ? static_cast<void> (0) : __assert_fail ("!AS.empty() && \"Must alias set should have at least one pointer element in it!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 448, __PRETTY_FUNCTION__))
447 !AS.empty() &&((!AS.empty() && "Must alias set should have at least one pointer element in it!"
) ? static_cast<void> (0) : __assert_fail ("!AS.empty() && \"Must alias set should have at least one pointer element in it!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 448, __PRETTY_FUNCTION__))
448 "Must alias set should have at least one pointer element in it!")((!AS.empty() && "Must alias set should have at least one pointer element in it!"
) ? static_cast<void> (0) : __assert_fail ("!AS.empty() && \"Must alias set should have at least one pointer element in it!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 448, __PRETTY_FUNCTION__))
;
449
450 SmallSetVector<Value *, 8> PointerMustAliases;
451 for (const auto &ASI : AS)
452 PointerMustAliases.insert(ASI.getValue());
453
454 Promoted |= promoteLoopAccessesToScalars(
455 PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
456 DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
457 }
458
459 // Once we have promoted values across the loop body we have to
460 // recursively reform LCSSA as any nested loop may now have values defined
461 // within the loop used in the outer loop.
462 // FIXME: This is really heavy handed. It would be a bit better to use an
463 // SSAUpdater strategy during promotion that was LCSSA aware and reformed
464 // it as it went.
465 if (Promoted)
466 formLCSSARecursively(*L, *DT, LI, SE);
467
468 Changed |= Promoted;
469 }
470 }
471
472 // Check that neither this loop nor its parent have had LCSSA broken. LICM is
473 // specifically moving instructions across the loop boundary and so it is
474 // especially in need of sanity checking here.
475 assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!")((L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!"
) ? static_cast<void> (0) : __assert_fail ("L->isLCSSAForm(*DT) && \"Loop not left in LCSSA form after LICM!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 475, __PRETTY_FUNCTION__))
;
476 assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&(((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm
(*DT)) && "Parent loop not left in LCSSA form after LICM!"
) ? static_cast<void> (0) : __assert_fail ("(!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) && \"Parent loop not left in LCSSA form after LICM!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 477, __PRETTY_FUNCTION__))
477 "Parent loop not left in LCSSA form after LICM!")(((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm
(*DT)) && "Parent loop not left in LCSSA form after LICM!"
) ? static_cast<void> (0) : __assert_fail ("(!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) && \"Parent loop not left in LCSSA form after LICM!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 477, __PRETTY_FUNCTION__))
;
478
479 // If this loop is nested inside of another one, save the alias information
480 // for when we process the outer loop.
481 if (!MSSAU.get() && CurAST.get() && L->getParentLoop() && !DeleteAST)
482 LoopToAliasSetMap[L] = std::move(CurAST);
483
484 if (MSSAU.get() && VerifyMemorySSA)
485 MSSAU->getMemorySSA()->verifyMemorySSA();
486
487 if (Changed && SE)
488 SE->forgetLoopDispositions(L);
489 return Changed;
490}
491
492/// Walk the specified region of the CFG (defined by all blocks dominated by
493/// the specified block, and that are in the current loop) in reverse depth
494/// first order w.r.t the DominatorTree. This allows us to visit uses before
495/// definitions, allowing us to sink a loop body in one pass without iteration.
496///
497bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
498 DominatorTree *DT, TargetLibraryInfo *TLI,
499 TargetTransformInfo *TTI, Loop *CurLoop,
500 AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
501 ICFLoopSafetyInfo *SafetyInfo,
502 SinkAndHoistLICMFlags &Flags,
503 OptimizationRemarkEmitter *ORE) {
504
505 // Verify inputs.
506 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&((N != nullptr && AA != nullptr && LI != nullptr
&& DT != nullptr && CurLoop != nullptr &&
SafetyInfo != nullptr && "Unexpected input to sinkRegion."
) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to sinkRegion.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 508, __PRETTY_FUNCTION__))
507 CurLoop != nullptr && SafetyInfo != nullptr &&((N != nullptr && AA != nullptr && LI != nullptr
&& DT != nullptr && CurLoop != nullptr &&
SafetyInfo != nullptr && "Unexpected input to sinkRegion."
) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to sinkRegion.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 508, __PRETTY_FUNCTION__))
508 "Unexpected input to sinkRegion.")((N != nullptr && AA != nullptr && LI != nullptr
&& DT != nullptr && CurLoop != nullptr &&
SafetyInfo != nullptr && "Unexpected input to sinkRegion."
) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to sinkRegion.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 508, __PRETTY_FUNCTION__))
;
509 assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&((((CurAST != nullptr) ^ (MSSAU != nullptr)) && "Either AliasSetTracker or MemorySSA should be initialized."
) ? static_cast<void> (0) : __assert_fail ("((CurAST != nullptr) ^ (MSSAU != nullptr)) && \"Either AliasSetTracker or MemorySSA should be initialized.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 510, __PRETTY_FUNCTION__))
510 "Either AliasSetTracker or MemorySSA should be initialized.")((((CurAST != nullptr) ^ (MSSAU != nullptr)) && "Either AliasSetTracker or MemorySSA should be initialized."
) ? static_cast<void> (0) : __assert_fail ("((CurAST != nullptr) ^ (MSSAU != nullptr)) && \"Either AliasSetTracker or MemorySSA should be initialized.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 510, __PRETTY_FUNCTION__))
;
511
512 // We want to visit children before parents. We will enque all the parents
513 // before their children in the worklist and process the worklist in reverse
514 // order.
515 SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
516
517 bool Changed = false;
518 for (DomTreeNode *DTN : reverse(Worklist)) {
519 BasicBlock *BB = DTN->getBlock();
520 // Only need to process the contents of this block if it is not part of a
521 // subloop (which would already have been processed).
522 if (inSubLoop(BB, CurLoop, LI))
523 continue;
524
525 for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
526 Instruction &I = *--II;
527
528 // If the instruction is dead, we would try to sink it because it isn't
529 // used in the loop, instead, just delete it.
530 if (isInstructionTriviallyDead(&I, TLI)) {
531 LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM deleting dead inst: " <<
I << '\n'; } } while (false)
;
532 salvageDebugInfo(I);
533 ++II;
534 eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
535 Changed = true;
536 continue;
537 }
538
539 // Check to see if we can sink this instruction to the exit blocks
540 // of the loop. We can do this if the all users of the instruction are
541 // outside of the loop. In this case, it doesn't even matter if the
542 // operands of the instruction are loop invariant.
543 //
544 bool FreeInLoop = false;
545 if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
546 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
547 ORE) &&
548 !I.mayHaveSideEffects()) {
549 if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
550 if (!FreeInLoop) {
551 ++II;
552 eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
553 }
554 Changed = true;
555 }
556 }
557 }
558 }
559 if (MSSAU && VerifyMemorySSA)
560 MSSAU->getMemorySSA()->verifyMemorySSA();
561 return Changed;
562}
563
564namespace {
565// This is a helper class for hoistRegion to make it able to hoist control flow
566// in order to be able to hoist phis. The way this works is that we initially
567// start hoisting to the loop preheader, and when we see a loop invariant branch
568// we make note of this. When we then come to hoist an instruction that's
569// conditional on such a branch we duplicate the branch and the relevant control
570// flow, then hoist the instruction into the block corresponding to its original
571// block in the duplicated control flow.
572class ControlFlowHoister {
573private:
574 // Information about the loop we are hoisting from
575 LoopInfo *LI;
576 DominatorTree *DT;
577 Loop *CurLoop;
578 MemorySSAUpdater *MSSAU;
579
580 // A map of blocks in the loop to the block their instructions will be hoisted
581 // to.
582 DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
583
584 // The branches that we can hoist, mapped to the block that marks a
585 // convergence point of their control flow.
586 DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
587
588public:
589 ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
590 MemorySSAUpdater *MSSAU)
591 : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
592
593 void registerPossiblyHoistableBranch(BranchInst *BI) {
594 // We can only hoist conditional branches with loop invariant operands.
595 if (!ControlFlowHoisting || !BI->isConditional() ||
596 !CurLoop->hasLoopInvariantOperands(BI))
597 return;
598
599 // The branch destinations need to be in the loop, and we don't gain
600 // anything by duplicating conditional branches with duplicate successors,
601 // as it's essentially the same as an unconditional branch.
602 BasicBlock *TrueDest = BI->getSuccessor(0);
603 BasicBlock *FalseDest = BI->getSuccessor(1);
604 if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
605 TrueDest == FalseDest)
606 return;
607
608 // We can hoist BI if one branch destination is the successor of the other,
609 // or both have common successor which we check by seeing if the
610 // intersection of their successors is non-empty.
611 // TODO: This could be expanded to allowing branches where both ends
612 // eventually converge to a single block.
613 SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
614 TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
615 FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
616 BasicBlock *CommonSucc = nullptr;
617 if (TrueDestSucc.count(FalseDest)) {
618 CommonSucc = FalseDest;
619 } else if (FalseDestSucc.count(TrueDest)) {
620 CommonSucc = TrueDest;
621 } else {
622 set_intersect(TrueDestSucc, FalseDestSucc);
623 // If there's one common successor use that.
624 if (TrueDestSucc.size() == 1)
625 CommonSucc = *TrueDestSucc.begin();
626 // If there's more than one pick whichever appears first in the block list
627 // (we can't use the value returned by TrueDestSucc.begin() as it's
628 // unpredicatable which element gets returned).
629 else if (!TrueDestSucc.empty()) {
630 Function *F = TrueDest->getParent();
631 auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
632 auto It = std::find_if(F->begin(), F->end(), IsSucc);
633 assert(It != F->end() && "Could not find successor in function")((It != F->end() && "Could not find successor in function"
) ? static_cast<void> (0) : __assert_fail ("It != F->end() && \"Could not find successor in function\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 633, __PRETTY_FUNCTION__))
;
634 CommonSucc = &*It;
635 }
636 }
637 // The common successor has to be dominated by the branch, as otherwise
638 // there will be some other path to the successor that will not be
639 // controlled by this branch so any phi we hoist would be controlled by the
640 // wrong condition. This also takes care of avoiding hoisting of loop back
641 // edges.
642 // TODO: In some cases this could be relaxed if the successor is dominated
643 // by another block that's been hoisted and we can guarantee that the
644 // control flow has been replicated exactly.
645 if (CommonSucc && DT->dominates(BI, CommonSucc))
646 HoistableBranches[BI] = CommonSucc;
647 }
648
649 bool canHoistPHI(PHINode *PN) {
650 // The phi must have loop invariant operands.
651 if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
652 return false;
653 // We can hoist phis if the block they are in is the target of hoistable
654 // branches which cover all of the predecessors of the block.
655 SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
656 BasicBlock *BB = PN->getParent();
657 for (BasicBlock *PredBB : predecessors(BB))
658 PredecessorBlocks.insert(PredBB);
659 // If we have less predecessor blocks than predecessors then the phi will
660 // have more than one incoming value for the same block which we can't
661 // handle.
662 // TODO: This could be handled be erasing some of the duplicate incoming
663 // values.
664 if (PredecessorBlocks.size() != pred_size(BB))
665 return false;
666 for (auto &Pair : HoistableBranches) {
667 if (Pair.second == BB) {
668 // Which blocks are predecessors via this branch depends on if the
669 // branch is triangle-like or diamond-like.
670 if (Pair.first->getSuccessor(0) == BB) {
671 PredecessorBlocks.erase(Pair.first->getParent());
672 PredecessorBlocks.erase(Pair.first->getSuccessor(1));
673 } else if (Pair.first->getSuccessor(1) == BB) {
674 PredecessorBlocks.erase(Pair.first->getParent());
675 PredecessorBlocks.erase(Pair.first->getSuccessor(0));
676 } else {
677 PredecessorBlocks.erase(Pair.first->getSuccessor(0));
678 PredecessorBlocks.erase(Pair.first->getSuccessor(1));
679 }
680 }
681 }
682 // PredecessorBlocks will now be empty if for every predecessor of BB we
683 // found a hoistable branch source.
684 return PredecessorBlocks.empty();
685 }
686
687 BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
688 if (!ControlFlowHoisting)
689 return CurLoop->getLoopPreheader();
690 // If BB has already been hoisted, return that
691 if (HoistDestinationMap.count(BB))
692 return HoistDestinationMap[BB];
693
694 // Check if this block is conditional based on a pending branch
695 auto HasBBAsSuccessor =
696 [&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) {
697 return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
698 Pair.first->getSuccessor(1) == BB);
699 };
700 auto It = std::find_if(HoistableBranches.begin(), HoistableBranches.end(),
701 HasBBAsSuccessor);
702
703 // If not involved in a pending branch, hoist to preheader
704 BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
705 if (It == HoistableBranches.end()) {
706 LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM using " << InitialPreheader
->getName() << " as hoist destination for " <<
BB->getName() << "\n"; } } while (false)
707 << " as hoist destination for " << BB->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM using " << InitialPreheader
->getName() << " as hoist destination for " <<
BB->getName() << "\n"; } } while (false)
708 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM using " << InitialPreheader
->getName() << " as hoist destination for " <<
BB->getName() << "\n"; } } while (false)
;
709 HoistDestinationMap[BB] = InitialPreheader;
710 return InitialPreheader;
711 }
712 BranchInst *BI = It->first;
713 assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==((std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor
) == HoistableBranches.end() && "BB is expected to be the target of at most one branch"
) ? static_cast<void> (0) : __assert_fail ("std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) == HoistableBranches.end() && \"BB is expected to be the target of at most one branch\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 715, __PRETTY_FUNCTION__))
714 HoistableBranches.end() &&((std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor
) == HoistableBranches.end() && "BB is expected to be the target of at most one branch"
) ? static_cast<void> (0) : __assert_fail ("std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) == HoistableBranches.end() && \"BB is expected to be the target of at most one branch\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 715, __PRETTY_FUNCTION__))
715 "BB is expected to be the target of at most one branch")((std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor
) == HoistableBranches.end() && "BB is expected to be the target of at most one branch"
) ? static_cast<void> (0) : __assert_fail ("std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) == HoistableBranches.end() && \"BB is expected to be the target of at most one branch\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 715, __PRETTY_FUNCTION__))
;
716
717 LLVMContext &C = BB->getContext();
718 BasicBlock *TrueDest = BI->getSuccessor(0);
719 BasicBlock *FalseDest = BI->getSuccessor(1);
720 BasicBlock *CommonSucc = HoistableBranches[BI];
721 BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
722
723 // Create hoisted versions of blocks that currently don't have them
724 auto CreateHoistedBlock = [&](BasicBlock *Orig) {
725 if (HoistDestinationMap.count(Orig))
726 return HoistDestinationMap[Orig];
727 BasicBlock *New =
728 BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
729 HoistDestinationMap[Orig] = New;
730 DT->addNewBlock(New, HoistTarget);
731 if (CurLoop->getParentLoop())
732 CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
733 ++NumCreatedBlocks;
734 LLVM_DEBUG(dbgs() << "LICM created " << New->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM created " << New->
getName() << " as hoist destination for " << Orig
->getName() << "\n"; } } while (false)
735 << " as hoist destination for " << Orig->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM created " << New->
getName() << " as hoist destination for " << Orig
->getName() << "\n"; } } while (false)
736 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM created " << New->
getName() << " as hoist destination for " << Orig
->getName() << "\n"; } } while (false)
;
737 return New;
738 };
739 BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
740 BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
741 BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
742
743 // Link up these blocks with branches.
744 if (!HoistCommonSucc->getTerminator()) {
745 // The new common successor we've generated will branch to whatever that
746 // hoist target branched to.
747 BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
748 assert(TargetSucc && "Expected hoist target to have a single successor")((TargetSucc && "Expected hoist target to have a single successor"
) ? static_cast<void> (0) : __assert_fail ("TargetSucc && \"Expected hoist target to have a single successor\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 748, __PRETTY_FUNCTION__))
;
749 HoistCommonSucc->moveBefore(TargetSucc);
750 BranchInst::Create(TargetSucc, HoistCommonSucc);
751 }
752 if (!HoistTrueDest->getTerminator()) {
753 HoistTrueDest->moveBefore(HoistCommonSucc);
754 BranchInst::Create(HoistCommonSucc, HoistTrueDest);
755 }
756 if (!HoistFalseDest->getTerminator()) {
757 HoistFalseDest->moveBefore(HoistCommonSucc);
758 BranchInst::Create(HoistCommonSucc, HoistFalseDest);
759 }
760
761 // If BI is being cloned to what was originally the preheader then
762 // HoistCommonSucc will now be the new preheader.
763 if (HoistTarget == InitialPreheader) {
764 // Phis in the loop header now need to use the new preheader.
765 InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
766 if (MSSAU)
767 MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
768 HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
769 // The new preheader dominates the loop header.
770 DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
771 DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
772 DT->changeImmediateDominator(HeaderNode, PreheaderNode);
773 // The preheader hoist destination is now the new preheader, with the
774 // exception of the hoist destination of this branch.
775 for (auto &Pair : HoistDestinationMap)
776 if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
777 Pair.second = HoistCommonSucc;
778 }
779
780 // Now finally clone BI.
781 ReplaceInstWithInst(
782 HoistTarget->getTerminator(),
783 BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
784 ++NumClonedBranches;
785
786 assert(CurLoop->getLoopPreheader() &&((CurLoop->getLoopPreheader() && "Hoisting blocks should not have destroyed preheader"
) ? static_cast<void> (0) : __assert_fail ("CurLoop->getLoopPreheader() && \"Hoisting blocks should not have destroyed preheader\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 787, __PRETTY_FUNCTION__))
787 "Hoisting blocks should not have destroyed preheader")((CurLoop->getLoopPreheader() && "Hoisting blocks should not have destroyed preheader"
) ? static_cast<void> (0) : __assert_fail ("CurLoop->getLoopPreheader() && \"Hoisting blocks should not have destroyed preheader\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 787, __PRETTY_FUNCTION__))
;
788 return HoistDestinationMap[BB];
789 }
790};
791} // namespace
792
793
794/// Return true if we know how to rewrite all uses of the given alloca after
795/// hoisting it out of the loop. The main concerns are a) potential captures
796/// and b) invariant.start markers which don't capture, but are no longer
797/// valid w/o a corresponding invariant.end.
798static bool canRewriteUsesOfAlloca(AllocaInst &AI) {
799 // TODO: This looks a lot like capture tracking, but we need to remove any
800 // invariant starts if we extend the lifetime of the alloca by hoisting it.
801 // We should probably refactor capture tracking into a form which allows us
802 // to reuse the relevant bits and remove the duplicated logic here.
803
804 SmallVector<Use *, 16> Worklist;
805 for (Use &U : AI.uses())
806 Worklist.push_back(&U);
807
808 unsigned NumUsesExplored = 0;
809 while (!Worklist.empty()) {
810 Use *U = Worklist.pop_back_val();
811 Instruction *I = cast<Instruction>(U->getUser());
812 NumUsesExplored++;
813 if (NumUsesExplored > DefaultMaxUsesToExplore)
814 return false;
815 // Non capturing, terminating uses
816 if (isa<LoadInst>(I) ||
817 (isa<StoreInst>(I) && U->getOperandNo() == 1))
818 continue;
819 // Non capturing, non-terminating
820 if (!isa<BitCastInst>(I) && !isa<GetElementPtrInst>(I))
821 return false;
822 for (Use &U : I->uses())
823 Worklist.push_back(&U);
824 }
825 return true;
826}
827
828/// Walk the specified region of the CFG (defined by all blocks dominated by
829/// the specified block, and that are in the current loop) in depth first
830/// order w.r.t the DominatorTree. This allows us to visit definitions before
831/// uses, allowing us to hoist a loop body in one pass without iteration.
832///
833bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
834 DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
835 AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
836 ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo,
837 SinkAndHoistLICMFlags &Flags,
838 OptimizationRemarkEmitter *ORE) {
839 // Verify inputs.
840 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&((N != nullptr && AA != nullptr && LI != nullptr
&& DT != nullptr && CurLoop != nullptr &&
SafetyInfo != nullptr && "Unexpected input to hoistRegion."
) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to hoistRegion.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 842, __PRETTY_FUNCTION__))
841 CurLoop != nullptr && SafetyInfo != nullptr &&((N != nullptr && AA != nullptr && LI != nullptr
&& DT != nullptr && CurLoop != nullptr &&
SafetyInfo != nullptr && "Unexpected input to hoistRegion."
) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to hoistRegion.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 842, __PRETTY_FUNCTION__))
842 "Unexpected input to hoistRegion.")((N != nullptr && AA != nullptr && LI != nullptr
&& DT != nullptr && CurLoop != nullptr &&
SafetyInfo != nullptr && "Unexpected input to hoistRegion."
) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to hoistRegion.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 842, __PRETTY_FUNCTION__))
;
843 assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&((((CurAST != nullptr) ^ (MSSAU != nullptr)) && "Either AliasSetTracker or MemorySSA should be initialized."
) ? static_cast<void> (0) : __assert_fail ("((CurAST != nullptr) ^ (MSSAU != nullptr)) && \"Either AliasSetTracker or MemorySSA should be initialized.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 844, __PRETTY_FUNCTION__))
844 "Either AliasSetTracker or MemorySSA should be initialized.")((((CurAST != nullptr) ^ (MSSAU != nullptr)) && "Either AliasSetTracker or MemorySSA should be initialized."
) ? static_cast<void> (0) : __assert_fail ("((CurAST != nullptr) ^ (MSSAU != nullptr)) && \"Either AliasSetTracker or MemorySSA should be initialized.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 844, __PRETTY_FUNCTION__))
;
845
846 ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
847
848 // Keep track of instructions that have been hoisted, as they may need to be
849 // re-hoisted if they end up not dominating all of their uses.
850 SmallVector<Instruction *, 16> HoistedInstructions;
851
852 // For PHI hoisting to work we need to hoist blocks before their successors.
853 // We can do this by iterating through the blocks in the loop in reverse
854 // post-order.
855 LoopBlocksRPO Worklist(CurLoop);
856 Worklist.perform(LI);
857 bool Changed = false;
858 for (BasicBlock *BB : Worklist) {
859 // Only need to process the contents of this block if it is not part of a
860 // subloop (which would already have been processed).
861 if (inSubLoop(BB, CurLoop, LI))
862 continue;
863
864 for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
865 Instruction &I = *II++;
866 // Try constant folding this instruction. If all the operands are
867 // constants, it is technically hoistable, but it would be better to
868 // just fold it.
869 if (Constant *C = ConstantFoldInstruction(
870 &I, I.getModule()->getDataLayout(), TLI)) {
871 LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM folding inst: " << I <<
" --> " << *C << '\n'; } } while (false)
872 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM folding inst: " << I <<
" --> " << *C << '\n'; } } while (false)
;
873 if (CurAST)
874 CurAST->copyValue(&I, C);
875 // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
876 I.replaceAllUsesWith(C);
877 if (isInstructionTriviallyDead(&I, TLI))
878 eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
879 Changed = true;
880 continue;
881 }
882
883 // Try hoisting the instruction out to the preheader. We can only do
884 // this if all of the operands of the instruction are loop invariant and
885 // if it is safe to hoist the instruction.
886 // TODO: It may be safe to hoist if we are hoisting to a conditional block
887 // and we have accurately duplicated the control flow from the loop header
888 // to that block.
889 if (CurLoop->hasLoopInvariantOperands(&I) &&
890 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
891 ORE) &&
892 isSafeToExecuteUnconditionally(
893 I, DT, CurLoop, SafetyInfo, ORE,
894 CurLoop->getLoopPreheader()->getTerminator())) {
895 hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
896 MSSAU, SE, ORE);
897 HoistedInstructions.push_back(&I);
898 Changed = true;
899 continue;
900 }
901
902 // Attempt to remove floating point division out of the loop by
903 // converting it to a reciprocal multiplication.
904 if (I.getOpcode() == Instruction::FDiv &&
905 CurLoop->isLoopInvariant(I.getOperand(1)) &&
906 I.hasAllowReciprocal()) {
907 auto Divisor = I.getOperand(1);
908 auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
909 auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
910 ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
911 SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
912 ReciprocalDivisor->insertBefore(&I);
913
914 auto Product =
915 BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
916 Product->setFastMathFlags(I.getFastMathFlags());
917 SafetyInfo->insertInstructionTo(Product, I.getParent());
918 Product->insertAfter(&I);
919 I.replaceAllUsesWith(Product);
920 eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
921
922 hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
923 SafetyInfo, MSSAU, SE, ORE);
924 HoistedInstructions.push_back(ReciprocalDivisor);
925 Changed = true;
926 continue;
927 }
928
929 auto IsInvariantStart = [&](Instruction &I) {
930 using namespace PatternMatch;
931 return I.use_empty() &&
932 match(&I, m_Intrinsic<Intrinsic::invariant_start>());
933 };
934 auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
935 return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
936 SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
937 };
938 if ((IsInvariantStart(I) || isGuard(&I)) &&
939 CurLoop->hasLoopInvariantOperands(&I) &&
940 MustExecuteWithoutWritesBefore(I)) {
941 hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
942 MSSAU, SE, ORE);
943 HoistedInstructions.push_back(&I);
944 Changed = true;
945 continue;
946 }
947
948 if (isa<AllocaInst>(&I) &&
949 SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
950 canRewriteUsesOfAlloca(cast<AllocaInst>(I))) {
951 hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
952 MSSAU, SE, ORE);
953 HoistedInstructions.push_back(&I);
954 Changed = true;
955 continue;
956 }
957
958 if (PHINode *PN = dyn_cast<PHINode>(&I)) {
959 if (CFH.canHoistPHI(PN)) {
960 // Redirect incoming blocks first to ensure that we create hoisted
961 // versions of those blocks before we hoist the phi.
962 for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
963 PN->setIncomingBlock(
964 i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
965 hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
966 MSSAU, SE, ORE);
967 assert(DT->dominates(PN, BB) && "Conditional PHIs not expected")((DT->dominates(PN, BB) && "Conditional PHIs not expected"
) ? static_cast<void> (0) : __assert_fail ("DT->dominates(PN, BB) && \"Conditional PHIs not expected\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 967, __PRETTY_FUNCTION__))
;
968 Changed = true;
969 continue;
970 }
971 }
972
973 // Remember possibly hoistable branches so we can actually hoist them
974 // later if needed.
975 if (BranchInst *BI = dyn_cast<BranchInst>(&I))
976 CFH.registerPossiblyHoistableBranch(BI);
977 }
978 }
979
980 // If we hoisted instructions to a conditional block they may not dominate
981 // their uses that weren't hoisted (such as phis where some operands are not
982 // loop invariant). If so make them unconditional by moving them to their
983 // immediate dominator. We iterate through the instructions in reverse order
984 // which ensures that when we rehoist an instruction we rehoist its operands,
985 // and also keep track of where in the block we are rehoisting to to make sure
986 // that we rehoist instructions before the instructions that use them.
987 Instruction *HoistPoint = nullptr;
988 if (ControlFlowHoisting) {
989 for (Instruction *I : reverse(HoistedInstructions)) {
990 if (!llvm::all_of(I->uses(),
991 [&](Use &U) { return DT->dominates(I, U); })) {
992 BasicBlock *Dominator =
993 DT->getNode(I->getParent())->getIDom()->getBlock();
994 if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
995 if (HoistPoint)
996 assert(DT->dominates(Dominator, HoistPoint->getParent()) &&((DT->dominates(Dominator, HoistPoint->getParent()) &&
"New hoist point expected to dominate old hoist point") ? static_cast
<void> (0) : __assert_fail ("DT->dominates(Dominator, HoistPoint->getParent()) && \"New hoist point expected to dominate old hoist point\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 997, __PRETTY_FUNCTION__))
997 "New hoist point expected to dominate old hoist point")((DT->dominates(Dominator, HoistPoint->getParent()) &&
"New hoist point expected to dominate old hoist point") ? static_cast
<void> (0) : __assert_fail ("DT->dominates(Dominator, HoistPoint->getParent()) && \"New hoist point expected to dominate old hoist point\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 997, __PRETTY_FUNCTION__))
;
998 HoistPoint = Dominator->getTerminator();
999 }
1000 LLVM_DEBUG(dbgs() << "LICM rehoisting to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM rehoisting to " << HoistPoint
->getParent()->getName() << ": " << *I <<
"\n"; } } while (false)
1001 << HoistPoint->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM rehoisting to " << HoistPoint
->getParent()->getName() << ": " << *I <<
"\n"; } } while (false)
1002 << ": " << *I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM rehoisting to " << HoistPoint
->getParent()->getName() << ": " << *I <<
"\n"; } } while (false)
;
1003 moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE);
1004 HoistPoint = I;
1005 Changed = true;
1006 }
1007 }
1008 }
1009 if (MSSAU && VerifyMemorySSA)
1010 MSSAU->getMemorySSA()->verifyMemorySSA();
1011
1012 // Now that we've finished hoisting make sure that LI and DT are still
1013 // valid.
1014#ifdef EXPENSIVE_CHECKS
1015 if (Changed) {
1016 assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&((DT->verify(DominatorTree::VerificationLevel::Fast) &&
"Dominator tree verification failed") ? static_cast<void>
(0) : __assert_fail ("DT->verify(DominatorTree::VerificationLevel::Fast) && \"Dominator tree verification failed\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1017, __PRETTY_FUNCTION__))
1017 "Dominator tree verification failed")((DT->verify(DominatorTree::VerificationLevel::Fast) &&
"Dominator tree verification failed") ? static_cast<void>
(0) : __assert_fail ("DT->verify(DominatorTree::VerificationLevel::Fast) && \"Dominator tree verification failed\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1017, __PRETTY_FUNCTION__))
;
1018 LI->verify(*DT);
1019 }
1020#endif
1021
1022 return Changed;
1023}
1024
1025// Return true if LI is invariant within scope of the loop. LI is invariant if
1026// CurLoop is dominated by an invariant.start representing the same memory
1027// location and size as the memory location LI loads from, and also the
1028// invariant.start has no uses.
1029static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
1030 Loop *CurLoop) {
1031 Value *Addr = LI->getOperand(0);
1032 const DataLayout &DL = LI->getModule()->getDataLayout();
1033 const uint32_t LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
1034
1035 // if the type is i8 addrspace(x)*, we know this is the type of
1036 // llvm.invariant.start operand
1037 auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
1038 LI->getPointerAddressSpace());
1039 unsigned BitcastsVisited = 0;
1040 // Look through bitcasts until we reach the i8* type (this is invariant.start
1041 // operand type).
1042 while (Addr->getType() != PtrInt8Ty) {
1043 auto *BC = dyn_cast<BitCastInst>(Addr);
1044 // Avoid traversing high number of bitcast uses.
1045 if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
1046 return false;
1047 Addr = BC->getOperand(0);
1048 }
1049
1050 unsigned UsesVisited = 0;
1051 // Traverse all uses of the load operand value, to see if invariant.start is
1052 // one of the uses, and whether it dominates the load instruction.
1053 for (auto *U : Addr->users()) {
1054 // Avoid traversing for Load operand with high number of users.
1055 if (++UsesVisited > MaxNumUsesTraversed)
1056 return false;
1057 IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
1058 // If there are escaping uses of invariant.start instruction, the load maybe
1059 // non-invariant.
1060 if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
1061 !II->use_empty())
1062 continue;
1063 unsigned InvariantSizeInBits =
1064 cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
1065 // Confirm the invariant.start location size contains the load operand size
1066 // in bits. Also, the invariant.start should dominate the load, and we
1067 // should not hoist the load out of a loop that contains this dominating
1068 // invariant.start.
1069 if (LocSizeInBits <= InvariantSizeInBits &&
1070 DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
1071 return true;
1072 }
1073
1074 return false;
1075}
1076
1077namespace {
1078/// Return true if-and-only-if we know how to (mechanically) both hoist and
1079/// sink a given instruction out of a loop. Does not address legality
1080/// concerns such as aliasing or speculation safety.
1081bool isHoistableAndSinkableInst(Instruction &I) {
1082 // Only these instructions are hoistable/sinkable.
1083 return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
2
Assuming 'I' is not a 'LoadInst'
3
Assuming 'I' is not a 'StoreInst'
4
Assuming 'I' is not a 'CallInst'
16
Returning the value 1, which participates in a condition later
1084 isa<FenceInst>(I) || isa<CastInst>(I) ||
5
Assuming 'I' is not a 'FenceInst'
6
Assuming 'I' is not a 'CastInst'
1085 isa<UnaryOperator>(I) || isa<BinaryOperator>(I) ||
7
Assuming 'I' is not a 'UnaryOperator'
8
Assuming 'I' is not a 'BinaryOperator'
1086 isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
9
Assuming 'I' is not a 'SelectInst'
10
Assuming 'I' is not a 'GetElementPtrInst'
11
Assuming 'I' is not a 'CmpInst'
1087 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
12
Assuming 'I' is not a 'InsertElementInst'
13
Assuming 'I' is not a 'ExtractElementInst'
1088 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
14
Assuming 'I' is not a 'ShuffleVectorInst'
15
Assuming 'I' is a 'ExtractValueInst'
1089 isa<InsertValueInst>(I));
1090}
1091/// Return true if all of the alias sets within this AST are known not to
1092/// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop.
1093bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU,
1094 const Loop *L) {
1095 if (CurAST) {
62
Assuming 'CurAST' is null
63
Taking false branch
1096 for (AliasSet &AS : *CurAST) {
1097 if (!AS.isForwardingAliasSet() && AS.isMod()) {
1098 return false;
1099 }
1100 }
1101 return true;
1102 } else { /*MSSAU*/
1103 for (auto *BB : L->getBlocks())
64
Assuming '__begin2' is not equal to '__end2'
1104 if (MSSAU->getMemorySSA()->getBlockDefs(BB))
65
Called C++ object pointer is null
1105 return false;
1106 return true;
1107 }
1108}
1109
1110/// Return true if I is the only Instruction with a MemoryAccess in L.
1111bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1112 const MemorySSAUpdater *MSSAU) {
1113 for (auto *BB : L->getBlocks())
1114 if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) {
1115 int NotAPhi = 0;
1116 for (const auto &Acc : *Accs) {
1117 if (isa<MemoryPhi>(&Acc))
1118 continue;
1119 const auto *MUD = cast<MemoryUseOrDef>(&Acc);
1120 if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1121 return false;
1122 }
1123 }
1124 return true;
1125}
1126}
1127
1128bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
1129 Loop *CurLoop, AliasSetTracker *CurAST,
1130 MemorySSAUpdater *MSSAU,
1131 bool TargetExecutesOncePerLoop,
1132 SinkAndHoistLICMFlags *Flags,
1133 OptimizationRemarkEmitter *ORE) {
1134 // If we don't understand the instruction, bail early.
1135 if (!isHoistableAndSinkableInst(I))
1
Calling 'isHoistableAndSinkableInst'
17
Returning from 'isHoistableAndSinkableInst'
18
Taking false branch
1136 return false;
1137
1138 MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
19
Assuming 'MSSAU' is null
20
'?' condition is false
1139 if (MSSA
20.1
'MSSA' is null
20.1
'MSSA' is null
20.1
'MSSA' is null
)
21
Taking false branch
1140 assert(Flags != nullptr && "Flags cannot be null.")((Flags != nullptr && "Flags cannot be null.") ? static_cast
<void> (0) : __assert_fail ("Flags != nullptr && \"Flags cannot be null.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1140, __PRETTY_FUNCTION__))
;
1141
1142 // Loads have extra constraints we have to verify before we can hoist them.
1143 if (LoadInst *LI
22.1
'LI' is null
22.1
'LI' is null
22.1
'LI' is null
= dyn_cast<LoadInst>(&I)) {
22
Assuming the object is not a 'LoadInst'
23
Taking false branch
1144 if (!LI->isUnordered())
1145 return false; // Don't sink/hoist volatile or ordered atomic loads!
1146
1147 // Loads from constant memory are always safe to move, even if they end up
1148 // in the same alias set as something that ends up being modified.
1149 if (AA->pointsToConstantMemory(LI->getOperand(0)))
1150 return true;
1151 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1152 return true;
1153
1154 if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1155 return false; // Don't risk duplicating unordered loads
1156
1157 // This checks for an invariant.start dominating the load.
1158 if (isLoadInvariantInLoop(LI, DT, CurLoop))
1159 return true;
1160
1161 bool Invalidated;
1162 if (CurAST)
1163 Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST,
1164 CurLoop, AA);
1165 else
1166 Invalidated = pointerInvalidatedByLoopWithMSSA(
1167 MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, *Flags);
1168 // Check loop-invariant address because this may also be a sinkable load
1169 // whose address is not necessarily loop-invariant.
1170 if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1171 ORE->emit([&]() {
1172 return OptimizationRemarkMissed(
1173 DEBUG_TYPE"licm", "LoadWithLoopInvariantAddressInvalidated", LI)
1174 << "failed to move load with loop-invariant address "
1175 "because the loop may invalidate its value";
1176 });
1177
1178 return !Invalidated;
1179 } else if (CallInst *CI
24.1
'CI' is non-null
24.1
'CI' is non-null
24.1
'CI' is non-null
= dyn_cast<CallInst>(&I)) {
24
Assuming the object is a 'CallInst'
25
Taking true branch
1180 // Don't sink or hoist dbg info; it's legal, but not useful.
1181 if (isa<DbgInfoIntrinsic>(I))
26
Assuming 'I' is not a 'DbgInfoIntrinsic'
27
Taking false branch
1182 return false;
1183
1184 // Don't sink calls which can throw.
1185 if (CI->mayThrow())
28
Assuming the condition is false
29
Taking false branch
1186 return false;
1187
1188 using namespace PatternMatch;
1189 if (match(CI, m_Intrinsic<Intrinsic::assume>()))
30
Calling 'match<llvm::CallInst, llvm::PatternMatch::IntrinsicID_match>'
37
Returning from 'match<llvm::CallInst, llvm::PatternMatch::IntrinsicID_match>'
38
Taking false branch
1190 // Assumes don't actually alias anything or throw
1191 return true;
1192
1193 if (match(CI, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
39
Calling 'match<llvm::CallInst, llvm::PatternMatch::IntrinsicID_match>'
46
Returning from 'match<llvm::CallInst, llvm::PatternMatch::IntrinsicID_match>'
47
Taking false branch
1194 // Widenable conditions don't actually alias anything or throw
1195 return true;
1196
1197 // Handle simple cases by querying alias analysis.
1198 FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
1199 if (Behavior == FMRB_DoesNotAccessMemory)
48
Assuming 'Behavior' is not equal to FMRB_DoesNotAccessMemory
49
Taking false branch
1200 return true;
1201 if (AliasAnalysis::onlyReadsMemory(Behavior)) {
50
Calling 'AAResults::onlyReadsMemory'
53
Returning from 'AAResults::onlyReadsMemory'
54
Taking true branch
1202 // A readonly argmemonly function only reads from memory pointed to by
1203 // it's arguments with arbitrary offsets. If we can prove there are no
1204 // writes to this memory in the loop, we can hoist or sink.
1205 if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) {
55
Calling 'AAResults::onlyAccessesArgPointees'
58
Returning from 'AAResults::onlyAccessesArgPointees'
59
Taking false branch
1206 // TODO: expand to writeable arguments
1207 for (Value *Op : CI->arg_operands())
1208 if (Op->getType()->isPointerTy()) {
1209 bool Invalidated;
1210 if (CurAST)
1211 Invalidated = pointerInvalidatedByLoop(
1212 MemoryLocation(Op, LocationSize::unknown(), AAMDNodes()),
1213 CurAST, CurLoop, AA);
1214 else
1215 Invalidated = pointerInvalidatedByLoopWithMSSA(
1216 MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop,
1217 *Flags);
1218 if (Invalidated)
1219 return false;
1220 }
1221 return true;
1222 }
1223
1224 // If this call only reads from memory and there are no writes to memory
1225 // in the loop, we can hoist or sink the call as appropriate.
1226 if (isReadOnly(CurAST, MSSAU, CurLoop))
60
Passing null pointer value via 2nd parameter 'MSSAU'
61
Calling 'isReadOnly'
1227 return true;
1228 }
1229
1230 // FIXME: This should use mod/ref information to see if we can hoist or
1231 // sink the call.
1232
1233 return false;
1234 } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
1235 // Fences alias (most) everything to provide ordering. For the moment,
1236 // just give up if there are any other memory operations in the loop.
1237 if (CurAST) {
1238 auto Begin = CurAST->begin();
1239 assert(Begin != CurAST->end() && "must contain FI")((Begin != CurAST->end() && "must contain FI") ? static_cast
<void> (0) : __assert_fail ("Begin != CurAST->end() && \"must contain FI\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1239, __PRETTY_FUNCTION__))
;
1240 if (std::next(Begin) != CurAST->end())
1241 // constant memory for instance, TODO: handle better
1242 return false;
1243 auto *UniqueI = Begin->getUniqueInstruction();
1244 if (!UniqueI)
1245 // other memory op, give up
1246 return false;
1247 (void)FI; // suppress unused variable warning
1248 assert(UniqueI == FI && "AS must contain FI")((UniqueI == FI && "AS must contain FI") ? static_cast
<void> (0) : __assert_fail ("UniqueI == FI && \"AS must contain FI\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1248, __PRETTY_FUNCTION__))
;
1249 return true;
1250 } else // MSSAU
1251 return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
1252 } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
1253 if (!SI->isUnordered())
1254 return false; // Don't sink/hoist volatile or ordered atomic store!
1255
1256 // We can only hoist a store that we can prove writes a value which is not
1257 // read or overwritten within the loop. For those cases, we fallback to
1258 // load store promotion instead. TODO: We can extend this to cases where
1259 // there is exactly one write to the location and that write dominates an
1260 // arbitrary number of reads in the loop.
1261 if (CurAST) {
1262 auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
1263
1264 if (AS.isRef() || !AS.isMustAlias())
1265 // Quick exit test, handled by the full path below as well.
1266 return false;
1267 auto *UniqueI = AS.getUniqueInstruction();
1268 if (!UniqueI)
1269 // other memory op, give up
1270 return false;
1271 assert(UniqueI == SI && "AS must contain SI")((UniqueI == SI && "AS must contain SI") ? static_cast
<void> (0) : __assert_fail ("UniqueI == SI && \"AS must contain SI\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1271, __PRETTY_FUNCTION__))
;
1272 return true;
1273 } else { // MSSAU
1274 if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
1275 return true;
1276 // If there are more accesses than the Promotion cap, give up, we're not
1277 // walking a list that long.
1278 if (Flags->NoOfMemAccTooLarge)
1279 return false;
1280 // Check store only if there's still "quota" to check clobber.
1281 if (Flags->LicmMssaOptCounter >= Flags->LicmMssaOptCap)
1282 return false;
1283 // If there are interfering Uses (i.e. their defining access is in the
1284 // loop), or ordered loads (stored as Defs!), don't move this store.
1285 // Could do better here, but this is conservatively correct.
1286 // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1287 // moving accesses. Can also extend to dominating uses.
1288 auto *SIMD = MSSA->getMemoryAccess(SI);
1289 for (auto *BB : CurLoop->getBlocks())
1290 if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
1291 for (const auto &MA : *Accesses)
1292 if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
1293 auto *MD = MU->getDefiningAccess();
1294 if (!MSSA->isLiveOnEntryDef(MD) &&
1295 CurLoop->contains(MD->getBlock()))
1296 return false;
1297 // Disable hoisting past potentially interfering loads. Optimized
1298 // Uses may point to an access outside the loop, as getClobbering
1299 // checks the previous iteration when walking the backedge.
1300 // FIXME: More precise: no Uses that alias SI.
1301 if (!Flags->IsSink && !MSSA->dominates(SIMD, MU))
1302 return false;
1303 } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
1304 if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
1305 (void)LI; // Silence warning.
1306 assert(!LI->isUnordered() && "Expected unordered load")((!LI->isUnordered() && "Expected unordered load")
? static_cast<void> (0) : __assert_fail ("!LI->isUnordered() && \"Expected unordered load\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1306, __PRETTY_FUNCTION__))
;
1307 return false;
1308 }
1309 // Any call, while it may not be clobbering SI, it may be a use.
1310 if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
1311 // Check if the call may read from the memory locattion written
1312 // to by SI. Check CI's attributes and arguments; the number of
1313 // such checks performed is limited above by NoOfMemAccTooLarge.
1314 ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI));
1315 if (isModOrRefSet(MRI))
1316 return false;
1317 }
1318 }
1319 }
1320
1321 auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
1322 Flags->LicmMssaOptCounter++;
1323 // If there are no clobbering Defs in the loop, store is safe to hoist.
1324 return MSSA->isLiveOnEntryDef(Source) ||
1325 !CurLoop->contains(Source->getBlock());
1326 }
1327 }
1328
1329 assert(!I.mayReadOrWriteMemory() && "unhandled aliasing")((!I.mayReadOrWriteMemory() && "unhandled aliasing") ?
static_cast<void> (0) : __assert_fail ("!I.mayReadOrWriteMemory() && \"unhandled aliasing\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1329, __PRETTY_FUNCTION__))
;
1330
1331 // We've established mechanical ability and aliasing, it's up to the caller
1332 // to check fault safety
1333 return true;
1334}
1335
1336/// Returns true if a PHINode is a trivially replaceable with an
1337/// Instruction.
1338/// This is true when all incoming values are that instruction.
1339/// This pattern occurs most often with LCSSA PHI nodes.
1340///
1341static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1342 for (const Value *IncValue : PN.incoming_values())
1343 if (IncValue != &I)
1344 return false;
1345
1346 return true;
1347}
1348
1349/// Return true if the instruction is free in the loop.
1350static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
1351 const TargetTransformInfo *TTI) {
1352
1353 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1354 if (TTI->getUserCost(GEP) != TargetTransformInfo::TCC_Free)
1355 return false;
1356 // For a GEP, we cannot simply use getUserCost because currently it
1357 // optimistically assume that a GEP will fold into addressing mode
1358 // regardless of its users.
1359 const BasicBlock *BB = GEP->getParent();
1360 for (const User *U : GEP->users()) {
1361 const Instruction *UI = cast<Instruction>(U);
1362 if (CurLoop->contains(UI) &&
1363 (BB != UI->getParent() ||
1364 (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
1365 return false;
1366 }
1367 return true;
1368 } else
1369 return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free;
1370}
1371
1372/// Return true if the only users of this instruction are outside of
1373/// the loop. If this is true, we can sink the instruction to the exit
1374/// blocks of the loop.
1375///
1376/// We also return true if the instruction could be folded away in lowering.
1377/// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
1378static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
1379 const LoopSafetyInfo *SafetyInfo,
1380 TargetTransformInfo *TTI, bool &FreeInLoop) {
1381 const auto &BlockColors = SafetyInfo->getBlockColors();
1382 bool IsFree = isFreeInLoop(I, CurLoop, TTI);
1383 for (const User *U : I.users()) {
1384 const Instruction *UI = cast<Instruction>(U);
1385 if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1386 const BasicBlock *BB = PN->getParent();
1387 // We cannot sink uses in catchswitches.
1388 if (isa<CatchSwitchInst>(BB->getTerminator()))
1389 return false;
1390
1391 // We need to sink a callsite to a unique funclet. Avoid sinking if the
1392 // phi use is too muddled.
1393 if (isa<CallInst>(I))
1394 if (!BlockColors.empty() &&
1395 BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
1396 return false;
1397 }
1398
1399 if (CurLoop->contains(UI)) {
1400 if (IsFree) {
1401 FreeInLoop = true;
1402 continue;
1403 }
1404 return false;
1405 }
1406 }
1407 return true;
1408}
1409
1410static Instruction *CloneInstructionInExitBlock(
1411 Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1412 const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) {
1413 Instruction *New;
1414 if (auto *CI = dyn_cast<CallInst>(&I)) {
1415 const auto &BlockColors = SafetyInfo->getBlockColors();
1416
1417 // Sinking call-sites need to be handled differently from other
1418 // instructions. The cloned call-site needs a funclet bundle operand
1419 // appropriate for its location in the CFG.
1420 SmallVector<OperandBundleDef, 1> OpBundles;
1421 for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1422 BundleIdx != BundleEnd; ++BundleIdx) {
1423 OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
1424 if (Bundle.getTagID() == LLVMContext::OB_funclet)
1425 continue;
1426
1427 OpBundles.emplace_back(Bundle);
1428 }
1429
1430 if (!BlockColors.empty()) {
1431 const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
1432 assert(CV.size() == 1 && "non-unique color for exit block!")((CV.size() == 1 && "non-unique color for exit block!"
) ? static_cast<void> (0) : __assert_fail ("CV.size() == 1 && \"non-unique color for exit block!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1432, __PRETTY_FUNCTION__))
;
1433 BasicBlock *BBColor = CV.front();
1434 Instruction *EHPad = BBColor->getFirstNonPHI();
1435 if (EHPad->isEHPad())
1436 OpBundles.emplace_back("funclet", EHPad);
1437 }
1438
1439 New = CallInst::Create(CI, OpBundles);
1440 } else {
1441 New = I.clone();
1442 }
1443
1444 ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
1445 if (!I.getName().empty())
1446 New->setName(I.getName() + ".le");
1447
1448 if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
1449 // Create a new MemoryAccess and let MemorySSA set its defining access.
1450 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1451 New, nullptr, New->getParent(), MemorySSA::Beginning);
1452 if (NewMemAcc) {
1453 if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
1454 MSSAU->insertDef(MemDef, /*RenameUses=*/true);
1455 else {
1456 auto *MemUse = cast<MemoryUse>(NewMemAcc);
1457 MSSAU->insertUse(MemUse, /*RenameUses=*/true);
1458 }
1459 }
1460 }
1461
1462 // Build LCSSA PHI nodes for any in-loop operands. Note that this is
1463 // particularly cheap because we can rip off the PHI node that we're
1464 // replacing for the number and blocks of the predecessors.
1465 // OPT: If this shows up in a profile, we can instead finish sinking all
1466 // invariant instructions, and then walk their operands to re-establish
1467 // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1468 // sinking bottom-up.
1469 for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
1470 ++OI)
1471 if (Instruction *OInst = dyn_cast<Instruction>(*OI))
1472 if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
1473 if (!OLoop->contains(&PN)) {
1474 PHINode *OpPN =
1475 PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
1476 OInst->getName() + ".lcssa", &ExitBlock.front());
1477 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1478 OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1479 *OI = OpPN;
1480 }
1481 return New;
1482}
1483
1484static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
1485 AliasSetTracker *AST, MemorySSAUpdater *MSSAU) {
1486 if (AST)
1487 AST->deleteValue(&I);
1488 if (MSSAU)
1489 MSSAU->removeMemoryAccess(&I);
1490 SafetyInfo.removeInstruction(&I);
1491 I.eraseFromParent();
1492}
1493
1494static void moveInstructionBefore(Instruction &I, Instruction &Dest,
1495 ICFLoopSafetyInfo &SafetyInfo,
1496 MemorySSAUpdater *MSSAU,
1497 ScalarEvolution *SE) {
1498 SafetyInfo.removeInstruction(&I);
1499 SafetyInfo.insertInstructionTo(&I, Dest.getParent());
1500 I.moveBefore(&Dest);
1501 if (MSSAU)
1502 if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1503 MSSAU->getMemorySSA()->getMemoryAccess(&I)))
1504 MSSAU->moveToPlace(OldMemAcc, Dest.getParent(),
1505 MemorySSA::BeforeTerminator);
1506 if (SE)
1507 SE->forgetValue(&I);
1508}
1509
1510static Instruction *sinkThroughTriviallyReplaceablePHI(
1511 PHINode *TPN, Instruction *I, LoopInfo *LI,
1512 SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
1513 const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1514 MemorySSAUpdater *MSSAU) {
1515 assert(isTriviallyReplaceablePHI(*TPN, *I) &&((isTriviallyReplaceablePHI(*TPN, *I) && "Expect only trivially replaceable PHI"
) ? static_cast<void> (0) : __assert_fail ("isTriviallyReplaceablePHI(*TPN, *I) && \"Expect only trivially replaceable PHI\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1516, __PRETTY_FUNCTION__))
1516 "Expect only trivially replaceable PHI")((isTriviallyReplaceablePHI(*TPN, *I) && "Expect only trivially replaceable PHI"
) ? static_cast<void> (0) : __assert_fail ("isTriviallyReplaceablePHI(*TPN, *I) && \"Expect only trivially replaceable PHI\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1516, __PRETTY_FUNCTION__))
;
1517 BasicBlock *ExitBlock = TPN->getParent();
1518 Instruction *New;
1519 auto It = SunkCopies.find(ExitBlock);
1520 if (It != SunkCopies.end())
1521 New = It->second;
1522 else
1523 New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock(
1524 *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
1525 return New;
1526}
1527
1528static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1529 BasicBlock *BB = PN->getParent();
1530 if (!BB->canSplitPredecessors())
1531 return false;
1532 // It's not impossible to split EHPad blocks, but if BlockColors already exist
1533 // it require updating BlockColors for all offspring blocks accordingly. By
1534 // skipping such corner case, we can make updating BlockColors after splitting
1535 // predecessor fairly simple.
1536 if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
1537 return false;
1538 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
1539 BasicBlock *BBPred = *PI;
1540 if (isa<IndirectBrInst>(BBPred->getTerminator()))
1541 return false;
1542 }
1543 return true;
1544}
1545
1546static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
1547 LoopInfo *LI, const Loop *CurLoop,
1548 LoopSafetyInfo *SafetyInfo,
1549 MemorySSAUpdater *MSSAU) {
1550#ifndef NDEBUG
1551 SmallVector<BasicBlock *, 32> ExitBlocks;
1552 CurLoop->getUniqueExitBlocks(ExitBlocks);
1553 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1554 ExitBlocks.end());
1555#endif
1556 BasicBlock *ExitBB = PN->getParent();
1557 assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.")((ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block."
) ? static_cast<void> (0) : __assert_fail ("ExitBlockSet.count(ExitBB) && \"Expect the PHI is in an exit block.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1557, __PRETTY_FUNCTION__))
;
1558
1559 // Split predecessors of the loop exit to make instructions in the loop are
1560 // exposed to exit blocks through trivially replaceable PHIs while keeping the
1561 // loop in the canonical form where each predecessor of each exit block should
1562 // be contained within the loop. For example, this will convert the loop below
1563 // from
1564 //
1565 // LB1:
1566 // %v1 =
1567 // br %LE, %LB2
1568 // LB2:
1569 // %v2 =
1570 // br %LE, %LB1
1571 // LE:
1572 // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1573 //
1574 // to
1575 //
1576 // LB1:
1577 // %v1 =
1578 // br %LE.split, %LB2
1579 // LB2:
1580 // %v2 =
1581 // br %LE.split2, %LB1
1582 // LE.split:
1583 // %p1 = phi [%v1, %LB1] <-- trivially replaceable
1584 // br %LE
1585 // LE.split2:
1586 // %p2 = phi [%v2, %LB2] <-- trivially replaceable
1587 // br %LE
1588 // LE:
1589 // %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1590 //
1591 const auto &BlockColors = SafetyInfo->getBlockColors();
1592 SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
1593 while (!PredBBs.empty()) {
1594 BasicBlock *PredBB = *PredBBs.begin();
1595 assert(CurLoop->contains(PredBB) &&((CurLoop->contains(PredBB) && "Expect all predecessors are in the loop"
) ? static_cast<void> (0) : __assert_fail ("CurLoop->contains(PredBB) && \"Expect all predecessors are in the loop\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1596, __PRETTY_FUNCTION__))
1596 "Expect all predecessors are in the loop")((CurLoop->contains(PredBB) && "Expect all predecessors are in the loop"
) ? static_cast<void> (0) : __assert_fail ("CurLoop->contains(PredBB) && \"Expect all predecessors are in the loop\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1596, __PRETTY_FUNCTION__))
;
1597 if (PN->getBasicBlockIndex(PredBB) >= 0) {
1598 BasicBlock *NewPred = SplitBlockPredecessors(
1599 ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
1600 // Since we do not allow splitting EH-block with BlockColors in
1601 // canSplitPredecessors(), we can simply assign predecessor's color to
1602 // the new block.
1603 if (!BlockColors.empty())
1604 // Grab a reference to the ColorVector to be inserted before getting the
1605 // reference to the vector we are copying because inserting the new
1606 // element in BlockColors might cause the map to be reallocated.
1607 SafetyInfo->copyColors(NewPred, PredBB);
1608 }
1609 PredBBs.remove(PredBB);
1610 }
1611}
1612
1613/// When an instruction is found to only be used outside of the loop, this
1614/// function moves it to the exit blocks and patches up SSA form as needed.
1615/// This method is guaranteed to remove the original instruction from its
1616/// position, and may either delete it or move it to outside of the loop.
1617///
1618static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1619 const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
1620 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE) {
1621 LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM sinking instruction: " <<
I << "\n"; } } while (false)
;
1622 ORE->emit([&]() {
1623 return OptimizationRemark(DEBUG_TYPE"licm", "InstSunk", &I)
1624 << "sinking " << ore::NV("Inst", &I);
1625 });
1626 bool Changed = false;
1627 if (isa<LoadInst>(I))
1628 ++NumMovedLoads;
1629 else if (isa<CallInst>(I))
1630 ++NumMovedCalls;
1631 ++NumSunk;
1632
1633 // Iterate over users to be ready for actual sinking. Replace users via
1634 // unreachable blocks with undef and make all user PHIs trivially replaceable.
1635 SmallPtrSet<Instruction *, 8> VisitedUsers;
1636 for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1637 auto *User = cast<Instruction>(*UI);
1638 Use &U = UI.getUse();
1639 ++UI;
1640
1641 if (VisitedUsers.count(User) || CurLoop->contains(User))
1642 continue;
1643
1644 if (!DT->isReachableFromEntry(User->getParent())) {
1645 U = UndefValue::get(I.getType());
1646 Changed = true;
1647 continue;
1648 }
1649
1650 // The user must be a PHI node.
1651 PHINode *PN = cast<PHINode>(User);
1652
1653 // Surprisingly, instructions can be used outside of loops without any
1654 // exits. This can only happen in PHI nodes if the incoming block is
1655 // unreachable.
1656 BasicBlock *BB = PN->getIncomingBlock(U);
1657 if (!DT->isReachableFromEntry(BB)) {
1658 U = UndefValue::get(I.getType());
1659 Changed = true;
1660 continue;
1661 }
1662
1663 VisitedUsers.insert(PN);
1664 if (isTriviallyReplaceablePHI(*PN, I))
1665 continue;
1666
1667 if (!canSplitPredecessors(PN, SafetyInfo))
1668 return Changed;
1669
1670 // Split predecessors of the PHI so that we can make users trivially
1671 // replaceable.
1672 splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU);
1673
1674 // Should rebuild the iterators, as they may be invalidated by
1675 // splitPredecessorsOfLoopExit().
1676 UI = I.user_begin();
1677 UE = I.user_end();
1678 }
1679
1680 if (VisitedUsers.empty())
1681 return Changed;
1682
1683#ifndef NDEBUG
1684 SmallVector<BasicBlock *, 32> ExitBlocks;
1685 CurLoop->getUniqueExitBlocks(ExitBlocks);
1686 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1687 ExitBlocks.end());
1688#endif
1689
1690 // Clones of this instruction. Don't create more than one per exit block!
1691 SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
1692
1693 // If this instruction is only used outside of the loop, then all users are
1694 // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1695 // the instruction.
1696 SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
1697 for (auto *UI : Users) {
1698 auto *User = cast<Instruction>(UI);
1699
1700 if (CurLoop->contains(User))
1701 continue;
1702
1703 PHINode *PN = cast<PHINode>(User);
1704 assert(ExitBlockSet.count(PN->getParent()) &&((ExitBlockSet.count(PN->getParent()) && "The LCSSA PHI is not in an exit block!"
) ? static_cast<void> (0) : __assert_fail ("ExitBlockSet.count(PN->getParent()) && \"The LCSSA PHI is not in an exit block!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1705, __PRETTY_FUNCTION__))
1705 "The LCSSA PHI is not in an exit block!")((ExitBlockSet.count(PN->getParent()) && "The LCSSA PHI is not in an exit block!"
) ? static_cast<void> (0) : __assert_fail ("ExitBlockSet.count(PN->getParent()) && \"The LCSSA PHI is not in an exit block!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1705, __PRETTY_FUNCTION__))
;
1706 // The PHI must be trivially replaceable.
1707 Instruction *New = sinkThroughTriviallyReplaceablePHI(
1708 PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1709 PN->replaceAllUsesWith(New);
1710 eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr);
1711 Changed = true;
1712 }
1713 return Changed;
1714}
1715
1716/// When an instruction is found to only use loop invariant operands that
1717/// is safe to hoist, this instruction is called to do the dirty work.
1718///
1719static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1720 BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1721 MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
1722 OptimizationRemarkEmitter *ORE) {
1723 LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << Ido { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM hoisting to " << Dest
->getName() << ": " << I << "\n"; } } while
(false)
1724 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM hoisting to " << Dest
->getName() << ": " << I << "\n"; } } while
(false)
;
1725 ORE->emit([&]() {
1726 return OptimizationRemark(DEBUG_TYPE"licm", "Hoisted", &I) << "hoisting "
1727 << ore::NV("Inst", &I);
1728 });
1729
1730 // Metadata can be dependent on conditions we are hoisting above.
1731 // Conservatively strip all metadata on the instruction unless we were
1732 // guaranteed to execute I if we entered the loop, in which case the metadata
1733 // is valid in the loop preheader.
1734 if (I.hasMetadataOtherThanDebugLoc() &&
1735 // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1736 // time in isGuaranteedToExecute if we don't actually have anything to
1737 // drop. It is a compile time optimization, not required for correctness.
1738 !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1739 I.dropUnknownNonDebugMetadata();
1740
1741 if (isa<PHINode>(I))
1742 // Move the new node to the end of the phi list in the destination block.
1743 moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE);
1744 else
1745 // Move the new node to the destination block, before its terminator.
1746 moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE);
1747
1748 // Apply line 0 debug locations when we are moving instructions to different
1749 // basic blocks because we want to avoid jumpy line tables.
1750 if (const DebugLoc &DL = I.getDebugLoc())
1751 I.setDebugLoc(DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
1752
1753 if (isa<LoadInst>(I))
1754 ++NumMovedLoads;
1755 else if (isa<CallInst>(I))
1756 ++NumMovedCalls;
1757 ++NumHoisted;
1758}
1759
1760/// Only sink or hoist an instruction if it is not a trapping instruction,
1761/// or if the instruction is known not to trap when moved to the preheader.
1762/// or if it is a trapping instruction and is guaranteed to execute.
1763static bool isSafeToExecuteUnconditionally(Instruction &Inst,
1764 const DominatorTree *DT,
1765 const Loop *CurLoop,
1766 const LoopSafetyInfo *SafetyInfo,
1767 OptimizationRemarkEmitter *ORE,
1768 const Instruction *CtxI) {
1769 if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT))
1770 return true;
1771
1772 bool GuaranteedToExecute =
1773 SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1774
1775 if (!GuaranteedToExecute) {
1776 auto *LI = dyn_cast<LoadInst>(&Inst);
1777 if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1778 ORE->emit([&]() {
1779 return OptimizationRemarkMissed(
1780 DEBUG_TYPE"licm", "LoadWithLoopInvariantAddressCondExecuted", LI)
1781 << "failed to hoist load with loop-invariant address "
1782 "because load is conditionally executed";
1783 });
1784 }
1785
1786 return GuaranteedToExecute;
1787}
1788
1789namespace {
1790class LoopPromoter : public LoadAndStorePromoter {
1791 Value *SomePtr; // Designated pointer to store to.
1792 const SmallSetVector<Value *, 8> &PointerMustAliases;
1793 SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1794 SmallVectorImpl<Instruction *> &LoopInsertPts;
1795 SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1796 PredIteratorCache &PredCache;
1797 AliasSetTracker &AST;
1798 MemorySSAUpdater *MSSAU;
1799 LoopInfo &LI;
1800 DebugLoc DL;
1801 int Alignment;
1802 bool UnorderedAtomic;
1803 AAMDNodes AATags;
1804 ICFLoopSafetyInfo &SafetyInfo;
1805
1806 Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1807 if (Instruction *I = dyn_cast<Instruction>(V))
1808 if (Loop *L = LI.getLoopFor(I->getParent()))
1809 if (!L->contains(BB)) {
1810 // We need to create an LCSSA PHI node for the incoming value and
1811 // store that.
1812 PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1813 I->getName() + ".lcssa", &BB->front());
1814 for (BasicBlock *Pred : PredCache.get(BB))
1815 PN->addIncoming(I, Pred);
1816 return PN;
1817 }
1818 return V;
1819 }
1820
1821public:
1822 LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1823 const SmallSetVector<Value *, 8> &PMA,
1824 SmallVectorImpl<BasicBlock *> &LEB,
1825 SmallVectorImpl<Instruction *> &LIP,
1826 SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
1827 AliasSetTracker &ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
1828 DebugLoc dl, int alignment, bool UnorderedAtomic,
1829 const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
1830 : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
1831 LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
1832 PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
1833 Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1834 SafetyInfo(SafetyInfo) {}
1835
1836 bool isInstInList(Instruction *I,
1837 const SmallVectorImpl<Instruction *> &) const override {
1838 Value *Ptr;
1839 if (LoadInst *LI = dyn_cast<LoadInst>(I))
1840 Ptr = LI->getOperand(0);
1841 else
1842 Ptr = cast<StoreInst>(I)->getPointerOperand();
1843 return PointerMustAliases.count(Ptr);
1844 }
1845
1846 void doExtraRewritesBeforeFinalDeletion() override {
1847 // Insert stores after in the loop exit blocks. Each exit block gets a
1848 // store of the live-out values that feed them. Since we've already told
1849 // the SSA updater about the defs in the loop and the preheader
1850 // definition, it is all set and we can start using it.
1851 for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1852 BasicBlock *ExitBlock = LoopExitBlocks[i];
1853 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1854 LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1855 Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1856 Instruction *InsertPos = LoopInsertPts[i];
1857 StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1858 if (UnorderedAtomic)
1859 NewSI->setOrdering(AtomicOrdering::Unordered);
1860 NewSI->setAlignment(MaybeAlign(Alignment));
1861 NewSI->setDebugLoc(DL);
1862 if (AATags)
1863 NewSI->setAAMetadata(AATags);
1864
1865 if (MSSAU) {
1866 MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1867 MemoryAccess *NewMemAcc;
1868 if (!MSSAInsertPoint) {
1869 NewMemAcc = MSSAU->createMemoryAccessInBB(
1870 NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
1871 } else {
1872 NewMemAcc =
1873 MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
1874 }
1875 MSSAInsertPts[i] = NewMemAcc;
1876 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1877 // FIXME: true for safety, false may still be correct.
1878 }
1879 }
1880 }
1881
1882 void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
1883 // Update alias analysis.
1884 AST.copyValue(LI, V);
1885 }
1886 void instructionDeleted(Instruction *I) const override {
1887 SafetyInfo.removeInstruction(I);
1888 AST.deleteValue(I);
1889 if (MSSAU)
1890 MSSAU->removeMemoryAccess(I);
1891 }
1892};
1893
1894
1895/// Return true iff we can prove that a caller of this function can not inspect
1896/// the contents of the provided object in a well defined program.
1897bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {
1898 if (isa<AllocaInst>(Object))
1899 // Since the alloca goes out of scope, we know the caller can't retain a
1900 // reference to it and be well defined. Thus, we don't need to check for
1901 // capture.
1902 return true;
1903
1904 // For all other objects we need to know that the caller can't possibly
1905 // have gotten a reference to the object. There are two components of
1906 // that:
1907 // 1) Object can't be escaped by this function. This is what
1908 // PointerMayBeCaptured checks.
1909 // 2) Object can't have been captured at definition site. For this, we
1910 // need to know the return value is noalias. At the moment, we use a
1911 // weaker condition and handle only AllocLikeFunctions (which are
1912 // known to be noalias). TODO
1913 return isAllocLikeFn(Object, TLI) &&
1914 !PointerMayBeCaptured(Object, true, true);
1915}
1916
1917} // namespace
1918
1919/// Try to promote memory values to scalars by sinking stores out of the
1920/// loop and moving loads to before the loop. We do this by looping over
1921/// the stores in the loop, looking for stores to Must pointers which are
1922/// loop invariant.
1923///
1924bool llvm::promoteLoopAccessesToScalars(
1925 const SmallSetVector<Value *, 8> &PointerMustAliases,
1926 SmallVectorImpl<BasicBlock *> &ExitBlocks,
1927 SmallVectorImpl<Instruction *> &InsertPts,
1928 SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
1929 LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
1930 Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
1931 ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
1932 // Verify inputs.
1933 assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&((LI != nullptr && DT != nullptr && CurLoop !=
nullptr && CurAST != nullptr && SafetyInfo !=
nullptr && "Unexpected Input to promoteLoopAccessesToScalars"
) ? static_cast<void> (0) : __assert_fail ("LI != nullptr && DT != nullptr && CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && \"Unexpected Input to promoteLoopAccessesToScalars\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1935, __PRETTY_FUNCTION__))
1934 CurAST != nullptr && SafetyInfo != nullptr &&((LI != nullptr && DT != nullptr && CurLoop !=
nullptr && CurAST != nullptr && SafetyInfo !=
nullptr && "Unexpected Input to promoteLoopAccessesToScalars"
) ? static_cast<void> (0) : __assert_fail ("LI != nullptr && DT != nullptr && CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && \"Unexpected Input to promoteLoopAccessesToScalars\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1935, __PRETTY_FUNCTION__))
1935 "Unexpected Input to promoteLoopAccessesToScalars")((LI != nullptr && DT != nullptr && CurLoop !=
nullptr && CurAST != nullptr && SafetyInfo !=
nullptr && "Unexpected Input to promoteLoopAccessesToScalars"
) ? static_cast<void> (0) : __assert_fail ("LI != nullptr && DT != nullptr && CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && \"Unexpected Input to promoteLoopAccessesToScalars\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 1935, __PRETTY_FUNCTION__))
;
1936
1937 Value *SomePtr = *PointerMustAliases.begin();
1938 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1939
1940 // It is not safe to promote a load/store from the loop if the load/store is
1941 // conditional. For example, turning:
1942 //
1943 // for () { if (c) *P += 1; }
1944 //
1945 // into:
1946 //
1947 // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
1948 //
1949 // is not safe, because *P may only be valid to access if 'c' is true.
1950 //
1951 // The safety property divides into two parts:
1952 // p1) The memory may not be dereferenceable on entry to the loop. In this
1953 // case, we can't insert the required load in the preheader.
1954 // p2) The memory model does not allow us to insert a store along any dynamic
1955 // path which did not originally have one.
1956 //
1957 // If at least one store is guaranteed to execute, both properties are
1958 // satisfied, and promotion is legal.
1959 //
1960 // This, however, is not a necessary condition. Even if no store/load is
1961 // guaranteed to execute, we can still establish these properties.
1962 // We can establish (p1) by proving that hoisting the load into the preheader
1963 // is safe (i.e. proving dereferenceability on all paths through the loop). We
1964 // can use any access within the alias set to prove dereferenceability,
1965 // since they're all must alias.
1966 //
1967 // There are two ways establish (p2):
1968 // a) Prove the location is thread-local. In this case the memory model
1969 // requirement does not apply, and stores are safe to insert.
1970 // b) Prove a store dominates every exit block. In this case, if an exit
1971 // blocks is reached, the original dynamic path would have taken us through
1972 // the store, so inserting a store into the exit block is safe. Note that this
1973 // is different from the store being guaranteed to execute. For instance,
1974 // if an exception is thrown on the first iteration of the loop, the original
1975 // store is never executed, but the exit blocks are not executed either.
1976
1977 bool DereferenceableInPH = false;
1978 bool SafeToInsertStore = false;
1979
1980 SmallVector<Instruction *, 64> LoopUses;
1981
1982 // We start with an alignment of one and try to find instructions that allow
1983 // us to prove better alignment.
1984 unsigned Alignment = 1;
1985 // Keep track of which types of access we see
1986 bool SawUnorderedAtomic = false;
1987 bool SawNotAtomic = false;
1988 AAMDNodes AATags;
1989
1990 const DataLayout &MDL = Preheader->getModule()->getDataLayout();
1991
1992 bool IsKnownThreadLocalObject = false;
1993 if (SafetyInfo->anyBlockMayThrow()) {
1994 // If a loop can throw, we have to insert a store along each unwind edge.
1995 // That said, we can't actually make the unwind edge explicit. Therefore,
1996 // we have to prove that the store is dead along the unwind edge. We do
1997 // this by proving that the caller can't have a reference to the object
1998 // after return and thus can't possibly load from the object.
1999 Value *Object = GetUnderlyingObject(SomePtr, MDL);
2000 if (!isKnownNonEscaping(Object, TLI))
2001 return false;
2002 // Subtlety: Alloca's aren't visible to callers, but *are* potentially
2003 // visible to other threads if captured and used during their lifetimes.
2004 IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
2005 }
2006
2007 // Check that all of the pointers in the alias set have the same type. We
2008 // cannot (yet) promote a memory location that is loaded and stored in
2009 // different sizes. While we are at it, collect alignment and AA info.
2010 for (Value *ASIV : PointerMustAliases) {
2011 // Check that all of the pointers in the alias set have the same type. We
2012 // cannot (yet) promote a memory location that is loaded and stored in
2013 // different sizes.
2014 if (SomePtr->getType() != ASIV->getType())
2015 return false;
2016
2017 for (User *U : ASIV->users()) {
2018 // Ignore instructions that are outside the loop.
2019 Instruction *UI = dyn_cast<Instruction>(U);
2020 if (!UI || !CurLoop->contains(UI))
2021 continue;
2022
2023 // If there is an non-load/store instruction in the loop, we can't promote
2024 // it.
2025 if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
2026 if (!Load->isUnordered())
2027 return false;
2028
2029 SawUnorderedAtomic |= Load->isAtomic();
2030 SawNotAtomic |= !Load->isAtomic();
2031
2032 unsigned InstAlignment = Load->getAlignment();
2033 if (!InstAlignment)
2034 InstAlignment =
2035 MDL.getABITypeAlignment(Load->getType());
2036
2037 // Note that proving a load safe to speculate requires proving
2038 // sufficient alignment at the target location. Proving it guaranteed
2039 // to execute does as well. Thus we can increase our guaranteed
2040 // alignment as well.
2041 if (!DereferenceableInPH || (InstAlignment > Alignment))
2042 if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo,
2043 ORE, Preheader->getTerminator())) {
2044 DereferenceableInPH = true;
2045 Alignment = std::max(Alignment, InstAlignment);
2046 }
2047 } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
2048 // Stores *of* the pointer are not interesting, only stores *to* the
2049 // pointer.
2050 if (UI->getOperand(1) != ASIV)
2051 continue;
2052 if (!Store->isUnordered())
2053 return false;
2054
2055 SawUnorderedAtomic |= Store->isAtomic();
2056 SawNotAtomic |= !Store->isAtomic();
2057
2058 // If the store is guaranteed to execute, both properties are satisfied.
2059 // We may want to check if a store is guaranteed to execute even if we
2060 // already know that promotion is safe, since it may have higher
2061 // alignment than any other guaranteed stores, in which case we can
2062 // raise the alignment on the promoted store.
2063 unsigned InstAlignment = Store->getAlignment();
2064 if (!InstAlignment)
2065 InstAlignment =
2066 MDL.getABITypeAlignment(Store->getValueOperand()->getType());
2067
2068 if (!DereferenceableInPH || !SafeToInsertStore ||
2069 (InstAlignment > Alignment)) {
2070 if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) {
2071 DereferenceableInPH = true;
2072 SafeToInsertStore = true;
2073 Alignment = std::max(Alignment, InstAlignment);
2074 }
2075 }
2076
2077 // If a store dominates all exit blocks, it is safe to sink.
2078 // As explained above, if an exit block was executed, a dominating
2079 // store must have been executed at least once, so we are not
2080 // introducing stores on paths that did not have them.
2081 // Note that this only looks at explicit exit blocks. If we ever
2082 // start sinking stores into unwind edges (see above), this will break.
2083 if (!SafeToInsertStore)
2084 SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
2085 return DT->dominates(Store->getParent(), Exit);
2086 });
2087
2088 // If the store is not guaranteed to execute, we may still get
2089 // deref info through it.
2090 if (!DereferenceableInPH) {
2091 DereferenceableInPH = isDereferenceableAndAlignedPointer(
2092 Store->getPointerOperand(), Store->getValueOperand()->getType(),
2093 MaybeAlign(Store->getAlignment()), MDL,
2094 Preheader->getTerminator(), DT);
2095 }
2096 } else
2097 return false; // Not a load or store.
2098
2099 // Merge the AA tags.
2100 if (LoopUses.empty()) {
2101 // On the first load/store, just take its AA tags.
2102 UI->getAAMetadata(AATags);
2103 } else if (AATags) {
2104 UI->getAAMetadata(AATags, /* Merge = */ true);
2105 }
2106
2107 LoopUses.push_back(UI);
2108 }
2109 }
2110
2111 // If we found both an unordered atomic instruction and a non-atomic memory
2112 // access, bail. We can't blindly promote non-atomic to atomic since we
2113 // might not be able to lower the result. We can't downgrade since that
2114 // would violate memory model. Also, align 0 is an error for atomics.
2115 if (SawUnorderedAtomic && SawNotAtomic)
2116 return false;
2117
2118 // If we're inserting an atomic load in the preheader, we must be able to
2119 // lower it. We're only guaranteed to be able to lower naturally aligned
2120 // atomics.
2121 auto *SomePtrElemType = SomePtr->getType()->getPointerElementType();
2122 if (SawUnorderedAtomic &&
2123 Alignment < MDL.getTypeStoreSize(SomePtrElemType))
2124 return false;
2125
2126 // If we couldn't prove we can hoist the load, bail.
2127 if (!DereferenceableInPH)
2128 return false;
2129
2130 // We know we can hoist the load, but don't have a guaranteed store.
2131 // Check whether the location is thread-local. If it is, then we can insert
2132 // stores along paths which originally didn't have them without violating the
2133 // memory model.
2134 if (!SafeToInsertStore) {
2135 if (IsKnownThreadLocalObject)
2136 SafeToInsertStore = true;
2137 else {
2138 Value *Object = GetUnderlyingObject(SomePtr, MDL);
2139 SafeToInsertStore =
2140 (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
2141 !PointerMayBeCaptured(Object, true, true);
2142 }
2143 }
2144
2145 // If we've still failed to prove we can sink the store, give up.
2146 if (!SafeToInsertStore)
2147 return false;
2148
2149 // Otherwise, this is safe to promote, lets do it!
2150 LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtrdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM: Promoting value stored to in loop: "
<< *SomePtr << '\n'; } } while (false)
2151 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "LICM: Promoting value stored to in loop: "
<< *SomePtr << '\n'; } } while (false)
;
2152 ORE->emit([&]() {
2153 return OptimizationRemark(DEBUG_TYPE"licm", "PromoteLoopAccessesToScalar",
2154 LoopUses[0])
2155 << "Moving accesses to memory location out of the loop";
2156 });
2157 ++NumPromoted;
2158
2159 // Grab a debug location for the inserted loads/stores; given that the
2160 // inserted loads/stores have little relation to the original loads/stores,
2161 // this code just arbitrarily picks a location from one, since any debug
2162 // location is better than none.
2163 DebugLoc DL = LoopUses[0]->getDebugLoc();
2164
2165 // We use the SSAUpdater interface to insert phi nodes as required.
2166 SmallVector<PHINode *, 16> NewPHIs;
2167 SSAUpdater SSA(&NewPHIs);
2168 LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
2169 InsertPts, MSSAInsertPts, PIC, *CurAST, MSSAU, *LI, DL,
2170 Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
2171
2172 // Set up the preheader to have a definition of the value. It is the live-out
2173 // value from the preheader that uses in the loop will use.
2174 LoadInst *PreheaderLoad = new LoadInst(
2175 SomePtr->getType()->getPointerElementType(), SomePtr,
2176 SomePtr->getName() + ".promoted", Preheader->getTerminator());
2177 if (SawUnorderedAtomic)
2178 PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2179 PreheaderLoad->setAlignment(MaybeAlign(Alignment));
2180 PreheaderLoad->setDebugLoc(DL);
2181 if (AATags)
2182 PreheaderLoad->setAAMetadata(AATags);
2183 SSA.AddAvailableValue(Preheader, PreheaderLoad);
2184
2185 if (MSSAU) {
2186 MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
2187 PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
2188 MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
2189 MSSAU->insertUse(NewMemUse, /*RenameUses=*/true);
2190 }
2191
2192 if (MSSAU && VerifyMemorySSA)
2193 MSSAU->getMemorySSA()->verifyMemorySSA();
2194 // Rewrite all the loads in the loop and remember all the definitions from
2195 // stores in the loop.
2196 Promoter.run(LoopUses);
2197
2198 if (MSSAU && VerifyMemorySSA)
2199 MSSAU->getMemorySSA()->verifyMemorySSA();
2200 // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2201 if (PreheaderLoad->use_empty())
2202 eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);
2203
2204 return true;
2205}
2206
2207/// Returns an owning pointer to an alias set which incorporates aliasing info
2208/// from L and all subloops of L.
2209/// FIXME: In new pass manager, there is no helper function to handle loop
2210/// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed
2211/// from scratch for every loop. Hook up with the helper functions when
2212/// available in the new pass manager to avoid redundant computation.
2213std::unique_ptr<AliasSetTracker>
2214LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
2215 AliasAnalysis *AA) {
2216 std::unique_ptr<AliasSetTracker> CurAST;
2217 SmallVector<Loop *, 4> RecomputeLoops;
2218 for (Loop *InnerL : L->getSubLoops()) {
2219 auto MapI = LoopToAliasSetMap.find(InnerL);
2220 // If the AST for this inner loop is missing it may have been merged into
2221 // some other loop's AST and then that loop unrolled, and so we need to
2222 // recompute it.
2223 if (MapI == LoopToAliasSetMap.end()) {
2224 RecomputeLoops.push_back(InnerL);
2225 continue;
2226 }
2227 std::unique_ptr<AliasSetTracker> InnerAST = std::move(MapI->second);
2228
2229 if (CurAST) {
2230 // What if InnerLoop was modified by other passes ?
2231 // Once we've incorporated the inner loop's AST into ours, we don't need
2232 // the subloop's anymore.
2233 CurAST->add(*InnerAST);
2234 } else {
2235 CurAST = std::move(InnerAST);
2236 }
2237 LoopToAliasSetMap.erase(MapI);
2238 }
2239 if (!CurAST)
2240 CurAST = std::make_unique<AliasSetTracker>(*AA);
2241
2242 // Add everything from the sub loops that are no longer directly available.
2243 for (Loop *InnerL : RecomputeLoops)
2244 for (BasicBlock *BB : InnerL->blocks())
2245 CurAST->add(*BB);
2246
2247 // And merge in this loop (without anything from inner loops).
2248 for (BasicBlock *BB : L->blocks())
2249 if (LI->getLoopFor(BB) == L)
2250 CurAST->add(*BB);
2251
2252 return CurAST;
2253}
2254
2255std::unique_ptr<AliasSetTracker>
2256LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA(
2257 Loop *L, AliasAnalysis *AA, MemorySSAUpdater *MSSAU) {
2258 auto *MSSA = MSSAU->getMemorySSA();
2259 auto CurAST = std::make_unique<AliasSetTracker>(*AA, MSSA, L);
2260 CurAST->addAllInstructionsInLoopUsingMSSA();
2261 return CurAST;
2262}
2263
2264/// Simple analysis hook. Clone alias set info.
2265///
2266void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
2267 Loop *L) {
2268 auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
2269 if (ASTIt == LICM.getLoopToAliasSetMap().end())
2270 return;
2271
2272 ASTIt->second->copyValue(From, To);
2273}
2274
2275/// Simple Analysis hook. Delete value V from alias set
2276///
2277void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) {
2278 auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
2279 if (ASTIt == LICM.getLoopToAliasSetMap().end())
2280 return;
2281
2282 ASTIt->second->deleteValue(V);
2283}
2284
2285/// Simple Analysis hook. Delete value L from alias set map.
2286///
2287void LegacyLICMPass::deleteAnalysisLoop(Loop *L) {
2288 if (!LICM.getLoopToAliasSetMap().count(L))
2289 return;
2290
2291 LICM.getLoopToAliasSetMap().erase(L);
2292}
2293
2294static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
2295 AliasSetTracker *CurAST, Loop *CurLoop,
2296 AliasAnalysis *AA) {
2297 // First check to see if any of the basic blocks in CurLoop invalidate *V.
2298 bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod();
2299
2300 if (!isInvalidatedAccordingToAST || !LICMN2Theshold)
2301 return isInvalidatedAccordingToAST;
2302
2303 // Check with a diagnostic analysis if we can refine the information above.
2304 // This is to identify the limitations of using the AST.
2305 // The alias set mechanism used by LICM has a major weakness in that it
2306 // combines all things which may alias into a single set *before* asking
2307 // modref questions. As a result, a single readonly call within a loop will
2308 // collapse all loads and stores into a single alias set and report
2309 // invalidation if the loop contains any store. For example, readonly calls
2310 // with deopt states have this form and create a general alias set with all
2311 // loads and stores. In order to get any LICM in loops containing possible
2312 // deopt states we need a more precise invalidation of checking the mod ref
2313 // info of each instruction within the loop and LI. This has a complexity of
2314 // O(N^2), so currently, it is used only as a diagnostic tool since the
2315 // default value of LICMN2Threshold is zero.
2316
2317 // Don't look at nested loops.
2318 if (CurLoop->begin() != CurLoop->end())
2319 return true;
2320
2321 int N = 0;
2322 for (BasicBlock *BB : CurLoop->getBlocks())
2323 for (Instruction &I : *BB) {
2324 if (N >= LICMN2Theshold) {
2325 LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "Alasing N2 threshold exhausted for "
<< *(MemLoc.Ptr) << "\n"; } } while (false)
2326 << *(MemLoc.Ptr) << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "Alasing N2 threshold exhausted for "
<< *(MemLoc.Ptr) << "\n"; } } while (false)
;
2327 return true;
2328 }
2329 N++;
2330 auto Res = AA->getModRefInfo(&I, MemLoc);
2331 if (isModSet(Res)) {
2332 LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "Aliasing failed on " << I <<
" for " << *(MemLoc.Ptr) << "\n"; } } while (false
)
2333 << *(MemLoc.Ptr) << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "Aliasing failed on " << I <<
" for " << *(MemLoc.Ptr) << "\n"; } } while (false
)
;
2334 return true;
2335 }
2336 }
2337 LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("licm")) { dbgs() << "Aliasing okay for " << *(MemLoc
.Ptr) << "\n"; } } while (false)
;
2338 return false;
2339}
2340
2341static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
2342 Loop *CurLoop,
2343 SinkAndHoistLICMFlags &Flags) {
2344 // For hoisting, use the walker to determine safety
2345 if (!Flags.IsSink) {
2346 MemoryAccess *Source;
2347 // See declaration of SetLicmMssaOptCap for usage details.
2348 if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap)
2349 Source = MU->getDefiningAccess();
2350 else {
2351 Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
2352 Flags.LicmMssaOptCounter++;
2353 }
2354 return !MSSA->isLiveOnEntryDef(Source) &&
2355 CurLoop->contains(Source->getBlock());
2356 }
2357
2358 // For sinking, we'd need to check all Defs below this use. The getClobbering
2359 // call will look on the backedge of the loop, but will check aliasing with
2360 // the instructions on the previous iteration.
2361 // For example:
2362 // for (i ... )
2363 // load a[i] ( Use (LoE)
2364 // store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
2365 // i++;
2366 // The load sees no clobbering inside the loop, as the backedge alias check
2367 // does phi translation, and will check aliasing against store a[i-1].
2368 // However sinking the load outside the loop, below the store is incorrect.
2369
2370 // For now, only sink if there are no Defs in the loop, and the existing ones
2371 // precede the use and are in the same block.
2372 // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
2373 // needs PostDominatorTreeAnalysis.
2374 // FIXME: More precise: no Defs that alias this Use.
2375 if (Flags.NoOfMemAccTooLarge)
2376 return true;
2377 for (auto *BB : CurLoop->getBlocks())
2378 if (auto *Accesses = MSSA->getBlockDefs(BB))
2379 for (const auto &MA : *Accesses)
2380 if (const auto *MD = dyn_cast<MemoryDef>(&MA))
2381 if (MU->getBlock() != MD->getBlock() ||
2382 !MSSA->locallyDominates(MD, MU))
2383 return true;
2384 return false;
2385}
2386
2387/// Little predicate that returns true if the specified basic block is in
2388/// a subloop of the current one, not the current one itself.
2389///
2390static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
2391 assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop")((CurLoop->contains(BB) && "Only valid if BB is IN the loop"
) ? static_cast<void> (0) : __assert_fail ("CurLoop->contains(BB) && \"Only valid if BB is IN the loop\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Transforms/Scalar/LICM.cpp"
, 2391, __PRETTY_FUNCTION__))
;
2392 return LI->getLoopFor(BB) != CurLoop;
2393}

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/PatternMatch.h

1//===- PatternMatch.h - Match on the LLVM IR --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides a simple and efficient mechanism for performing general
10// tree-based pattern matches on the LLVM IR. The power of these routines is
11// that it allows you to write concise patterns that are expressive and easy to
12// understand. The other major advantage of this is that it allows you to
13// trivially capture/bind elements in the pattern to variables. For example,
14// you can do something like this:
15//
16// Value *Exp = ...
17// Value *X, *Y; ConstantInt *C1, *C2; // (X & C1) | (Y & C2)
18// if (match(Exp, m_Or(m_And(m_Value(X), m_ConstantInt(C1)),
19// m_And(m_Value(Y), m_ConstantInt(C2))))) {
20// ... Pattern is matched and variables are bound ...
21// }
22//
23// This is primarily useful to things like the instruction combiner, but can
24// also be useful for static analysis tools or code generators.
25//
26//===----------------------------------------------------------------------===//
27
28#ifndef LLVM_IR_PATTERNMATCH_H
29#define LLVM_IR_PATTERNMATCH_H
30
31#include "llvm/ADT/APFloat.h"
32#include "llvm/ADT/APInt.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/InstrTypes.h"
36#include "llvm/IR/Instruction.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/IntrinsicInst.h"
39#include "llvm/IR/Intrinsics.h"
40#include "llvm/IR/Operator.h"
41#include "llvm/IR/Value.h"
42#include "llvm/Support/Casting.h"
43#include <cstdint>
44
45namespace llvm {
46namespace PatternMatch {
47
48template <typename Val, typename Pattern> bool match(Val *V, const Pattern &P) {
49 return const_cast<Pattern &>(P).match(V);
31
Calling 'IntrinsicID_match::match'
35
Returning from 'IntrinsicID_match::match'
36
Returning zero, which participates in a condition later
40
Calling 'IntrinsicID_match::match'
44
Returning from 'IntrinsicID_match::match'
45
Returning zero, which participates in a condition later
50}
51
52template <typename SubPattern_t> struct OneUse_match {
53 SubPattern_t SubPattern;
54
55 OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
56
57 template <typename OpTy> bool match(OpTy *V) {
58 return V->hasOneUse() && SubPattern.match(V);
59 }
60};
61
62template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
63 return SubPattern;
64}
65
66template <typename Class> struct class_match {
67 template <typename ITy> bool match(ITy *V) { return isa<Class>(V); }
68};
69
70/// Match an arbitrary value and ignore it.
71inline class_match<Value> m_Value() { return class_match<Value>(); }
72
73/// Match an arbitrary binary operation and ignore it.
74inline class_match<BinaryOperator> m_BinOp() {
75 return class_match<BinaryOperator>();
76}
77
78/// Matches any compare instruction and ignore it.
79inline class_match<CmpInst> m_Cmp() { return class_match<CmpInst>(); }
80
81/// Match an arbitrary ConstantInt and ignore it.
82inline class_match<ConstantInt> m_ConstantInt() {
83 return class_match<ConstantInt>();
84}
85
86/// Match an arbitrary undef constant.
87inline class_match<UndefValue> m_Undef() { return class_match<UndefValue>(); }
88
89/// Match an arbitrary Constant and ignore it.
90inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
91
92/// Match an arbitrary basic block value and ignore it.
93inline class_match<BasicBlock> m_BasicBlock() {
94 return class_match<BasicBlock>();
95}
96
97/// Inverting matcher
98template <typename Ty> struct match_unless {
99 Ty M;
100
101 match_unless(const Ty &Matcher) : M(Matcher) {}
102
103 template <typename ITy> bool match(ITy *V) { return !M.match(V); }
104};
105
106/// Match if the inner matcher does *NOT* match.
107template <typename Ty> inline match_unless<Ty> m_Unless(const Ty &M) {
108 return match_unless<Ty>(M);
109}
110
111/// Matching combinators
112template <typename LTy, typename RTy> struct match_combine_or {
113 LTy L;
114 RTy R;
115
116 match_combine_or(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
117
118 template <typename ITy> bool match(ITy *V) {
119 if (L.match(V))
120 return true;
121 if (R.match(V))
122 return true;
123 return false;
124 }
125};
126
127template <typename LTy, typename RTy> struct match_combine_and {
128 LTy L;
129 RTy R;
130
131 match_combine_and(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
132
133 template <typename ITy> bool match(ITy *V) {
134 if (L.match(V))
135 if (R.match(V))
136 return true;
137 return false;
138 }
139};
140
141/// Combine two pattern matchers matching L || R
142template <typename LTy, typename RTy>
143inline match_combine_or<LTy, RTy> m_CombineOr(const LTy &L, const RTy &R) {
144 return match_combine_or<LTy, RTy>(L, R);
145}
146
147/// Combine two pattern matchers matching L && R
148template <typename LTy, typename RTy>
149inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
150 return match_combine_and<LTy, RTy>(L, R);
151}
152
153struct apint_match {
154 const APInt *&Res;
155
156 apint_match(const APInt *&R) : Res(R) {}
157
158 template <typename ITy> bool match(ITy *V) {
159 if (auto *CI = dyn_cast<ConstantInt>(V)) {
160 Res = &CI->getValue();
161 return true;
162 }
163 if (V->getType()->isVectorTy())
164 if (const auto *C = dyn_cast<Constant>(V))
165 if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
166 Res = &CI->getValue();
167 return true;
168 }
169 return false;
170 }
171};
172// Either constexpr if or renaming ConstantFP::getValueAPF to
173// ConstantFP::getValue is needed to do it via single template
174// function for both apint/apfloat.
175struct apfloat_match {
176 const APFloat *&Res;
177 apfloat_match(const APFloat *&R) : Res(R) {}
178 template <typename ITy> bool match(ITy *V) {
179 if (auto *CI = dyn_cast<ConstantFP>(V)) {
180 Res = &CI->getValueAPF();
181 return true;
182 }
183 if (V->getType()->isVectorTy())
184 if (const auto *C = dyn_cast<Constant>(V))
185 if (auto *CI = dyn_cast_or_null<ConstantFP>(C->getSplatValue())) {
186 Res = &CI->getValueAPF();
187 return true;
188 }
189 return false;
190 }
191};
192
193/// Match a ConstantInt or splatted ConstantVector, binding the
194/// specified pointer to the contained APInt.
195inline apint_match m_APInt(const APInt *&Res) { return Res; }
196
197/// Match a ConstantFP or splatted ConstantVector, binding the
198/// specified pointer to the contained APFloat.
199inline apfloat_match m_APFloat(const APFloat *&Res) { return Res; }
200
201template <int64_t Val> struct constantint_match {
202 template <typename ITy> bool match(ITy *V) {
203 if (const auto *CI = dyn_cast<ConstantInt>(V)) {
204 const APInt &CIV = CI->getValue();
205 if (Val >= 0)
206 return CIV == static_cast<uint64_t>(Val);
207 // If Val is negative, and CI is shorter than it, truncate to the right
208 // number of bits. If it is larger, then we have to sign extend. Just
209 // compare their negated values.
210 return -CIV == -Val;
211 }
212 return false;
213 }
214};
215
216/// Match a ConstantInt with a specific value.
217template <int64_t Val> inline constantint_match<Val> m_ConstantInt() {
218 return constantint_match<Val>();
219}
220
221/// This helper class is used to match scalar and vector integer constants that
222/// satisfy a specified predicate.
223/// For vector constants, undefined elements are ignored.
224template <typename Predicate> struct cst_pred_ty : public Predicate {
225 template <typename ITy> bool match(ITy *V) {
226 if (const auto *CI = dyn_cast<ConstantInt>(V))
227 return this->isValue(CI->getValue());
228 if (V->getType()->isVectorTy()) {
229 if (const auto *C = dyn_cast<Constant>(V)) {
230 if (const auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue()))
231 return this->isValue(CI->getValue());
232
233 // Non-splat vector constant: check each element for a match.
234 unsigned NumElts = V->getType()->getVectorNumElements();
235 assert(NumElts != 0 && "Constant vector with no elements?")((NumElts != 0 && "Constant vector with no elements?"
) ? static_cast<void> (0) : __assert_fail ("NumElts != 0 && \"Constant vector with no elements?\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/PatternMatch.h"
, 235, __PRETTY_FUNCTION__))
;
236 bool HasNonUndefElements = false;
237 for (unsigned i = 0; i != NumElts; ++i) {
238 Constant *Elt = C->getAggregateElement(i);
239 if (!Elt)
240 return false;
241 if (isa<UndefValue>(Elt))
242 continue;
243 auto *CI = dyn_cast<ConstantInt>(Elt);
244 if (!CI || !this->isValue(CI->getValue()))
245 return false;
246 HasNonUndefElements = true;
247 }
248 return HasNonUndefElements;
249 }
250 }
251 return false;
252 }
253};
254
255/// This helper class is used to match scalar and vector constants that
256/// satisfy a specified predicate, and bind them to an APInt.
257template <typename Predicate> struct api_pred_ty : public Predicate {
258 const APInt *&Res;
259
260 api_pred_ty(const APInt *&R) : Res(R) {}
261
262 template <typename ITy> bool match(ITy *V) {
263 if (const auto *CI = dyn_cast<ConstantInt>(V))
264 if (this->isValue(CI->getValue())) {
265 Res = &CI->getValue();
266 return true;
267 }
268 if (V->getType()->isVectorTy())
269 if (const auto *C = dyn_cast<Constant>(V))
270 if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue()))
271 if (this->isValue(CI->getValue())) {
272 Res = &CI->getValue();
273 return true;
274 }
275
276 return false;
277 }
278};
279
280/// This helper class is used to match scalar and vector floating-point
281/// constants that satisfy a specified predicate.
282/// For vector constants, undefined elements are ignored.
283template <typename Predicate> struct cstfp_pred_ty : public Predicate {
284 template <typename ITy> bool match(ITy *V) {
285 if (const auto *CF = dyn_cast<ConstantFP>(V))
286 return this->isValue(CF->getValueAPF());
287 if (V->getType()->isVectorTy()) {
288 if (const auto *C = dyn_cast<Constant>(V)) {
289 if (const auto *CF = dyn_cast_or_null<ConstantFP>(C->getSplatValue()))
290 return this->isValue(CF->getValueAPF());
291
292 // Non-splat vector constant: check each element for a match.
293 unsigned NumElts = V->getType()->getVectorNumElements();
294 assert(NumElts != 0 && "Constant vector with no elements?")((NumElts != 0 && "Constant vector with no elements?"
) ? static_cast<void> (0) : __assert_fail ("NumElts != 0 && \"Constant vector with no elements?\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/PatternMatch.h"
, 294, __PRETTY_FUNCTION__))
;
295 bool HasNonUndefElements = false;
296 for (unsigned i = 0; i != NumElts; ++i) {
297 Constant *Elt = C->getAggregateElement(i);
298 if (!Elt)
299 return false;
300 if (isa<UndefValue>(Elt))
301 continue;
302 auto *CF = dyn_cast<ConstantFP>(Elt);
303 if (!CF || !this->isValue(CF->getValueAPF()))
304 return false;
305 HasNonUndefElements = true;
306 }
307 return HasNonUndefElements;
308 }
309 }
310 return false;
311 }
312};
313
314///////////////////////////////////////////////////////////////////////////////
315//
316// Encapsulate constant value queries for use in templated predicate matchers.
317// This allows checking if constants match using compound predicates and works
318// with vector constants, possibly with relaxed constraints. For example, ignore
319// undef values.
320//
321///////////////////////////////////////////////////////////////////////////////
322
323struct is_any_apint {
324 bool isValue(const APInt &C) { return true; }
325};
326/// Match an integer or vector with any integral constant.
327/// For vectors, this includes constants with undefined elements.
328inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() {
329 return cst_pred_ty<is_any_apint>();
330}
331
332struct is_all_ones {
333 bool isValue(const APInt &C) { return C.isAllOnesValue(); }
334};
335/// Match an integer or vector with all bits set.
336/// For vectors, this includes constants with undefined elements.
337inline cst_pred_ty<is_all_ones> m_AllOnes() {
338 return cst_pred_ty<is_all_ones>();
339}
340
341struct is_maxsignedvalue {
342 bool isValue(const APInt &C) { return C.isMaxSignedValue(); }
343};
344/// Match an integer or vector with values having all bits except for the high
345/// bit set (0x7f...).
346/// For vectors, this includes constants with undefined elements.
347inline cst_pred_ty<is_maxsignedvalue> m_MaxSignedValue() {
348 return cst_pred_ty<is_maxsignedvalue>();
349}
350inline api_pred_ty<is_maxsignedvalue> m_MaxSignedValue(const APInt *&V) {
351 return V;
352}
353
354struct is_negative {
355 bool isValue(const APInt &C) { return C.isNegative(); }
356};
357/// Match an integer or vector of negative values.
358/// For vectors, this includes constants with undefined elements.
359inline cst_pred_ty<is_negative> m_Negative() {
360 return cst_pred_ty<is_negative>();
361}
362inline api_pred_ty<is_negative> m_Negative(const APInt *&V) {
363 return V;
364}
365
366struct is_nonnegative {
367 bool isValue(const APInt &C) { return C.isNonNegative(); }
368};
369/// Match an integer or vector of non-negative values.
370/// For vectors, this includes constants with undefined elements.
371inline cst_pred_ty<is_nonnegative> m_NonNegative() {
372 return cst_pred_ty<is_nonnegative>();
373}
374inline api_pred_ty<is_nonnegative> m_NonNegative(const APInt *&V) {
375 return V;
376}
377
378struct is_strictlypositive {
379 bool isValue(const APInt &C) { return C.isStrictlyPositive(); }
380};
381/// Match an integer or vector of strictly positive values.
382/// For vectors, this includes constants with undefined elements.
383inline cst_pred_ty<is_strictlypositive> m_StrictlyPositive() {
384 return cst_pred_ty<is_strictlypositive>();
385}
386inline api_pred_ty<is_strictlypositive> m_StrictlyPositive(const APInt *&V) {
387 return V;
388}
389
390struct is_nonpositive {
391 bool isValue(const APInt &C) { return C.isNonPositive(); }
392};
393/// Match an integer or vector of non-positive values.
394/// For vectors, this includes constants with undefined elements.
395inline cst_pred_ty<is_nonpositive> m_NonPositive() {
396 return cst_pred_ty<is_nonpositive>();
397}
398inline api_pred_ty<is_nonpositive> m_NonPositive(const APInt *&V) { return V; }
399
400struct is_one {
401 bool isValue(const APInt &C) { return C.isOneValue(); }
402};
403/// Match an integer 1 or a vector with all elements equal to 1.
404/// For vectors, this includes constants with undefined elements.
405inline cst_pred_ty<is_one> m_One() {
406 return cst_pred_ty<is_one>();
407}
408
409struct is_zero_int {
410 bool isValue(const APInt &C) { return C.isNullValue(); }
411};
412/// Match an integer 0 or a vector with all elements equal to 0.
413/// For vectors, this includes constants with undefined elements.
414inline cst_pred_ty<is_zero_int> m_ZeroInt() {
415 return cst_pred_ty<is_zero_int>();
416}
417
418struct is_zero {
419 template <typename ITy> bool match(ITy *V) {
420 auto *C = dyn_cast<Constant>(V);
421 return C && (C->isNullValue() || cst_pred_ty<is_zero_int>().match(C));
422 }
423};
424/// Match any null constant or a vector with all elements equal to 0.
425/// For vectors, this includes constants with undefined elements.
426inline is_zero m_Zero() {
427 return is_zero();
428}
429
430struct is_power2 {
431 bool isValue(const APInt &C) { return C.isPowerOf2(); }
432};
433/// Match an integer or vector power-of-2.
434/// For vectors, this includes constants with undefined elements.
435inline cst_pred_ty<is_power2> m_Power2() {
436 return cst_pred_ty<is_power2>();
437}
438inline api_pred_ty<is_power2> m_Power2(const APInt *&V) {
439 return V;
440}
441
442struct is_negated_power2 {
443 bool isValue(const APInt &C) { return (-C).isPowerOf2(); }
444};
445/// Match a integer or vector negated power-of-2.
446/// For vectors, this includes constants with undefined elements.
447inline cst_pred_ty<is_negated_power2> m_NegatedPower2() {
448 return cst_pred_ty<is_negated_power2>();
449}
450inline api_pred_ty<is_negated_power2> m_NegatedPower2(const APInt *&V) {
451 return V;
452}
453
454struct is_power2_or_zero {
455 bool isValue(const APInt &C) { return !C || C.isPowerOf2(); }
456};
457/// Match an integer or vector of 0 or power-of-2 values.
458/// For vectors, this includes constants with undefined elements.
459inline cst_pred_ty<is_power2_or_zero> m_Power2OrZero() {
460 return cst_pred_ty<is_power2_or_zero>();
461}
462inline api_pred_ty<is_power2_or_zero> m_Power2OrZero(const APInt *&V) {
463 return V;
464}
465
466struct is_sign_mask {
467 bool isValue(const APInt &C) { return C.isSignMask(); }
468};
469/// Match an integer or vector with only the sign bit(s) set.
470/// For vectors, this includes constants with undefined elements.
471inline cst_pred_ty<is_sign_mask> m_SignMask() {
472 return cst_pred_ty<is_sign_mask>();
473}
474
475struct is_lowbit_mask {
476 bool isValue(const APInt &C) { return C.isMask(); }
477};
478/// Match an integer or vector with only the low bit(s) set.
479/// For vectors, this includes constants with undefined elements.
480inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
481 return cst_pred_ty<is_lowbit_mask>();
482}
483
484struct icmp_pred_with_threshold {
485 ICmpInst::Predicate Pred;
486 const APInt *Thr;
487 bool isValue(const APInt &C) {
488 switch (Pred) {
489 case ICmpInst::Predicate::ICMP_EQ:
490 return C.eq(*Thr);
491 case ICmpInst::Predicate::ICMP_NE:
492 return C.ne(*Thr);
493 case ICmpInst::Predicate::ICMP_UGT:
494 return C.ugt(*Thr);
495 case ICmpInst::Predicate::ICMP_UGE:
496 return C.uge(*Thr);
497 case ICmpInst::Predicate::ICMP_ULT:
498 return C.ult(*Thr);
499 case ICmpInst::Predicate::ICMP_ULE:
500 return C.ule(*Thr);
501 case ICmpInst::Predicate::ICMP_SGT:
502 return C.sgt(*Thr);
503 case ICmpInst::Predicate::ICMP_SGE:
504 return C.sge(*Thr);
505 case ICmpInst::Predicate::ICMP_SLT:
506 return C.slt(*Thr);
507 case ICmpInst::Predicate::ICMP_SLE:
508 return C.sle(*Thr);
509 default:
510 llvm_unreachable("Unhandled ICmp predicate")::llvm::llvm_unreachable_internal("Unhandled ICmp predicate",
"/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/PatternMatch.h"
, 510)
;
511 }
512 }
513};
514/// Match an integer or vector with every element comparing 'pred' (eg/ne/...)
515/// to Threshold. For vectors, this includes constants with undefined elements.
516inline cst_pred_ty<icmp_pred_with_threshold>
517m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold) {
518 cst_pred_ty<icmp_pred_with_threshold> P;
519 P.Pred = Predicate;
520 P.Thr = &Threshold;
521 return P;
522}
523
524struct is_nan {
525 bool isValue(const APFloat &C) { return C.isNaN(); }
526};
527/// Match an arbitrary NaN constant. This includes quiet and signalling nans.
528/// For vectors, this includes constants with undefined elements.
529inline cstfp_pred_ty<is_nan> m_NaN() {
530 return cstfp_pred_ty<is_nan>();
531}
532
533struct is_any_zero_fp {
534 bool isValue(const APFloat &C) { return C.isZero(); }
535};
536/// Match a floating-point negative zero or positive zero.
537/// For vectors, this includes constants with undefined elements.
538inline cstfp_pred_ty<is_any_zero_fp> m_AnyZeroFP() {
539 return cstfp_pred_ty<is_any_zero_fp>();
540}
541
542struct is_pos_zero_fp {
543 bool isValue(const APFloat &C) { return C.isPosZero(); }
544};
545/// Match a floating-point positive zero.
546/// For vectors, this includes constants with undefined elements.
547inline cstfp_pred_ty<is_pos_zero_fp> m_PosZeroFP() {
548 return cstfp_pred_ty<is_pos_zero_fp>();
549}
550
551struct is_neg_zero_fp {
552 bool isValue(const APFloat &C) { return C.isNegZero(); }
553};
554/// Match a floating-point negative zero.
555/// For vectors, this includes constants with undefined elements.
556inline cstfp_pred_ty<is_neg_zero_fp> m_NegZeroFP() {
557 return cstfp_pred_ty<is_neg_zero_fp>();
558}
559
560///////////////////////////////////////////////////////////////////////////////
561
562template <typename Class> struct bind_ty {
563 Class *&VR;
564
565 bind_ty(Class *&V) : VR(V) {}
566
567 template <typename ITy> bool match(ITy *V) {
568 if (auto *CV = dyn_cast<Class>(V)) {
569 VR = CV;
570 return true;
571 }
572 return false;
573 }
574};
575
576/// Match a value, capturing it if we match.
577inline bind_ty<Value> m_Value(Value *&V) { return V; }
578inline bind_ty<const Value> m_Value(const Value *&V) { return V; }
579
580/// Match an instruction, capturing it if we match.
581inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
582/// Match a binary operator, capturing it if we match.
583inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
584/// Match a with overflow intrinsic, capturing it if we match.
585inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) { return I; }
586
587/// Match a ConstantInt, capturing the value if we match.
588inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; }
589
590/// Match a Constant, capturing the value if we match.
591inline bind_ty<Constant> m_Constant(Constant *&C) { return C; }
592
593/// Match a ConstantFP, capturing the value if we match.
594inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; }
595
596/// Match a basic block value, capturing it if we match.
597inline bind_ty<BasicBlock> m_BasicBlock(BasicBlock *&V) { return V; }
598inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) {
599 return V;
600}
601
602/// Match a specified Value*.
603struct specificval_ty {
604 const Value *Val;
605
606 specificval_ty(const Value *V) : Val(V) {}
607
608 template <typename ITy> bool match(ITy *V) { return V == Val; }
609};
610
611/// Match if we have a specific specified value.
612inline specificval_ty m_Specific(const Value *V) { return V; }
613
614/// Stores a reference to the Value *, not the Value * itself,
615/// thus can be used in commutative matchers.
616template <typename Class> struct deferredval_ty {
617 Class *const &Val;
618
619 deferredval_ty(Class *const &V) : Val(V) {}
620
621 template <typename ITy> bool match(ITy *const V) { return V == Val; }
622};
623
624/// A commutative-friendly version of m_Specific().
625inline deferredval_ty<Value> m_Deferred(Value *const &V) { return V; }
626inline deferredval_ty<const Value> m_Deferred(const Value *const &V) {
627 return V;
628}
629
630/// Match a specified floating point value or vector of all elements of
631/// that value.
632struct specific_fpval {
633 double Val;
634
635 specific_fpval(double V) : Val(V) {}
636
637 template <typename ITy> bool match(ITy *V) {
638 if (const auto *CFP = dyn_cast<ConstantFP>(V))
639 return CFP->isExactlyValue(Val);
640 if (V->getType()->isVectorTy())
641 if (const auto *C = dyn_cast<Constant>(V))
642 if (auto *CFP = dyn_cast_or_null<ConstantFP>(C->getSplatValue()))
643 return CFP->isExactlyValue(Val);
644 return false;
645 }
646};
647
648/// Match a specific floating point value or vector with all elements
649/// equal to the value.
650inline specific_fpval m_SpecificFP(double V) { return specific_fpval(V); }
651
652/// Match a float 1.0 or vector with all elements equal to 1.0.
653inline specific_fpval m_FPOne() { return m_SpecificFP(1.0); }
654
655struct bind_const_intval_ty {
656 uint64_t &VR;
657
658 bind_const_intval_ty(uint64_t &V) : VR(V) {}
659
660 template <typename ITy> bool match(ITy *V) {
661 if (const auto *CV = dyn_cast<ConstantInt>(V))
662 if (CV->getValue().ule(UINT64_MAX(18446744073709551615UL))) {
663 VR = CV->getZExtValue();
664 return true;
665 }
666 return false;
667 }
668};
669
670/// Match a specified integer value or vector of all elements of that
671/// value.
672struct specific_intval {
673 APInt Val;
674
675 specific_intval(APInt V) : Val(std::move(V)) {}
676
677 template <typename ITy> bool match(ITy *V) {
678 const auto *CI = dyn_cast<ConstantInt>(V);
679 if (!CI && V->getType()->isVectorTy())
680 if (const auto *C = dyn_cast<Constant>(V))
681 CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue());
682
683 return CI && APInt::isSameValue(CI->getValue(), Val);
684 }
685};
686
687/// Match a specific integer value or vector with all elements equal to
688/// the value.
689inline specific_intval m_SpecificInt(APInt V) {
690 return specific_intval(std::move(V));
691}
692
693inline specific_intval m_SpecificInt(uint64_t V) {
694 return m_SpecificInt(APInt(64, V));
695}
696
697/// Match a ConstantInt and bind to its value. This does not match
698/// ConstantInts wider than 64-bits.
699inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; }
700
701/// Match a specified basic block value.
702struct specific_bbval {
703 BasicBlock *Val;
704
705 specific_bbval(BasicBlock *Val) : Val(Val) {}
706
707 template <typename ITy> bool match(ITy *V) {
708 const auto *BB = dyn_cast<BasicBlock>(V);
709 return BB && BB == Val;
710 }
711};
712
713/// Match a specific basic block value.
714inline specific_bbval m_SpecificBB(BasicBlock *BB) {
715 return specific_bbval(BB);
716}
717
718/// A commutative-friendly version of m_Specific().
719inline deferredval_ty<BasicBlock> m_Deferred(BasicBlock *const &BB) {
720 return BB;
721}
722inline deferredval_ty<const BasicBlock>
723m_Deferred(const BasicBlock *const &BB) {
724 return BB;
725}
726
727//===----------------------------------------------------------------------===//
728// Matcher for any binary operator.
729//
730template <typename LHS_t, typename RHS_t, bool Commutable = false>
731struct AnyBinaryOp_match {
732 LHS_t L;
733 RHS_t R;
734
735 // The evaluation order is always stable, regardless of Commutability.
736 // The LHS is always matched first.
737 AnyBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
738
739 template <typename OpTy> bool match(OpTy *V) {
740 if (auto *I = dyn_cast<BinaryOperator>(V))
741 return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
742 (Commutable && L.match(I->getOperand(1)) &&
743 R.match(I->getOperand(0)));
744 return false;
745 }
746};
747
748template <typename LHS, typename RHS>
749inline AnyBinaryOp_match<LHS, RHS> m_BinOp(const LHS &L, const RHS &R) {
750 return AnyBinaryOp_match<LHS, RHS>(L, R);
751}
752
753//===----------------------------------------------------------------------===//
754// Matchers for specific binary operators.
755//
756
757template <typename LHS_t, typename RHS_t, unsigned Opcode,
758 bool Commutable = false>
759struct BinaryOp_match {
760 LHS_t L;
761 RHS_t R;
762
763 // The evaluation order is always stable, regardless of Commutability.
764 // The LHS is always matched first.
765 BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
766
767 template <typename OpTy> bool match(OpTy *V) {
768 if (V->getValueID() == Value::InstructionVal + Opcode) {
769 auto *I = cast<BinaryOperator>(V);
770 return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
771 (Commutable && L.match(I->getOperand(1)) &&
772 R.match(I->getOperand(0)));
773 }
774 if (auto *CE = dyn_cast<ConstantExpr>(V))
775 return CE->getOpcode() == Opcode &&
776 ((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) ||
777 (Commutable && L.match(CE->getOperand(1)) &&
778 R.match(CE->getOperand(0))));
779 return false;
780 }
781};
782
783template <typename LHS, typename RHS>
784inline BinaryOp_match<LHS, RHS, Instruction::Add> m_Add(const LHS &L,
785 const RHS &R) {
786 return BinaryOp_match<LHS, RHS, Instruction::Add>(L, R);
787}
788
789template <typename LHS, typename RHS>
790inline BinaryOp_match<LHS, RHS, Instruction::FAdd> m_FAdd(const LHS &L,
791 const RHS &R) {
792 return BinaryOp_match<LHS, RHS, Instruction::FAdd>(L, R);
793}
794
795template <typename LHS, typename RHS>
796inline BinaryOp_match<LHS, RHS, Instruction::Sub> m_Sub(const LHS &L,
797 const RHS &R) {
798 return BinaryOp_match<LHS, RHS, Instruction::Sub>(L, R);
799}
800
801template <typename LHS, typename RHS>
802inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L,
803 const RHS &R) {
804 return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R);
805}
806
807template <typename Op_t> struct FNeg_match {
808 Op_t X;
809
810 FNeg_match(const Op_t &Op) : X(Op) {}
811 template <typename OpTy> bool match(OpTy *V) {
812 auto *FPMO = dyn_cast<FPMathOperator>(V);
813 if (!FPMO) return false;
814
815 if (FPMO->getOpcode() == Instruction::FNeg)
816 return X.match(FPMO->getOperand(0));
817
818 if (FPMO->getOpcode() == Instruction::FSub) {
819 if (FPMO->hasNoSignedZeros()) {
820 // With 'nsz', any zero goes.
821 if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0)))
822 return false;
823 } else {
824 // Without 'nsz', we need fsub -0.0, X exactly.
825 if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0)))
826 return false;
827 }
828
829 return X.match(FPMO->getOperand(1));
830 }
831
832 return false;
833 }
834};
835
836/// Match 'fneg X' as 'fsub -0.0, X'.
837template <typename OpTy>
838inline FNeg_match<OpTy>
839m_FNeg(const OpTy &X) {
840 return FNeg_match<OpTy>(X);
841}
842
843/// Match 'fneg X' as 'fsub +-0.0, X'.
844template <typename RHS>
845inline BinaryOp_match<cstfp_pred_ty<is_any_zero_fp>, RHS, Instruction::FSub>
846m_FNegNSZ(const RHS &X) {
847 return m_FSub(m_AnyZeroFP(), X);
848}
849
850template <typename LHS, typename RHS>
851inline BinaryOp_match<LHS, RHS, Instruction::Mul> m_Mul(const LHS &L,
852 const RHS &R) {
853 return BinaryOp_match<LHS, RHS, Instruction::Mul>(L, R);
854}
855
856template <typename LHS, typename RHS>
857inline BinaryOp_match<LHS, RHS, Instruction::FMul> m_FMul(const LHS &L,
858 const RHS &R) {
859 return BinaryOp_match<LHS, RHS, Instruction::FMul>(L, R);
860}
861
862template <typename LHS, typename RHS>
863inline BinaryOp_match<LHS, RHS, Instruction::UDiv> m_UDiv(const LHS &L,
864 const RHS &R) {
865 return BinaryOp_match<LHS, RHS, Instruction::UDiv>(L, R);
866}
867
868template <typename LHS, typename RHS>
869inline BinaryOp_match<LHS, RHS, Instruction::SDiv> m_SDiv(const LHS &L,
870 const RHS &R) {
871 return BinaryOp_match<LHS, RHS, Instruction::SDiv>(L, R);
872}
873
874template <typename LHS, typename RHS>
875inline BinaryOp_match<LHS, RHS, Instruction::FDiv> m_FDiv(const LHS &L,
876 const RHS &R) {
877 return BinaryOp_match<LHS, RHS, Instruction::FDiv>(L, R);
878}
879
880template <typename LHS, typename RHS>
881inline BinaryOp_match<LHS, RHS, Instruction::URem> m_URem(const LHS &L,
882 const RHS &R) {
883 return BinaryOp_match<LHS, RHS, Instruction::URem>(L, R);
884}
885
886template <typename LHS, typename RHS>
887inline BinaryOp_match<LHS, RHS, Instruction::SRem> m_SRem(const LHS &L,
888 const RHS &R) {
889 return BinaryOp_match<LHS, RHS, Instruction::SRem>(L, R);
890}
891
892template <typename LHS, typename RHS>
893inline BinaryOp_match<LHS, RHS, Instruction::FRem> m_FRem(const LHS &L,
894 const RHS &R) {
895 return BinaryOp_match<LHS, RHS, Instruction::FRem>(L, R);
896}
897
898template <typename LHS, typename RHS>
899inline BinaryOp_match<LHS, RHS, Instruction::And> m_And(const LHS &L,
900 const RHS &R) {
901 return BinaryOp_match<LHS, RHS, Instruction::And>(L, R);
902}
903
904template <typename LHS, typename RHS>
905inline BinaryOp_match<LHS, RHS, Instruction::Or> m_Or(const LHS &L,
906 const RHS &R) {
907 return BinaryOp_match<LHS, RHS, Instruction::Or>(L, R);
908}
909
910template <typename LHS, typename RHS>
911inline BinaryOp_match<LHS, RHS, Instruction::Xor> m_Xor(const LHS &L,
912 const RHS &R) {
913 return BinaryOp_match<LHS, RHS, Instruction::Xor>(L, R);
914}
915
916template <typename LHS, typename RHS>
917inline BinaryOp_match<LHS, RHS, Instruction::Shl> m_Shl(const LHS &L,
918 const RHS &R) {
919 return BinaryOp_match<LHS, RHS, Instruction::Shl>(L, R);
920}
921
922template <typename LHS, typename RHS>
923inline BinaryOp_match<LHS, RHS, Instruction::LShr> m_LShr(const LHS &L,
924 const RHS &R) {
925 return BinaryOp_match<LHS, RHS, Instruction::LShr>(L, R);
926}
927
928template <typename LHS, typename RHS>
929inline BinaryOp_match<LHS, RHS, Instruction::AShr> m_AShr(const LHS &L,
930 const RHS &R) {
931 return BinaryOp_match<LHS, RHS, Instruction::AShr>(L, R);
932}
933
934template <typename LHS_t, typename RHS_t, unsigned Opcode,
935 unsigned WrapFlags = 0>
936struct OverflowingBinaryOp_match {
937 LHS_t L;
938 RHS_t R;
939
940 OverflowingBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS)
941 : L(LHS), R(RHS) {}
942
943 template <typename OpTy> bool match(OpTy *V) {
944 if (auto *Op = dyn_cast<OverflowingBinaryOperator>(V)) {
945 if (Op->getOpcode() != Opcode)
946 return false;
947 if (WrapFlags & OverflowingBinaryOperator::NoUnsignedWrap &&
948 !Op->hasNoUnsignedWrap())
949 return false;
950 if (WrapFlags & OverflowingBinaryOperator::NoSignedWrap &&
951 !Op->hasNoSignedWrap())
952 return false;
953 return L.match(Op->getOperand(0)) && R.match(Op->getOperand(1));
954 }
955 return false;
956 }
957};
958
959template <typename LHS, typename RHS>
960inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
961 OverflowingBinaryOperator::NoSignedWrap>
962m_NSWAdd(const LHS &L, const RHS &R) {
963 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
964 OverflowingBinaryOperator::NoSignedWrap>(
965 L, R);
966}
967template <typename LHS, typename RHS>
968inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
969 OverflowingBinaryOperator::NoSignedWrap>
970m_NSWSub(const LHS &L, const RHS &R) {
971 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
972 OverflowingBinaryOperator::NoSignedWrap>(
973 L, R);
974}
975template <typename LHS, typename RHS>
976inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
977 OverflowingBinaryOperator::NoSignedWrap>
978m_NSWMul(const LHS &L, const RHS &R) {
979 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
980 OverflowingBinaryOperator::NoSignedWrap>(
981 L, R);
982}
983template <typename LHS, typename RHS>
984inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
985 OverflowingBinaryOperator::NoSignedWrap>
986m_NSWShl(const LHS &L, const RHS &R) {
987 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
988 OverflowingBinaryOperator::NoSignedWrap>(
989 L, R);
990}
991
992template <typename LHS, typename RHS>
993inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
994 OverflowingBinaryOperator::NoUnsignedWrap>
995m_NUWAdd(const LHS &L, const RHS &R) {
996 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
997 OverflowingBinaryOperator::NoUnsignedWrap>(
998 L, R);
999}
1000template <typename LHS, typename RHS>
1001inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1002 OverflowingBinaryOperator::NoUnsignedWrap>
1003m_NUWSub(const LHS &L, const RHS &R) {
1004 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1005 OverflowingBinaryOperator::NoUnsignedWrap>(
1006 L, R);
1007}
1008template <typename LHS, typename RHS>
1009inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1010 OverflowingBinaryOperator::NoUnsignedWrap>
1011m_NUWMul(const LHS &L, const RHS &R) {
1012 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1013 OverflowingBinaryOperator::NoUnsignedWrap>(
1014 L, R);
1015}
1016template <typename LHS, typename RHS>
1017inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1018 OverflowingBinaryOperator::NoUnsignedWrap>
1019m_NUWShl(const LHS &L, const RHS &R) {
1020 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1021 OverflowingBinaryOperator::NoUnsignedWrap>(
1022 L, R);
1023}
1024
1025//===----------------------------------------------------------------------===//
1026// Class that matches a group of binary opcodes.
1027//
1028template <typename LHS_t, typename RHS_t, typename Predicate>
1029struct BinOpPred_match : Predicate {
1030 LHS_t L;
1031 RHS_t R;
1032
1033 BinOpPred_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
1034
1035 template <typename OpTy> bool match(OpTy *V) {
1036 if (auto *I = dyn_cast<Instruction>(V))
1037 return this->isOpType(I->getOpcode()) && L.match(I->getOperand(0)) &&
1038 R.match(I->getOperand(1));
1039 if (auto *CE = dyn_cast<ConstantExpr>(V))
1040 return this->isOpType(CE->getOpcode()) && L.match(CE->getOperand(0)) &&
1041 R.match(CE->getOperand(1));
1042 return false;
1043 }
1044};
1045
1046struct is_shift_op {
1047 bool isOpType(unsigned Opcode) { return Instruction::isShift(Opcode); }
1048};
1049
1050struct is_right_shift_op {
1051 bool isOpType(unsigned Opcode) {
1052 return Opcode == Instruction::LShr || Opcode == Instruction::AShr;
1053 }
1054};
1055
1056struct is_logical_shift_op {
1057 bool isOpType(unsigned Opcode) {
1058 return Opcode == Instruction::LShr || Opcode == Instruction::Shl;
1059 }
1060};
1061
1062struct is_bitwiselogic_op {
1063 bool isOpType(unsigned Opcode) {
1064 return Instruction::isBitwiseLogicOp(Opcode);
1065 }
1066};
1067
1068struct is_idiv_op {
1069 bool isOpType(unsigned Opcode) {
1070 return Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
1071 }
1072};
1073
1074struct is_irem_op {
1075 bool isOpType(unsigned Opcode) {
1076 return Opcode == Instruction::SRem || Opcode == Instruction::URem;
1077 }
1078};
1079
1080/// Matches shift operations.
1081template <typename LHS, typename RHS>
1082inline BinOpPred_match<LHS, RHS, is_shift_op> m_Shift(const LHS &L,
1083 const RHS &R) {
1084 return BinOpPred_match<LHS, RHS, is_shift_op>(L, R);
1085}
1086
1087/// Matches logical shift operations.
1088template <typename LHS, typename RHS>
1089inline BinOpPred_match<LHS, RHS, is_right_shift_op> m_Shr(const LHS &L,
1090 const RHS &R) {
1091 return BinOpPred_match<LHS, RHS, is_right_shift_op>(L, R);
1092}
1093
1094/// Matches logical shift operations.
1095template <typename LHS, typename RHS>
1096inline BinOpPred_match<LHS, RHS, is_logical_shift_op>
1097m_LogicalShift(const LHS &L, const RHS &R) {
1098 return BinOpPred_match<LHS, RHS, is_logical_shift_op>(L, R);
1099}
1100
1101/// Matches bitwise logic operations.
1102template <typename LHS, typename RHS>
1103inline BinOpPred_match<LHS, RHS, is_bitwiselogic_op>
1104m_BitwiseLogic(const LHS &L, const RHS &R) {
1105 return BinOpPred_match<LHS, RHS, is_bitwiselogic_op>(L, R);
1106}
1107
1108/// Matches integer division operations.
1109template <typename LHS, typename RHS>
1110inline BinOpPred_match<LHS, RHS, is_idiv_op> m_IDiv(const LHS &L,
1111 const RHS &R) {
1112 return BinOpPred_match<LHS, RHS, is_idiv_op>(L, R);
1113}
1114
1115/// Matches integer remainder operations.
1116template <typename LHS, typename RHS>
1117inline BinOpPred_match<LHS, RHS, is_irem_op> m_IRem(const LHS &L,
1118 const RHS &R) {
1119 return BinOpPred_match<LHS, RHS, is_irem_op>(L, R);
1120}
1121
1122//===----------------------------------------------------------------------===//
1123// Class that matches exact binary ops.
1124//
1125template <typename SubPattern_t> struct Exact_match {
1126 SubPattern_t SubPattern;
1127
1128 Exact_match(const SubPattern_t &SP) : SubPattern(SP) {}
1129
1130 template <typename OpTy> bool match(OpTy *V) {
1131 if (auto *PEO = dyn_cast<PossiblyExactOperator>(V))
1132 return PEO->isExact() && SubPattern.match(V);
1133 return false;
1134 }
1135};
1136
1137template <typename T> inline Exact_match<T> m_Exact(const T &SubPattern) {
1138 return SubPattern;
1139}
1140
1141//===----------------------------------------------------------------------===//
1142// Matchers for CmpInst classes
1143//
1144
1145template <typename LHS_t, typename RHS_t, typename Class, typename PredicateTy,
1146 bool Commutable = false>
1147struct CmpClass_match {
1148 PredicateTy &Predicate;
1149 LHS_t L;
1150 RHS_t R;
1151
1152 // The evaluation order is always stable, regardless of Commutability.
1153 // The LHS is always matched first.
1154 CmpClass_match(PredicateTy &Pred, const LHS_t &LHS, const RHS_t &RHS)
1155 : Predicate(Pred), L(LHS), R(RHS) {}
1156
1157 template <typename OpTy> bool match(OpTy *V) {
1158 if (auto *I = dyn_cast<Class>(V))
1159 if ((L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
1160 (Commutable && L.match(I->getOperand(1)) &&
1161 R.match(I->getOperand(0)))) {
1162 Predicate = I->getPredicate();
1163 return true;
1164 }
1165 return false;
1166 }
1167};
1168
1169template <typename LHS, typename RHS>
1170inline CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>
1171m_Cmp(CmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1172 return CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>(Pred, L, R);
1173}
1174
1175template <typename LHS, typename RHS>
1176inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>
1177m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1178 return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>(Pred, L, R);
1179}
1180
1181template <typename LHS, typename RHS>
1182inline CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>
1183m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1184 return CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>(Pred, L, R);
1185}
1186
1187//===----------------------------------------------------------------------===//
1188// Matchers for instructions with a given opcode and number of operands.
1189//
1190
1191/// Matches instructions with Opcode and three operands.
1192template <typename T0, unsigned Opcode> struct OneOps_match {
1193 T0 Op1;
1194
1195 OneOps_match(const T0 &Op1) : Op1(Op1) {}
1196
1197 template <typename OpTy> bool match(OpTy *V) {
1198 if (V->getValueID() == Value::InstructionVal + Opcode) {
1199 auto *I = cast<Instruction>(V);
1200 return Op1.match(I->getOperand(0));
1201 }
1202 return false;
1203 }
1204};
1205
1206/// Matches instructions with Opcode and three operands.
1207template <typename T0, typename T1, unsigned Opcode> struct TwoOps_match {
1208 T0 Op1;
1209 T1 Op2;
1210
1211 TwoOps_match(const T0 &Op1, const T1 &Op2) : Op1(Op1), Op2(Op2) {}
1212
1213 template <typename OpTy> bool match(OpTy *V) {
1214 if (V->getValueID() == Value::InstructionVal + Opcode) {
1215 auto *I = cast<Instruction>(V);
1216 return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1));
1217 }
1218 return false;
1219 }
1220};
1221
1222/// Matches instructions with Opcode and three operands.
1223template <typename T0, typename T1, typename T2, unsigned Opcode>
1224struct ThreeOps_match {
1225 T0 Op1;
1226 T1 Op2;
1227 T2 Op3;
1228
1229 ThreeOps_match(const T0 &Op1, const T1 &Op2, const T2 &Op3)
1230 : Op1(Op1), Op2(Op2), Op3(Op3) {}
1231
1232 template <typename OpTy> bool match(OpTy *V) {
1233 if (V->getValueID() == Value::InstructionVal + Opcode) {
1234 auto *I = cast<Instruction>(V);
1235 return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) &&
1236 Op3.match(I->getOperand(2));
1237 }
1238 return false;
1239 }
1240};
1241
1242/// Matches SelectInst.
1243template <typename Cond, typename LHS, typename RHS>
1244inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select>
1245m_Select(const Cond &C, const LHS &L, const RHS &R) {
1246 return ThreeOps_match<Cond, LHS, RHS, Instruction::Select>(C, L, R);
1247}
1248
1249/// This matches a select of two constants, e.g.:
1250/// m_SelectCst<-1, 0>(m_Value(V))
1251template <int64_t L, int64_t R, typename Cond>
1252inline ThreeOps_match<Cond, constantint_match<L>, constantint_match<R>,
1253 Instruction::Select>
1254m_SelectCst(const Cond &C) {
1255 return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>());
1256}
1257
1258/// Matches FreezeInst.
1259template <typename OpTy>
1260inline OneOps_match<OpTy, Instruction::Freeze> m_Freeze(const OpTy &Op) {
1261 return OneOps_match<OpTy, Instruction::Freeze>(Op);
1262}
1263
1264/// Matches InsertElementInst.
1265template <typename Val_t, typename Elt_t, typename Idx_t>
1266inline ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>
1267m_InsertElement(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx) {
1268 return ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>(
1269 Val, Elt, Idx);
1270}
1271
1272/// Matches ExtractElementInst.
1273template <typename Val_t, typename Idx_t>
1274inline TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>
1275m_ExtractElement(const Val_t &Val, const Idx_t &Idx) {
1276 return TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>(Val, Idx);
1277}
1278
1279/// Matches ShuffleVectorInst.
1280template <typename V1_t, typename V2_t, typename Mask_t>
1281inline ThreeOps_match<V1_t, V2_t, Mask_t, Instruction::ShuffleVector>
1282m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m) {
1283 return ThreeOps_match<V1_t, V2_t, Mask_t, Instruction::ShuffleVector>(v1, v2,
1284 m);
1285}
1286
1287/// Matches LoadInst.
1288template <typename OpTy>
1289inline OneOps_match<OpTy, Instruction::Load> m_Load(const OpTy &Op) {
1290 return OneOps_match<OpTy, Instruction::Load>(Op);
1291}
1292
1293/// Matches StoreInst.
1294template <typename ValueOpTy, typename PointerOpTy>
1295inline TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>
1296m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) {
1297 return TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>(ValueOp,
1298 PointerOp);
1299}
1300
1301//===----------------------------------------------------------------------===//
1302// Matchers for CastInst classes
1303//
1304
1305template <typename Op_t, unsigned Opcode> struct CastClass_match {
1306 Op_t Op;
1307
1308 CastClass_match(const Op_t &OpMatch) : Op(OpMatch) {}
1309
1310 template <typename OpTy> bool match(OpTy *V) {
1311 if (auto *O = dyn_cast<Operator>(V))
1312 return O->getOpcode() == Opcode && Op.match(O->getOperand(0));
1313 return false;
1314 }
1315};
1316
1317/// Matches BitCast.
1318template <typename OpTy>
1319inline CastClass_match<OpTy, Instruction::BitCast> m_BitCast(const OpTy &Op) {
1320 return CastClass_match<OpTy, Instruction::BitCast>(Op);
1321}
1322
1323/// Matches PtrToInt.
1324template <typename OpTy>
1325inline CastClass_match<OpTy, Instruction::PtrToInt> m_PtrToInt(const OpTy &Op) {
1326 return CastClass_match<OpTy, Instruction::PtrToInt>(Op);
1327}
1328
1329/// Matches Trunc.
1330template <typename OpTy>
1331inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) {
1332 return CastClass_match<OpTy, Instruction::Trunc>(Op);
1333}
1334
1335template <typename OpTy>
1336inline match_combine_or<CastClass_match<OpTy, Instruction::Trunc>, OpTy>
1337m_TruncOrSelf(const OpTy &Op) {
1338 return m_CombineOr(m_Trunc(Op), Op);
1339}
1340
1341/// Matches SExt.
1342template <typename OpTy>
1343inline CastClass_match<OpTy, Instruction::SExt> m_SExt(const OpTy &Op) {
1344 return CastClass_match<OpTy, Instruction::SExt>(Op);
1345}
1346
1347/// Matches ZExt.
1348template <typename OpTy>
1349inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) {
1350 return CastClass_match<OpTy, Instruction::ZExt>(Op);
1351}
1352
1353template <typename OpTy>
1354inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, OpTy>
1355m_ZExtOrSelf(const OpTy &Op) {
1356 return m_CombineOr(m_ZExt(Op), Op);
1357}
1358
1359template <typename OpTy>
1360inline match_combine_or<CastClass_match<OpTy, Instruction::SExt>, OpTy>
1361m_SExtOrSelf(const OpTy &Op) {
1362 return m_CombineOr(m_SExt(Op), Op);
1363}
1364
1365template <typename OpTy>
1366inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
1367 CastClass_match<OpTy, Instruction::SExt>>
1368m_ZExtOrSExt(const OpTy &Op) {
1369 return m_CombineOr(m_ZExt(Op), m_SExt(Op));
1370}
1371
1372template <typename OpTy>
1373inline match_combine_or<
1374 match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
1375 CastClass_match<OpTy, Instruction::SExt>>,
1376 OpTy>
1377m_ZExtOrSExtOrSelf(const OpTy &Op) {
1378 return m_CombineOr(m_ZExtOrSExt(Op), Op);
1379}
1380
1381/// Matches UIToFP.
1382template <typename OpTy>
1383inline CastClass_match<OpTy, Instruction::UIToFP> m_UIToFP(const OpTy &Op) {
1384 return CastClass_match<OpTy, Instruction::UIToFP>(Op);
1385}
1386
1387/// Matches SIToFP.
1388template <typename OpTy>
1389inline CastClass_match<OpTy, Instruction::SIToFP> m_SIToFP(const OpTy &Op) {
1390 return CastClass_match<OpTy, Instruction::SIToFP>(Op);
1391}
1392
1393/// Matches FPTrunc
1394template <typename OpTy>
1395inline CastClass_match<OpTy, Instruction::FPTrunc> m_FPTrunc(const OpTy &Op) {
1396 return CastClass_match<OpTy, Instruction::FPTrunc>(Op);
1397}
1398
1399/// Matches FPExt
1400template <typename OpTy>
1401inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) {
1402 return CastClass_match<OpTy, Instruction::FPExt>(Op);
1403}
1404
1405//===----------------------------------------------------------------------===//
1406// Matchers for control flow.
1407//
1408
1409struct br_match {
1410 BasicBlock *&Succ;
1411
1412 br_match(BasicBlock *&Succ) : Succ(Succ) {}
1413
1414 template <typename OpTy> bool match(OpTy *V) {
1415 if (auto *BI = dyn_cast<BranchInst>(V))
1416 if (BI->isUnconditional()) {
1417 Succ = BI->getSuccessor(0);
1418 return true;
1419 }
1420 return false;
1421 }
1422};
1423
1424inline br_match m_UnconditionalBr(BasicBlock *&Succ) { return br_match(Succ); }
1425
1426template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t>
1427struct brc_match {
1428 Cond_t Cond;
1429 TrueBlock_t T;
1430 FalseBlock_t F;
1431
1432 brc_match(const Cond_t &C, const TrueBlock_t &t, const FalseBlock_t &f)
1433 : Cond(C), T(t), F(f) {}
1434
1435 template <typename OpTy> bool match(OpTy *V) {
1436 if (auto *BI = dyn_cast<BranchInst>(V))
1437 if (BI->isConditional() && Cond.match(BI->getCondition()))
1438 return T.match(BI->getSuccessor(0)) && F.match(BI->getSuccessor(1));
1439 return false;
1440 }
1441};
1442
1443template <typename Cond_t>
1444inline brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>
1445m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) {
1446 return brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>(
1447 C, m_BasicBlock(T), m_BasicBlock(F));
1448}
1449
1450template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t>
1451inline brc_match<Cond_t, TrueBlock_t, FalseBlock_t>
1452m_Br(const Cond_t &C, const TrueBlock_t &T, const FalseBlock_t &F) {
1453 return brc_match<Cond_t, TrueBlock_t, FalseBlock_t>(C, T, F);
1454}
1455
1456//===----------------------------------------------------------------------===//
1457// Matchers for max/min idioms, eg: "select (sgt x, y), x, y" -> smax(x,y).
1458//
1459
1460template <typename CmpInst_t, typename LHS_t, typename RHS_t, typename Pred_t,
1461 bool Commutable = false>
1462struct MaxMin_match {
1463 LHS_t L;
1464 RHS_t R;
1465
1466 // The evaluation order is always stable, regardless of Commutability.
1467 // The LHS is always matched first.
1468 MaxMin_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
1469
1470 template <typename OpTy> bool match(OpTy *V) {
1471 // Look for "(x pred y) ? x : y" or "(x pred y) ? y : x".
1472 auto *SI = dyn_cast<SelectInst>(V);
1473 if (!SI)
1474 return false;
1475 auto *Cmp = dyn_cast<CmpInst_t>(SI->getCondition());
1476 if (!Cmp)
1477 return false;
1478 // At this point we have a select conditioned on a comparison. Check that
1479 // it is the values returned by the select that are being compared.
1480 Value *TrueVal = SI->getTrueValue();
1481 Value *FalseVal = SI->getFalseValue();
1482 Value *LHS = Cmp->getOperand(0);
1483 Value *RHS = Cmp->getOperand(1);
1484 if ((TrueVal != LHS || FalseVal != RHS) &&
1485 (TrueVal != RHS || FalseVal != LHS))
1486 return false;
1487 typename CmpInst_t::Predicate Pred =
1488 LHS == TrueVal ? Cmp->getPredicate() : Cmp->getInversePredicate();
1489 // Does "(x pred y) ? x : y" represent the desired max/min operation?
1490 if (!Pred_t::match(Pred))
1491 return false;
1492 // It does! Bind the operands.
1493 return (L.match(LHS) && R.match(RHS)) ||
1494 (Commutable && L.match(RHS) && R.match(LHS));
1495 }
1496};
1497
1498/// Helper class for identifying signed max predicates.
1499struct smax_pred_ty {
1500 static bool match(ICmpInst::Predicate Pred) {
1501 return Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE;
1502 }
1503};
1504
1505/// Helper class for identifying signed min predicates.
1506struct smin_pred_ty {
1507 static bool match(ICmpInst::Predicate Pred) {
1508 return Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SLE;
1509 }
1510};
1511
1512/// Helper class for identifying unsigned max predicates.
1513struct umax_pred_ty {
1514 static bool match(ICmpInst::Predicate Pred) {
1515 return Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE;
1516 }
1517};
1518
1519/// Helper class for identifying unsigned min predicates.
1520struct umin_pred_ty {
1521 static bool match(ICmpInst::Predicate Pred) {
1522 return Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_ULE;
1523 }
1524};
1525
1526/// Helper class for identifying ordered max predicates.
1527struct ofmax_pred_ty {
1528 static bool match(FCmpInst::Predicate Pred) {
1529 return Pred == CmpInst::FCMP_OGT || Pred == CmpInst::FCMP_OGE;
1530 }
1531};
1532
1533/// Helper class for identifying ordered min predicates.
1534struct ofmin_pred_ty {
1535 static bool match(FCmpInst::Predicate Pred) {
1536 return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE;
1537 }
1538};
1539
1540/// Helper class for identifying unordered max predicates.
1541struct ufmax_pred_ty {
1542 static bool match(FCmpInst::Predicate Pred) {
1543 return Pred == CmpInst::FCMP_UGT || Pred == CmpInst::FCMP_UGE;
1544 }
1545};
1546
1547/// Helper class for identifying unordered min predicates.
1548struct ufmin_pred_ty {
1549 static bool match(FCmpInst::Predicate Pred) {
1550 return Pred == CmpInst::FCMP_ULT || Pred == CmpInst::FCMP_ULE;
1551 }
1552};
1553
1554template <typename LHS, typename RHS>
1555inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty> m_SMax(const LHS &L,
1556 const RHS &R) {
1557 return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>(L, R);
1558}
1559
1560template <typename LHS, typename RHS>
1561inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty> m_SMin(const LHS &L,
1562 const RHS &R) {
1563 return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>(L, R);
1564}
1565
1566template <typename LHS, typename RHS>
1567inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty> m_UMax(const LHS &L,
1568 const RHS &R) {
1569 return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>(L, R);
1570}
1571
1572template <typename LHS, typename RHS>
1573inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty> m_UMin(const LHS &L,
1574 const RHS &R) {
1575 return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>(L, R);
1576}
1577
1578/// Match an 'ordered' floating point maximum function.
1579/// Floating point has one special value 'NaN'. Therefore, there is no total
1580/// order. However, if we can ignore the 'NaN' value (for example, because of a
1581/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
1582/// semantics. In the presence of 'NaN' we have to preserve the original
1583/// select(fcmp(ogt/ge, L, R), L, R) semantics matched by this predicate.
1584///
1585/// max(L, R) iff L and R are not NaN
1586/// m_OrdFMax(L, R) = R iff L or R are NaN
1587template <typename LHS, typename RHS>
1588inline MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty> m_OrdFMax(const LHS &L,
1589 const RHS &R) {
1590 return MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty>(L, R);
1591}
1592
1593/// Match an 'ordered' floating point minimum function.
1594/// Floating point has one special value 'NaN'. Therefore, there is no total
1595/// order. However, if we can ignore the 'NaN' value (for example, because of a
1596/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
1597/// semantics. In the presence of 'NaN' we have to preserve the original
1598/// select(fcmp(olt/le, L, R), L, R) semantics matched by this predicate.
1599///
1600/// min(L, R) iff L and R are not NaN
1601/// m_OrdFMin(L, R) = R iff L or R are NaN
1602template <typename LHS, typename RHS>
1603inline MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty> m_OrdFMin(const LHS &L,
1604 const RHS &R) {
1605 return MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty>(L, R);
1606}
1607
1608/// Match an 'unordered' floating point maximum function.
1609/// Floating point has one special value 'NaN'. Therefore, there is no total
1610/// order. However, if we can ignore the 'NaN' value (for example, because of a
1611/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
1612/// semantics. In the presence of 'NaN' we have to preserve the original
1613/// select(fcmp(ugt/ge, L, R), L, R) semantics matched by this predicate.
1614///
1615/// max(L, R) iff L and R are not NaN
1616/// m_UnordFMax(L, R) = L iff L or R are NaN
1617template <typename LHS, typename RHS>
1618inline MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>
1619m_UnordFMax(const LHS &L, const RHS &R) {
1620 return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R);
1621}
1622
1623/// Match an 'unordered' floating point minimum function.
1624/// Floating point has one special value 'NaN'. Therefore, there is no total
1625/// order. However, if we can ignore the 'NaN' value (for example, because of a
1626/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
1627/// semantics. In the presence of 'NaN' we have to preserve the original
1628/// select(fcmp(ult/le, L, R), L, R) semantics matched by this predicate.
1629///
1630/// min(L, R) iff L and R are not NaN
1631/// m_UnordFMin(L, R) = L iff L or R are NaN
1632template <typename LHS, typename RHS>
1633inline MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>
1634m_UnordFMin(const LHS &L, const RHS &R) {
1635 return MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>(L, R);
1636}
1637
1638//===----------------------------------------------------------------------===//
1639// Matchers for overflow check patterns: e.g. (a + b) u< a
1640//
1641
1642template <typename LHS_t, typename RHS_t, typename Sum_t>
1643struct UAddWithOverflow_match {
1644 LHS_t L;
1645 RHS_t R;
1646 Sum_t S;
1647
1648 UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S)
1649 : L(L), R(R), S(S) {}
1650
1651 template <typename OpTy> bool match(OpTy *V) {
1652 Value *ICmpLHS, *ICmpRHS;
1653 ICmpInst::Predicate Pred;
1654 if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V))
1655 return false;
1656
1657 Value *AddLHS, *AddRHS;
1658 auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS));
1659
1660 // (a + b) u< a, (a + b) u< b
1661 if (Pred == ICmpInst::ICMP_ULT)
1662 if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS))
1663 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
1664
1665 // a >u (a + b), b >u (a + b)
1666 if (Pred == ICmpInst::ICMP_UGT)
1667 if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
1668 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
1669
1670 // Match special-case for increment-by-1.
1671 if (Pred == ICmpInst::ICMP_EQ) {
1672 // (a + 1) == 0
1673 // (1 + a) == 0
1674 if (AddExpr.match(ICmpLHS) && m_ZeroInt().match(ICmpRHS) &&
1675 (m_One().match(AddLHS) || m_One().match(AddRHS)))
1676 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
1677 // 0 == (a + 1)
1678 // 0 == (1 + a)
1679 if (m_ZeroInt().match(ICmpLHS) && AddExpr.match(ICmpRHS) &&
1680 (m_One().match(AddLHS) || m_One().match(AddRHS)))
1681 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
1682 }
1683
1684 return false;
1685 }
1686};
1687
1688/// Match an icmp instruction checking for unsigned overflow on addition.
1689///
1690/// S is matched to the addition whose result is being checked for overflow, and
1691/// L and R are matched to the LHS and RHS of S.
1692template <typename LHS_t, typename RHS_t, typename Sum_t>
1693UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>
1694m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) {
1695 return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S);
1696}
1697
1698template <typename Opnd_t> struct Argument_match {
1699 unsigned OpI;
1700 Opnd_t Val;
1701
1702 Argument_match(unsigned OpIdx, const Opnd_t &V) : OpI(OpIdx), Val(V) {}
1703
1704 template <typename OpTy> bool match(OpTy *V) {
1705 // FIXME: Should likely be switched to use `CallBase`.
1706 if (const auto *CI = dyn_cast<CallInst>(V))
1707 return Val.match(CI->getArgOperand(OpI));
1708 return false;
1709 }
1710};
1711
1712/// Match an argument.
1713template <unsigned OpI, typename Opnd_t>
1714inline Argument_match<Opnd_t> m_Argument(const Opnd_t &Op) {
1715 return Argument_match<Opnd_t>(OpI, Op);
1716}
1717
1718/// Intrinsic matchers.
1719struct IntrinsicID_match {
1720 unsigned ID;
1721
1722 IntrinsicID_match(Intrinsic::ID IntrID) : ID(IntrID) {}
1723
1724 template <typename OpTy> bool match(OpTy *V) {
1725 if (const auto *CI
32.1
'CI' is null
41.1
'CI' is null
32.1
'CI' is null
41.1
'CI' is null
32.1
'CI' is null
41.1
'CI' is null
= dyn_cast<CallInst>(V))
32
Assuming 'V' is not a 'CallInst'
33
Taking false branch
41
'V' is not a 'CallInst'
42
Taking false branch
1726 if (const auto *F = CI->getCalledFunction())
1727 return F->getIntrinsicID() == ID;
1728 return false;
34
Returning zero, which participates in a condition later
43
Returning zero, which participates in a condition later
1729 }
1730};
1731
1732/// Intrinsic matches are combinations of ID matchers, and argument
1733/// matchers. Higher arity matcher are defined recursively in terms of and-ing
1734/// them with lower arity matchers. Here's some convenient typedefs for up to
1735/// several arguments, and more can be added as needed
1736template <typename T0 = void, typename T1 = void, typename T2 = void,
1737 typename T3 = void, typename T4 = void, typename T5 = void,
1738 typename T6 = void, typename T7 = void, typename T8 = void,
1739 typename T9 = void, typename T10 = void>
1740struct m_Intrinsic_Ty;
1741template <typename T0> struct m_Intrinsic_Ty<T0> {
1742 using Ty = match_combine_and<IntrinsicID_match, Argument_match<T0>>;
1743};
1744template <typename T0, typename T1> struct m_Intrinsic_Ty<T0, T1> {
1745 using Ty =
1746 match_combine_and<typename m_Intrinsic_Ty<T0>::Ty, Argument_match<T1>>;
1747};
1748template <typename T0, typename T1, typename T2>
1749struct m_Intrinsic_Ty<T0, T1, T2> {
1750 using Ty =
1751 match_combine_and<typename m_Intrinsic_Ty<T0, T1>::Ty,
1752 Argument_match<T2>>;
1753};
1754template <typename T0, typename T1, typename T2, typename T3>
1755struct m_Intrinsic_Ty<T0, T1, T2, T3> {
1756 using Ty =
1757 match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2>::Ty,
1758 Argument_match<T3>>;
1759};
1760
1761template <typename T0, typename T1, typename T2, typename T3, typename T4>
1762struct m_Intrinsic_Ty<T0, T1, T2, T3, T4> {
1763 using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty,
1764 Argument_match<T4>>;
1765};
1766
1767/// Match intrinsic calls like this:
1768/// m_Intrinsic<Intrinsic::fabs>(m_Value(X))
1769template <Intrinsic::ID IntrID> inline IntrinsicID_match m_Intrinsic() {
1770 return IntrinsicID_match(IntrID);
1771}
1772
1773template <Intrinsic::ID IntrID, typename T0>
1774inline typename m_Intrinsic_Ty<T0>::Ty m_Intrinsic(const T0 &Op0) {
1775 return m_CombineAnd(m_Intrinsic<IntrID>(), m_Argument<0>(Op0));
1776}
1777
1778template <Intrinsic::ID IntrID, typename T0, typename T1>
1779inline typename m_Intrinsic_Ty<T0, T1>::Ty m_Intrinsic(const T0 &Op0,
1780 const T1 &Op1) {
1781 return m_CombineAnd(m_Intrinsic<IntrID>(Op0), m_Argument<1>(Op1));
1782}
1783
1784template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2>
1785inline typename m_Intrinsic_Ty<T0, T1, T2>::Ty
1786m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2) {
1787 return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1), m_Argument<2>(Op2));
1788}
1789
1790template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
1791 typename T3>
1792inline typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty
1793m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
1794 return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
1795}
1796
1797template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
1798 typename T3, typename T4>
1799inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty
1800m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3,
1801 const T4 &Op4) {
1802 return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3),
1803 m_Argument<4>(Op4));
1804}
1805
1806// Helper intrinsic matching specializations.
1807template <typename Opnd0>
1808inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BitReverse(const Opnd0 &Op0) {
1809 return m_Intrinsic<Intrinsic::bitreverse>(Op0);
1810}
1811
1812template <typename Opnd0>
1813inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) {
1814 return m_Intrinsic<Intrinsic::bswap>(Op0);
1815}
1816
1817template <typename Opnd0>
1818inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FAbs(const Opnd0 &Op0) {
1819 return m_Intrinsic<Intrinsic::fabs>(Op0);
1820}
1821
1822template <typename Opnd0>
1823inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FCanonicalize(const Opnd0 &Op0) {
1824 return m_Intrinsic<Intrinsic::canonicalize>(Op0);
1825}
1826
1827template <typename Opnd0, typename Opnd1>
1828inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMin(const Opnd0 &Op0,
1829 const Opnd1 &Op1) {
1830 return m_Intrinsic<Intrinsic::minnum>(Op0, Op1);
1831}
1832
1833template <typename Opnd0, typename Opnd1>
1834inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMax(const Opnd0 &Op0,
1835 const Opnd1 &Op1) {
1836 return m_Intrinsic<Intrinsic::maxnum>(Op0, Op1);
1837}
1838
1839//===----------------------------------------------------------------------===//
1840// Matchers for two-operands operators with the operators in either order
1841//
1842
1843/// Matches a BinaryOperator with LHS and RHS in either order.
1844template <typename LHS, typename RHS>
1845inline AnyBinaryOp_match<LHS, RHS, true> m_c_BinOp(const LHS &L, const RHS &R) {
1846 return AnyBinaryOp_match<LHS, RHS, true>(L, R);
1847}
1848
1849/// Matches an ICmp with a predicate over LHS and RHS in either order.
1850/// Does not swap the predicate.
1851template <typename LHS, typename RHS>
1852inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>
1853m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1854 return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>(Pred, L,
1855 R);
1856}
1857
1858/// Matches a Add with LHS and RHS in either order.
1859template <typename LHS, typename RHS>
1860inline BinaryOp_match<LHS, RHS, Instruction::Add, true> m_c_Add(const LHS &L,
1861 const RHS &R) {
1862 return BinaryOp_match<LHS, RHS, Instruction::Add, true>(L, R);
1863}
1864
1865/// Matches a Mul with LHS and RHS in either order.
1866template <typename LHS, typename RHS>
1867inline BinaryOp_match<LHS, RHS, Instruction::Mul, true> m_c_Mul(const LHS &L,
1868 const RHS &R) {
1869 return BinaryOp_match<LHS, RHS, Instruction::Mul, true>(L, R);
1870}
1871
1872/// Matches an And with LHS and RHS in either order.
1873template <typename LHS, typename RHS>
1874inline BinaryOp_match<LHS, RHS, Instruction::And, true> m_c_And(const LHS &L,
1875 const RHS &R) {
1876 return BinaryOp_match<LHS, RHS, Instruction::And, true>(L, R);
1877}
1878
1879/// Matches an Or with LHS and RHS in either order.
1880template <typename LHS, typename RHS>
1881inline BinaryOp_match<LHS, RHS, Instruction::Or, true> m_c_Or(const LHS &L,
1882 const RHS &R) {
1883 return BinaryOp_match<LHS, RHS, Instruction::Or, true>(L, R);
1884}
1885
1886/// Matches an Xor with LHS and RHS in either order.
1887template <typename LHS, typename RHS>
1888inline BinaryOp_match<LHS, RHS, Instruction::Xor, true> m_c_Xor(const LHS &L,
1889 const RHS &R) {
1890 return BinaryOp_match<LHS, RHS, Instruction::Xor, true>(L, R);
1891}
1892
1893/// Matches a 'Neg' as 'sub 0, V'.
1894template <typename ValTy>
1895inline BinaryOp_match<cst_pred_ty<is_zero_int>, ValTy, Instruction::Sub>
1896m_Neg(const ValTy &V) {
1897 return m_Sub(m_ZeroInt(), V);
1898}
1899
1900/// Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
1901template <typename ValTy>
1902inline BinaryOp_match<ValTy, cst_pred_ty<is_all_ones>, Instruction::Xor, true>
1903m_Not(const ValTy &V) {
1904 return m_c_Xor(V, m_AllOnes());
1905}
1906
1907/// Matches an SMin with LHS and RHS in either order.
1908template <typename LHS, typename RHS>
1909inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>
1910m_c_SMin(const LHS &L, const RHS &R) {
1911 return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>(L, R);
1912}
1913/// Matches an SMax with LHS and RHS in either order.
1914template <typename LHS, typename RHS>
1915inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>
1916m_c_SMax(const LHS &L, const RHS &R) {
1917 return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>(L, R);
1918}
1919/// Matches a UMin with LHS and RHS in either order.
1920template <typename LHS, typename RHS>
1921inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>
1922m_c_UMin(const LHS &L, const RHS &R) {
1923 return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>(L, R);
1924}
1925/// Matches a UMax with LHS and RHS in either order.
1926template <typename LHS, typename RHS>
1927inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>
1928m_c_UMax(const LHS &L, const RHS &R) {
1929 return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>(L, R);
1930}
1931
1932/// Matches FAdd with LHS and RHS in either order.
1933template <typename LHS, typename RHS>
1934inline BinaryOp_match<LHS, RHS, Instruction::FAdd, true>
1935m_c_FAdd(const LHS &L, const RHS &R) {
1936 return BinaryOp_match<LHS, RHS, Instruction::FAdd, true>(L, R);
1937}
1938
1939/// Matches FMul with LHS and RHS in either order.
1940template <typename LHS, typename RHS>
1941inline BinaryOp_match<LHS, RHS, Instruction::FMul, true>
1942m_c_FMul(const LHS &L, const RHS &R) {
1943 return BinaryOp_match<LHS, RHS, Instruction::FMul, true>(L, R);
1944}
1945
1946template <typename Opnd_t> struct Signum_match {
1947 Opnd_t Val;
1948 Signum_match(const Opnd_t &V) : Val(V) {}
1949
1950 template <typename OpTy> bool match(OpTy *V) {
1951 unsigned TypeSize = V->getType()->getScalarSizeInBits();
1952 if (TypeSize == 0)
1953 return false;
1954
1955 unsigned ShiftWidth = TypeSize - 1;
1956 Value *OpL = nullptr, *OpR = nullptr;
1957
1958 // This is the representation of signum we match:
1959 //
1960 // signum(x) == (x >> 63) | (-x >>u 63)
1961 //
1962 // An i1 value is its own signum, so it's correct to match
1963 //
1964 // signum(x) == (x >> 0) | (-x >>u 0)
1965 //
1966 // for i1 values.
1967
1968 auto LHS = m_AShr(m_Value(OpL), m_SpecificInt(ShiftWidth));
1969 auto RHS = m_LShr(m_Neg(m_Value(OpR)), m_SpecificInt(ShiftWidth));
1970 auto Signum = m_Or(LHS, RHS);
1971
1972 return Signum.match(V) && OpL == OpR && Val.match(OpL);
1973 }
1974};
1975
1976/// Matches a signum pattern.
1977///
1978/// signum(x) =
1979/// x > 0 -> 1
1980/// x == 0 -> 0
1981/// x < 0 -> -1
1982template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) {
1983 return Signum_match<Val_t>(V);
1984}
1985
1986template <int Ind, typename Opnd_t> struct ExtractValue_match {
1987 Opnd_t Val;
1988 ExtractValue_match(const Opnd_t &V) : Val(V) {}
1989
1990 template <typename OpTy> bool match(OpTy *V) {
1991 if (auto *I = dyn_cast<ExtractValueInst>(V))
1992 return I->getNumIndices() == 1 && I->getIndices()[0] == Ind &&
1993 Val.match(I->getAggregateOperand());
1994 return false;
1995 }
1996};
1997
1998/// Match a single index ExtractValue instruction.
1999/// For example m_ExtractValue<1>(...)
2000template <int Ind, typename Val_t>
2001inline ExtractValue_match<Ind, Val_t> m_ExtractValue(const Val_t &V) {
2002 return ExtractValue_match<Ind, Val_t>(V);
2003}
2004
2005} // end namespace PatternMatch
2006} // end namespace llvm
2007
2008#endif // LLVM_IR_PATTERNMATCH_H

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Analysis/AliasAnalysis.h

1//===- llvm/Analysis/AliasAnalysis.h - Alias Analysis Interface -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the generic AliasAnalysis interface, which is used as the
10// common interface used by all clients of alias analysis information, and
11// implemented by all alias analysis implementations. Mod/Ref information is
12// also captured by this interface.
13//
14// Implementations of this interface must implement the various virtual methods,
15// which automatically provides functionality for the entire suite of client
16// APIs.
17//
18// This API identifies memory regions with the MemoryLocation class. The pointer
19// component specifies the base memory address of the region. The Size specifies
20// the maximum size (in address units) of the memory region, or
21// MemoryLocation::UnknownSize if the size is not known. The TBAA tag
22// identifies the "type" of the memory reference; see the
23// TypeBasedAliasAnalysis class for details.
24//
25// Some non-obvious details include:
26// - Pointers that point to two completely different objects in memory never
27// alias, regardless of the value of the Size component.
28// - NoAlias doesn't imply inequal pointers. The most obvious example of this
29// is two pointers to constant memory. Even if they are equal, constant
30// memory is never stored to, so there will never be any dependencies.
31// In this and other situations, the pointers may be both NoAlias and
32// MustAlias at the same time. The current API can only return one result,
33// though this is rarely a problem in practice.
34//
35//===----------------------------------------------------------------------===//
36
37#ifndef LLVM_ANALYSIS_ALIASANALYSIS_H
38#define LLVM_ANALYSIS_ALIASANALYSIS_H
39
40#include "llvm/ADT/DenseMap.h"
41#include "llvm/ADT/None.h"
42#include "llvm/ADT/Optional.h"
43#include "llvm/ADT/SmallVector.h"
44#include "llvm/Analysis/MemoryLocation.h"
45#include "llvm/Analysis/TargetLibraryInfo.h"
46#include "llvm/IR/Function.h"
47#include "llvm/IR/Instruction.h"
48#include "llvm/IR/Instructions.h"
49#include "llvm/IR/PassManager.h"
50#include "llvm/Pass.h"
51#include <cstdint>
52#include <functional>
53#include <memory>
54#include <vector>
55
56namespace llvm {
57
58class AnalysisUsage;
59class BasicAAResult;
60class BasicBlock;
61class DominatorTree;
62class OrderedBasicBlock;
63class Value;
64
65/// The possible results of an alias query.
66///
67/// These results are always computed between two MemoryLocation objects as
68/// a query to some alias analysis.
69///
70/// Note that these are unscoped enumerations because we would like to support
71/// implicitly testing a result for the existence of any possible aliasing with
72/// a conversion to bool, but an "enum class" doesn't support this. The
73/// canonical names from the literature are suffixed and unique anyways, and so
74/// they serve as global constants in LLVM for these results.
75///
76/// See docs/AliasAnalysis.html for more information on the specific meanings
77/// of these values.
78enum AliasResult : uint8_t {
79 /// The two locations do not alias at all.
80 ///
81 /// This value is arranged to convert to false, while all other values
82 /// convert to true. This allows a boolean context to convert the result to
83 /// a binary flag indicating whether there is the possibility of aliasing.
84 NoAlias = 0,
85 /// The two locations may or may not alias. This is the least precise result.
86 MayAlias,
87 /// The two locations alias, but only due to a partial overlap.
88 PartialAlias,
89 /// The two locations precisely alias each other.
90 MustAlias,
91};
92
93/// << operator for AliasResult.
94raw_ostream &operator<<(raw_ostream &OS, AliasResult AR);
95
96/// Flags indicating whether a memory access modifies or references memory.
97///
98/// This is no access at all, a modification, a reference, or both
99/// a modification and a reference. These are specifically structured such that
100/// they form a three bit matrix and bit-tests for 'mod' or 'ref' or 'must'
101/// work with any of the possible values.
102enum class ModRefInfo : uint8_t {
103 /// Must is provided for completeness, but no routines will return only
104 /// Must today. See definition of Must below.
105 Must = 0,
106 /// The access may reference the value stored in memory,
107 /// a mustAlias relation was found, and no mayAlias or partialAlias found.
108 MustRef = 1,
109 /// The access may modify the value stored in memory,
110 /// a mustAlias relation was found, and no mayAlias or partialAlias found.
111 MustMod = 2,
112 /// The access may reference, modify or both the value stored in memory,
113 /// a mustAlias relation was found, and no mayAlias or partialAlias found.
114 MustModRef = MustRef | MustMod,
115 /// The access neither references nor modifies the value stored in memory.
116 NoModRef = 4,
117 /// The access may reference the value stored in memory.
118 Ref = NoModRef | MustRef,
119 /// The access may modify the value stored in memory.
120 Mod = NoModRef | MustMod,
121 /// The access may reference and may modify the value stored in memory.
122 ModRef = Ref | Mod,
123
124 /// About Must:
125 /// Must is set in a best effort manner.
126 /// We usually do not try our best to infer Must, instead it is merely
127 /// another piece of "free" information that is presented when available.
128 /// Must set means there was certainly a MustAlias found. For calls,
129 /// where multiple arguments are checked (argmemonly), this translates to
130 /// only MustAlias or NoAlias was found.
131 /// Must is not set for RAR accesses, even if the two locations must
132 /// alias. The reason is that two read accesses translate to an early return
133 /// of NoModRef. An additional alias check to set Must may be
134 /// expensive. Other cases may also not set Must(e.g. callCapturesBefore).
135 /// We refer to Must being *set* when the most significant bit is *cleared*.
136 /// Conversely we *clear* Must information by *setting* the Must bit to 1.
137};
138
139LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isNoModRef(const ModRefInfo MRI) {
140 return (static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustModRef)) ==
141 static_cast<int>(ModRefInfo::Must);
142}
143LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isModOrRefSet(const ModRefInfo MRI) {
144 return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustModRef);
145}
146LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isModAndRefSet(const ModRefInfo MRI) {
147 return (static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustModRef)) ==
148 static_cast<int>(ModRefInfo::MustModRef);
149}
150LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isModSet(const ModRefInfo MRI) {
151 return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustMod);
152}
153LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isRefSet(const ModRefInfo MRI) {
154 return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustRef);
155}
156LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isMustSet(const ModRefInfo MRI) {
157 return !(static_cast<int>(MRI) & static_cast<int>(ModRefInfo::NoModRef));
158}
159
160LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo setMod(const ModRefInfo MRI) {
161 return ModRefInfo(static_cast<int>(MRI) |
162 static_cast<int>(ModRefInfo::MustMod));
163}
164LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo setRef(const ModRefInfo MRI) {
165 return ModRefInfo(static_cast<int>(MRI) |
166 static_cast<int>(ModRefInfo::MustRef));
167}
168LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo setMust(const ModRefInfo MRI) {
169 return ModRefInfo(static_cast<int>(MRI) &
170 static_cast<int>(ModRefInfo::MustModRef));
171}
172LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo setModAndRef(const ModRefInfo MRI) {
173 return ModRefInfo(static_cast<int>(MRI) |
174 static_cast<int>(ModRefInfo::MustModRef));
175}
176LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo clearMod(const ModRefInfo MRI) {
177 return ModRefInfo(static_cast<int>(MRI) & static_cast<int>(ModRefInfo::Ref));
178}
179LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo clearRef(const ModRefInfo MRI) {
180 return ModRefInfo(static_cast<int>(MRI) & static_cast<int>(ModRefInfo::Mod));
181}
182LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo clearMust(const ModRefInfo MRI) {
183 return ModRefInfo(static_cast<int>(MRI) |
184 static_cast<int>(ModRefInfo::NoModRef));
185}
186LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo unionModRef(const ModRefInfo MRI1,
187 const ModRefInfo MRI2) {
188 return ModRefInfo(static_cast<int>(MRI1) | static_cast<int>(MRI2));
189}
190LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo intersectModRef(const ModRefInfo MRI1,
191 const ModRefInfo MRI2) {
192 return ModRefInfo(static_cast<int>(MRI1) & static_cast<int>(MRI2));
193}
194
195/// The locations at which a function might access memory.
196///
197/// These are primarily used in conjunction with the \c AccessKind bits to
198/// describe both the nature of access and the locations of access for a
199/// function call.
200enum FunctionModRefLocation {
201 /// Base case is no access to memory.
202 FMRL_Nowhere = 0,
203 /// Access to memory via argument pointers.
204 FMRL_ArgumentPointees = 8,
205 /// Memory that is inaccessible via LLVM IR.
206 FMRL_InaccessibleMem = 16,
207 /// Access to any memory.
208 FMRL_Anywhere = 32 | FMRL_InaccessibleMem | FMRL_ArgumentPointees
209};
210
211/// Summary of how a function affects memory in the program.
212///
213/// Loads from constant globals are not considered memory accesses for this
214/// interface. Also, functions may freely modify stack space local to their
215/// invocation without having to report it through these interfaces.
216enum FunctionModRefBehavior {
217 /// This function does not perform any non-local loads or stores to memory.
218 ///
219 /// This property corresponds to the GCC 'const' attribute.
220 /// This property corresponds to the LLVM IR 'readnone' attribute.
221 /// This property corresponds to the IntrNoMem LLVM intrinsic flag.
222 FMRB_DoesNotAccessMemory =
223 FMRL_Nowhere | static_cast<int>(ModRefInfo::NoModRef),
224
225 /// The only memory references in this function (if it has any) are
226 /// non-volatile loads from objects pointed to by its pointer-typed
227 /// arguments, with arbitrary offsets.
228 ///
229 /// This property corresponds to the IntrReadArgMem LLVM intrinsic flag.
230 FMRB_OnlyReadsArgumentPointees =
231 FMRL_ArgumentPointees | static_cast<int>(ModRefInfo::Ref),
232
233 /// The only memory references in this function (if it has any) are
234 /// non-volatile loads and stores from objects pointed to by its
235 /// pointer-typed arguments, with arbitrary offsets.
236 ///
237 /// This property corresponds to the IntrArgMemOnly LLVM intrinsic flag.
238 FMRB_OnlyAccessesArgumentPointees =
239 FMRL_ArgumentPointees | static_cast<int>(ModRefInfo::ModRef),
240
241 /// The only memory references in this function (if it has any) are
242 /// references of memory that is otherwise inaccessible via LLVM IR.
243 ///
244 /// This property corresponds to the LLVM IR inaccessiblememonly attribute.
245 FMRB_OnlyAccessesInaccessibleMem =
246 FMRL_InaccessibleMem | static_cast<int>(ModRefInfo::ModRef),
247
248 /// The function may perform non-volatile loads and stores of objects
249 /// pointed to by its pointer-typed arguments, with arbitrary offsets, and
250 /// it may also perform loads and stores of memory that is otherwise
251 /// inaccessible via LLVM IR.
252 ///
253 /// This property corresponds to the LLVM IR
254 /// inaccessiblemem_or_argmemonly attribute.
255 FMRB_OnlyAccessesInaccessibleOrArgMem = FMRL_InaccessibleMem |
256 FMRL_ArgumentPointees |
257 static_cast<int>(ModRefInfo::ModRef),
258
259 /// This function does not perform any non-local stores or volatile loads,
260 /// but may read from any memory location.
261 ///
262 /// This property corresponds to the GCC 'pure' attribute.
263 /// This property corresponds to the LLVM IR 'readonly' attribute.
264 /// This property corresponds to the IntrReadMem LLVM intrinsic flag.
265 FMRB_OnlyReadsMemory = FMRL_Anywhere | static_cast<int>(ModRefInfo::Ref),
266
267 // This function does not read from memory anywhere, but may write to any
268 // memory location.
269 //
270 // This property corresponds to the LLVM IR 'writeonly' attribute.
271 // This property corresponds to the IntrWriteMem LLVM intrinsic flag.
272 FMRB_DoesNotReadMemory = FMRL_Anywhere | static_cast<int>(ModRefInfo::Mod),
273
274 /// This indicates that the function could not be classified into one of the
275 /// behaviors above.
276 FMRB_UnknownModRefBehavior =
277 FMRL_Anywhere | static_cast<int>(ModRefInfo::ModRef)
278};
279
280// Wrapper method strips bits significant only in FunctionModRefBehavior,
281// to obtain a valid ModRefInfo. The benefit of using the wrapper is that if
282// ModRefInfo enum changes, the wrapper can be updated to & with the new enum
283// entry with all bits set to 1.
284LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo
285createModRefInfo(const FunctionModRefBehavior FMRB) {
286 return ModRefInfo(FMRB & static_cast<int>(ModRefInfo::ModRef));
287}
288
289/// This class stores info we want to provide to or retain within an alias
290/// query. By default, the root query is stateless and starts with a freshly
291/// constructed info object. Specific alias analyses can use this query info to
292/// store per-query state that is important for recursive or nested queries to
293/// avoid recomputing. To enable preserving this state across multiple queries
294/// where safe (due to the IR not changing), use a `BatchAAResults` wrapper.
295/// The information stored in an `AAQueryInfo` is currently limitted to the
296/// caches used by BasicAA, but can further be extended to fit other AA needs.
297class AAQueryInfo {
298public:
299 using LocPair = std::pair<MemoryLocation, MemoryLocation>;
300 using AliasCacheT = SmallDenseMap<LocPair, AliasResult, 8>;
301 AliasCacheT AliasCache;
302
303 using IsCapturedCacheT = SmallDenseMap<const Value *, bool, 8>;
304 IsCapturedCacheT IsCapturedCache;
305
306 AAQueryInfo() : AliasCache(), IsCapturedCache() {}
307};
308
309class BatchAAResults;
310
311class AAResults {
312public:
313 // Make these results default constructable and movable. We have to spell
314 // these out because MSVC won't synthesize them.
315 AAResults(const TargetLibraryInfo &TLI) : TLI(TLI) {}
316 AAResults(AAResults &&Arg);
317 ~AAResults();
318
319 /// Register a specific AA result.
320 template <typename AAResultT> void addAAResult(AAResultT &AAResult) {
321 // FIXME: We should use a much lighter weight system than the usual
322 // polymorphic pattern because we don't own AAResult. It should
323 // ideally involve two pointers and no separate allocation.
324 AAs.emplace_back(new Model<AAResultT>(AAResult, *this));
325 }
326
327 /// Register a function analysis ID that the results aggregation depends on.
328 ///
329 /// This is used in the new pass manager to implement the invalidation logic
330 /// where we must invalidate the results aggregation if any of our component
331 /// analyses become invalid.
332 void addAADependencyID(AnalysisKey *ID) { AADeps.push_back(ID); }
333
334 /// Handle invalidation events in the new pass manager.
335 ///
336 /// The aggregation is invalidated if any of the underlying analyses is
337 /// invalidated.
338 bool invalidate(Function &F, const PreservedAnalyses &PA,
339 FunctionAnalysisManager::Invalidator &Inv);
340
341 //===--------------------------------------------------------------------===//
342 /// \name Alias Queries
343 /// @{
344
345 /// The main low level interface to the alias analysis implementation.
346 /// Returns an AliasResult indicating whether the two pointers are aliased to
347 /// each other. This is the interface that must be implemented by specific
348 /// alias analysis implementations.
349 AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
350
351 /// A convenience wrapper around the primary \c alias interface.
352 AliasResult alias(const Value *V1, LocationSize V1Size, const Value *V2,
353 LocationSize V2Size) {
354 return alias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size));
355 }
356
357 /// A convenience wrapper around the primary \c alias interface.
358 AliasResult alias(const Value *V1, const Value *V2) {
359 return alias(V1, LocationSize::unknown(), V2, LocationSize::unknown());
360 }
361
362 /// A trivial helper function to check to see if the specified pointers are
363 /// no-alias.
364 bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
365 return alias(LocA, LocB) == NoAlias;
366 }
367
368 /// A convenience wrapper around the \c isNoAlias helper interface.
369 bool isNoAlias(const Value *V1, LocationSize V1Size, const Value *V2,
370 LocationSize V2Size) {
371 return isNoAlias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size));
372 }
373
374 /// A convenience wrapper around the \c isNoAlias helper interface.
375 bool isNoAlias(const Value *V1, const Value *V2) {
376 return isNoAlias(MemoryLocation(V1), MemoryLocation(V2));
377 }
378
379 /// A trivial helper function to check to see if the specified pointers are
380 /// must-alias.
381 bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
382 return alias(LocA, LocB) == MustAlias;
383 }
384
385 /// A convenience wrapper around the \c isMustAlias helper interface.
386 bool isMustAlias(const Value *V1, const Value *V2) {
387 return alias(V1, LocationSize::precise(1), V2, LocationSize::precise(1)) ==
388 MustAlias;
389 }
390
391 /// Checks whether the given location points to constant memory, or if
392 /// \p OrLocal is true whether it points to a local alloca.
393 bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false);
394
395 /// A convenience wrapper around the primary \c pointsToConstantMemory
396 /// interface.
397 bool pointsToConstantMemory(const Value *P, bool OrLocal = false) {
398 return pointsToConstantMemory(MemoryLocation(P), OrLocal);
399 }
400
401 /// @}
402 //===--------------------------------------------------------------------===//
403 /// \name Simple mod/ref information
404 /// @{
405
406 /// Get the ModRef info associated with a pointer argument of a call. The
407 /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note
408 /// that these bits do not necessarily account for the overall behavior of
409 /// the function, but rather only provide additional per-argument
410 /// information. This never sets ModRefInfo::Must.
411 ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx);
412
413 /// Return the behavior of the given call site.
414 FunctionModRefBehavior getModRefBehavior(const CallBase *Call);
415
416 /// Return the behavior when calling the given function.
417 FunctionModRefBehavior getModRefBehavior(const Function *F);
418
419 /// Checks if the specified call is known to never read or write memory.
420 ///
421 /// Note that if the call only reads from known-constant memory, it is also
422 /// legal to return true. Also, calls that unwind the stack are legal for
423 /// this predicate.
424 ///
425 /// Many optimizations (such as CSE and LICM) can be performed on such calls
426 /// without worrying about aliasing properties, and many calls have this
427 /// property (e.g. calls to 'sin' and 'cos').
428 ///
429 /// This property corresponds to the GCC 'const' attribute.
430 bool doesNotAccessMemory(const CallBase *Call) {
431 return getModRefBehavior(Call) == FMRB_DoesNotAccessMemory;
432 }
433
434 /// Checks if the specified function is known to never read or write memory.
435 ///
436 /// Note that if the function only reads from known-constant memory, it is
437 /// also legal to return true. Also, function that unwind the stack are legal
438 /// for this predicate.
439 ///
440 /// Many optimizations (such as CSE and LICM) can be performed on such calls
441 /// to such functions without worrying about aliasing properties, and many
442 /// functions have this property (e.g. 'sin' and 'cos').
443 ///
444 /// This property corresponds to the GCC 'const' attribute.
445 bool doesNotAccessMemory(const Function *F) {
446 return getModRefBehavior(F) == FMRB_DoesNotAccessMemory;
447 }
448
449 /// Checks if the specified call is known to only read from non-volatile
450 /// memory (or not access memory at all).
451 ///
452 /// Calls that unwind the stack are legal for this predicate.
453 ///
454 /// This property allows many common optimizations to be performed in the
455 /// absence of interfering store instructions, such as CSE of strlen calls.
456 ///
457 /// This property corresponds to the GCC 'pure' attribute.
458 bool onlyReadsMemory(const CallBase *Call) {
459 return onlyReadsMemory(getModRefBehavior(Call));
460 }
461
462 /// Checks if the specified function is known to only read from non-volatile
463 /// memory (or not access memory at all).
464 ///
465 /// Functions that unwind the stack are legal for this predicate.
466 ///
467 /// This property allows many common optimizations to be performed in the
468 /// absence of interfering store instructions, such as CSE of strlen calls.
469 ///
470 /// This property corresponds to the GCC 'pure' attribute.
471 bool onlyReadsMemory(const Function *F) {
472 return onlyReadsMemory(getModRefBehavior(F));
473 }
474
475 /// Checks if functions with the specified behavior are known to only read
476 /// from non-volatile memory (or not access memory at all).
477 static bool onlyReadsMemory(FunctionModRefBehavior MRB) {
478 return !isModSet(createModRefInfo(MRB));
51
Assuming the condition is true
52
Returning the value 1, which participates in a condition later
479 }
480
481 /// Checks if functions with the specified behavior are known to only write
482 /// memory (or not access memory at all).
483 static bool doesNotReadMemory(FunctionModRefBehavior MRB) {
484 return !isRefSet(createModRefInfo(MRB));
485 }
486
487 /// Checks if functions with the specified behavior are known to read and
488 /// write at most from objects pointed to by their pointer-typed arguments
489 /// (with arbitrary offsets).
490 static bool onlyAccessesArgPointees(FunctionModRefBehavior MRB) {
491 return !(MRB & FMRL_Anywhere & ~FMRL_ArgumentPointees);
56
Assuming the condition is false
57
Returning zero, which participates in a condition later
492 }
493
494 /// Checks if functions with the specified behavior are known to potentially
495 /// read or write from objects pointed to be their pointer-typed arguments
496 /// (with arbitrary offsets).
497 static bool doesAccessArgPointees(FunctionModRefBehavior MRB) {
498 return isModOrRefSet(createModRefInfo(MRB)) &&
499 (MRB & FMRL_ArgumentPointees);
500 }
501
502 /// Checks if functions with the specified behavior are known to read and
503 /// write at most from memory that is inaccessible from LLVM IR.
504 static bool onlyAccessesInaccessibleMem(FunctionModRefBehavior MRB) {
505 return !(MRB & FMRL_Anywhere & ~FMRL_InaccessibleMem);
506 }
507
508 /// Checks if functions with the specified behavior are known to potentially
509 /// read or write from memory that is inaccessible from LLVM IR.
510 static bool doesAccessInaccessibleMem(FunctionModRefBehavior MRB) {
511 return isModOrRefSet(createModRefInfo(MRB)) && (MRB & FMRL_InaccessibleMem);
512 }
513
514 /// Checks if functions with the specified behavior are known to read and
515 /// write at most from memory that is inaccessible from LLVM IR or objects
516 /// pointed to by their pointer-typed arguments (with arbitrary offsets).
517 static bool onlyAccessesInaccessibleOrArgMem(FunctionModRefBehavior MRB) {
518 return !(MRB & FMRL_Anywhere &
519 ~(FMRL_InaccessibleMem | FMRL_ArgumentPointees));
520 }
521
522 /// getModRefInfo (for call sites) - Return information about whether
523 /// a particular call site modifies or reads the specified memory location.
524 ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
525
526 /// getModRefInfo (for call sites) - A convenience wrapper.
527 ModRefInfo getModRefInfo(const CallBase *Call, const Value *P,
528 LocationSize Size) {
529 return getModRefInfo(Call, MemoryLocation(P, Size));
530 }
531
532 /// getModRefInfo (for loads) - Return information about whether
533 /// a particular load modifies or reads the specified memory location.
534 ModRefInfo getModRefInfo(const LoadInst *L, const MemoryLocation &Loc);
535
536 /// getModRefInfo (for loads) - A convenience wrapper.
537 ModRefInfo getModRefInfo(const LoadInst *L, const Value *P,
538 LocationSize Size) {
539 return getModRefInfo(L, MemoryLocation(P, Size));
540 }
541
542 /// getModRefInfo (for stores) - Return information about whether
543 /// a particular store modifies or reads the specified memory location.
544 ModRefInfo getModRefInfo(const StoreInst *S, const MemoryLocation &Loc);
545
546 /// getModRefInfo (for stores) - A convenience wrapper.
547 ModRefInfo getModRefInfo(const StoreInst *S, const Value *P,
548 LocationSize Size) {
549 return getModRefInfo(S, MemoryLocation(P, Size));
550 }
551
552 /// getModRefInfo (for fences) - Return information about whether
553 /// a particular store modifies or reads the specified memory location.
554 ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc);
555
556 /// getModRefInfo (for fences) - A convenience wrapper.
557 ModRefInfo getModRefInfo(const FenceInst *S, const Value *P,
558 LocationSize Size) {
559 return getModRefInfo(S, MemoryLocation(P, Size));
560 }
561
562 /// getModRefInfo (for cmpxchges) - Return information about whether
563 /// a particular cmpxchg modifies or reads the specified memory location.
564 ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX,
565 const MemoryLocation &Loc);
566
567 /// getModRefInfo (for cmpxchges) - A convenience wrapper.
568 ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX, const Value *P,
569 LocationSize Size) {
570 return getModRefInfo(CX, MemoryLocation(P, Size));
571 }
572
573 /// getModRefInfo (for atomicrmws) - Return information about whether
574 /// a particular atomicrmw modifies or reads the specified memory location.
575 ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const MemoryLocation &Loc);
576
577 /// getModRefInfo (for atomicrmws) - A convenience wrapper.
578 ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const Value *P,
579 LocationSize Size) {
580 return getModRefInfo(RMW, MemoryLocation(P, Size));
581 }
582
583 /// getModRefInfo (for va_args) - Return information about whether
584 /// a particular va_arg modifies or reads the specified memory location.
585 ModRefInfo getModRefInfo(const VAArgInst *I, const MemoryLocation &Loc);
586
587 /// getModRefInfo (for va_args) - A convenience wrapper.
588 ModRefInfo getModRefInfo(const VAArgInst *I, const Value *P,
589 LocationSize Size) {
590 return getModRefInfo(I, MemoryLocation(P, Size));
591 }
592
593 /// getModRefInfo (for catchpads) - Return information about whether
594 /// a particular catchpad modifies or reads the specified memory location.
595 ModRefInfo getModRefInfo(const CatchPadInst *I, const MemoryLocation &Loc);
596
597 /// getModRefInfo (for catchpads) - A convenience wrapper.
598 ModRefInfo getModRefInfo(const CatchPadInst *I, const Value *P,
599 LocationSize Size) {
600 return getModRefInfo(I, MemoryLocation(P, Size));
601 }
602
603 /// getModRefInfo (for catchrets) - Return information about whether
604 /// a particular catchret modifies or reads the specified memory location.
605 ModRefInfo getModRefInfo(const CatchReturnInst *I, const MemoryLocation &Loc);
606
607 /// getModRefInfo (for catchrets) - A convenience wrapper.
608 ModRefInfo getModRefInfo(const CatchReturnInst *I, const Value *P,
609 LocationSize Size) {
610 return getModRefInfo(I, MemoryLocation(P, Size));
611 }
612
613 /// Check whether or not an instruction may read or write the optionally
614 /// specified memory location.
615 ///
616 ///
617 /// An instruction that doesn't read or write memory may be trivially LICM'd
618 /// for example.
619 ///
620 /// For function calls, this delegates to the alias-analysis specific
621 /// call-site mod-ref behavior queries. Otherwise it delegates to the specific
622 /// helpers above.
623 ModRefInfo getModRefInfo(const Instruction *I,
624 const Optional<MemoryLocation> &OptLoc) {
625 AAQueryInfo AAQIP;
626 return getModRefInfo(I, OptLoc, AAQIP);
627 }
628
629 /// A convenience wrapper for constructing the memory location.
630 ModRefInfo getModRefInfo(const Instruction *I, const Value *P,
631 LocationSize Size) {
632 return getModRefInfo(I, MemoryLocation(P, Size));
633 }
634
635 /// Return information about whether a call and an instruction may refer to
636 /// the same memory locations.
637 ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call);
638
639 /// Return information about whether two call sites may refer to the same set
640 /// of memory locations. See the AA documentation for details:
641 /// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo
642 ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
643
644 /// Return information about whether a particular call site modifies
645 /// or reads the specified memory location \p MemLoc before instruction \p I
646 /// in a BasicBlock. An ordered basic block \p OBB can be used to speed up
647 /// instruction ordering queries inside the BasicBlock containing \p I.
648 /// Early exits in callCapturesBefore may lead to ModRefInfo::Must not being
649 /// set.
650 ModRefInfo callCapturesBefore(const Instruction *I,
651 const MemoryLocation &MemLoc, DominatorTree *DT,
652 OrderedBasicBlock *OBB = nullptr);
653
654 /// A convenience wrapper to synthesize a memory location.
655 ModRefInfo callCapturesBefore(const Instruction *I, const Value *P,
656 LocationSize Size, DominatorTree *DT,
657 OrderedBasicBlock *OBB = nullptr) {
658 return callCapturesBefore(I, MemoryLocation(P, Size), DT, OBB);
659 }
660
661 /// @}
662 //===--------------------------------------------------------------------===//
663 /// \name Higher level methods for querying mod/ref information.
664 /// @{
665
666 /// Check if it is possible for execution of the specified basic block to
667 /// modify the location Loc.
668 bool canBasicBlockModify(const BasicBlock &BB, const MemoryLocation &Loc);
669
670 /// A convenience wrapper synthesizing a memory location.
671 bool canBasicBlockModify(const BasicBlock &BB, const Value *P,
672 LocationSize Size) {
673 return canBasicBlockModify(BB, MemoryLocation(P, Size));
674 }
675
676 /// Check if it is possible for the execution of the specified instructions
677 /// to mod\ref (according to the mode) the location Loc.
678 ///
679 /// The instructions to consider are all of the instructions in the range of
680 /// [I1,I2] INCLUSIVE. I1 and I2 must be in the same basic block.
681 bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2,
682 const MemoryLocation &Loc,
683 const ModRefInfo Mode);
684
685 /// A convenience wrapper synthesizing a memory location.
686 bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2,
687 const Value *Ptr, LocationSize Size,
688 const ModRefInfo Mode) {
689 return canInstructionRangeModRef(I1, I2, MemoryLocation(Ptr, Size), Mode);
690 }
691
692private:
693 AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
694 AAQueryInfo &AAQI);
695 bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
696 bool OrLocal = false);
697 ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call2,
698 AAQueryInfo &AAQIP);
699 ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
700 AAQueryInfo &AAQI);
701 ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
702 AAQueryInfo &AAQI);
703 ModRefInfo getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc,
704 AAQueryInfo &AAQI);
705 ModRefInfo getModRefInfo(const LoadInst *L, const MemoryLocation &Loc,
706 AAQueryInfo &AAQI);
707 ModRefInfo getModRefInfo(const StoreInst *S, const MemoryLocation &Loc,
708 AAQueryInfo &AAQI);
709 ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc,
710 AAQueryInfo &AAQI);
711 ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX,
712 const MemoryLocation &Loc, AAQueryInfo &AAQI);
713 ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const MemoryLocation &Loc,
714 AAQueryInfo &AAQI);
715 ModRefInfo getModRefInfo(const CatchPadInst *I, const MemoryLocation &Loc,
716 AAQueryInfo &AAQI);
717 ModRefInfo getModRefInfo(const CatchReturnInst *I, const MemoryLocation &Loc,
718 AAQueryInfo &AAQI);
719 ModRefInfo getModRefInfo(const Instruction *I,
720 const Optional<MemoryLocation> &OptLoc,
721 AAQueryInfo &AAQIP) {
722 if (OptLoc == None) {
723 if (const auto *Call = dyn_cast<CallBase>(I)) {
724 return createModRefInfo(getModRefBehavior(Call));
725 }
726 }
727
728 const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation());
729
730 switch (I->getOpcode()) {
731 case Instruction::VAArg:
732 return getModRefInfo((const VAArgInst *)I, Loc, AAQIP);
733 case Instruction::Load:
734 return getModRefInfo((const LoadInst *)I, Loc, AAQIP);
735 case Instruction::Store:
736 return getModRefInfo((const StoreInst *)I, Loc, AAQIP);
737 case Instruction::Fence:
738 return getModRefInfo((const FenceInst *)I, Loc, AAQIP);
739 case Instruction::AtomicCmpXchg:
740 return getModRefInfo((const AtomicCmpXchgInst *)I, Loc, AAQIP);
741 case Instruction::AtomicRMW:
742 return getModRefInfo((const AtomicRMWInst *)I, Loc, AAQIP);
743 case Instruction::Call:
744 return getModRefInfo((const CallInst *)I, Loc, AAQIP);
745 case Instruction::Invoke:
746 return getModRefInfo((const InvokeInst *)I, Loc, AAQIP);
747 case Instruction::CatchPad:
748 return getModRefInfo((const CatchPadInst *)I, Loc, AAQIP);
749 case Instruction::CatchRet:
750 return getModRefInfo((const CatchReturnInst *)I, Loc, AAQIP);
751 default:
752 return ModRefInfo::NoModRef;
753 }
754 }
755
756 class Concept;
757
758 template <typename T> class Model;
759
760 template <typename T> friend class AAResultBase;
761
762 const TargetLibraryInfo &TLI;
763
764 std::vector<std::unique_ptr<Concept>> AAs;
765
766 std::vector<AnalysisKey *> AADeps;
767
768 friend class BatchAAResults;
769};
770
771/// This class is a wrapper over an AAResults, and it is intended to be used
772/// only when there are no IR changes inbetween queries. BatchAAResults is
773/// reusing the same `AAQueryInfo` to preserve the state across queries,
774/// esentially making AA work in "batch mode". The internal state cannot be
775/// cleared, so to go "out-of-batch-mode", the user must either use AAResults,
776/// or create a new BatchAAResults.
777class BatchAAResults {
778 AAResults &AA;
779 AAQueryInfo AAQI;
780
781public:
782 BatchAAResults(AAResults &AAR) : AA(AAR), AAQI() {}
783 AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
784 return AA.alias(LocA, LocB, AAQI);
785 }
786 bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false) {
787 return AA.pointsToConstantMemory(Loc, AAQI, OrLocal);
788 }
789 ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) {
790 return AA.getModRefInfo(Call, Loc, AAQI);
791 }
792 ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) {
793 return AA.getModRefInfo(Call1, Call2, AAQI);
794 }
795 ModRefInfo getModRefInfo(const Instruction *I,
796 const Optional<MemoryLocation> &OptLoc) {
797 return AA.getModRefInfo(I, OptLoc, AAQI);
798 }
799 ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call2) {
800 return AA.getModRefInfo(I, Call2, AAQI);
801 }
802 ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
803 return AA.getArgModRefInfo(Call, ArgIdx);
804 }
805 FunctionModRefBehavior getModRefBehavior(const CallBase *Call) {
806 return AA.getModRefBehavior(Call);
807 }
808};
809
810/// Temporary typedef for legacy code that uses a generic \c AliasAnalysis
811/// pointer or reference.
812using AliasAnalysis = AAResults;
813
814/// A private abstract base class describing the concept of an individual alias
815/// analysis implementation.
816///
817/// This interface is implemented by any \c Model instantiation. It is also the
818/// interface which a type used to instantiate the model must provide.
819///
820/// All of these methods model methods by the same name in the \c
821/// AAResults class. Only differences and specifics to how the
822/// implementations are called are documented here.
823class AAResults::Concept {
824public:
825 virtual ~Concept() = 0;
826
827 /// An update API used internally by the AAResults to provide
828 /// a handle back to the top level aggregation.
829 virtual void setAAResults(AAResults *NewAAR) = 0;
830
831 //===--------------------------------------------------------------------===//
832 /// \name Alias Queries
833 /// @{
834
835 /// The main low level interface to the alias analysis implementation.
836 /// Returns an AliasResult indicating whether the two pointers are aliased to
837 /// each other. This is the interface that must be implemented by specific
838 /// alias analysis implementations.
839 virtual AliasResult alias(const MemoryLocation &LocA,
840 const MemoryLocation &LocB, AAQueryInfo &AAQI) = 0;
841
842 /// Checks whether the given location points to constant memory, or if
843 /// \p OrLocal is true whether it points to a local alloca.
844 virtual bool pointsToConstantMemory(const MemoryLocation &Loc,
845 AAQueryInfo &AAQI, bool OrLocal) = 0;
846
847 /// @}
848 //===--------------------------------------------------------------------===//
849 /// \name Simple mod/ref information
850 /// @{
851
852 /// Get the ModRef info associated with a pointer argument of a callsite. The
853 /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note
854 /// that these bits do not necessarily account for the overall behavior of
855 /// the function, but rather only provide additional per-argument
856 /// information.
857 virtual ModRefInfo getArgModRefInfo(const CallBase *Call,
858 unsigned ArgIdx) = 0;
859
860 /// Return the behavior of the given call site.
861 virtual FunctionModRefBehavior getModRefBehavior(const CallBase *Call) = 0;
862
863 /// Return the behavior when calling the given function.
864 virtual FunctionModRefBehavior getModRefBehavior(const Function *F) = 0;
865
866 /// getModRefInfo (for call sites) - Return information about whether
867 /// a particular call site modifies or reads the specified memory location.
868 virtual ModRefInfo getModRefInfo(const CallBase *Call,
869 const MemoryLocation &Loc,
870 AAQueryInfo &AAQI) = 0;
871
872 /// Return information about whether two call sites may refer to the same set
873 /// of memory locations. See the AA documentation for details:
874 /// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo
875 virtual ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
876 AAQueryInfo &AAQI) = 0;
877
878 /// @}
879};
880
881/// A private class template which derives from \c Concept and wraps some other
882/// type.
883///
884/// This models the concept by directly forwarding each interface point to the
885/// wrapped type which must implement a compatible interface. This provides
886/// a type erased binding.
887template <typename AAResultT> class AAResults::Model final : public Concept {
888 AAResultT &Result;
889
890public:
891 explicit Model(AAResultT &Result, AAResults &AAR) : Result(Result) {
892 Result.setAAResults(&AAR);
893 }
894 ~Model() override = default;
895
896 void setAAResults(AAResults *NewAAR) override { Result.setAAResults(NewAAR); }
897
898 AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
899 AAQueryInfo &AAQI) override {
900 return Result.alias(LocA, LocB, AAQI);
901 }
902
903 bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
904 bool OrLocal) override {
905 return Result.pointsToConstantMemory(Loc, AAQI, OrLocal);
906 }
907
908 ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) override {
909 return Result.getArgModRefInfo(Call, ArgIdx);
910 }
911
912 FunctionModRefBehavior getModRefBehavior(const CallBase *Call) override {
913 return Result.getModRefBehavior(Call);
914 }
915
916 FunctionModRefBehavior getModRefBehavior(const Function *F) override {
917 return Result.getModRefBehavior(F);
918 }
919
920 ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
921 AAQueryInfo &AAQI) override {
922 return Result.getModRefInfo(Call, Loc, AAQI);
923 }
924
925 ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
926 AAQueryInfo &AAQI) override {
927 return Result.getModRefInfo(Call1, Call2, AAQI);
928 }
929};
930
931/// A CRTP-driven "mixin" base class to help implement the function alias
932/// analysis results concept.
933///
934/// Because of the nature of many alias analysis implementations, they often
935/// only implement a subset of the interface. This base class will attempt to
936/// implement the remaining portions of the interface in terms of simpler forms
937/// of the interface where possible, and otherwise provide conservatively
938/// correct fallback implementations.
939///
940/// Implementors of an alias analysis should derive from this CRTP, and then
941/// override specific methods that they wish to customize. There is no need to
942/// use virtual anywhere, the CRTP base class does static dispatch to the
943/// derived type passed into it.
944template <typename DerivedT> class AAResultBase {
945 // Expose some parts of the interface only to the AAResults::Model
946 // for wrapping. Specifically, this allows the model to call our
947 // setAAResults method without exposing it as a fully public API.
948 friend class AAResults::Model<DerivedT>;
949
950 /// A pointer to the AAResults object that this AAResult is
951 /// aggregated within. May be null if not aggregated.
952 AAResults *AAR = nullptr;
953
954 /// Helper to dispatch calls back through the derived type.
955 DerivedT &derived() { return static_cast<DerivedT &>(*this); }
956
957 /// A setter for the AAResults pointer, which is used to satisfy the
958 /// AAResults::Model contract.
959 void setAAResults(AAResults *NewAAR) { AAR = NewAAR; }
960
961protected:
962 /// This proxy class models a common pattern where we delegate to either the
963 /// top-level \c AAResults aggregation if one is registered, or to the
964 /// current result if none are registered.
965 class AAResultsProxy {
966 AAResults *AAR;
967 DerivedT &CurrentResult;
968
969 public:
970 AAResultsProxy(AAResults *AAR, DerivedT &CurrentResult)
971 : AAR(AAR), CurrentResult(CurrentResult) {}
972
973 AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
974 AAQueryInfo &AAQI) {
975 return AAR ? AAR->alias(LocA, LocB, AAQI)
976 : CurrentResult.alias(LocA, LocB, AAQI);
977 }
978
979 bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
980 bool OrLocal) {
981 return AAR ? AAR->pointsToConstantMemory(Loc, AAQI, OrLocal)
982 : CurrentResult.pointsToConstantMemory(Loc, AAQI, OrLocal);
983 }
984
985 ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
986 return AAR ? AAR->getArgModRefInfo(Call, ArgIdx)
987 : CurrentResult.getArgModRefInfo(Call, ArgIdx);
988 }
989
990 FunctionModRefBehavior getModRefBehavior(const CallBase *Call) {
991 return AAR ? AAR->getModRefBehavior(Call)
992 : CurrentResult.getModRefBehavior(Call);
993 }
994
995 FunctionModRefBehavior getModRefBehavior(const Function *F) {
996 return AAR ? AAR->getModRefBehavior(F) : CurrentResult.getModRefBehavior(F);
997 }
998
999 ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
1000 AAQueryInfo &AAQI) {
1001 return AAR ? AAR->getModRefInfo(Call, Loc, AAQI)
1002 : CurrentResult.getModRefInfo(Call, Loc, AAQI);
1003 }
1004
1005 ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
1006 AAQueryInfo &AAQI) {
1007 return AAR ? AAR->getModRefInfo(Call1, Call2, AAQI)
1008 : CurrentResult.getModRefInfo(Call1, Call2, AAQI);
1009 }
1010 };
1011
1012 explicit AAResultBase() = default;
1013
1014 // Provide all the copy and move constructors so that derived types aren't
1015 // constrained.
1016 AAResultBase(const AAResultBase &Arg) {}
1017 AAResultBase(AAResultBase &&Arg) {}
1018
1019 /// Get a proxy for the best AA result set to query at this time.
1020 ///
1021 /// When this result is part of a larger aggregation, this will proxy to that
1022 /// aggregation. When this result is used in isolation, it will just delegate
1023 /// back to the derived class's implementation.
1024 ///
1025 /// Note that callers of this need to take considerable care to not cause
1026 /// performance problems when they use this routine, in the case of a large
1027 /// number of alias analyses being aggregated, it can be expensive to walk
1028 /// back across the chain.
1029 AAResultsProxy getBestAAResults() { return AAResultsProxy(AAR, derived()); }
1030
1031public:
1032 AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
1033 AAQueryInfo &AAQI) {
1034 return MayAlias;
1035 }
1036
1037 bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
1038 bool OrLocal) {
1039 return false;
1040 }
1041
1042 ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
1043 return ModRefInfo::ModRef;
1044 }
1045
1046 FunctionModRefBehavior getModRefBehavior(const CallBase *Call) {
1047 return FMRB_UnknownModRefBehavior;
1048 }
1049
1050 FunctionModRefBehavior getModRefBehavior(const Function *F) {
1051 return FMRB_UnknownModRefBehavior;
1052 }
1053
1054 ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
1055 AAQueryInfo &AAQI) {
1056 return ModRefInfo::ModRef;
1057 }
1058
1059 ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
1060 AAQueryInfo &AAQI) {
1061 return ModRefInfo::ModRef;
1062 }
1063};
1064
1065/// Return true if this pointer is returned by a noalias function.
1066bool isNoAliasCall(const Value *V);
1067
1068/// Return true if this is an argument with the noalias attribute.
1069bool isNoAliasArgument(const Value *V);
1070
1071/// Return true if this pointer refers to a distinct and identifiable object.
1072/// This returns true for:
1073/// Global Variables and Functions (but not Global Aliases)
1074/// Allocas
1075/// ByVal and NoAlias Arguments
1076/// NoAlias returns (e.g. calls to malloc)
1077///
1078bool isIdentifiedObject(const Value *V);
1079
1080/// Return true if V is umabigously identified at the function-level.
1081/// Different IdentifiedFunctionLocals can't alias.
1082/// Further, an IdentifiedFunctionLocal can not alias with any function
1083/// arguments other than itself, which is not necessarily true for
1084/// IdentifiedObjects.
1085bool isIdentifiedFunctionLocal(const Value *V);
1086
1087/// A manager for alias analyses.
1088///
1089/// This class can have analyses registered with it and when run, it will run
1090/// all of them and aggregate their results into single AA results interface
1091/// that dispatches across all of the alias analysis results available.
1092///
1093/// Note that the order in which analyses are registered is very significant.
1094/// That is the order in which the results will be aggregated and queried.
1095///
1096/// This manager effectively wraps the AnalysisManager for registering alias
1097/// analyses. When you register your alias analysis with this manager, it will
1098/// ensure the analysis itself is registered with its AnalysisManager.
1099///
1100/// The result of this analysis is only invalidated if one of the particular
1101/// aggregated AA results end up being invalidated. This removes the need to
1102/// explicitly preserve the results of `AAManager`. Note that analyses should no
1103/// longer be registered once the `AAManager` is run.
1104class AAManager : public AnalysisInfoMixin<AAManager> {
1105public:
1106 using Result = AAResults;
1107
1108 /// Register a specific AA result.
1109 template <typename AnalysisT> void registerFunctionAnalysis() {
1110 ResultGetters.push_back(&getFunctionAAResultImpl<AnalysisT>);
1111 }
1112
1113 /// Register a specific AA result.
1114 template <typename AnalysisT> void registerModuleAnalysis() {
1115 ResultGetters.push_back(&getModuleAAResultImpl<AnalysisT>);
1116 }
1117
1118 Result run(Function &F, FunctionAnalysisManager &AM) {
1119 Result R(AM.getResult<TargetLibraryAnalysis>(F));
1120 for (auto &Getter : ResultGetters)
1121 (*Getter)(F, AM, R);
1122 return R;
1123 }
1124
1125private:
1126 friend AnalysisInfoMixin<AAManager>;
1127
1128 static AnalysisKey Key;
1129
1130 SmallVector<void (*)(Function &F, FunctionAnalysisManager &AM,
1131 AAResults &AAResults),
1132 4> ResultGetters;
1133
1134 template <typename AnalysisT>
1135 static void getFunctionAAResultImpl(Function &F,
1136 FunctionAnalysisManager &AM,
1137 AAResults &AAResults) {
1138 AAResults.addAAResult(AM.template getResult<AnalysisT>(F));
1139 AAResults.addAADependencyID(AnalysisT::ID());
1140 }
1141
1142 template <typename AnalysisT>
1143 static void getModuleAAResultImpl(Function &F, FunctionAnalysisManager &AM,
1144 AAResults &AAResults) {
1145 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1146 auto &MAM = MAMProxy.getManager();
1147 if (auto *R = MAM.template getCachedResult<AnalysisT>(*F.getParent())) {
1148 AAResults.addAAResult(*R);
1149 MAMProxy
1150 .template registerOuterAnalysisInvalidation<AnalysisT, AAManager>();
1151 }
1152 }
1153};
1154
1155/// A wrapper pass to provide the legacy pass manager access to a suitably
1156/// prepared AAResults object.
1157class AAResultsWrapperPass : public FunctionPass {
1158 std::unique_ptr<AAResults> AAR;
1159
1160public:
1161 static char ID;