File: | llvm/lib/Transforms/Scalar/LICM.cpp |
Warning: | line 1079, column 11 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// | ||||||
2 | // | ||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||
6 | // | ||||||
7 | //===----------------------------------------------------------------------===// | ||||||
8 | // | ||||||
9 | // This pass performs loop invariant code motion, attempting to remove as much | ||||||
10 | // code from the body of a loop as possible. It does this by either hoisting | ||||||
11 | // code into the preheader block, or by sinking code to the exit blocks if it is | ||||||
12 | // safe. This pass also promotes must-aliased memory locations in the loop to | ||||||
13 | // live in registers, thus hoisting and sinking "invariant" loads and stores. | ||||||
14 | // | ||||||
15 | // This pass uses alias analysis for two purposes: | ||||||
16 | // | ||||||
17 | // 1. Moving loop invariant loads and calls out of loops. If we can determine | ||||||
18 | // that a load or call inside of a loop never aliases anything stored to, | ||||||
19 | // we can hoist it or sink it like any other instruction. | ||||||
20 | // 2. Scalar Promotion of Memory - If there is a store instruction inside of | ||||||
21 | // the loop, we try to move the store to happen AFTER the loop instead of | ||||||
22 | // inside of the loop. This can only happen if a few conditions are true: | ||||||
23 | // A. The pointer stored through is loop invariant | ||||||
24 | // B. There are no stores or loads in the loop which _may_ alias the | ||||||
25 | // pointer. There are no calls in the loop which mod/ref the pointer. | ||||||
26 | // If these conditions are true, we can promote the loads and stores in the | ||||||
27 | // loop of the pointer to use a temporary alloca'd variable. We then use | ||||||
28 | // the SSAUpdater to construct the appropriate SSA form for the value. | ||||||
29 | // | ||||||
30 | //===----------------------------------------------------------------------===// | ||||||
31 | |||||||
32 | #include "llvm/Transforms/Scalar/LICM.h" | ||||||
33 | #include "llvm/ADT/SetOperations.h" | ||||||
34 | #include "llvm/ADT/Statistic.h" | ||||||
35 | #include "llvm/Analysis/AliasAnalysis.h" | ||||||
36 | #include "llvm/Analysis/AliasSetTracker.h" | ||||||
37 | #include "llvm/Analysis/BasicAliasAnalysis.h" | ||||||
38 | #include "llvm/Analysis/BlockFrequencyInfo.h" | ||||||
39 | #include "llvm/Analysis/CaptureTracking.h" | ||||||
40 | #include "llvm/Analysis/ConstantFolding.h" | ||||||
41 | #include "llvm/Analysis/GlobalsModRef.h" | ||||||
42 | #include "llvm/Analysis/GuardUtils.h" | ||||||
43 | #include "llvm/Analysis/LazyBlockFrequencyInfo.h" | ||||||
44 | #include "llvm/Analysis/Loads.h" | ||||||
45 | #include "llvm/Analysis/LoopInfo.h" | ||||||
46 | #include "llvm/Analysis/LoopIterator.h" | ||||||
47 | #include "llvm/Analysis/LoopPass.h" | ||||||
48 | #include "llvm/Analysis/MemoryBuiltins.h" | ||||||
49 | #include "llvm/Analysis/MemorySSA.h" | ||||||
50 | #include "llvm/Analysis/MemorySSAUpdater.h" | ||||||
51 | #include "llvm/Analysis/MustExecute.h" | ||||||
52 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" | ||||||
53 | #include "llvm/Analysis/ScalarEvolution.h" | ||||||
54 | #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" | ||||||
55 | #include "llvm/Analysis/TargetLibraryInfo.h" | ||||||
56 | #include "llvm/Analysis/ValueTracking.h" | ||||||
57 | #include "llvm/IR/CFG.h" | ||||||
58 | #include "llvm/IR/Constants.h" | ||||||
59 | #include "llvm/IR/DataLayout.h" | ||||||
60 | #include "llvm/IR/DebugInfoMetadata.h" | ||||||
61 | #include "llvm/IR/DerivedTypes.h" | ||||||
62 | #include "llvm/IR/Dominators.h" | ||||||
63 | #include "llvm/IR/Instructions.h" | ||||||
64 | #include "llvm/IR/IntrinsicInst.h" | ||||||
65 | #include "llvm/IR/LLVMContext.h" | ||||||
66 | #include "llvm/IR/Metadata.h" | ||||||
67 | #include "llvm/IR/PatternMatch.h" | ||||||
68 | #include "llvm/IR/PredIteratorCache.h" | ||||||
69 | #include "llvm/InitializePasses.h" | ||||||
70 | #include "llvm/Support/CommandLine.h" | ||||||
71 | #include "llvm/Support/Debug.h" | ||||||
72 | #include "llvm/Support/raw_ostream.h" | ||||||
73 | #include "llvm/Transforms/Scalar.h" | ||||||
74 | #include "llvm/Transforms/Scalar/LoopPassManager.h" | ||||||
75 | #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" | ||||||
76 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | ||||||
77 | #include "llvm/Transforms/Utils/Local.h" | ||||||
78 | #include "llvm/Transforms/Utils/LoopUtils.h" | ||||||
79 | #include "llvm/Transforms/Utils/SSAUpdater.h" | ||||||
80 | #include <algorithm> | ||||||
81 | #include <utility> | ||||||
82 | using namespace llvm; | ||||||
83 | |||||||
84 | #define DEBUG_TYPE"licm" "licm" | ||||||
85 | |||||||
86 | STATISTIC(NumCreatedBlocks, "Number of blocks created")static llvm::Statistic NumCreatedBlocks = {"licm", "NumCreatedBlocks" , "Number of blocks created"}; | ||||||
87 | STATISTIC(NumClonedBranches, "Number of branches cloned")static llvm::Statistic NumClonedBranches = {"licm", "NumClonedBranches" , "Number of branches cloned"}; | ||||||
88 | STATISTIC(NumSunk, "Number of instructions sunk out of loop")static llvm::Statistic NumSunk = {"licm", "NumSunk", "Number of instructions sunk out of loop" }; | ||||||
89 | STATISTIC(NumHoisted, "Number of instructions hoisted out of loop")static llvm::Statistic NumHoisted = {"licm", "NumHoisted", "Number of instructions hoisted out of loop" }; | ||||||
90 | STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk")static llvm::Statistic NumMovedLoads = {"licm", "NumMovedLoads" , "Number of load insts hoisted or sunk"}; | ||||||
91 | STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk")static llvm::Statistic NumMovedCalls = {"licm", "NumMovedCalls" , "Number of call insts hoisted or sunk"}; | ||||||
92 | STATISTIC(NumPromoted, "Number of memory locations promoted to registers")static llvm::Statistic NumPromoted = {"licm", "NumPromoted", "Number of memory locations promoted to registers" }; | ||||||
93 | |||||||
94 | /// Memory promotion is enabled by default. | ||||||
95 | static cl::opt<bool> | ||||||
96 | DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), | ||||||
97 | cl::desc("Disable memory promotion in LICM pass")); | ||||||
98 | |||||||
99 | static cl::opt<bool> ControlFlowHoisting( | ||||||
100 | "licm-control-flow-hoisting", cl::Hidden, cl::init(false), | ||||||
101 | cl::desc("Enable control flow (and PHI) hoisting in LICM")); | ||||||
102 | |||||||
103 | static cl::opt<unsigned> HoistSinkColdnessThreshold( | ||||||
104 | "licm-coldness-threshold", cl::Hidden, cl::init(4), | ||||||
105 | cl::desc("Relative coldness Threshold of hoisting/sinking destination " | ||||||
106 | "block for LICM to be considered beneficial")); | ||||||
107 | |||||||
108 | static cl::opt<uint32_t> MaxNumUsesTraversed( | ||||||
109 | "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), | ||||||
110 | cl::desc("Max num uses visited for identifying load " | ||||||
111 | "invariance in loop using invariant start (default = 8)")); | ||||||
112 | |||||||
113 | // Default value of zero implies we use the regular alias set tracker mechanism | ||||||
114 | // instead of the cross product using AA to identify aliasing of the memory | ||||||
115 | // location we are interested in. | ||||||
116 | static cl::opt<int> | ||||||
117 | LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0), | ||||||
118 | cl::desc("How many instruction to cross product using AA")); | ||||||
119 | |||||||
120 | // Experimental option to allow imprecision in LICM in pathological cases, in | ||||||
121 | // exchange for faster compile. This is to be removed if MemorySSA starts to | ||||||
122 | // address the same issue. This flag applies only when LICM uses MemorySSA | ||||||
123 | // instead on AliasSetTracker. LICM calls MemorySSAWalker's | ||||||
124 | // getClobberingMemoryAccess, up to the value of the Cap, getting perfect | ||||||
125 | // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess, | ||||||
126 | // which may not be precise, since optimizeUses is capped. The result is | ||||||
127 | // correct, but we may not get as "far up" as possible to get which access is | ||||||
128 | // clobbering the one queried. | ||||||
129 | cl::opt<unsigned> llvm::SetLicmMssaOptCap( | ||||||
130 | "licm-mssa-optimization-cap", cl::init(100), cl::Hidden, | ||||||
131 | cl::desc("Enable imprecision in LICM in pathological cases, in exchange " | ||||||
132 | "for faster compile. Caps the MemorySSA clobbering calls.")); | ||||||
133 | |||||||
134 | // Experimentally, memory promotion carries less importance than sinking and | ||||||
135 | // hoisting. Limit when we do promotion when using MemorySSA, in order to save | ||||||
136 | // compile time. | ||||||
137 | cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap( | ||||||
138 | "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden, | ||||||
139 | cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no " | ||||||
140 | "effect. When MSSA in LICM is enabled, then this is the maximum " | ||||||
141 | "number of accesses allowed to be present in a loop in order to " | ||||||
142 | "enable memory promotion.")); | ||||||
143 | |||||||
144 | static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); | ||||||
145 | static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, | ||||||
146 | const LoopSafetyInfo *SafetyInfo, | ||||||
147 | TargetTransformInfo *TTI, bool &FreeInLoop); | ||||||
148 | static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, | ||||||
149 | BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, | ||||||
150 | MemorySSAUpdater *MSSAU, ScalarEvolution *SE, | ||||||
151 | OptimizationRemarkEmitter *ORE); | ||||||
152 | static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, | ||||||
153 | BlockFrequencyInfo *BFI, const Loop *CurLoop, | ||||||
154 | ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, | ||||||
155 | OptimizationRemarkEmitter *ORE); | ||||||
156 | static bool isSafeToExecuteUnconditionally(Instruction &Inst, | ||||||
157 | const DominatorTree *DT, | ||||||
158 | const Loop *CurLoop, | ||||||
159 | const LoopSafetyInfo *SafetyInfo, | ||||||
160 | OptimizationRemarkEmitter *ORE, | ||||||
161 | const Instruction *CtxI = nullptr); | ||||||
162 | static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, | ||||||
163 | AliasSetTracker *CurAST, Loop *CurLoop, | ||||||
164 | AAResults *AA); | ||||||
165 | static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, | ||||||
166 | Loop *CurLoop, | ||||||
167 | SinkAndHoistLICMFlags &Flags); | ||||||
168 | static Instruction *cloneInstructionInExitBlock( | ||||||
169 | Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, | ||||||
170 | const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU); | ||||||
171 | |||||||
172 | static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, | ||||||
173 | AliasSetTracker *AST, MemorySSAUpdater *MSSAU); | ||||||
174 | |||||||
175 | static void moveInstructionBefore(Instruction &I, Instruction &Dest, | ||||||
176 | ICFLoopSafetyInfo &SafetyInfo, | ||||||
177 | MemorySSAUpdater *MSSAU, ScalarEvolution *SE); | ||||||
178 | |||||||
179 | namespace { | ||||||
180 | struct LoopInvariantCodeMotion { | ||||||
181 | bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT, | ||||||
182 | BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, | ||||||
183 | TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA, | ||||||
184 | OptimizationRemarkEmitter *ORE); | ||||||
185 | |||||||
186 | LoopInvariantCodeMotion(unsigned LicmMssaOptCap, | ||||||
187 | unsigned LicmMssaNoAccForPromotionCap) | ||||||
188 | : LicmMssaOptCap(LicmMssaOptCap), | ||||||
189 | LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} | ||||||
190 | |||||||
191 | private: | ||||||
192 | unsigned LicmMssaOptCap; | ||||||
193 | unsigned LicmMssaNoAccForPromotionCap; | ||||||
194 | |||||||
195 | std::unique_ptr<AliasSetTracker> | ||||||
196 | collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AAResults *AA); | ||||||
197 | std::unique_ptr<AliasSetTracker> | ||||||
198 | collectAliasInfoForLoopWithMSSA(Loop *L, AAResults *AA, | ||||||
199 | MemorySSAUpdater *MSSAU); | ||||||
200 | }; | ||||||
201 | |||||||
202 | struct LegacyLICMPass : public LoopPass { | ||||||
203 | static char ID; // Pass identification, replacement for typeid | ||||||
204 | LegacyLICMPass( | ||||||
205 | unsigned LicmMssaOptCap = SetLicmMssaOptCap, | ||||||
206 | unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap) | ||||||
207 | : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) { | ||||||
208 | initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); | ||||||
209 | } | ||||||
210 | |||||||
211 | bool runOnLoop(Loop *L, LPPassManager &LPM) override { | ||||||
212 | if (skipLoop(L)) | ||||||
213 | return false; | ||||||
214 | |||||||
215 | auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); | ||||||
216 | MemorySSA *MSSA = EnableMSSALoopDependency | ||||||
217 | ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA()) | ||||||
218 | : nullptr; | ||||||
219 | bool hasProfileData = L->getHeader()->getParent()->hasProfileData(); | ||||||
220 | BlockFrequencyInfo *BFI = | ||||||
221 | hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() | ||||||
222 | : nullptr; | ||||||
223 | // For the old PM, we can't use OptimizationRemarkEmitter as an analysis | ||||||
224 | // pass. Function analyses need to be preserved across loop transformations | ||||||
225 | // but ORE cannot be preserved (see comment before the pass definition). | ||||||
226 | OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); | ||||||
227 | return LICM.runOnLoop( | ||||||
228 | L, &getAnalysis<AAResultsWrapperPass>().getAAResults(), | ||||||
229 | &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), | ||||||
230 | &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), BFI, | ||||||
231 | &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI( | ||||||
232 | *L->getHeader()->getParent()), | ||||||
233 | &getAnalysis<TargetTransformInfoWrapperPass>().getTTI( | ||||||
234 | *L->getHeader()->getParent()), | ||||||
235 | SE ? &SE->getSE() : nullptr, MSSA, &ORE); | ||||||
236 | } | ||||||
237 | |||||||
238 | /// This transformation requires natural loop information & requires that | ||||||
239 | /// loop preheaders be inserted into the CFG... | ||||||
240 | /// | ||||||
241 | void getAnalysisUsage(AnalysisUsage &AU) const override { | ||||||
242 | AU.addPreserved<DominatorTreeWrapperPass>(); | ||||||
243 | AU.addPreserved<LoopInfoWrapperPass>(); | ||||||
244 | AU.addRequired<TargetLibraryInfoWrapperPass>(); | ||||||
245 | if (EnableMSSALoopDependency) { | ||||||
246 | AU.addRequired<MemorySSAWrapperPass>(); | ||||||
247 | AU.addPreserved<MemorySSAWrapperPass>(); | ||||||
248 | } | ||||||
249 | AU.addRequired<TargetTransformInfoWrapperPass>(); | ||||||
250 | getLoopAnalysisUsage(AU); | ||||||
251 | LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); | ||||||
252 | AU.addPreserved<LazyBlockFrequencyInfoPass>(); | ||||||
253 | AU.addPreserved<LazyBranchProbabilityInfoPass>(); | ||||||
254 | } | ||||||
255 | |||||||
256 | private: | ||||||
257 | LoopInvariantCodeMotion LICM; | ||||||
258 | }; | ||||||
259 | } // namespace | ||||||
260 | |||||||
261 | PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, | ||||||
262 | LoopStandardAnalysisResults &AR, LPMUpdater &) { | ||||||
263 | // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis | ||||||
264 | // pass. Function analyses need to be preserved across loop transformations | ||||||
265 | // but ORE cannot be preserved (see comment before the pass definition). | ||||||
266 | OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); | ||||||
267 | |||||||
268 | LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); | ||||||
269 | if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI, | ||||||
270 | &AR.SE, AR.MSSA, &ORE)) | ||||||
271 | return PreservedAnalyses::all(); | ||||||
272 | |||||||
273 | auto PA = getLoopPassPreservedAnalyses(); | ||||||
274 | |||||||
275 | PA.preserve<DominatorTreeAnalysis>(); | ||||||
276 | PA.preserve<LoopAnalysis>(); | ||||||
277 | if (AR.MSSA) | ||||||
278 | PA.preserve<MemorySSAAnalysis>(); | ||||||
279 | |||||||
280 | return PA; | ||||||
281 | } | ||||||
282 | |||||||
283 | char LegacyLICMPass::ID = 0; | ||||||
284 | INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",static void *initializeLegacyLICMPassPassOnce(PassRegistry & Registry) { | ||||||
285 | false, false)static void *initializeLegacyLICMPassPassOnce(PassRegistry & Registry) { | ||||||
286 | INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry); | ||||||
287 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry); | ||||||
288 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry); | ||||||
289 | INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry); | ||||||
290 | INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)initializeLazyBFIPassPass(Registry); | ||||||
291 | INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,PassInfo *PI = new PassInfo( "Loop Invariant Code Motion", "licm" , &LegacyLICMPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <LegacyLICMPass>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeLegacyLICMPassPassFlag ; void llvm::initializeLegacyLICMPassPass(PassRegistry &Registry ) { llvm::call_once(InitializeLegacyLICMPassPassFlag, initializeLegacyLICMPassPassOnce , std::ref(Registry)); } | ||||||
292 | false)PassInfo *PI = new PassInfo( "Loop Invariant Code Motion", "licm" , &LegacyLICMPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <LegacyLICMPass>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeLegacyLICMPassPassFlag ; void llvm::initializeLegacyLICMPassPass(PassRegistry &Registry ) { llvm::call_once(InitializeLegacyLICMPassPassFlag, initializeLegacyLICMPassPassOnce , std::ref(Registry)); } | ||||||
293 | |||||||
294 | Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } | ||||||
295 | Pass *llvm::createLICMPass(unsigned LicmMssaOptCap, | ||||||
296 | unsigned LicmMssaNoAccForPromotionCap) { | ||||||
297 | return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); | ||||||
298 | } | ||||||
299 | |||||||
300 | /// Hoist expressions out of the specified loop. Note, alias info for inner | ||||||
301 | /// loop is not preserved so it is not a good idea to run LICM multiple | ||||||
302 | /// times on one loop. | ||||||
303 | bool LoopInvariantCodeMotion::runOnLoop( | ||||||
304 | Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT, | ||||||
305 | BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, | ||||||
306 | ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE) { | ||||||
307 | bool Changed = false; | ||||||
308 | |||||||
309 | assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.")((L->isLCSSAForm(*DT) && "Loop is not in LCSSA form." ) ? static_cast<void> (0) : __assert_fail ("L->isLCSSAForm(*DT) && \"Loop is not in LCSSA form.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 309, __PRETTY_FUNCTION__)); | ||||||
310 | |||||||
311 | // If this loop has metadata indicating that LICM is not to be performed then | ||||||
312 | // just exit. | ||||||
313 | if (hasDisableLICMTransformsHint(L)) { | ||||||
314 | return false; | ||||||
315 | } | ||||||
316 | |||||||
317 | std::unique_ptr<AliasSetTracker> CurAST; | ||||||
318 | std::unique_ptr<MemorySSAUpdater> MSSAU; | ||||||
319 | bool NoOfMemAccTooLarge = false; | ||||||
320 | unsigned LicmMssaOptCounter = 0; | ||||||
321 | |||||||
322 | if (!MSSA) { | ||||||
323 | LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM: Using Alias Set Tracker.\n" ; } } while (false); | ||||||
324 | CurAST = collectAliasInfoForLoop(L, LI, AA); | ||||||
325 | } else { | ||||||
326 | LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM: Using MemorySSA.\n"; } } while (false); | ||||||
327 | MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); | ||||||
328 | |||||||
329 | unsigned AccessCapCount = 0; | ||||||
330 | for (auto *BB : L->getBlocks()) { | ||||||
331 | if (auto *Accesses = MSSA->getBlockAccesses(BB)) { | ||||||
332 | for (const auto &MA : *Accesses) { | ||||||
333 | (void)MA; | ||||||
334 | AccessCapCount++; | ||||||
335 | if (AccessCapCount > LicmMssaNoAccForPromotionCap) { | ||||||
336 | NoOfMemAccTooLarge = true; | ||||||
337 | break; | ||||||
338 | } | ||||||
339 | } | ||||||
340 | } | ||||||
341 | if (NoOfMemAccTooLarge) | ||||||
342 | break; | ||||||
343 | } | ||||||
344 | } | ||||||
345 | |||||||
346 | // Get the preheader block to move instructions into... | ||||||
347 | BasicBlock *Preheader = L->getLoopPreheader(); | ||||||
348 | |||||||
349 | // Compute loop safety information. | ||||||
350 | ICFLoopSafetyInfo SafetyInfo; | ||||||
351 | SafetyInfo.computeLoopSafetyInfo(L); | ||||||
352 | |||||||
353 | // We want to visit all of the instructions in this loop... that are not parts | ||||||
354 | // of our subloops (they have already had their invariants hoisted out of | ||||||
355 | // their loop, into this loop, so there is no need to process the BODIES of | ||||||
356 | // the subloops). | ||||||
357 | // | ||||||
358 | // Traverse the body of the loop in depth first order on the dominator tree so | ||||||
359 | // that we are guaranteed to see definitions before we see uses. This allows | ||||||
360 | // us to sink instructions in one pass, without iteration. After sinking | ||||||
361 | // instructions, we perform another pass to hoist them out of the loop. | ||||||
362 | SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter, | ||||||
363 | LicmMssaOptCap, LicmMssaNoAccForPromotionCap, | ||||||
364 | /*IsSink=*/true}; | ||||||
365 | if (L->hasDedicatedExits()) | ||||||
366 | Changed |= | ||||||
367 | sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L, | ||||||
368 | CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE); | ||||||
369 | Flags.IsSink = false; | ||||||
370 | if (Preheader) | ||||||
371 | Changed |= | ||||||
372 | hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L, | ||||||
373 | CurAST.get(), MSSAU.get(), SE, &SafetyInfo, Flags, ORE); | ||||||
374 | |||||||
375 | // Now that all loop invariants have been removed from the loop, promote any | ||||||
376 | // memory references to scalars that we can. | ||||||
377 | // Don't sink stores from loops without dedicated block exits. Exits | ||||||
378 | // containing indirect branches are not transformed by loop simplify, | ||||||
379 | // make sure we catch that. An additional load may be generated in the | ||||||
380 | // preheader for SSA updater, so also avoid sinking when no preheader | ||||||
381 | // is available. | ||||||
382 | if (!DisablePromotion && Preheader && L->hasDedicatedExits() && | ||||||
383 | !NoOfMemAccTooLarge) { | ||||||
384 | // Figure out the loop exits and their insertion points | ||||||
385 | SmallVector<BasicBlock *, 8> ExitBlocks; | ||||||
386 | L->getUniqueExitBlocks(ExitBlocks); | ||||||
387 | |||||||
388 | // We can't insert into a catchswitch. | ||||||
389 | bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { | ||||||
390 | return isa<CatchSwitchInst>(Exit->getTerminator()); | ||||||
391 | }); | ||||||
392 | |||||||
393 | if (!HasCatchSwitch) { | ||||||
394 | SmallVector<Instruction *, 8> InsertPts; | ||||||
395 | SmallVector<MemoryAccess *, 8> MSSAInsertPts; | ||||||
396 | InsertPts.reserve(ExitBlocks.size()); | ||||||
397 | if (MSSAU) | ||||||
398 | MSSAInsertPts.reserve(ExitBlocks.size()); | ||||||
399 | for (BasicBlock *ExitBlock : ExitBlocks) { | ||||||
400 | InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); | ||||||
401 | if (MSSAU) | ||||||
402 | MSSAInsertPts.push_back(nullptr); | ||||||
403 | } | ||||||
404 | |||||||
405 | PredIteratorCache PIC; | ||||||
406 | |||||||
407 | bool Promoted = false; | ||||||
408 | |||||||
409 | // Build an AST using MSSA. | ||||||
410 | if (!CurAST.get()) | ||||||
411 | CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get()); | ||||||
412 | |||||||
413 | // Loop over all of the alias sets in the tracker object. | ||||||
414 | for (AliasSet &AS : *CurAST) { | ||||||
415 | // We can promote this alias set if it has a store, if it is a "Must" | ||||||
416 | // alias set, if the pointer is loop invariant, and if we are not | ||||||
417 | // eliminating any volatile loads or stores. | ||||||
418 | if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || | ||||||
419 | !L->isLoopInvariant(AS.begin()->getValue())) | ||||||
420 | continue; | ||||||
421 | |||||||
422 | assert(((!AS.empty() && "Must alias set should have at least one pointer element in it!" ) ? static_cast<void> (0) : __assert_fail ("!AS.empty() && \"Must alias set should have at least one pointer element in it!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 424, __PRETTY_FUNCTION__)) | ||||||
423 | !AS.empty() &&((!AS.empty() && "Must alias set should have at least one pointer element in it!" ) ? static_cast<void> (0) : __assert_fail ("!AS.empty() && \"Must alias set should have at least one pointer element in it!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 424, __PRETTY_FUNCTION__)) | ||||||
424 | "Must alias set should have at least one pointer element in it!")((!AS.empty() && "Must alias set should have at least one pointer element in it!" ) ? static_cast<void> (0) : __assert_fail ("!AS.empty() && \"Must alias set should have at least one pointer element in it!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 424, __PRETTY_FUNCTION__)); | ||||||
425 | |||||||
426 | SmallSetVector<Value *, 8> PointerMustAliases; | ||||||
427 | for (const auto &ASI : AS) | ||||||
428 | PointerMustAliases.insert(ASI.getValue()); | ||||||
429 | |||||||
430 | Promoted |= promoteLoopAccessesToScalars( | ||||||
431 | PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, | ||||||
432 | DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE); | ||||||
433 | } | ||||||
434 | |||||||
435 | // Once we have promoted values across the loop body we have to | ||||||
436 | // recursively reform LCSSA as any nested loop may now have values defined | ||||||
437 | // within the loop used in the outer loop. | ||||||
438 | // FIXME: This is really heavy handed. It would be a bit better to use an | ||||||
439 | // SSAUpdater strategy during promotion that was LCSSA aware and reformed | ||||||
440 | // it as it went. | ||||||
441 | if (Promoted) | ||||||
442 | formLCSSARecursively(*L, *DT, LI, SE); | ||||||
443 | |||||||
444 | Changed |= Promoted; | ||||||
445 | } | ||||||
446 | } | ||||||
447 | |||||||
448 | // Check that neither this loop nor its parent have had LCSSA broken. LICM is | ||||||
449 | // specifically moving instructions across the loop boundary and so it is | ||||||
450 | // especially in need of sanity checking here. | ||||||
451 | assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!")((L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!" ) ? static_cast<void> (0) : __assert_fail ("L->isLCSSAForm(*DT) && \"Loop not left in LCSSA form after LICM!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 451, __PRETTY_FUNCTION__)); | ||||||
452 | assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&(((L->isOutermost() || L->getParentLoop()->isLCSSAForm (*DT)) && "Parent loop not left in LCSSA form after LICM!" ) ? static_cast<void> (0) : __assert_fail ("(L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) && \"Parent loop not left in LCSSA form after LICM!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 453, __PRETTY_FUNCTION__)) | ||||||
453 | "Parent loop not left in LCSSA form after LICM!")(((L->isOutermost() || L->getParentLoop()->isLCSSAForm (*DT)) && "Parent loop not left in LCSSA form after LICM!" ) ? static_cast<void> (0) : __assert_fail ("(L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) && \"Parent loop not left in LCSSA form after LICM!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 453, __PRETTY_FUNCTION__)); | ||||||
454 | |||||||
455 | if (MSSAU.get() && VerifyMemorySSA) | ||||||
456 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
457 | |||||||
458 | if (Changed && SE) | ||||||
459 | SE->forgetLoopDispositions(L); | ||||||
460 | return Changed; | ||||||
461 | } | ||||||
462 | |||||||
463 | /// Walk the specified region of the CFG (defined by all blocks dominated by | ||||||
464 | /// the specified block, and that are in the current loop) in reverse depth | ||||||
465 | /// first order w.r.t the DominatorTree. This allows us to visit uses before | ||||||
466 | /// definitions, allowing us to sink a loop body in one pass without iteration. | ||||||
467 | /// | ||||||
468 | bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, | ||||||
469 | DominatorTree *DT, BlockFrequencyInfo *BFI, | ||||||
470 | TargetLibraryInfo *TLI, TargetTransformInfo *TTI, | ||||||
471 | Loop *CurLoop, AliasSetTracker *CurAST, | ||||||
472 | MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, | ||||||
473 | SinkAndHoistLICMFlags &Flags, | ||||||
474 | OptimizationRemarkEmitter *ORE) { | ||||||
475 | |||||||
476 | // Verify inputs. | ||||||
477 | assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&((N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected input to sinkRegion." ) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to sinkRegion.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 479, __PRETTY_FUNCTION__)) | ||||||
478 | CurLoop != nullptr && SafetyInfo != nullptr &&((N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected input to sinkRegion." ) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to sinkRegion.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 479, __PRETTY_FUNCTION__)) | ||||||
479 | "Unexpected input to sinkRegion.")((N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected input to sinkRegion." ) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to sinkRegion.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 479, __PRETTY_FUNCTION__)); | ||||||
480 | assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&((((CurAST != nullptr) ^ (MSSAU != nullptr)) && "Either AliasSetTracker or MemorySSA should be initialized." ) ? static_cast<void> (0) : __assert_fail ("((CurAST != nullptr) ^ (MSSAU != nullptr)) && \"Either AliasSetTracker or MemorySSA should be initialized.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 481, __PRETTY_FUNCTION__)) | ||||||
481 | "Either AliasSetTracker or MemorySSA should be initialized.")((((CurAST != nullptr) ^ (MSSAU != nullptr)) && "Either AliasSetTracker or MemorySSA should be initialized." ) ? static_cast<void> (0) : __assert_fail ("((CurAST != nullptr) ^ (MSSAU != nullptr)) && \"Either AliasSetTracker or MemorySSA should be initialized.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 481, __PRETTY_FUNCTION__)); | ||||||
482 | |||||||
483 | // We want to visit children before parents. We will enque all the parents | ||||||
484 | // before their children in the worklist and process the worklist in reverse | ||||||
485 | // order. | ||||||
486 | SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop); | ||||||
487 | |||||||
488 | bool Changed = false; | ||||||
489 | for (DomTreeNode *DTN : reverse(Worklist)) { | ||||||
490 | BasicBlock *BB = DTN->getBlock(); | ||||||
491 | // Only need to process the contents of this block if it is not part of a | ||||||
492 | // subloop (which would already have been processed). | ||||||
493 | if (inSubLoop(BB, CurLoop, LI)) | ||||||
494 | continue; | ||||||
495 | |||||||
496 | for (BasicBlock::iterator II = BB->end(); II != BB->begin();) { | ||||||
497 | Instruction &I = *--II; | ||||||
498 | |||||||
499 | // If the instruction is dead, we would try to sink it because it isn't | ||||||
500 | // used in the loop, instead, just delete it. | ||||||
501 | if (isInstructionTriviallyDead(&I, TLI)) { | ||||||
502 | LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM deleting dead inst: " << I << '\n'; } } while (false); | ||||||
503 | salvageKnowledge(&I); | ||||||
504 | salvageDebugInfo(I); | ||||||
505 | ++II; | ||||||
506 | eraseInstruction(I, *SafetyInfo, CurAST, MSSAU); | ||||||
507 | Changed = true; | ||||||
508 | continue; | ||||||
509 | } | ||||||
510 | |||||||
511 | // Check to see if we can sink this instruction to the exit blocks | ||||||
512 | // of the loop. We can do this if the all users of the instruction are | ||||||
513 | // outside of the loop. In this case, it doesn't even matter if the | ||||||
514 | // operands of the instruction are loop invariant. | ||||||
515 | // | ||||||
516 | bool FreeInLoop = false; | ||||||
517 | if (!I.mayHaveSideEffects() && | ||||||
518 | isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && | ||||||
519 | canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags, | ||||||
520 | ORE)) { | ||||||
521 | if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) { | ||||||
522 | if (!FreeInLoop) { | ||||||
523 | ++II; | ||||||
524 | salvageDebugInfo(I); | ||||||
525 | eraseInstruction(I, *SafetyInfo, CurAST, MSSAU); | ||||||
526 | } | ||||||
527 | Changed = true; | ||||||
528 | } | ||||||
529 | } | ||||||
530 | } | ||||||
531 | } | ||||||
532 | if (MSSAU && VerifyMemorySSA) | ||||||
533 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
534 | return Changed; | ||||||
535 | } | ||||||
536 | |||||||
537 | namespace { | ||||||
538 | // This is a helper class for hoistRegion to make it able to hoist control flow | ||||||
539 | // in order to be able to hoist phis. The way this works is that we initially | ||||||
540 | // start hoisting to the loop preheader, and when we see a loop invariant branch | ||||||
541 | // we make note of this. When we then come to hoist an instruction that's | ||||||
542 | // conditional on such a branch we duplicate the branch and the relevant control | ||||||
543 | // flow, then hoist the instruction into the block corresponding to its original | ||||||
544 | // block in the duplicated control flow. | ||||||
545 | class ControlFlowHoister { | ||||||
546 | private: | ||||||
547 | // Information about the loop we are hoisting from | ||||||
548 | LoopInfo *LI; | ||||||
549 | DominatorTree *DT; | ||||||
550 | Loop *CurLoop; | ||||||
551 | MemorySSAUpdater *MSSAU; | ||||||
552 | |||||||
553 | // A map of blocks in the loop to the block their instructions will be hoisted | ||||||
554 | // to. | ||||||
555 | DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap; | ||||||
556 | |||||||
557 | // The branches that we can hoist, mapped to the block that marks a | ||||||
558 | // convergence point of their control flow. | ||||||
559 | DenseMap<BranchInst *, BasicBlock *> HoistableBranches; | ||||||
560 | |||||||
561 | public: | ||||||
562 | ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop, | ||||||
563 | MemorySSAUpdater *MSSAU) | ||||||
564 | : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {} | ||||||
565 | |||||||
566 | void registerPossiblyHoistableBranch(BranchInst *BI) { | ||||||
567 | // We can only hoist conditional branches with loop invariant operands. | ||||||
568 | if (!ControlFlowHoisting || !BI->isConditional() || | ||||||
569 | !CurLoop->hasLoopInvariantOperands(BI)) | ||||||
570 | return; | ||||||
571 | |||||||
572 | // The branch destinations need to be in the loop, and we don't gain | ||||||
573 | // anything by duplicating conditional branches with duplicate successors, | ||||||
574 | // as it's essentially the same as an unconditional branch. | ||||||
575 | BasicBlock *TrueDest = BI->getSuccessor(0); | ||||||
576 | BasicBlock *FalseDest = BI->getSuccessor(1); | ||||||
577 | if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) || | ||||||
578 | TrueDest == FalseDest) | ||||||
579 | return; | ||||||
580 | |||||||
581 | // We can hoist BI if one branch destination is the successor of the other, | ||||||
582 | // or both have common successor which we check by seeing if the | ||||||
583 | // intersection of their successors is non-empty. | ||||||
584 | // TODO: This could be expanded to allowing branches where both ends | ||||||
585 | // eventually converge to a single block. | ||||||
586 | SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc; | ||||||
587 | TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest)); | ||||||
588 | FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest)); | ||||||
589 | BasicBlock *CommonSucc = nullptr; | ||||||
590 | if (TrueDestSucc.count(FalseDest)) { | ||||||
591 | CommonSucc = FalseDest; | ||||||
592 | } else if (FalseDestSucc.count(TrueDest)) { | ||||||
593 | CommonSucc = TrueDest; | ||||||
594 | } else { | ||||||
595 | set_intersect(TrueDestSucc, FalseDestSucc); | ||||||
596 | // If there's one common successor use that. | ||||||
597 | if (TrueDestSucc.size() == 1) | ||||||
598 | CommonSucc = *TrueDestSucc.begin(); | ||||||
599 | // If there's more than one pick whichever appears first in the block list | ||||||
600 | // (we can't use the value returned by TrueDestSucc.begin() as it's | ||||||
601 | // unpredicatable which element gets returned). | ||||||
602 | else if (!TrueDestSucc.empty()) { | ||||||
603 | Function *F = TrueDest->getParent(); | ||||||
604 | auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); }; | ||||||
605 | auto It = std::find_if(F->begin(), F->end(), IsSucc); | ||||||
606 | assert(It != F->end() && "Could not find successor in function")((It != F->end() && "Could not find successor in function" ) ? static_cast<void> (0) : __assert_fail ("It != F->end() && \"Could not find successor in function\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 606, __PRETTY_FUNCTION__)); | ||||||
607 | CommonSucc = &*It; | ||||||
608 | } | ||||||
609 | } | ||||||
610 | // The common successor has to be dominated by the branch, as otherwise | ||||||
611 | // there will be some other path to the successor that will not be | ||||||
612 | // controlled by this branch so any phi we hoist would be controlled by the | ||||||
613 | // wrong condition. This also takes care of avoiding hoisting of loop back | ||||||
614 | // edges. | ||||||
615 | // TODO: In some cases this could be relaxed if the successor is dominated | ||||||
616 | // by another block that's been hoisted and we can guarantee that the | ||||||
617 | // control flow has been replicated exactly. | ||||||
618 | if (CommonSucc && DT->dominates(BI, CommonSucc)) | ||||||
619 | HoistableBranches[BI] = CommonSucc; | ||||||
620 | } | ||||||
621 | |||||||
622 | bool canHoistPHI(PHINode *PN) { | ||||||
623 | // The phi must have loop invariant operands. | ||||||
624 | if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN)) | ||||||
625 | return false; | ||||||
626 | // We can hoist phis if the block they are in is the target of hoistable | ||||||
627 | // branches which cover all of the predecessors of the block. | ||||||
628 | SmallPtrSet<BasicBlock *, 8> PredecessorBlocks; | ||||||
629 | BasicBlock *BB = PN->getParent(); | ||||||
630 | for (BasicBlock *PredBB : predecessors(BB)) | ||||||
631 | PredecessorBlocks.insert(PredBB); | ||||||
632 | // If we have less predecessor blocks than predecessors then the phi will | ||||||
633 | // have more than one incoming value for the same block which we can't | ||||||
634 | // handle. | ||||||
635 | // TODO: This could be handled be erasing some of the duplicate incoming | ||||||
636 | // values. | ||||||
637 | if (PredecessorBlocks.size() != pred_size(BB)) | ||||||
638 | return false; | ||||||
639 | for (auto &Pair : HoistableBranches) { | ||||||
640 | if (Pair.second == BB) { | ||||||
641 | // Which blocks are predecessors via this branch depends on if the | ||||||
642 | // branch is triangle-like or diamond-like. | ||||||
643 | if (Pair.first->getSuccessor(0) == BB) { | ||||||
644 | PredecessorBlocks.erase(Pair.first->getParent()); | ||||||
645 | PredecessorBlocks.erase(Pair.first->getSuccessor(1)); | ||||||
646 | } else if (Pair.first->getSuccessor(1) == BB) { | ||||||
647 | PredecessorBlocks.erase(Pair.first->getParent()); | ||||||
648 | PredecessorBlocks.erase(Pair.first->getSuccessor(0)); | ||||||
649 | } else { | ||||||
650 | PredecessorBlocks.erase(Pair.first->getSuccessor(0)); | ||||||
651 | PredecessorBlocks.erase(Pair.first->getSuccessor(1)); | ||||||
652 | } | ||||||
653 | } | ||||||
654 | } | ||||||
655 | // PredecessorBlocks will now be empty if for every predecessor of BB we | ||||||
656 | // found a hoistable branch source. | ||||||
657 | return PredecessorBlocks.empty(); | ||||||
658 | } | ||||||
659 | |||||||
660 | BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) { | ||||||
661 | if (!ControlFlowHoisting) | ||||||
662 | return CurLoop->getLoopPreheader(); | ||||||
663 | // If BB has already been hoisted, return that | ||||||
664 | if (HoistDestinationMap.count(BB)) | ||||||
665 | return HoistDestinationMap[BB]; | ||||||
666 | |||||||
667 | // Check if this block is conditional based on a pending branch | ||||||
668 | auto HasBBAsSuccessor = | ||||||
669 | [&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) { | ||||||
670 | return BB != Pair.second && (Pair.first->getSuccessor(0) == BB || | ||||||
671 | Pair.first->getSuccessor(1) == BB); | ||||||
672 | }; | ||||||
673 | auto It = std::find_if(HoistableBranches.begin(), HoistableBranches.end(), | ||||||
674 | HasBBAsSuccessor); | ||||||
675 | |||||||
676 | // If not involved in a pending branch, hoist to preheader | ||||||
677 | BasicBlock *InitialPreheader = CurLoop->getLoopPreheader(); | ||||||
678 | if (It == HoistableBranches.end()) { | ||||||
679 | LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM using " << InitialPreheader ->getName() << " as hoist destination for " << BB->getName() << "\n"; } } while (false) | ||||||
680 | << " as hoist destination for " << BB->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM using " << InitialPreheader ->getName() << " as hoist destination for " << BB->getName() << "\n"; } } while (false) | ||||||
681 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM using " << InitialPreheader ->getName() << " as hoist destination for " << BB->getName() << "\n"; } } while (false); | ||||||
682 | HoistDestinationMap[BB] = InitialPreheader; | ||||||
683 | return InitialPreheader; | ||||||
684 | } | ||||||
685 | BranchInst *BI = It->first; | ||||||
686 | assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==((std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor ) == HoistableBranches.end() && "BB is expected to be the target of at most one branch" ) ? static_cast<void> (0) : __assert_fail ("std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) == HoistableBranches.end() && \"BB is expected to be the target of at most one branch\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 688, __PRETTY_FUNCTION__)) | ||||||
687 | HoistableBranches.end() &&((std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor ) == HoistableBranches.end() && "BB is expected to be the target of at most one branch" ) ? static_cast<void> (0) : __assert_fail ("std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) == HoistableBranches.end() && \"BB is expected to be the target of at most one branch\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 688, __PRETTY_FUNCTION__)) | ||||||
688 | "BB is expected to be the target of at most one branch")((std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor ) == HoistableBranches.end() && "BB is expected to be the target of at most one branch" ) ? static_cast<void> (0) : __assert_fail ("std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) == HoistableBranches.end() && \"BB is expected to be the target of at most one branch\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 688, __PRETTY_FUNCTION__)); | ||||||
689 | |||||||
690 | LLVMContext &C = BB->getContext(); | ||||||
691 | BasicBlock *TrueDest = BI->getSuccessor(0); | ||||||
692 | BasicBlock *FalseDest = BI->getSuccessor(1); | ||||||
693 | BasicBlock *CommonSucc = HoistableBranches[BI]; | ||||||
694 | BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent()); | ||||||
695 | |||||||
696 | // Create hoisted versions of blocks that currently don't have them | ||||||
697 | auto CreateHoistedBlock = [&](BasicBlock *Orig) { | ||||||
698 | if (HoistDestinationMap.count(Orig)) | ||||||
699 | return HoistDestinationMap[Orig]; | ||||||
700 | BasicBlock *New = | ||||||
701 | BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent()); | ||||||
702 | HoistDestinationMap[Orig] = New; | ||||||
703 | DT->addNewBlock(New, HoistTarget); | ||||||
704 | if (CurLoop->getParentLoop()) | ||||||
705 | CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI); | ||||||
706 | ++NumCreatedBlocks; | ||||||
707 | LLVM_DEBUG(dbgs() << "LICM created " << New->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM created " << New-> getName() << " as hoist destination for " << Orig ->getName() << "\n"; } } while (false) | ||||||
708 | << " as hoist destination for " << Orig->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM created " << New-> getName() << " as hoist destination for " << Orig ->getName() << "\n"; } } while (false) | ||||||
709 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM created " << New-> getName() << " as hoist destination for " << Orig ->getName() << "\n"; } } while (false); | ||||||
710 | return New; | ||||||
711 | }; | ||||||
712 | BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest); | ||||||
713 | BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest); | ||||||
714 | BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc); | ||||||
715 | |||||||
716 | // Link up these blocks with branches. | ||||||
717 | if (!HoistCommonSucc->getTerminator()) { | ||||||
718 | // The new common successor we've generated will branch to whatever that | ||||||
719 | // hoist target branched to. | ||||||
720 | BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor(); | ||||||
721 | assert(TargetSucc && "Expected hoist target to have a single successor")((TargetSucc && "Expected hoist target to have a single successor" ) ? static_cast<void> (0) : __assert_fail ("TargetSucc && \"Expected hoist target to have a single successor\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 721, __PRETTY_FUNCTION__)); | ||||||
722 | HoistCommonSucc->moveBefore(TargetSucc); | ||||||
723 | BranchInst::Create(TargetSucc, HoistCommonSucc); | ||||||
724 | } | ||||||
725 | if (!HoistTrueDest->getTerminator()) { | ||||||
726 | HoistTrueDest->moveBefore(HoistCommonSucc); | ||||||
727 | BranchInst::Create(HoistCommonSucc, HoistTrueDest); | ||||||
728 | } | ||||||
729 | if (!HoistFalseDest->getTerminator()) { | ||||||
730 | HoistFalseDest->moveBefore(HoistCommonSucc); | ||||||
731 | BranchInst::Create(HoistCommonSucc, HoistFalseDest); | ||||||
732 | } | ||||||
733 | |||||||
734 | // If BI is being cloned to what was originally the preheader then | ||||||
735 | // HoistCommonSucc will now be the new preheader. | ||||||
736 | if (HoistTarget == InitialPreheader) { | ||||||
737 | // Phis in the loop header now need to use the new preheader. | ||||||
738 | InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc); | ||||||
739 | if (MSSAU) | ||||||
740 | MSSAU->wireOldPredecessorsToNewImmediatePredecessor( | ||||||
741 | HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget}); | ||||||
742 | // The new preheader dominates the loop header. | ||||||
743 | DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc); | ||||||
744 | DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader()); | ||||||
745 | DT->changeImmediateDominator(HeaderNode, PreheaderNode); | ||||||
746 | // The preheader hoist destination is now the new preheader, with the | ||||||
747 | // exception of the hoist destination of this branch. | ||||||
748 | for (auto &Pair : HoistDestinationMap) | ||||||
749 | if (Pair.second == InitialPreheader && Pair.first != BI->getParent()) | ||||||
750 | Pair.second = HoistCommonSucc; | ||||||
751 | } | ||||||
752 | |||||||
753 | // Now finally clone BI. | ||||||
754 | ReplaceInstWithInst( | ||||||
755 | HoistTarget->getTerminator(), | ||||||
756 | BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition())); | ||||||
757 | ++NumClonedBranches; | ||||||
758 | |||||||
759 | assert(CurLoop->getLoopPreheader() &&((CurLoop->getLoopPreheader() && "Hoisting blocks should not have destroyed preheader" ) ? static_cast<void> (0) : __assert_fail ("CurLoop->getLoopPreheader() && \"Hoisting blocks should not have destroyed preheader\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 760, __PRETTY_FUNCTION__)) | ||||||
760 | "Hoisting blocks should not have destroyed preheader")((CurLoop->getLoopPreheader() && "Hoisting blocks should not have destroyed preheader" ) ? static_cast<void> (0) : __assert_fail ("CurLoop->getLoopPreheader() && \"Hoisting blocks should not have destroyed preheader\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 760, __PRETTY_FUNCTION__)); | ||||||
761 | return HoistDestinationMap[BB]; | ||||||
762 | } | ||||||
763 | }; | ||||||
764 | } // namespace | ||||||
765 | |||||||
766 | // Hoisting/sinking instruction out of a loop isn't always beneficial. It's only | ||||||
767 | // only worthwhile if the destination block is actually colder than current | ||||||
768 | // block. | ||||||
769 | static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock, | ||||||
770 | OptimizationRemarkEmitter *ORE, | ||||||
771 | BlockFrequencyInfo *BFI) { | ||||||
772 | // Check block frequency only when runtime profile is available | ||||||
773 | // to avoid pathological cases. With static profile, lean towards | ||||||
774 | // hosting because it helps canonicalize the loop for vectorizer. | ||||||
775 | if (!DstBlock->getParent()->hasProfileData()) | ||||||
776 | return true; | ||||||
777 | |||||||
778 | if (!HoistSinkColdnessThreshold || !BFI) | ||||||
779 | return true; | ||||||
780 | |||||||
781 | BasicBlock *SrcBlock = I.getParent(); | ||||||
782 | if (BFI->getBlockFreq(DstBlock).getFrequency() / HoistSinkColdnessThreshold > | ||||||
783 | BFI->getBlockFreq(SrcBlock).getFrequency()) { | ||||||
784 | ORE->emit([&]() { | ||||||
785 | return OptimizationRemarkMissed(DEBUG_TYPE"licm", "SinkHoistInst", &I) | ||||||
786 | << "failed to sink or hoist instruction because containing block " | ||||||
787 | "has lower frequency than destination block"; | ||||||
788 | }); | ||||||
789 | return false; | ||||||
790 | } | ||||||
791 | |||||||
792 | return true; | ||||||
793 | } | ||||||
794 | |||||||
795 | /// Walk the specified region of the CFG (defined by all blocks dominated by | ||||||
796 | /// the specified block, and that are in the current loop) in depth first | ||||||
797 | /// order w.r.t the DominatorTree. This allows us to visit definitions before | ||||||
798 | /// uses, allowing us to hoist a loop body in one pass without iteration. | ||||||
799 | /// | ||||||
800 | bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, | ||||||
801 | DominatorTree *DT, BlockFrequencyInfo *BFI, | ||||||
802 | TargetLibraryInfo *TLI, Loop *CurLoop, | ||||||
803 | AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, | ||||||
804 | ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo, | ||||||
805 | SinkAndHoistLICMFlags &Flags, | ||||||
806 | OptimizationRemarkEmitter *ORE) { | ||||||
807 | // Verify inputs. | ||||||
808 | assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&((N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected input to hoistRegion." ) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to hoistRegion.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 810, __PRETTY_FUNCTION__)) | ||||||
809 | CurLoop != nullptr && SafetyInfo != nullptr &&((N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected input to hoistRegion." ) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to hoistRegion.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 810, __PRETTY_FUNCTION__)) | ||||||
810 | "Unexpected input to hoistRegion.")((N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected input to hoistRegion." ) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to hoistRegion.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 810, __PRETTY_FUNCTION__)); | ||||||
811 | assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&((((CurAST != nullptr) ^ (MSSAU != nullptr)) && "Either AliasSetTracker or MemorySSA should be initialized." ) ? static_cast<void> (0) : __assert_fail ("((CurAST != nullptr) ^ (MSSAU != nullptr)) && \"Either AliasSetTracker or MemorySSA should be initialized.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 812, __PRETTY_FUNCTION__)) | ||||||
812 | "Either AliasSetTracker or MemorySSA should be initialized.")((((CurAST != nullptr) ^ (MSSAU != nullptr)) && "Either AliasSetTracker or MemorySSA should be initialized." ) ? static_cast<void> (0) : __assert_fail ("((CurAST != nullptr) ^ (MSSAU != nullptr)) && \"Either AliasSetTracker or MemorySSA should be initialized.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 812, __PRETTY_FUNCTION__)); | ||||||
813 | |||||||
814 | ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU); | ||||||
815 | |||||||
816 | // Keep track of instructions that have been hoisted, as they may need to be | ||||||
817 | // re-hoisted if they end up not dominating all of their uses. | ||||||
818 | SmallVector<Instruction *, 16> HoistedInstructions; | ||||||
819 | |||||||
820 | // For PHI hoisting to work we need to hoist blocks before their successors. | ||||||
821 | // We can do this by iterating through the blocks in the loop in reverse | ||||||
822 | // post-order. | ||||||
823 | LoopBlocksRPO Worklist(CurLoop); | ||||||
824 | Worklist.perform(LI); | ||||||
825 | bool Changed = false; | ||||||
826 | for (BasicBlock *BB : Worklist) { | ||||||
827 | // Only need to process the contents of this block if it is not part of a | ||||||
828 | // subloop (which would already have been processed). | ||||||
829 | if (inSubLoop(BB, CurLoop, LI)) | ||||||
830 | continue; | ||||||
831 | |||||||
832 | for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { | ||||||
833 | Instruction &I = *II++; | ||||||
834 | // Try constant folding this instruction. If all the operands are | ||||||
835 | // constants, it is technically hoistable, but it would be better to | ||||||
836 | // just fold it. | ||||||
837 | if (Constant *C = ConstantFoldInstruction( | ||||||
838 | &I, I.getModule()->getDataLayout(), TLI)) { | ||||||
839 | LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'; } } while (false) | ||||||
840 | << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'; } } while (false); | ||||||
841 | if (CurAST) | ||||||
842 | CurAST->copyValue(&I, C); | ||||||
843 | // FIXME MSSA: Such replacements may make accesses unoptimized (D51960). | ||||||
844 | I.replaceAllUsesWith(C); | ||||||
845 | if (isInstructionTriviallyDead(&I, TLI)) | ||||||
846 | eraseInstruction(I, *SafetyInfo, CurAST, MSSAU); | ||||||
847 | Changed = true; | ||||||
848 | continue; | ||||||
849 | } | ||||||
850 | |||||||
851 | // Try hoisting the instruction out to the preheader. We can only do | ||||||
852 | // this if all of the operands of the instruction are loop invariant and | ||||||
853 | // if it is safe to hoist the instruction. We also check block frequency | ||||||
854 | // to make sure instruction only gets hoisted into colder blocks. | ||||||
855 | // TODO: It may be safe to hoist if we are hoisting to a conditional block | ||||||
856 | // and we have accurately duplicated the control flow from the loop header | ||||||
857 | // to that block. | ||||||
858 | if (CurLoop->hasLoopInvariantOperands(&I) && | ||||||
859 | canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags, | ||||||
860 | ORE) && | ||||||
861 | worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) && | ||||||
862 | isSafeToExecuteUnconditionally( | ||||||
863 | I, DT, CurLoop, SafetyInfo, ORE, | ||||||
864 | CurLoop->getLoopPreheader()->getTerminator())) { | ||||||
865 | hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, | ||||||
866 | MSSAU, SE, ORE); | ||||||
867 | HoistedInstructions.push_back(&I); | ||||||
868 | Changed = true; | ||||||
869 | continue; | ||||||
870 | } | ||||||
871 | |||||||
872 | // Attempt to remove floating point division out of the loop by | ||||||
873 | // converting it to a reciprocal multiplication. | ||||||
874 | if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() && | ||||||
875 | CurLoop->isLoopInvariant(I.getOperand(1))) { | ||||||
876 | auto Divisor = I.getOperand(1); | ||||||
877 | auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0); | ||||||
878 | auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor); | ||||||
879 | ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags()); | ||||||
880 | SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent()); | ||||||
881 | ReciprocalDivisor->insertBefore(&I); | ||||||
882 | |||||||
883 | auto Product = | ||||||
884 | BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor); | ||||||
885 | Product->setFastMathFlags(I.getFastMathFlags()); | ||||||
886 | SafetyInfo->insertInstructionTo(Product, I.getParent()); | ||||||
887 | Product->insertAfter(&I); | ||||||
888 | I.replaceAllUsesWith(Product); | ||||||
889 | eraseInstruction(I, *SafetyInfo, CurAST, MSSAU); | ||||||
890 | |||||||
891 | hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), | ||||||
892 | SafetyInfo, MSSAU, SE, ORE); | ||||||
893 | HoistedInstructions.push_back(ReciprocalDivisor); | ||||||
894 | Changed = true; | ||||||
895 | continue; | ||||||
896 | } | ||||||
897 | |||||||
898 | auto IsInvariantStart = [&](Instruction &I) { | ||||||
899 | using namespace PatternMatch; | ||||||
900 | return I.use_empty() && | ||||||
901 | match(&I, m_Intrinsic<Intrinsic::invariant_start>()); | ||||||
902 | }; | ||||||
903 | auto MustExecuteWithoutWritesBefore = [&](Instruction &I) { | ||||||
904 | return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) && | ||||||
905 | SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop); | ||||||
906 | }; | ||||||
907 | if ((IsInvariantStart(I) || isGuard(&I)) && | ||||||
908 | CurLoop->hasLoopInvariantOperands(&I) && | ||||||
909 | MustExecuteWithoutWritesBefore(I)) { | ||||||
910 | hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, | ||||||
911 | MSSAU, SE, ORE); | ||||||
912 | HoistedInstructions.push_back(&I); | ||||||
913 | Changed = true; | ||||||
914 | continue; | ||||||
915 | } | ||||||
916 | |||||||
917 | if (PHINode *PN = dyn_cast<PHINode>(&I)) { | ||||||
918 | if (CFH.canHoistPHI(PN)) { | ||||||
919 | // Redirect incoming blocks first to ensure that we create hoisted | ||||||
920 | // versions of those blocks before we hoist the phi. | ||||||
921 | for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i) | ||||||
922 | PN->setIncomingBlock( | ||||||
923 | i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i))); | ||||||
924 | hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, | ||||||
925 | MSSAU, SE, ORE); | ||||||
926 | assert(DT->dominates(PN, BB) && "Conditional PHIs not expected")((DT->dominates(PN, BB) && "Conditional PHIs not expected" ) ? static_cast<void> (0) : __assert_fail ("DT->dominates(PN, BB) && \"Conditional PHIs not expected\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 926, __PRETTY_FUNCTION__)); | ||||||
927 | Changed = true; | ||||||
928 | continue; | ||||||
929 | } | ||||||
930 | } | ||||||
931 | |||||||
932 | // Remember possibly hoistable branches so we can actually hoist them | ||||||
933 | // later if needed. | ||||||
934 | if (BranchInst *BI = dyn_cast<BranchInst>(&I)) | ||||||
935 | CFH.registerPossiblyHoistableBranch(BI); | ||||||
936 | } | ||||||
937 | } | ||||||
938 | |||||||
939 | // If we hoisted instructions to a conditional block they may not dominate | ||||||
940 | // their uses that weren't hoisted (such as phis where some operands are not | ||||||
941 | // loop invariant). If so make them unconditional by moving them to their | ||||||
942 | // immediate dominator. We iterate through the instructions in reverse order | ||||||
943 | // which ensures that when we rehoist an instruction we rehoist its operands, | ||||||
944 | // and also keep track of where in the block we are rehoisting to to make sure | ||||||
945 | // that we rehoist instructions before the instructions that use them. | ||||||
946 | Instruction *HoistPoint = nullptr; | ||||||
947 | if (ControlFlowHoisting) { | ||||||
948 | for (Instruction *I : reverse(HoistedInstructions)) { | ||||||
949 | if (!llvm::all_of(I->uses(), | ||||||
950 | [&](Use &U) { return DT->dominates(I, U); })) { | ||||||
951 | BasicBlock *Dominator = | ||||||
952 | DT->getNode(I->getParent())->getIDom()->getBlock(); | ||||||
953 | if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) { | ||||||
954 | if (HoistPoint) | ||||||
955 | assert(DT->dominates(Dominator, HoistPoint->getParent()) &&((DT->dominates(Dominator, HoistPoint->getParent()) && "New hoist point expected to dominate old hoist point") ? static_cast <void> (0) : __assert_fail ("DT->dominates(Dominator, HoistPoint->getParent()) && \"New hoist point expected to dominate old hoist point\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 956, __PRETTY_FUNCTION__)) | ||||||
956 | "New hoist point expected to dominate old hoist point")((DT->dominates(Dominator, HoistPoint->getParent()) && "New hoist point expected to dominate old hoist point") ? static_cast <void> (0) : __assert_fail ("DT->dominates(Dominator, HoistPoint->getParent()) && \"New hoist point expected to dominate old hoist point\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 956, __PRETTY_FUNCTION__)); | ||||||
957 | HoistPoint = Dominator->getTerminator(); | ||||||
958 | } | ||||||
959 | LLVM_DEBUG(dbgs() << "LICM rehoisting to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM rehoisting to " << HoistPoint ->getParent()->getName() << ": " << *I << "\n"; } } while (false) | ||||||
960 | << HoistPoint->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM rehoisting to " << HoistPoint ->getParent()->getName() << ": " << *I << "\n"; } } while (false) | ||||||
961 | << ": " << *I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM rehoisting to " << HoistPoint ->getParent()->getName() << ": " << *I << "\n"; } } while (false); | ||||||
962 | moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE); | ||||||
963 | HoistPoint = I; | ||||||
964 | Changed = true; | ||||||
965 | } | ||||||
966 | } | ||||||
967 | } | ||||||
968 | if (MSSAU && VerifyMemorySSA) | ||||||
969 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
970 | |||||||
971 | // Now that we've finished hoisting make sure that LI and DT are still | ||||||
972 | // valid. | ||||||
973 | #ifdef EXPENSIVE_CHECKS | ||||||
974 | if (Changed) { | ||||||
975 | assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&((DT->verify(DominatorTree::VerificationLevel::Fast) && "Dominator tree verification failed") ? static_cast<void> (0) : __assert_fail ("DT->verify(DominatorTree::VerificationLevel::Fast) && \"Dominator tree verification failed\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 976, __PRETTY_FUNCTION__)) | ||||||
976 | "Dominator tree verification failed")((DT->verify(DominatorTree::VerificationLevel::Fast) && "Dominator tree verification failed") ? static_cast<void> (0) : __assert_fail ("DT->verify(DominatorTree::VerificationLevel::Fast) && \"Dominator tree verification failed\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 976, __PRETTY_FUNCTION__)); | ||||||
977 | LI->verify(*DT); | ||||||
978 | } | ||||||
979 | #endif | ||||||
980 | |||||||
981 | return Changed; | ||||||
982 | } | ||||||
983 | |||||||
984 | // Return true if LI is invariant within scope of the loop. LI is invariant if | ||||||
985 | // CurLoop is dominated by an invariant.start representing the same memory | ||||||
986 | // location and size as the memory location LI loads from, and also the | ||||||
987 | // invariant.start has no uses. | ||||||
988 | static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, | ||||||
989 | Loop *CurLoop) { | ||||||
990 | Value *Addr = LI->getOperand(0); | ||||||
991 | const DataLayout &DL = LI->getModule()->getDataLayout(); | ||||||
992 | const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType()); | ||||||
993 | |||||||
994 | // It is not currently possible for clang to generate an invariant.start | ||||||
995 | // intrinsic with scalable vector types because we don't support thread local | ||||||
996 | // sizeless types and we don't permit sizeless types in structs or classes. | ||||||
997 | // Furthermore, even if support is added for this in future the intrinsic | ||||||
998 | // itself is defined to have a size of -1 for variable sized objects. This | ||||||
999 | // makes it impossible to verify if the intrinsic envelops our region of | ||||||
1000 | // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8> | ||||||
1001 | // types would have a -1 parameter, but the former is clearly double the size | ||||||
1002 | // of the latter. | ||||||
1003 | if (LocSizeInBits.isScalable()) | ||||||
1004 | return false; | ||||||
1005 | |||||||
1006 | // if the type is i8 addrspace(x)*, we know this is the type of | ||||||
1007 | // llvm.invariant.start operand | ||||||
1008 | auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()), | ||||||
1009 | LI->getPointerAddressSpace()); | ||||||
1010 | unsigned BitcastsVisited = 0; | ||||||
1011 | // Look through bitcasts until we reach the i8* type (this is invariant.start | ||||||
1012 | // operand type). | ||||||
1013 | while (Addr->getType() != PtrInt8Ty) { | ||||||
1014 | auto *BC = dyn_cast<BitCastInst>(Addr); | ||||||
1015 | // Avoid traversing high number of bitcast uses. | ||||||
1016 | if (++BitcastsVisited > MaxNumUsesTraversed || !BC) | ||||||
1017 | return false; | ||||||
1018 | Addr = BC->getOperand(0); | ||||||
1019 | } | ||||||
1020 | |||||||
1021 | unsigned UsesVisited = 0; | ||||||
1022 | // Traverse all uses of the load operand value, to see if invariant.start is | ||||||
1023 | // one of the uses, and whether it dominates the load instruction. | ||||||
1024 | for (auto *U : Addr->users()) { | ||||||
1025 | // Avoid traversing for Load operand with high number of users. | ||||||
1026 | if (++UsesVisited > MaxNumUsesTraversed) | ||||||
1027 | return false; | ||||||
1028 | IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); | ||||||
1029 | // If there are escaping uses of invariant.start instruction, the load maybe | ||||||
1030 | // non-invariant. | ||||||
1031 | if (!II || II->getIntrinsicID() != Intrinsic::invariant_start || | ||||||
1032 | !II->use_empty()) | ||||||
1033 | continue; | ||||||
1034 | ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0)); | ||||||
1035 | // The intrinsic supports having a -1 argument for variable sized objects | ||||||
1036 | // so we should check for that here. | ||||||
1037 | if (InvariantSize->isNegative()) | ||||||
1038 | continue; | ||||||
1039 | uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8; | ||||||
1040 | // Confirm the invariant.start location size contains the load operand size | ||||||
1041 | // in bits. Also, the invariant.start should dominate the load, and we | ||||||
1042 | // should not hoist the load out of a loop that contains this dominating | ||||||
1043 | // invariant.start. | ||||||
1044 | if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits && | ||||||
1045 | DT->properlyDominates(II->getParent(), CurLoop->getHeader())) | ||||||
1046 | return true; | ||||||
1047 | } | ||||||
1048 | |||||||
1049 | return false; | ||||||
1050 | } | ||||||
1051 | |||||||
1052 | namespace { | ||||||
1053 | /// Return true if-and-only-if we know how to (mechanically) both hoist and | ||||||
1054 | /// sink a given instruction out of a loop. Does not address legality | ||||||
1055 | /// concerns such as aliasing or speculation safety. | ||||||
1056 | bool isHoistableAndSinkableInst(Instruction &I) { | ||||||
1057 | // Only these instructions are hoistable/sinkable. | ||||||
1058 | return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) || | ||||||
1059 | isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) || | ||||||
1060 | isa<BinaryOperator>(I) || isa<SelectInst>(I) || | ||||||
1061 | isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || | ||||||
1062 | isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || | ||||||
1063 | isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || | ||||||
1064 | isa<InsertValueInst>(I) || isa<FreezeInst>(I)); | ||||||
1065 | } | ||||||
1066 | /// Return true if all of the alias sets within this AST are known not to | ||||||
1067 | /// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop. | ||||||
1068 | bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU, | ||||||
1069 | const Loop *L) { | ||||||
1070 | if (CurAST) { | ||||||
1071 | for (AliasSet &AS : *CurAST) { | ||||||
1072 | if (!AS.isForwardingAliasSet() && AS.isMod()) { | ||||||
1073 | return false; | ||||||
1074 | } | ||||||
1075 | } | ||||||
1076 | return true; | ||||||
1077 | } else { /*MSSAU*/ | ||||||
1078 | for (auto *BB : L->getBlocks()) | ||||||
1079 | if (MSSAU->getMemorySSA()->getBlockDefs(BB)) | ||||||
| |||||||
1080 | return false; | ||||||
1081 | return true; | ||||||
1082 | } | ||||||
1083 | } | ||||||
1084 | |||||||
1085 | /// Return true if I is the only Instruction with a MemoryAccess in L. | ||||||
1086 | bool isOnlyMemoryAccess(const Instruction *I, const Loop *L, | ||||||
1087 | const MemorySSAUpdater *MSSAU) { | ||||||
1088 | for (auto *BB : L->getBlocks()) | ||||||
1089 | if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) { | ||||||
1090 | int NotAPhi = 0; | ||||||
1091 | for (const auto &Acc : *Accs) { | ||||||
1092 | if (isa<MemoryPhi>(&Acc)) | ||||||
1093 | continue; | ||||||
1094 | const auto *MUD = cast<MemoryUseOrDef>(&Acc); | ||||||
1095 | if (MUD->getMemoryInst() != I || NotAPhi++ == 1) | ||||||
1096 | return false; | ||||||
1097 | } | ||||||
1098 | } | ||||||
1099 | return true; | ||||||
1100 | } | ||||||
1101 | } | ||||||
1102 | |||||||
1103 | bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, | ||||||
1104 | Loop *CurLoop, AliasSetTracker *CurAST, | ||||||
1105 | MemorySSAUpdater *MSSAU, | ||||||
1106 | bool TargetExecutesOncePerLoop, | ||||||
1107 | SinkAndHoistLICMFlags *Flags, | ||||||
1108 | OptimizationRemarkEmitter *ORE) { | ||||||
1109 | // If we don't understand the instruction, bail early. | ||||||
1110 | if (!isHoistableAndSinkableInst(I)) | ||||||
| |||||||
1111 | return false; | ||||||
1112 | |||||||
1113 | MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr; | ||||||
1114 | if (MSSA
| ||||||
1115 | assert(Flags != nullptr && "Flags cannot be null.")((Flags != nullptr && "Flags cannot be null.") ? static_cast <void> (0) : __assert_fail ("Flags != nullptr && \"Flags cannot be null.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1115, __PRETTY_FUNCTION__)); | ||||||
1116 | |||||||
1117 | // Loads have extra constraints we have to verify before we can hoist them. | ||||||
1118 | if (LoadInst *LI
| ||||||
1119 | if (!LI->isUnordered()) | ||||||
1120 | return false; // Don't sink/hoist volatile or ordered atomic loads! | ||||||
1121 | |||||||
1122 | // Loads from constant memory are always safe to move, even if they end up | ||||||
1123 | // in the same alias set as something that ends up being modified. | ||||||
1124 | if (AA->pointsToConstantMemory(LI->getOperand(0))) | ||||||
1125 | return true; | ||||||
1126 | if (LI->hasMetadata(LLVMContext::MD_invariant_load)) | ||||||
1127 | return true; | ||||||
1128 | |||||||
1129 | if (LI->isAtomic() && !TargetExecutesOncePerLoop) | ||||||
1130 | return false; // Don't risk duplicating unordered loads | ||||||
1131 | |||||||
1132 | // This checks for an invariant.start dominating the load. | ||||||
1133 | if (isLoadInvariantInLoop(LI, DT, CurLoop)) | ||||||
1134 | return true; | ||||||
1135 | |||||||
1136 | bool Invalidated; | ||||||
1137 | if (CurAST) | ||||||
1138 | Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST, | ||||||
1139 | CurLoop, AA); | ||||||
1140 | else | ||||||
1141 | Invalidated = pointerInvalidatedByLoopWithMSSA( | ||||||
1142 | MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, *Flags); | ||||||
1143 | // Check loop-invariant address because this may also be a sinkable load | ||||||
1144 | // whose address is not necessarily loop-invariant. | ||||||
1145 | if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) | ||||||
1146 | ORE->emit([&]() { | ||||||
1147 | return OptimizationRemarkMissed( | ||||||
1148 | DEBUG_TYPE"licm", "LoadWithLoopInvariantAddressInvalidated", LI) | ||||||
1149 | << "failed to move load with loop-invariant address " | ||||||
1150 | "because the loop may invalidate its value"; | ||||||
1151 | }); | ||||||
1152 | |||||||
1153 | return !Invalidated; | ||||||
1154 | } else if (CallInst *CI
| ||||||
1155 | // Don't sink or hoist dbg info; it's legal, but not useful. | ||||||
1156 | if (isa<DbgInfoIntrinsic>(I)) | ||||||
1157 | return false; | ||||||
1158 | |||||||
1159 | // Don't sink calls which can throw. | ||||||
1160 | if (CI->mayThrow()) | ||||||
1161 | return false; | ||||||
1162 | |||||||
1163 | using namespace PatternMatch; | ||||||
1164 | if (match(CI, m_Intrinsic<Intrinsic::assume>())) | ||||||
1165 | // Assumes don't actually alias anything or throw | ||||||
1166 | return true; | ||||||
1167 | |||||||
1168 | if (match(CI, m_Intrinsic<Intrinsic::experimental_widenable_condition>())) | ||||||
1169 | // Widenable conditions don't actually alias anything or throw | ||||||
1170 | return true; | ||||||
1171 | |||||||
1172 | // Handle simple cases by querying alias analysis. | ||||||
1173 | FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); | ||||||
1174 | if (Behavior == FMRB_DoesNotAccessMemory) | ||||||
1175 | return true; | ||||||
1176 | if (AAResults::onlyReadsMemory(Behavior)) { | ||||||
1177 | // A readonly argmemonly function only reads from memory pointed to by | ||||||
1178 | // it's arguments with arbitrary offsets. If we can prove there are no | ||||||
1179 | // writes to this memory in the loop, we can hoist or sink. | ||||||
1180 | if (AAResults::onlyAccessesArgPointees(Behavior)) { | ||||||
1181 | // TODO: expand to writeable arguments | ||||||
1182 | for (Value *Op : CI->arg_operands()) | ||||||
1183 | if (Op->getType()->isPointerTy()) { | ||||||
1184 | bool Invalidated; | ||||||
1185 | if (CurAST) | ||||||
1186 | Invalidated = pointerInvalidatedByLoop( | ||||||
1187 | MemoryLocation(Op, LocationSize::unknown(), AAMDNodes()), | ||||||
1188 | CurAST, CurLoop, AA); | ||||||
1189 | else | ||||||
1190 | Invalidated = pointerInvalidatedByLoopWithMSSA( | ||||||
1191 | MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, | ||||||
1192 | *Flags); | ||||||
1193 | if (Invalidated) | ||||||
1194 | return false; | ||||||
1195 | } | ||||||
1196 | return true; | ||||||
1197 | } | ||||||
1198 | |||||||
1199 | // If this call only reads from memory and there are no writes to memory | ||||||
1200 | // in the loop, we can hoist or sink the call as appropriate. | ||||||
1201 | if (isReadOnly(CurAST, MSSAU, CurLoop)) | ||||||
1202 | return true; | ||||||
1203 | } | ||||||
1204 | |||||||
1205 | // FIXME: This should use mod/ref information to see if we can hoist or | ||||||
1206 | // sink the call. | ||||||
1207 | |||||||
1208 | return false; | ||||||
1209 | } else if (auto *FI = dyn_cast<FenceInst>(&I)) { | ||||||
1210 | // Fences alias (most) everything to provide ordering. For the moment, | ||||||
1211 | // just give up if there are any other memory operations in the loop. | ||||||
1212 | if (CurAST) { | ||||||
1213 | auto Begin = CurAST->begin(); | ||||||
1214 | assert(Begin != CurAST->end() && "must contain FI")((Begin != CurAST->end() && "must contain FI") ? static_cast <void> (0) : __assert_fail ("Begin != CurAST->end() && \"must contain FI\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1214, __PRETTY_FUNCTION__)); | ||||||
1215 | if (std::next(Begin) != CurAST->end()) | ||||||
1216 | // constant memory for instance, TODO: handle better | ||||||
1217 | return false; | ||||||
1218 | auto *UniqueI = Begin->getUniqueInstruction(); | ||||||
1219 | if (!UniqueI) | ||||||
1220 | // other memory op, give up | ||||||
1221 | return false; | ||||||
1222 | (void)FI; // suppress unused variable warning | ||||||
1223 | assert(UniqueI == FI && "AS must contain FI")((UniqueI == FI && "AS must contain FI") ? static_cast <void> (0) : __assert_fail ("UniqueI == FI && \"AS must contain FI\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1223, __PRETTY_FUNCTION__)); | ||||||
1224 | return true; | ||||||
1225 | } else // MSSAU | ||||||
1226 | return isOnlyMemoryAccess(FI, CurLoop, MSSAU); | ||||||
1227 | } else if (auto *SI = dyn_cast<StoreInst>(&I)) { | ||||||
1228 | if (!SI->isUnordered()) | ||||||
1229 | return false; // Don't sink/hoist volatile or ordered atomic store! | ||||||
1230 | |||||||
1231 | // We can only hoist a store that we can prove writes a value which is not | ||||||
1232 | // read or overwritten within the loop. For those cases, we fallback to | ||||||
1233 | // load store promotion instead. TODO: We can extend this to cases where | ||||||
1234 | // there is exactly one write to the location and that write dominates an | ||||||
1235 | // arbitrary number of reads in the loop. | ||||||
1236 | if (CurAST) { | ||||||
1237 | auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI)); | ||||||
1238 | |||||||
1239 | if (AS.isRef() || !AS.isMustAlias()) | ||||||
1240 | // Quick exit test, handled by the full path below as well. | ||||||
1241 | return false; | ||||||
1242 | auto *UniqueI = AS.getUniqueInstruction(); | ||||||
1243 | if (!UniqueI) | ||||||
1244 | // other memory op, give up | ||||||
1245 | return false; | ||||||
1246 | assert(UniqueI == SI && "AS must contain SI")((UniqueI == SI && "AS must contain SI") ? static_cast <void> (0) : __assert_fail ("UniqueI == SI && \"AS must contain SI\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1246, __PRETTY_FUNCTION__)); | ||||||
1247 | return true; | ||||||
1248 | } else { // MSSAU | ||||||
1249 | if (isOnlyMemoryAccess(SI, CurLoop, MSSAU)) | ||||||
1250 | return true; | ||||||
1251 | // If there are more accesses than the Promotion cap, give up, we're not | ||||||
1252 | // walking a list that long. | ||||||
1253 | if (Flags->NoOfMemAccTooLarge) | ||||||
1254 | return false; | ||||||
1255 | // Check store only if there's still "quota" to check clobber. | ||||||
1256 | if (Flags->LicmMssaOptCounter >= Flags->LicmMssaOptCap) | ||||||
1257 | return false; | ||||||
1258 | // If there are interfering Uses (i.e. their defining access is in the | ||||||
1259 | // loop), or ordered loads (stored as Defs!), don't move this store. | ||||||
1260 | // Could do better here, but this is conservatively correct. | ||||||
1261 | // TODO: Cache set of Uses on the first walk in runOnLoop, update when | ||||||
1262 | // moving accesses. Can also extend to dominating uses. | ||||||
1263 | auto *SIMD = MSSA->getMemoryAccess(SI); | ||||||
1264 | for (auto *BB : CurLoop->getBlocks()) | ||||||
1265 | if (auto *Accesses = MSSA->getBlockAccesses(BB)) { | ||||||
1266 | for (const auto &MA : *Accesses) | ||||||
1267 | if (const auto *MU = dyn_cast<MemoryUse>(&MA)) { | ||||||
1268 | auto *MD = MU->getDefiningAccess(); | ||||||
1269 | if (!MSSA->isLiveOnEntryDef(MD) && | ||||||
1270 | CurLoop->contains(MD->getBlock())) | ||||||
1271 | return false; | ||||||
1272 | // Disable hoisting past potentially interfering loads. Optimized | ||||||
1273 | // Uses may point to an access outside the loop, as getClobbering | ||||||
1274 | // checks the previous iteration when walking the backedge. | ||||||
1275 | // FIXME: More precise: no Uses that alias SI. | ||||||
1276 | if (!Flags->IsSink && !MSSA->dominates(SIMD, MU)) | ||||||
1277 | return false; | ||||||
1278 | } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) { | ||||||
1279 | if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) { | ||||||
1280 | (void)LI; // Silence warning. | ||||||
1281 | assert(!LI->isUnordered() && "Expected unordered load")((!LI->isUnordered() && "Expected unordered load") ? static_cast<void> (0) : __assert_fail ("!LI->isUnordered() && \"Expected unordered load\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1281, __PRETTY_FUNCTION__)); | ||||||
1282 | return false; | ||||||
1283 | } | ||||||
1284 | // Any call, while it may not be clobbering SI, it may be a use. | ||||||
1285 | if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) { | ||||||
1286 | // Check if the call may read from the memory locattion written | ||||||
1287 | // to by SI. Check CI's attributes and arguments; the number of | ||||||
1288 | // such checks performed is limited above by NoOfMemAccTooLarge. | ||||||
1289 | ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI)); | ||||||
1290 | if (isModOrRefSet(MRI)) | ||||||
1291 | return false; | ||||||
1292 | } | ||||||
1293 | } | ||||||
1294 | } | ||||||
1295 | |||||||
1296 | auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI); | ||||||
1297 | Flags->LicmMssaOptCounter++; | ||||||
1298 | // If there are no clobbering Defs in the loop, store is safe to hoist. | ||||||
1299 | return MSSA->isLiveOnEntryDef(Source) || | ||||||
1300 | !CurLoop->contains(Source->getBlock()); | ||||||
1301 | } | ||||||
1302 | } | ||||||
1303 | |||||||
1304 | assert(!I.mayReadOrWriteMemory() && "unhandled aliasing")((!I.mayReadOrWriteMemory() && "unhandled aliasing") ? static_cast<void> (0) : __assert_fail ("!I.mayReadOrWriteMemory() && \"unhandled aliasing\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1304, __PRETTY_FUNCTION__)); | ||||||
1305 | |||||||
1306 | // We've established mechanical ability and aliasing, it's up to the caller | ||||||
1307 | // to check fault safety | ||||||
1308 | return true; | ||||||
1309 | } | ||||||
1310 | |||||||
1311 | /// Returns true if a PHINode is a trivially replaceable with an | ||||||
1312 | /// Instruction. | ||||||
1313 | /// This is true when all incoming values are that instruction. | ||||||
1314 | /// This pattern occurs most often with LCSSA PHI nodes. | ||||||
1315 | /// | ||||||
1316 | static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) { | ||||||
1317 | for (const Value *IncValue : PN.incoming_values()) | ||||||
1318 | if (IncValue != &I) | ||||||
1319 | return false; | ||||||
1320 | |||||||
1321 | return true; | ||||||
1322 | } | ||||||
1323 | |||||||
1324 | /// Return true if the instruction is free in the loop. | ||||||
1325 | static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, | ||||||
1326 | const TargetTransformInfo *TTI) { | ||||||
1327 | |||||||
1328 | if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) { | ||||||
1329 | if (TTI->getUserCost(GEP, TargetTransformInfo::TCK_SizeAndLatency) != | ||||||
1330 | TargetTransformInfo::TCC_Free) | ||||||
1331 | return false; | ||||||
1332 | // For a GEP, we cannot simply use getUserCost because currently it | ||||||
1333 | // optimistically assume that a GEP will fold into addressing mode | ||||||
1334 | // regardless of its users. | ||||||
1335 | const BasicBlock *BB = GEP->getParent(); | ||||||
1336 | for (const User *U : GEP->users()) { | ||||||
1337 | const Instruction *UI = cast<Instruction>(U); | ||||||
1338 | if (CurLoop->contains(UI) && | ||||||
1339 | (BB != UI->getParent() || | ||||||
1340 | (!isa<StoreInst>(UI) && !isa<LoadInst>(UI)))) | ||||||
1341 | return false; | ||||||
1342 | } | ||||||
1343 | return true; | ||||||
1344 | } else | ||||||
1345 | return TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == | ||||||
1346 | TargetTransformInfo::TCC_Free; | ||||||
1347 | } | ||||||
1348 | |||||||
1349 | /// Return true if the only users of this instruction are outside of | ||||||
1350 | /// the loop. If this is true, we can sink the instruction to the exit | ||||||
1351 | /// blocks of the loop. | ||||||
1352 | /// | ||||||
1353 | /// We also return true if the instruction could be folded away in lowering. | ||||||
1354 | /// (e.g., a GEP can be folded into a load as an addressing mode in the loop). | ||||||
1355 | static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, | ||||||
1356 | const LoopSafetyInfo *SafetyInfo, | ||||||
1357 | TargetTransformInfo *TTI, bool &FreeInLoop) { | ||||||
1358 | const auto &BlockColors = SafetyInfo->getBlockColors(); | ||||||
1359 | bool IsFree = isFreeInLoop(I, CurLoop, TTI); | ||||||
1360 | for (const User *U : I.users()) { | ||||||
1361 | const Instruction *UI = cast<Instruction>(U); | ||||||
1362 | if (const PHINode *PN = dyn_cast<PHINode>(UI)) { | ||||||
1363 | const BasicBlock *BB = PN->getParent(); | ||||||
1364 | // We cannot sink uses in catchswitches. | ||||||
1365 | if (isa<CatchSwitchInst>(BB->getTerminator())) | ||||||
1366 | return false; | ||||||
1367 | |||||||
1368 | // We need to sink a callsite to a unique funclet. Avoid sinking if the | ||||||
1369 | // phi use is too muddled. | ||||||
1370 | if (isa<CallInst>(I)) | ||||||
1371 | if (!BlockColors.empty() && | ||||||
1372 | BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1) | ||||||
1373 | return false; | ||||||
1374 | } | ||||||
1375 | |||||||
1376 | if (CurLoop->contains(UI)) { | ||||||
1377 | if (IsFree) { | ||||||
1378 | FreeInLoop = true; | ||||||
1379 | continue; | ||||||
1380 | } | ||||||
1381 | return false; | ||||||
1382 | } | ||||||
1383 | } | ||||||
1384 | return true; | ||||||
1385 | } | ||||||
1386 | |||||||
1387 | static Instruction *cloneInstructionInExitBlock( | ||||||
1388 | Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, | ||||||
1389 | const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) { | ||||||
1390 | Instruction *New; | ||||||
1391 | if (auto *CI = dyn_cast<CallInst>(&I)) { | ||||||
1392 | const auto &BlockColors = SafetyInfo->getBlockColors(); | ||||||
1393 | |||||||
1394 | // Sinking call-sites need to be handled differently from other | ||||||
1395 | // instructions. The cloned call-site needs a funclet bundle operand | ||||||
1396 | // appropriate for its location in the CFG. | ||||||
1397 | SmallVector<OperandBundleDef, 1> OpBundles; | ||||||
1398 | for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles(); | ||||||
1399 | BundleIdx != BundleEnd; ++BundleIdx) { | ||||||
1400 | OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx); | ||||||
1401 | if (Bundle.getTagID() == LLVMContext::OB_funclet) | ||||||
1402 | continue; | ||||||
1403 | |||||||
1404 | OpBundles.emplace_back(Bundle); | ||||||
1405 | } | ||||||
1406 | |||||||
1407 | if (!BlockColors.empty()) { | ||||||
1408 | const ColorVector &CV = BlockColors.find(&ExitBlock)->second; | ||||||
1409 | assert(CV.size() == 1 && "non-unique color for exit block!")((CV.size() == 1 && "non-unique color for exit block!" ) ? static_cast<void> (0) : __assert_fail ("CV.size() == 1 && \"non-unique color for exit block!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1409, __PRETTY_FUNCTION__)); | ||||||
1410 | BasicBlock *BBColor = CV.front(); | ||||||
1411 | Instruction *EHPad = BBColor->getFirstNonPHI(); | ||||||
1412 | if (EHPad->isEHPad()) | ||||||
1413 | OpBundles.emplace_back("funclet", EHPad); | ||||||
1414 | } | ||||||
1415 | |||||||
1416 | New = CallInst::Create(CI, OpBundles); | ||||||
1417 | } else { | ||||||
1418 | New = I.clone(); | ||||||
1419 | } | ||||||
1420 | |||||||
1421 | ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New); | ||||||
1422 | if (!I.getName().empty()) | ||||||
1423 | New->setName(I.getName() + ".le"); | ||||||
1424 | |||||||
1425 | if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) { | ||||||
1426 | // Create a new MemoryAccess and let MemorySSA set its defining access. | ||||||
1427 | MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB( | ||||||
1428 | New, nullptr, New->getParent(), MemorySSA::Beginning); | ||||||
1429 | if (NewMemAcc) { | ||||||
1430 | if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc)) | ||||||
1431 | MSSAU->insertDef(MemDef, /*RenameUses=*/true); | ||||||
1432 | else { | ||||||
1433 | auto *MemUse = cast<MemoryUse>(NewMemAcc); | ||||||
1434 | MSSAU->insertUse(MemUse, /*RenameUses=*/true); | ||||||
1435 | } | ||||||
1436 | } | ||||||
1437 | } | ||||||
1438 | |||||||
1439 | // Build LCSSA PHI nodes for any in-loop operands. Note that this is | ||||||
1440 | // particularly cheap because we can rip off the PHI node that we're | ||||||
1441 | // replacing for the number and blocks of the predecessors. | ||||||
1442 | // OPT: If this shows up in a profile, we can instead finish sinking all | ||||||
1443 | // invariant instructions, and then walk their operands to re-establish | ||||||
1444 | // LCSSA. That will eliminate creating PHI nodes just to nuke them when | ||||||
1445 | // sinking bottom-up. | ||||||
1446 | for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE; | ||||||
1447 | ++OI) | ||||||
1448 | if (Instruction *OInst = dyn_cast<Instruction>(*OI)) | ||||||
1449 | if (Loop *OLoop = LI->getLoopFor(OInst->getParent())) | ||||||
1450 | if (!OLoop->contains(&PN)) { | ||||||
1451 | PHINode *OpPN = | ||||||
1452 | PHINode::Create(OInst->getType(), PN.getNumIncomingValues(), | ||||||
1453 | OInst->getName() + ".lcssa", &ExitBlock.front()); | ||||||
1454 | for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) | ||||||
1455 | OpPN->addIncoming(OInst, PN.getIncomingBlock(i)); | ||||||
1456 | *OI = OpPN; | ||||||
1457 | } | ||||||
1458 | return New; | ||||||
1459 | } | ||||||
1460 | |||||||
1461 | static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, | ||||||
1462 | AliasSetTracker *AST, MemorySSAUpdater *MSSAU) { | ||||||
1463 | if (AST) | ||||||
1464 | AST->deleteValue(&I); | ||||||
1465 | if (MSSAU) | ||||||
1466 | MSSAU->removeMemoryAccess(&I); | ||||||
1467 | SafetyInfo.removeInstruction(&I); | ||||||
1468 | I.eraseFromParent(); | ||||||
1469 | } | ||||||
1470 | |||||||
1471 | static void moveInstructionBefore(Instruction &I, Instruction &Dest, | ||||||
1472 | ICFLoopSafetyInfo &SafetyInfo, | ||||||
1473 | MemorySSAUpdater *MSSAU, | ||||||
1474 | ScalarEvolution *SE) { | ||||||
1475 | SafetyInfo.removeInstruction(&I); | ||||||
1476 | SafetyInfo.insertInstructionTo(&I, Dest.getParent()); | ||||||
1477 | I.moveBefore(&Dest); | ||||||
1478 | if (MSSAU) | ||||||
1479 | if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>( | ||||||
1480 | MSSAU->getMemorySSA()->getMemoryAccess(&I))) | ||||||
1481 | MSSAU->moveToPlace(OldMemAcc, Dest.getParent(), | ||||||
1482 | MemorySSA::BeforeTerminator); | ||||||
1483 | if (SE) | ||||||
1484 | SE->forgetValue(&I); | ||||||
1485 | } | ||||||
1486 | |||||||
1487 | static Instruction *sinkThroughTriviallyReplaceablePHI( | ||||||
1488 | PHINode *TPN, Instruction *I, LoopInfo *LI, | ||||||
1489 | SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies, | ||||||
1490 | const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop, | ||||||
1491 | MemorySSAUpdater *MSSAU) { | ||||||
1492 | assert(isTriviallyReplaceablePHI(*TPN, *I) &&((isTriviallyReplaceablePHI(*TPN, *I) && "Expect only trivially replaceable PHI" ) ? static_cast<void> (0) : __assert_fail ("isTriviallyReplaceablePHI(*TPN, *I) && \"Expect only trivially replaceable PHI\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1493, __PRETTY_FUNCTION__)) | ||||||
1493 | "Expect only trivially replaceable PHI")((isTriviallyReplaceablePHI(*TPN, *I) && "Expect only trivially replaceable PHI" ) ? static_cast<void> (0) : __assert_fail ("isTriviallyReplaceablePHI(*TPN, *I) && \"Expect only trivially replaceable PHI\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1493, __PRETTY_FUNCTION__)); | ||||||
1494 | BasicBlock *ExitBlock = TPN->getParent(); | ||||||
1495 | Instruction *New; | ||||||
1496 | auto It = SunkCopies.find(ExitBlock); | ||||||
1497 | if (It != SunkCopies.end()) | ||||||
1498 | New = It->second; | ||||||
1499 | else | ||||||
1500 | New = SunkCopies[ExitBlock] = cloneInstructionInExitBlock( | ||||||
1501 | *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU); | ||||||
1502 | return New; | ||||||
1503 | } | ||||||
1504 | |||||||
1505 | static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) { | ||||||
1506 | BasicBlock *BB = PN->getParent(); | ||||||
1507 | if (!BB->canSplitPredecessors()) | ||||||
1508 | return false; | ||||||
1509 | // It's not impossible to split EHPad blocks, but if BlockColors already exist | ||||||
1510 | // it require updating BlockColors for all offspring blocks accordingly. By | ||||||
1511 | // skipping such corner case, we can make updating BlockColors after splitting | ||||||
1512 | // predecessor fairly simple. | ||||||
1513 | if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad()) | ||||||
1514 | return false; | ||||||
1515 | for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { | ||||||
1516 | BasicBlock *BBPred = *PI; | ||||||
1517 | if (isa<IndirectBrInst>(BBPred->getTerminator()) || | ||||||
1518 | isa<CallBrInst>(BBPred->getTerminator())) | ||||||
1519 | return false; | ||||||
1520 | } | ||||||
1521 | return true; | ||||||
1522 | } | ||||||
1523 | |||||||
1524 | static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, | ||||||
1525 | LoopInfo *LI, const Loop *CurLoop, | ||||||
1526 | LoopSafetyInfo *SafetyInfo, | ||||||
1527 | MemorySSAUpdater *MSSAU) { | ||||||
1528 | #ifndef NDEBUG | ||||||
1529 | SmallVector<BasicBlock *, 32> ExitBlocks; | ||||||
1530 | CurLoop->getUniqueExitBlocks(ExitBlocks); | ||||||
1531 | SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), | ||||||
1532 | ExitBlocks.end()); | ||||||
1533 | #endif | ||||||
1534 | BasicBlock *ExitBB = PN->getParent(); | ||||||
1535 | assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.")((ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block." ) ? static_cast<void> (0) : __assert_fail ("ExitBlockSet.count(ExitBB) && \"Expect the PHI is in an exit block.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1535, __PRETTY_FUNCTION__)); | ||||||
1536 | |||||||
1537 | // Split predecessors of the loop exit to make instructions in the loop are | ||||||
1538 | // exposed to exit blocks through trivially replaceable PHIs while keeping the | ||||||
1539 | // loop in the canonical form where each predecessor of each exit block should | ||||||
1540 | // be contained within the loop. For example, this will convert the loop below | ||||||
1541 | // from | ||||||
1542 | // | ||||||
1543 | // LB1: | ||||||
1544 | // %v1 = | ||||||
1545 | // br %LE, %LB2 | ||||||
1546 | // LB2: | ||||||
1547 | // %v2 = | ||||||
1548 | // br %LE, %LB1 | ||||||
1549 | // LE: | ||||||
1550 | // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable | ||||||
1551 | // | ||||||
1552 | // to | ||||||
1553 | // | ||||||
1554 | // LB1: | ||||||
1555 | // %v1 = | ||||||
1556 | // br %LE.split, %LB2 | ||||||
1557 | // LB2: | ||||||
1558 | // %v2 = | ||||||
1559 | // br %LE.split2, %LB1 | ||||||
1560 | // LE.split: | ||||||
1561 | // %p1 = phi [%v1, %LB1] <-- trivially replaceable | ||||||
1562 | // br %LE | ||||||
1563 | // LE.split2: | ||||||
1564 | // %p2 = phi [%v2, %LB2] <-- trivially replaceable | ||||||
1565 | // br %LE | ||||||
1566 | // LE: | ||||||
1567 | // %p = phi [%p1, %LE.split], [%p2, %LE.split2] | ||||||
1568 | // | ||||||
1569 | const auto &BlockColors = SafetyInfo->getBlockColors(); | ||||||
1570 | SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB)); | ||||||
1571 | while (!PredBBs.empty()) { | ||||||
1572 | BasicBlock *PredBB = *PredBBs.begin(); | ||||||
1573 | assert(CurLoop->contains(PredBB) &&((CurLoop->contains(PredBB) && "Expect all predecessors are in the loop" ) ? static_cast<void> (0) : __assert_fail ("CurLoop->contains(PredBB) && \"Expect all predecessors are in the loop\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1574, __PRETTY_FUNCTION__)) | ||||||
1574 | "Expect all predecessors are in the loop")((CurLoop->contains(PredBB) && "Expect all predecessors are in the loop" ) ? static_cast<void> (0) : __assert_fail ("CurLoop->contains(PredBB) && \"Expect all predecessors are in the loop\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1574, __PRETTY_FUNCTION__)); | ||||||
1575 | if (PN->getBasicBlockIndex(PredBB) >= 0) { | ||||||
1576 | BasicBlock *NewPred = SplitBlockPredecessors( | ||||||
1577 | ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true); | ||||||
1578 | // Since we do not allow splitting EH-block with BlockColors in | ||||||
1579 | // canSplitPredecessors(), we can simply assign predecessor's color to | ||||||
1580 | // the new block. | ||||||
1581 | if (!BlockColors.empty()) | ||||||
1582 | // Grab a reference to the ColorVector to be inserted before getting the | ||||||
1583 | // reference to the vector we are copying because inserting the new | ||||||
1584 | // element in BlockColors might cause the map to be reallocated. | ||||||
1585 | SafetyInfo->copyColors(NewPred, PredBB); | ||||||
1586 | } | ||||||
1587 | PredBBs.remove(PredBB); | ||||||
1588 | } | ||||||
1589 | } | ||||||
1590 | |||||||
1591 | /// When an instruction is found to only be used outside of the loop, this | ||||||
1592 | /// function moves it to the exit blocks and patches up SSA form as needed. | ||||||
1593 | /// This method is guaranteed to remove the original instruction from its | ||||||
1594 | /// position, and may either delete it or move it to outside of the loop. | ||||||
1595 | /// | ||||||
1596 | static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, | ||||||
1597 | BlockFrequencyInfo *BFI, const Loop *CurLoop, | ||||||
1598 | ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, | ||||||
1599 | OptimizationRemarkEmitter *ORE) { | ||||||
1600 | LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM sinking instruction: " << I << "\n"; } } while (false); | ||||||
1601 | ORE->emit([&]() { | ||||||
1602 | return OptimizationRemark(DEBUG_TYPE"licm", "InstSunk", &I) | ||||||
1603 | << "sinking " << ore::NV("Inst", &I); | ||||||
1604 | }); | ||||||
1605 | bool Changed = false; | ||||||
1606 | if (isa<LoadInst>(I)) | ||||||
1607 | ++NumMovedLoads; | ||||||
1608 | else if (isa<CallInst>(I)) | ||||||
1609 | ++NumMovedCalls; | ||||||
1610 | ++NumSunk; | ||||||
1611 | |||||||
1612 | // Iterate over users to be ready for actual sinking. Replace users via | ||||||
1613 | // unreachable blocks with undef and make all user PHIs trivially replaceable. | ||||||
1614 | SmallPtrSet<Instruction *, 8> VisitedUsers; | ||||||
1615 | for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) { | ||||||
1616 | auto *User = cast<Instruction>(*UI); | ||||||
1617 | Use &U = UI.getUse(); | ||||||
1618 | ++UI; | ||||||
1619 | |||||||
1620 | if (VisitedUsers.count(User) || CurLoop->contains(User)) | ||||||
1621 | continue; | ||||||
1622 | |||||||
1623 | if (!DT->isReachableFromEntry(User->getParent())) { | ||||||
1624 | U = UndefValue::get(I.getType()); | ||||||
1625 | Changed = true; | ||||||
1626 | continue; | ||||||
1627 | } | ||||||
1628 | |||||||
1629 | // The user must be a PHI node. | ||||||
1630 | PHINode *PN = cast<PHINode>(User); | ||||||
1631 | |||||||
1632 | // Surprisingly, instructions can be used outside of loops without any | ||||||
1633 | // exits. This can only happen in PHI nodes if the incoming block is | ||||||
1634 | // unreachable. | ||||||
1635 | BasicBlock *BB = PN->getIncomingBlock(U); | ||||||
1636 | if (!DT->isReachableFromEntry(BB)) { | ||||||
1637 | U = UndefValue::get(I.getType()); | ||||||
1638 | Changed = true; | ||||||
1639 | continue; | ||||||
1640 | } | ||||||
1641 | |||||||
1642 | VisitedUsers.insert(PN); | ||||||
1643 | if (isTriviallyReplaceablePHI(*PN, I)) | ||||||
1644 | continue; | ||||||
1645 | |||||||
1646 | if (!canSplitPredecessors(PN, SafetyInfo)) | ||||||
1647 | return Changed; | ||||||
1648 | |||||||
1649 | // Split predecessors of the PHI so that we can make users trivially | ||||||
1650 | // replaceable. | ||||||
1651 | splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU); | ||||||
1652 | |||||||
1653 | // Should rebuild the iterators, as they may be invalidated by | ||||||
1654 | // splitPredecessorsOfLoopExit(). | ||||||
1655 | UI = I.user_begin(); | ||||||
1656 | UE = I.user_end(); | ||||||
1657 | } | ||||||
1658 | |||||||
1659 | if (VisitedUsers.empty()) | ||||||
1660 | return Changed; | ||||||
1661 | |||||||
1662 | #ifndef NDEBUG | ||||||
1663 | SmallVector<BasicBlock *, 32> ExitBlocks; | ||||||
1664 | CurLoop->getUniqueExitBlocks(ExitBlocks); | ||||||
1665 | SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), | ||||||
1666 | ExitBlocks.end()); | ||||||
1667 | #endif | ||||||
1668 | |||||||
1669 | // Clones of this instruction. Don't create more than one per exit block! | ||||||
1670 | SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies; | ||||||
1671 | |||||||
1672 | // If this instruction is only used outside of the loop, then all users are | ||||||
1673 | // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of | ||||||
1674 | // the instruction. | ||||||
1675 | // First check if I is worth sinking for all uses. Sink only when it is worth | ||||||
1676 | // across all uses. | ||||||
1677 | SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end()); | ||||||
1678 | SmallVector<PHINode *, 8> ExitPNs; | ||||||
1679 | for (auto *UI : Users) { | ||||||
1680 | auto *User = cast<Instruction>(UI); | ||||||
1681 | |||||||
1682 | if (CurLoop->contains(User)) | ||||||
1683 | continue; | ||||||
1684 | |||||||
1685 | PHINode *PN = cast<PHINode>(User); | ||||||
1686 | assert(ExitBlockSet.count(PN->getParent()) &&((ExitBlockSet.count(PN->getParent()) && "The LCSSA PHI is not in an exit block!" ) ? static_cast<void> (0) : __assert_fail ("ExitBlockSet.count(PN->getParent()) && \"The LCSSA PHI is not in an exit block!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1687, __PRETTY_FUNCTION__)) | ||||||
1687 | "The LCSSA PHI is not in an exit block!")((ExitBlockSet.count(PN->getParent()) && "The LCSSA PHI is not in an exit block!" ) ? static_cast<void> (0) : __assert_fail ("ExitBlockSet.count(PN->getParent()) && \"The LCSSA PHI is not in an exit block!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1687, __PRETTY_FUNCTION__)); | ||||||
1688 | if (!worthSinkOrHoistInst(I, PN->getParent(), ORE, BFI)) { | ||||||
1689 | return Changed; | ||||||
1690 | } | ||||||
1691 | |||||||
1692 | ExitPNs.push_back(PN); | ||||||
1693 | } | ||||||
1694 | |||||||
1695 | for (auto *PN : ExitPNs) { | ||||||
1696 | |||||||
1697 | // The PHI must be trivially replaceable. | ||||||
1698 | Instruction *New = sinkThroughTriviallyReplaceablePHI( | ||||||
1699 | PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU); | ||||||
1700 | PN->replaceAllUsesWith(New); | ||||||
1701 | eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr); | ||||||
1702 | Changed = true; | ||||||
1703 | } | ||||||
1704 | return Changed; | ||||||
1705 | } | ||||||
1706 | |||||||
1707 | /// When an instruction is found to only use loop invariant operands that | ||||||
1708 | /// is safe to hoist, this instruction is called to do the dirty work. | ||||||
1709 | /// | ||||||
1710 | static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, | ||||||
1711 | BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, | ||||||
1712 | MemorySSAUpdater *MSSAU, ScalarEvolution *SE, | ||||||
1713 | OptimizationRemarkEmitter *ORE) { | ||||||
1714 | LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << Ido { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM hoisting to " << Dest ->getName() << ": " << I << "\n"; } } while (false) | ||||||
1715 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM hoisting to " << Dest ->getName() << ": " << I << "\n"; } } while (false); | ||||||
1716 | ORE->emit([&]() { | ||||||
1717 | return OptimizationRemark(DEBUG_TYPE"licm", "Hoisted", &I) << "hoisting " | ||||||
1718 | << ore::NV("Inst", &I); | ||||||
1719 | }); | ||||||
1720 | |||||||
1721 | // Metadata can be dependent on conditions we are hoisting above. | ||||||
1722 | // Conservatively strip all metadata on the instruction unless we were | ||||||
1723 | // guaranteed to execute I if we entered the loop, in which case the metadata | ||||||
1724 | // is valid in the loop preheader. | ||||||
1725 | if (I.hasMetadataOtherThanDebugLoc() && | ||||||
1726 | // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning | ||||||
1727 | // time in isGuaranteedToExecute if we don't actually have anything to | ||||||
1728 | // drop. It is a compile time optimization, not required for correctness. | ||||||
1729 | !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop)) | ||||||
1730 | I.dropUnknownNonDebugMetadata(); | ||||||
1731 | |||||||
1732 | if (isa<PHINode>(I)) | ||||||
1733 | // Move the new node to the end of the phi list in the destination block. | ||||||
1734 | moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE); | ||||||
1735 | else | ||||||
1736 | // Move the new node to the destination block, before its terminator. | ||||||
1737 | moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE); | ||||||
1738 | |||||||
1739 | I.updateLocationAfterHoist(); | ||||||
1740 | |||||||
1741 | if (isa<LoadInst>(I)) | ||||||
1742 | ++NumMovedLoads; | ||||||
1743 | else if (isa<CallInst>(I)) | ||||||
1744 | ++NumMovedCalls; | ||||||
1745 | ++NumHoisted; | ||||||
1746 | } | ||||||
1747 | |||||||
1748 | /// Only sink or hoist an instruction if it is not a trapping instruction, | ||||||
1749 | /// or if the instruction is known not to trap when moved to the preheader. | ||||||
1750 | /// or if it is a trapping instruction and is guaranteed to execute. | ||||||
1751 | static bool isSafeToExecuteUnconditionally(Instruction &Inst, | ||||||
1752 | const DominatorTree *DT, | ||||||
1753 | const Loop *CurLoop, | ||||||
1754 | const LoopSafetyInfo *SafetyInfo, | ||||||
1755 | OptimizationRemarkEmitter *ORE, | ||||||
1756 | const Instruction *CtxI) { | ||||||
1757 | if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT)) | ||||||
1758 | return true; | ||||||
1759 | |||||||
1760 | bool GuaranteedToExecute = | ||||||
1761 | SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop); | ||||||
1762 | |||||||
1763 | if (!GuaranteedToExecute) { | ||||||
1764 | auto *LI = dyn_cast<LoadInst>(&Inst); | ||||||
1765 | if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand())) | ||||||
1766 | ORE->emit([&]() { | ||||||
1767 | return OptimizationRemarkMissed( | ||||||
1768 | DEBUG_TYPE"licm", "LoadWithLoopInvariantAddressCondExecuted", LI) | ||||||
1769 | << "failed to hoist load with loop-invariant address " | ||||||
1770 | "because load is conditionally executed"; | ||||||
1771 | }); | ||||||
1772 | } | ||||||
1773 | |||||||
1774 | return GuaranteedToExecute; | ||||||
1775 | } | ||||||
1776 | |||||||
1777 | namespace { | ||||||
1778 | class LoopPromoter : public LoadAndStorePromoter { | ||||||
1779 | Value *SomePtr; // Designated pointer to store to. | ||||||
1780 | const SmallSetVector<Value *, 8> &PointerMustAliases; | ||||||
1781 | SmallVectorImpl<BasicBlock *> &LoopExitBlocks; | ||||||
1782 | SmallVectorImpl<Instruction *> &LoopInsertPts; | ||||||
1783 | SmallVectorImpl<MemoryAccess *> &MSSAInsertPts; | ||||||
1784 | PredIteratorCache &PredCache; | ||||||
1785 | AliasSetTracker *AST; | ||||||
1786 | MemorySSAUpdater *MSSAU; | ||||||
1787 | LoopInfo &LI; | ||||||
1788 | DebugLoc DL; | ||||||
1789 | int Alignment; | ||||||
1790 | bool UnorderedAtomic; | ||||||
1791 | AAMDNodes AATags; | ||||||
1792 | ICFLoopSafetyInfo &SafetyInfo; | ||||||
1793 | |||||||
1794 | Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { | ||||||
1795 | if (Instruction *I = dyn_cast<Instruction>(V)) | ||||||
1796 | if (Loop *L = LI.getLoopFor(I->getParent())) | ||||||
1797 | if (!L->contains(BB)) { | ||||||
1798 | // We need to create an LCSSA PHI node for the incoming value and | ||||||
1799 | // store that. | ||||||
1800 | PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB), | ||||||
1801 | I->getName() + ".lcssa", &BB->front()); | ||||||
1802 | for (BasicBlock *Pred : PredCache.get(BB)) | ||||||
1803 | PN->addIncoming(I, Pred); | ||||||
1804 | return PN; | ||||||
1805 | } | ||||||
1806 | return V; | ||||||
1807 | } | ||||||
1808 | |||||||
1809 | public: | ||||||
1810 | LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S, | ||||||
1811 | const SmallSetVector<Value *, 8> &PMA, | ||||||
1812 | SmallVectorImpl<BasicBlock *> &LEB, | ||||||
1813 | SmallVectorImpl<Instruction *> &LIP, | ||||||
1814 | SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC, | ||||||
1815 | AliasSetTracker *ast, MemorySSAUpdater *MSSAU, LoopInfo &li, | ||||||
1816 | DebugLoc dl, int alignment, bool UnorderedAtomic, | ||||||
1817 | const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo) | ||||||
1818 | : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), | ||||||
1819 | LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), | ||||||
1820 | PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)), | ||||||
1821 | Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags), | ||||||
1822 | SafetyInfo(SafetyInfo) {} | ||||||
1823 | |||||||
1824 | bool isInstInList(Instruction *I, | ||||||
1825 | const SmallVectorImpl<Instruction *> &) const override { | ||||||
1826 | Value *Ptr; | ||||||
1827 | if (LoadInst *LI = dyn_cast<LoadInst>(I)) | ||||||
1828 | Ptr = LI->getOperand(0); | ||||||
1829 | else | ||||||
1830 | Ptr = cast<StoreInst>(I)->getPointerOperand(); | ||||||
1831 | return PointerMustAliases.count(Ptr); | ||||||
1832 | } | ||||||
1833 | |||||||
1834 | void doExtraRewritesBeforeFinalDeletion() override { | ||||||
1835 | // Insert stores after in the loop exit blocks. Each exit block gets a | ||||||
1836 | // store of the live-out values that feed them. Since we've already told | ||||||
1837 | // the SSA updater about the defs in the loop and the preheader | ||||||
1838 | // definition, it is all set and we can start using it. | ||||||
1839 | for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { | ||||||
1840 | BasicBlock *ExitBlock = LoopExitBlocks[i]; | ||||||
1841 | Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); | ||||||
1842 | LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock); | ||||||
1843 | Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); | ||||||
1844 | Instruction *InsertPos = LoopInsertPts[i]; | ||||||
1845 | StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); | ||||||
1846 | if (UnorderedAtomic) | ||||||
1847 | NewSI->setOrdering(AtomicOrdering::Unordered); | ||||||
1848 | NewSI->setAlignment(Align(Alignment)); | ||||||
1849 | NewSI->setDebugLoc(DL); | ||||||
1850 | if (AATags) | ||||||
1851 | NewSI->setAAMetadata(AATags); | ||||||
1852 | |||||||
1853 | if (MSSAU) { | ||||||
1854 | MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i]; | ||||||
1855 | MemoryAccess *NewMemAcc; | ||||||
1856 | if (!MSSAInsertPoint) { | ||||||
1857 | NewMemAcc = MSSAU->createMemoryAccessInBB( | ||||||
1858 | NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning); | ||||||
1859 | } else { | ||||||
1860 | NewMemAcc = | ||||||
1861 | MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint); | ||||||
1862 | } | ||||||
1863 | MSSAInsertPts[i] = NewMemAcc; | ||||||
1864 | MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true); | ||||||
1865 | // FIXME: true for safety, false may still be correct. | ||||||
1866 | } | ||||||
1867 | } | ||||||
1868 | } | ||||||
1869 | |||||||
1870 | void replaceLoadWithValue(LoadInst *LI, Value *V) const override { | ||||||
1871 | // Update alias analysis. | ||||||
1872 | if (AST) | ||||||
1873 | AST->copyValue(LI, V); | ||||||
1874 | } | ||||||
1875 | void instructionDeleted(Instruction *I) const override { | ||||||
1876 | SafetyInfo.removeInstruction(I); | ||||||
1877 | if (AST) | ||||||
1878 | AST->deleteValue(I); | ||||||
1879 | if (MSSAU) | ||||||
1880 | MSSAU->removeMemoryAccess(I); | ||||||
1881 | } | ||||||
1882 | }; | ||||||
1883 | |||||||
1884 | |||||||
1885 | /// Return true iff we can prove that a caller of this function can not inspect | ||||||
1886 | /// the contents of the provided object in a well defined program. | ||||||
1887 | bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) { | ||||||
1888 | if (isa<AllocaInst>(Object)) | ||||||
1889 | // Since the alloca goes out of scope, we know the caller can't retain a | ||||||
1890 | // reference to it and be well defined. Thus, we don't need to check for | ||||||
1891 | // capture. | ||||||
1892 | return true; | ||||||
1893 | |||||||
1894 | // For all other objects we need to know that the caller can't possibly | ||||||
1895 | // have gotten a reference to the object. There are two components of | ||||||
1896 | // that: | ||||||
1897 | // 1) Object can't be escaped by this function. This is what | ||||||
1898 | // PointerMayBeCaptured checks. | ||||||
1899 | // 2) Object can't have been captured at definition site. For this, we | ||||||
1900 | // need to know the return value is noalias. At the moment, we use a | ||||||
1901 | // weaker condition and handle only AllocLikeFunctions (which are | ||||||
1902 | // known to be noalias). TODO | ||||||
1903 | return isAllocLikeFn(Object, TLI) && | ||||||
1904 | !PointerMayBeCaptured(Object, true, true); | ||||||
1905 | } | ||||||
1906 | |||||||
1907 | } // namespace | ||||||
1908 | |||||||
1909 | /// Try to promote memory values to scalars by sinking stores out of the | ||||||
1910 | /// loop and moving loads to before the loop. We do this by looping over | ||||||
1911 | /// the stores in the loop, looking for stores to Must pointers which are | ||||||
1912 | /// loop invariant. | ||||||
1913 | /// | ||||||
1914 | bool llvm::promoteLoopAccessesToScalars( | ||||||
1915 | const SmallSetVector<Value *, 8> &PointerMustAliases, | ||||||
1916 | SmallVectorImpl<BasicBlock *> &ExitBlocks, | ||||||
1917 | SmallVectorImpl<Instruction *> &InsertPts, | ||||||
1918 | SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC, | ||||||
1919 | LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, | ||||||
1920 | Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, | ||||||
1921 | ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { | ||||||
1922 | // Verify inputs. | ||||||
1923 | assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&((LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected Input to promoteLoopAccessesToScalars" ) ? static_cast<void> (0) : __assert_fail ("LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected Input to promoteLoopAccessesToScalars\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1925, __PRETTY_FUNCTION__)) | ||||||
1924 | SafetyInfo != nullptr &&((LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected Input to promoteLoopAccessesToScalars" ) ? static_cast<void> (0) : __assert_fail ("LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected Input to promoteLoopAccessesToScalars\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1925, __PRETTY_FUNCTION__)) | ||||||
1925 | "Unexpected Input to promoteLoopAccessesToScalars")((LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected Input to promoteLoopAccessesToScalars" ) ? static_cast<void> (0) : __assert_fail ("LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected Input to promoteLoopAccessesToScalars\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1925, __PRETTY_FUNCTION__)); | ||||||
1926 | |||||||
1927 | Value *SomePtr = *PointerMustAliases.begin(); | ||||||
1928 | BasicBlock *Preheader = CurLoop->getLoopPreheader(); | ||||||
1929 | |||||||
1930 | // It is not safe to promote a load/store from the loop if the load/store is | ||||||
1931 | // conditional. For example, turning: | ||||||
1932 | // | ||||||
1933 | // for () { if (c) *P += 1; } | ||||||
1934 | // | ||||||
1935 | // into: | ||||||
1936 | // | ||||||
1937 | // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; | ||||||
1938 | // | ||||||
1939 | // is not safe, because *P may only be valid to access if 'c' is true. | ||||||
1940 | // | ||||||
1941 | // The safety property divides into two parts: | ||||||
1942 | // p1) The memory may not be dereferenceable on entry to the loop. In this | ||||||
1943 | // case, we can't insert the required load in the preheader. | ||||||
1944 | // p2) The memory model does not allow us to insert a store along any dynamic | ||||||
1945 | // path which did not originally have one. | ||||||
1946 | // | ||||||
1947 | // If at least one store is guaranteed to execute, both properties are | ||||||
1948 | // satisfied, and promotion is legal. | ||||||
1949 | // | ||||||
1950 | // This, however, is not a necessary condition. Even if no store/load is | ||||||
1951 | // guaranteed to execute, we can still establish these properties. | ||||||
1952 | // We can establish (p1) by proving that hoisting the load into the preheader | ||||||
1953 | // is safe (i.e. proving dereferenceability on all paths through the loop). We | ||||||
1954 | // can use any access within the alias set to prove dereferenceability, | ||||||
1955 | // since they're all must alias. | ||||||
1956 | // | ||||||
1957 | // There are two ways establish (p2): | ||||||
1958 | // a) Prove the location is thread-local. In this case the memory model | ||||||
1959 | // requirement does not apply, and stores are safe to insert. | ||||||
1960 | // b) Prove a store dominates every exit block. In this case, if an exit | ||||||
1961 | // blocks is reached, the original dynamic path would have taken us through | ||||||
1962 | // the store, so inserting a store into the exit block is safe. Note that this | ||||||
1963 | // is different from the store being guaranteed to execute. For instance, | ||||||
1964 | // if an exception is thrown on the first iteration of the loop, the original | ||||||
1965 | // store is never executed, but the exit blocks are not executed either. | ||||||
1966 | |||||||
1967 | bool DereferenceableInPH = false; | ||||||
1968 | bool SafeToInsertStore = false; | ||||||
1969 | |||||||
1970 | SmallVector<Instruction *, 64> LoopUses; | ||||||
1971 | |||||||
1972 | // We start with an alignment of one and try to find instructions that allow | ||||||
1973 | // us to prove better alignment. | ||||||
1974 | Align Alignment; | ||||||
1975 | // Keep track of which types of access we see | ||||||
1976 | bool SawUnorderedAtomic = false; | ||||||
1977 | bool SawNotAtomic = false; | ||||||
1978 | AAMDNodes AATags; | ||||||
1979 | |||||||
1980 | const DataLayout &MDL = Preheader->getModule()->getDataLayout(); | ||||||
1981 | |||||||
1982 | bool IsKnownThreadLocalObject = false; | ||||||
1983 | if (SafetyInfo->anyBlockMayThrow()) { | ||||||
1984 | // If a loop can throw, we have to insert a store along each unwind edge. | ||||||
1985 | // That said, we can't actually make the unwind edge explicit. Therefore, | ||||||
1986 | // we have to prove that the store is dead along the unwind edge. We do | ||||||
1987 | // this by proving that the caller can't have a reference to the object | ||||||
1988 | // after return and thus can't possibly load from the object. | ||||||
1989 | Value *Object = getUnderlyingObject(SomePtr); | ||||||
1990 | if (!isKnownNonEscaping(Object, TLI)) | ||||||
1991 | return false; | ||||||
1992 | // Subtlety: Alloca's aren't visible to callers, but *are* potentially | ||||||
1993 | // visible to other threads if captured and used during their lifetimes. | ||||||
1994 | IsKnownThreadLocalObject = !isa<AllocaInst>(Object); | ||||||
1995 | } | ||||||
1996 | |||||||
1997 | // Check that all of the pointers in the alias set have the same type. We | ||||||
1998 | // cannot (yet) promote a memory location that is loaded and stored in | ||||||
1999 | // different sizes. While we are at it, collect alignment and AA info. | ||||||
2000 | for (Value *ASIV : PointerMustAliases) { | ||||||
2001 | // Check that all of the pointers in the alias set have the same type. We | ||||||
2002 | // cannot (yet) promote a memory location that is loaded and stored in | ||||||
2003 | // different sizes. | ||||||
2004 | if (SomePtr->getType() != ASIV->getType()) | ||||||
2005 | return false; | ||||||
2006 | |||||||
2007 | for (User *U : ASIV->users()) { | ||||||
2008 | // Ignore instructions that are outside the loop. | ||||||
2009 | Instruction *UI = dyn_cast<Instruction>(U); | ||||||
2010 | if (!UI || !CurLoop->contains(UI)) | ||||||
2011 | continue; | ||||||
2012 | |||||||
2013 | // If there is an non-load/store instruction in the loop, we can't promote | ||||||
2014 | // it. | ||||||
2015 | if (LoadInst *Load = dyn_cast<LoadInst>(UI)) { | ||||||
2016 | if (!Load->isUnordered()) | ||||||
2017 | return false; | ||||||
2018 | |||||||
2019 | SawUnorderedAtomic |= Load->isAtomic(); | ||||||
2020 | SawNotAtomic |= !Load->isAtomic(); | ||||||
2021 | |||||||
2022 | Align InstAlignment = Load->getAlign(); | ||||||
2023 | |||||||
2024 | // Note that proving a load safe to speculate requires proving | ||||||
2025 | // sufficient alignment at the target location. Proving it guaranteed | ||||||
2026 | // to execute does as well. Thus we can increase our guaranteed | ||||||
2027 | // alignment as well. | ||||||
2028 | if (!DereferenceableInPH || (InstAlignment > Alignment)) | ||||||
2029 | if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo, | ||||||
2030 | ORE, Preheader->getTerminator())) { | ||||||
2031 | DereferenceableInPH = true; | ||||||
2032 | Alignment = std::max(Alignment, InstAlignment); | ||||||
2033 | } | ||||||
2034 | } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) { | ||||||
2035 | // Stores *of* the pointer are not interesting, only stores *to* the | ||||||
2036 | // pointer. | ||||||
2037 | if (UI->getOperand(1) != ASIV) | ||||||
2038 | continue; | ||||||
2039 | if (!Store->isUnordered()) | ||||||
2040 | return false; | ||||||
2041 | |||||||
2042 | SawUnorderedAtomic |= Store->isAtomic(); | ||||||
2043 | SawNotAtomic |= !Store->isAtomic(); | ||||||
2044 | |||||||
2045 | // If the store is guaranteed to execute, both properties are satisfied. | ||||||
2046 | // We may want to check if a store is guaranteed to execute even if we | ||||||
2047 | // already know that promotion is safe, since it may have higher | ||||||
2048 | // alignment than any other guaranteed stores, in which case we can | ||||||
2049 | // raise the alignment on the promoted store. | ||||||
2050 | Align InstAlignment = Store->getAlign(); | ||||||
2051 | |||||||
2052 | if (!DereferenceableInPH || !SafeToInsertStore || | ||||||
2053 | (InstAlignment > Alignment)) { | ||||||
2054 | if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) { | ||||||
2055 | DereferenceableInPH = true; | ||||||
2056 | SafeToInsertStore = true; | ||||||
2057 | Alignment = std::max(Alignment, InstAlignment); | ||||||
2058 | } | ||||||
2059 | } | ||||||
2060 | |||||||
2061 | // If a store dominates all exit blocks, it is safe to sink. | ||||||
2062 | // As explained above, if an exit block was executed, a dominating | ||||||
2063 | // store must have been executed at least once, so we are not | ||||||
2064 | // introducing stores on paths that did not have them. | ||||||
2065 | // Note that this only looks at explicit exit blocks. If we ever | ||||||
2066 | // start sinking stores into unwind edges (see above), this will break. | ||||||
2067 | if (!SafeToInsertStore) | ||||||
2068 | SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { | ||||||
2069 | return DT->dominates(Store->getParent(), Exit); | ||||||
2070 | }); | ||||||
2071 | |||||||
2072 | // If the store is not guaranteed to execute, we may still get | ||||||
2073 | // deref info through it. | ||||||
2074 | if (!DereferenceableInPH) { | ||||||
2075 | DereferenceableInPH = isDereferenceableAndAlignedPointer( | ||||||
2076 | Store->getPointerOperand(), Store->getValueOperand()->getType(), | ||||||
2077 | Store->getAlign(), MDL, Preheader->getTerminator(), DT); | ||||||
2078 | } | ||||||
2079 | } else | ||||||
2080 | return false; // Not a load or store. | ||||||
2081 | |||||||
2082 | // Merge the AA tags. | ||||||
2083 | if (LoopUses.empty()) { | ||||||
2084 | // On the first load/store, just take its AA tags. | ||||||
2085 | UI->getAAMetadata(AATags); | ||||||
2086 | } else if (AATags) { | ||||||
2087 | UI->getAAMetadata(AATags, /* Merge = */ true); | ||||||
2088 | } | ||||||
2089 | |||||||
2090 | LoopUses.push_back(UI); | ||||||
2091 | } | ||||||
2092 | } | ||||||
2093 | |||||||
2094 | // If we found both an unordered atomic instruction and a non-atomic memory | ||||||
2095 | // access, bail. We can't blindly promote non-atomic to atomic since we | ||||||
2096 | // might not be able to lower the result. We can't downgrade since that | ||||||
2097 | // would violate memory model. Also, align 0 is an error for atomics. | ||||||
2098 | if (SawUnorderedAtomic && SawNotAtomic) | ||||||
2099 | return false; | ||||||
2100 | |||||||
2101 | // If we're inserting an atomic load in the preheader, we must be able to | ||||||
2102 | // lower it. We're only guaranteed to be able to lower naturally aligned | ||||||
2103 | // atomics. | ||||||
2104 | auto *SomePtrElemType = SomePtr->getType()->getPointerElementType(); | ||||||
2105 | if (SawUnorderedAtomic && | ||||||
2106 | Alignment < MDL.getTypeStoreSize(SomePtrElemType)) | ||||||
2107 | return false; | ||||||
2108 | |||||||
2109 | // If we couldn't prove we can hoist the load, bail. | ||||||
2110 | if (!DereferenceableInPH) | ||||||
2111 | return false; | ||||||
2112 | |||||||
2113 | // We know we can hoist the load, but don't have a guaranteed store. | ||||||
2114 | // Check whether the location is thread-local. If it is, then we can insert | ||||||
2115 | // stores along paths which originally didn't have them without violating the | ||||||
2116 | // memory model. | ||||||
2117 | if (!SafeToInsertStore) { | ||||||
2118 | if (IsKnownThreadLocalObject) | ||||||
2119 | SafeToInsertStore = true; | ||||||
2120 | else { | ||||||
2121 | Value *Object = getUnderlyingObject(SomePtr); | ||||||
2122 | SafeToInsertStore = | ||||||
2123 | (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) && | ||||||
2124 | !PointerMayBeCaptured(Object, true, true); | ||||||
2125 | } | ||||||
2126 | } | ||||||
2127 | |||||||
2128 | // If we've still failed to prove we can sink the store, give up. | ||||||
2129 | if (!SafeToInsertStore) | ||||||
2130 | return false; | ||||||
2131 | |||||||
2132 | // Otherwise, this is safe to promote, lets do it! | ||||||
2133 | LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtrdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr << '\n'; } } while (false) | ||||||
2134 | << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr << '\n'; } } while (false); | ||||||
2135 | ORE->emit([&]() { | ||||||
2136 | return OptimizationRemark(DEBUG_TYPE"licm", "PromoteLoopAccessesToScalar", | ||||||
2137 | LoopUses[0]) | ||||||
2138 | << "Moving accesses to memory location out of the loop"; | ||||||
2139 | }); | ||||||
2140 | ++NumPromoted; | ||||||
2141 | |||||||
2142 | // Look at all the loop uses, and try to merge their locations. | ||||||
2143 | std::vector<const DILocation *> LoopUsesLocs; | ||||||
2144 | for (auto U : LoopUses) | ||||||
2145 | LoopUsesLocs.push_back(U->getDebugLoc().get()); | ||||||
2146 | auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs)); | ||||||
2147 | |||||||
2148 | // We use the SSAUpdater interface to insert phi nodes as required. | ||||||
2149 | SmallVector<PHINode *, 16> NewPHIs; | ||||||
2150 | SSAUpdater SSA(&NewPHIs); | ||||||
2151 | LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, | ||||||
2152 | InsertPts, MSSAInsertPts, PIC, CurAST, MSSAU, *LI, DL, | ||||||
2153 | Alignment.value(), SawUnorderedAtomic, AATags, | ||||||
2154 | *SafetyInfo); | ||||||
2155 | |||||||
2156 | // Set up the preheader to have a definition of the value. It is the live-out | ||||||
2157 | // value from the preheader that uses in the loop will use. | ||||||
2158 | LoadInst *PreheaderLoad = new LoadInst( | ||||||
2159 | SomePtr->getType()->getPointerElementType(), SomePtr, | ||||||
2160 | SomePtr->getName() + ".promoted", Preheader->getTerminator()); | ||||||
2161 | if (SawUnorderedAtomic) | ||||||
2162 | PreheaderLoad->setOrdering(AtomicOrdering::Unordered); | ||||||
2163 | PreheaderLoad->setAlignment(Alignment); | ||||||
2164 | PreheaderLoad->setDebugLoc(DebugLoc()); | ||||||
2165 | if (AATags) | ||||||
2166 | PreheaderLoad->setAAMetadata(AATags); | ||||||
2167 | SSA.AddAvailableValue(Preheader, PreheaderLoad); | ||||||
2168 | |||||||
2169 | if (MSSAU) { | ||||||
2170 | MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB( | ||||||
2171 | PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End); | ||||||
2172 | MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess); | ||||||
2173 | MSSAU->insertUse(NewMemUse, /*RenameUses=*/true); | ||||||
2174 | } | ||||||
2175 | |||||||
2176 | if (MSSAU && VerifyMemorySSA) | ||||||
2177 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
2178 | // Rewrite all the loads in the loop and remember all the definitions from | ||||||
2179 | // stores in the loop. | ||||||
2180 | Promoter.run(LoopUses); | ||||||
2181 | |||||||
2182 | if (MSSAU && VerifyMemorySSA) | ||||||
2183 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
2184 | // If the SSAUpdater didn't use the load in the preheader, just zap it now. | ||||||
2185 | if (PreheaderLoad->use_empty()) | ||||||
2186 | eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU); | ||||||
2187 | |||||||
2188 | return true; | ||||||
2189 | } | ||||||
2190 | |||||||
2191 | /// Returns an owning pointer to an alias set which incorporates aliasing info | ||||||
2192 | /// from L and all subloops of L. | ||||||
2193 | std::unique_ptr<AliasSetTracker> | ||||||
2194 | LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI, | ||||||
2195 | AAResults *AA) { | ||||||
2196 | auto CurAST = std::make_unique<AliasSetTracker>(*AA); | ||||||
2197 | |||||||
2198 | // Add everything from all the sub loops. | ||||||
2199 | for (Loop *InnerL : L->getSubLoops()) | ||||||
2200 | for (BasicBlock *BB : InnerL->blocks()) | ||||||
2201 | CurAST->add(*BB); | ||||||
2202 | |||||||
2203 | // And merge in this loop (without anything from inner loops). | ||||||
2204 | for (BasicBlock *BB : L->blocks()) | ||||||
2205 | if (LI->getLoopFor(BB) == L) | ||||||
2206 | CurAST->add(*BB); | ||||||
2207 | |||||||
2208 | return CurAST; | ||||||
2209 | } | ||||||
2210 | |||||||
2211 | std::unique_ptr<AliasSetTracker> | ||||||
2212 | LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA( | ||||||
2213 | Loop *L, AAResults *AA, MemorySSAUpdater *MSSAU) { | ||||||
2214 | auto *MSSA = MSSAU->getMemorySSA(); | ||||||
2215 | auto CurAST = std::make_unique<AliasSetTracker>(*AA, MSSA, L); | ||||||
2216 | CurAST->addAllInstructionsInLoopUsingMSSA(); | ||||||
2217 | return CurAST; | ||||||
2218 | } | ||||||
2219 | |||||||
2220 | static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, | ||||||
2221 | AliasSetTracker *CurAST, Loop *CurLoop, | ||||||
2222 | AAResults *AA) { | ||||||
2223 | // First check to see if any of the basic blocks in CurLoop invalidate *V. | ||||||
2224 | bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod(); | ||||||
2225 | |||||||
2226 | if (!isInvalidatedAccordingToAST || !LICMN2Theshold) | ||||||
2227 | return isInvalidatedAccordingToAST; | ||||||
2228 | |||||||
2229 | // Check with a diagnostic analysis if we can refine the information above. | ||||||
2230 | // This is to identify the limitations of using the AST. | ||||||
2231 | // The alias set mechanism used by LICM has a major weakness in that it | ||||||
2232 | // combines all things which may alias into a single set *before* asking | ||||||
2233 | // modref questions. As a result, a single readonly call within a loop will | ||||||
2234 | // collapse all loads and stores into a single alias set and report | ||||||
2235 | // invalidation if the loop contains any store. For example, readonly calls | ||||||
2236 | // with deopt states have this form and create a general alias set with all | ||||||
2237 | // loads and stores. In order to get any LICM in loops containing possible | ||||||
2238 | // deopt states we need a more precise invalidation of checking the mod ref | ||||||
2239 | // info of each instruction within the loop and LI. This has a complexity of | ||||||
2240 | // O(N^2), so currently, it is used only as a diagnostic tool since the | ||||||
2241 | // default value of LICMN2Threshold is zero. | ||||||
2242 | |||||||
2243 | // Don't look at nested loops. | ||||||
2244 | if (CurLoop->begin() != CurLoop->end()) | ||||||
2245 | return true; | ||||||
2246 | |||||||
2247 | int N = 0; | ||||||
2248 | for (BasicBlock *BB : CurLoop->getBlocks()) | ||||||
2249 | for (Instruction &I : *BB) { | ||||||
2250 | if (N >= LICMN2Theshold) { | ||||||
2251 | LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "Alasing N2 threshold exhausted for " << *(MemLoc.Ptr) << "\n"; } } while (false) | ||||||
2252 | << *(MemLoc.Ptr) << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "Alasing N2 threshold exhausted for " << *(MemLoc.Ptr) << "\n"; } } while (false); | ||||||
2253 | return true; | ||||||
2254 | } | ||||||
2255 | N++; | ||||||
2256 | auto Res = AA->getModRefInfo(&I, MemLoc); | ||||||
2257 | if (isModSet(Res)) { | ||||||
2258 | LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "Aliasing failed on " << I << " for " << *(MemLoc.Ptr) << "\n"; } } while (false ) | ||||||
2259 | << *(MemLoc.Ptr) << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "Aliasing failed on " << I << " for " << *(MemLoc.Ptr) << "\n"; } } while (false ); | ||||||
2260 | return true; | ||||||
2261 | } | ||||||
2262 | } | ||||||
2263 | LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "Aliasing okay for " << *(MemLoc .Ptr) << "\n"; } } while (false); | ||||||
2264 | return false; | ||||||
2265 | } | ||||||
2266 | |||||||
2267 | static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, | ||||||
2268 | Loop *CurLoop, | ||||||
2269 | SinkAndHoistLICMFlags &Flags) { | ||||||
2270 | // For hoisting, use the walker to determine safety | ||||||
2271 | if (!Flags.IsSink) { | ||||||
2272 | MemoryAccess *Source; | ||||||
2273 | // See declaration of SetLicmMssaOptCap for usage details. | ||||||
2274 | if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap) | ||||||
2275 | Source = MU->getDefiningAccess(); | ||||||
2276 | else { | ||||||
2277 | Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU); | ||||||
2278 | Flags.LicmMssaOptCounter++; | ||||||
2279 | } | ||||||
2280 | return !MSSA->isLiveOnEntryDef(Source) && | ||||||
2281 | CurLoop->contains(Source->getBlock()); | ||||||
2282 | } | ||||||
2283 | |||||||
2284 | // For sinking, we'd need to check all Defs below this use. The getClobbering | ||||||
2285 | // call will look on the backedge of the loop, but will check aliasing with | ||||||
2286 | // the instructions on the previous iteration. | ||||||
2287 | // For example: | ||||||
2288 | // for (i ... ) | ||||||
2289 | // load a[i] ( Use (LoE) | ||||||
2290 | // store a[i] ( 1 = Def (2), with 2 = Phi for the loop. | ||||||
2291 | // i++; | ||||||
2292 | // The load sees no clobbering inside the loop, as the backedge alias check | ||||||
2293 | // does phi translation, and will check aliasing against store a[i-1]. | ||||||
2294 | // However sinking the load outside the loop, below the store is incorrect. | ||||||
2295 | |||||||
2296 | // For now, only sink if there are no Defs in the loop, and the existing ones | ||||||
2297 | // precede the use and are in the same block. | ||||||
2298 | // FIXME: Increase precision: Safe to sink if Use post dominates the Def; | ||||||
2299 | // needs PostDominatorTreeAnalysis. | ||||||
2300 | // FIXME: More precise: no Defs that alias this Use. | ||||||
2301 | if (Flags.NoOfMemAccTooLarge) | ||||||
2302 | return true; | ||||||
2303 | for (auto *BB : CurLoop->getBlocks()) | ||||||
2304 | if (auto *Accesses = MSSA->getBlockDefs(BB)) | ||||||
2305 | for (const auto &MA : *Accesses) | ||||||
2306 | if (const auto *MD = dyn_cast<MemoryDef>(&MA)) | ||||||
2307 | if (MU->getBlock() != MD->getBlock() || | ||||||
2308 | !MSSA->locallyDominates(MD, MU)) | ||||||
2309 | return true; | ||||||
2310 | return false; | ||||||
2311 | } | ||||||
2312 | |||||||
2313 | /// Little predicate that returns true if the specified basic block is in | ||||||
2314 | /// a subloop of the current one, not the current one itself. | ||||||
2315 | /// | ||||||
2316 | static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) { | ||||||
2317 | assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop")((CurLoop->contains(BB) && "Only valid if BB is IN the loop" ) ? static_cast<void> (0) : __assert_fail ("CurLoop->contains(BB) && \"Only valid if BB is IN the loop\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 2317, __PRETTY_FUNCTION__)); | ||||||
2318 | return LI->getLoopFor(BB) != CurLoop; | ||||||
2319 | } |
1 | //===- PatternMatch.h - Match on the LLVM IR --------------------*- C++ -*-===// | ||||||||||||
2 | // | ||||||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||||||
6 | // | ||||||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||||||
8 | // | ||||||||||||
9 | // This file provides a simple and efficient mechanism for performing general | ||||||||||||
10 | // tree-based pattern matches on the LLVM IR. The power of these routines is | ||||||||||||
11 | // that it allows you to write concise patterns that are expressive and easy to | ||||||||||||
12 | // understand. The other major advantage of this is that it allows you to | ||||||||||||
13 | // trivially capture/bind elements in the pattern to variables. For example, | ||||||||||||
14 | // you can do something like this: | ||||||||||||
15 | // | ||||||||||||
16 | // Value *Exp = ... | ||||||||||||
17 | // Value *X, *Y; ConstantInt *C1, *C2; // (X & C1) | (Y & C2) | ||||||||||||
18 | // if (match(Exp, m_Or(m_And(m_Value(X), m_ConstantInt(C1)), | ||||||||||||
19 | // m_And(m_Value(Y), m_ConstantInt(C2))))) { | ||||||||||||
20 | // ... Pattern is matched and variables are bound ... | ||||||||||||
21 | // } | ||||||||||||
22 | // | ||||||||||||
23 | // This is primarily useful to things like the instruction combiner, but can | ||||||||||||
24 | // also be useful for static analysis tools or code generators. | ||||||||||||
25 | // | ||||||||||||
26 | //===----------------------------------------------------------------------===// | ||||||||||||
27 | |||||||||||||
28 | #ifndef LLVM_IR_PATTERNMATCH_H | ||||||||||||
29 | #define LLVM_IR_PATTERNMATCH_H | ||||||||||||
30 | |||||||||||||
31 | #include "llvm/ADT/APFloat.h" | ||||||||||||
32 | #include "llvm/ADT/APInt.h" | ||||||||||||
33 | #include "llvm/IR/Constant.h" | ||||||||||||
34 | #include "llvm/IR/Constants.h" | ||||||||||||
35 | #include "llvm/IR/DataLayout.h" | ||||||||||||
36 | #include "llvm/IR/InstrTypes.h" | ||||||||||||
37 | #include "llvm/IR/Instruction.h" | ||||||||||||
38 | #include "llvm/IR/Instructions.h" | ||||||||||||
39 | #include "llvm/IR/IntrinsicInst.h" | ||||||||||||
40 | #include "llvm/IR/Intrinsics.h" | ||||||||||||
41 | #include "llvm/IR/Operator.h" | ||||||||||||
42 | #include "llvm/IR/Value.h" | ||||||||||||
43 | #include "llvm/Support/Casting.h" | ||||||||||||
44 | #include <cstdint> | ||||||||||||
45 | |||||||||||||
46 | namespace llvm { | ||||||||||||
47 | namespace PatternMatch { | ||||||||||||
48 | |||||||||||||
49 | template <typename Val, typename Pattern> bool match(Val *V, const Pattern &P) { | ||||||||||||
50 | return const_cast<Pattern &>(P).match(V); | ||||||||||||
51 | } | ||||||||||||
52 | |||||||||||||
53 | template <typename Pattern> bool match(ArrayRef<int> Mask, const Pattern &P) { | ||||||||||||
54 | return const_cast<Pattern &>(P).match(Mask); | ||||||||||||
55 | } | ||||||||||||
56 | |||||||||||||
57 | template <typename SubPattern_t> struct OneUse_match { | ||||||||||||
58 | SubPattern_t SubPattern; | ||||||||||||
59 | |||||||||||||
60 | OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {} | ||||||||||||
61 | |||||||||||||
62 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
63 | return V->hasOneUse() && SubPattern.match(V); | ||||||||||||
64 | } | ||||||||||||
65 | }; | ||||||||||||
66 | |||||||||||||
67 | template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) { | ||||||||||||
68 | return SubPattern; | ||||||||||||
69 | } | ||||||||||||
70 | |||||||||||||
71 | template <typename Class> struct class_match { | ||||||||||||
72 | template <typename ITy> bool match(ITy *V) { return isa<Class>(V); } | ||||||||||||
73 | }; | ||||||||||||
74 | |||||||||||||
75 | /// Match an arbitrary value and ignore it. | ||||||||||||
76 | inline class_match<Value> m_Value() { return class_match<Value>(); } | ||||||||||||
77 | |||||||||||||
78 | /// Match an arbitrary unary operation and ignore it. | ||||||||||||
79 | inline class_match<UnaryOperator> m_UnOp() { | ||||||||||||
80 | return class_match<UnaryOperator>(); | ||||||||||||
81 | } | ||||||||||||
82 | |||||||||||||
83 | /// Match an arbitrary binary operation and ignore it. | ||||||||||||
84 | inline class_match<BinaryOperator> m_BinOp() { | ||||||||||||
85 | return class_match<BinaryOperator>(); | ||||||||||||
86 | } | ||||||||||||
87 | |||||||||||||
88 | /// Matches any compare instruction and ignore it. | ||||||||||||
89 | inline class_match<CmpInst> m_Cmp() { return class_match<CmpInst>(); } | ||||||||||||
90 | |||||||||||||
91 | /// Match an arbitrary ConstantInt and ignore it. | ||||||||||||
92 | inline class_match<ConstantInt> m_ConstantInt() { | ||||||||||||
93 | return class_match<ConstantInt>(); | ||||||||||||
94 | } | ||||||||||||
95 | |||||||||||||
96 | /// Match an arbitrary undef constant. | ||||||||||||
97 | inline class_match<UndefValue> m_Undef() { return class_match<UndefValue>(); } | ||||||||||||
98 | |||||||||||||
99 | /// Match an arbitrary Constant and ignore it. | ||||||||||||
100 | inline class_match<Constant> m_Constant() { return class_match<Constant>(); } | ||||||||||||
101 | |||||||||||||
102 | /// Match an arbitrary basic block value and ignore it. | ||||||||||||
103 | inline class_match<BasicBlock> m_BasicBlock() { | ||||||||||||
104 | return class_match<BasicBlock>(); | ||||||||||||
105 | } | ||||||||||||
106 | |||||||||||||
107 | /// Inverting matcher | ||||||||||||
108 | template <typename Ty> struct match_unless { | ||||||||||||
109 | Ty M; | ||||||||||||
110 | |||||||||||||
111 | match_unless(const Ty &Matcher) : M(Matcher) {} | ||||||||||||
112 | |||||||||||||
113 | template <typename ITy> bool match(ITy *V) { return !M.match(V); } | ||||||||||||
114 | }; | ||||||||||||
115 | |||||||||||||
116 | /// Match if the inner matcher does *NOT* match. | ||||||||||||
117 | template <typename Ty> inline match_unless<Ty> m_Unless(const Ty &M) { | ||||||||||||
118 | return match_unless<Ty>(M); | ||||||||||||
119 | } | ||||||||||||
120 | |||||||||||||
121 | /// Matching combinators | ||||||||||||
122 | template <typename LTy, typename RTy> struct match_combine_or { | ||||||||||||
123 | LTy L; | ||||||||||||
124 | RTy R; | ||||||||||||
125 | |||||||||||||
126 | match_combine_or(const LTy &Left, const RTy &Right) : L(Left), R(Right) {} | ||||||||||||
127 | |||||||||||||
128 | template <typename ITy> bool match(ITy *V) { | ||||||||||||
129 | if (L.match(V)) | ||||||||||||
130 | return true; | ||||||||||||
131 | if (R.match(V)) | ||||||||||||
132 | return true; | ||||||||||||
133 | return false; | ||||||||||||
134 | } | ||||||||||||
135 | }; | ||||||||||||
136 | |||||||||||||
137 | template <typename LTy, typename RTy> struct match_combine_and { | ||||||||||||
138 | LTy L; | ||||||||||||
139 | RTy R; | ||||||||||||
140 | |||||||||||||
141 | match_combine_and(const LTy &Left, const RTy &Right) : L(Left), R(Right) {} | ||||||||||||
142 | |||||||||||||
143 | template <typename ITy> bool match(ITy *V) { | ||||||||||||
144 | if (L.match(V)) | ||||||||||||
145 | if (R.match(V)) | ||||||||||||
146 | return true; | ||||||||||||
147 | return false; | ||||||||||||
148 | } | ||||||||||||
149 | }; | ||||||||||||
150 | |||||||||||||
151 | /// Combine two pattern matchers matching L || R | ||||||||||||
152 | template <typename LTy, typename RTy> | ||||||||||||
153 | inline match_combine_or<LTy, RTy> m_CombineOr(const LTy &L, const RTy &R) { | ||||||||||||
154 | return match_combine_or<LTy, RTy>(L, R); | ||||||||||||
155 | } | ||||||||||||
156 | |||||||||||||
157 | /// Combine two pattern matchers matching L && R | ||||||||||||
158 | template <typename LTy, typename RTy> | ||||||||||||
159 | inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) { | ||||||||||||
160 | return match_combine_and<LTy, RTy>(L, R); | ||||||||||||
161 | } | ||||||||||||
162 | |||||||||||||
163 | struct apint_match { | ||||||||||||
164 | const APInt *&Res; | ||||||||||||
165 | bool AllowUndef; | ||||||||||||
166 | |||||||||||||
167 | apint_match(const APInt *&Res, bool AllowUndef) | ||||||||||||
168 | : Res(Res), AllowUndef(AllowUndef) {} | ||||||||||||
169 | |||||||||||||
170 | template <typename ITy> bool match(ITy *V) { | ||||||||||||
171 | if (auto *CI = dyn_cast<ConstantInt>(V)) { | ||||||||||||
172 | Res = &CI->getValue(); | ||||||||||||
173 | return true; | ||||||||||||
174 | } | ||||||||||||
175 | if (V->getType()->isVectorTy()) | ||||||||||||
176 | if (const auto *C = dyn_cast<Constant>(V)) | ||||||||||||
177 | if (auto *CI = dyn_cast_or_null<ConstantInt>( | ||||||||||||
178 | C->getSplatValue(AllowUndef))) { | ||||||||||||
179 | Res = &CI->getValue(); | ||||||||||||
180 | return true; | ||||||||||||
181 | } | ||||||||||||
182 | return false; | ||||||||||||
183 | } | ||||||||||||
184 | }; | ||||||||||||
185 | // Either constexpr if or renaming ConstantFP::getValueAPF to | ||||||||||||
186 | // ConstantFP::getValue is needed to do it via single template | ||||||||||||
187 | // function for both apint/apfloat. | ||||||||||||
188 | struct apfloat_match { | ||||||||||||
189 | const APFloat *&Res; | ||||||||||||
190 | bool AllowUndef; | ||||||||||||
191 | |||||||||||||
192 | apfloat_match(const APFloat *&Res, bool AllowUndef) | ||||||||||||
193 | : Res(Res), AllowUndef(AllowUndef) {} | ||||||||||||
194 | |||||||||||||
195 | template <typename ITy> bool match(ITy *V) { | ||||||||||||
196 | if (auto *CI = dyn_cast<ConstantFP>(V)) { | ||||||||||||
197 | Res = &CI->getValueAPF(); | ||||||||||||
198 | return true; | ||||||||||||
199 | } | ||||||||||||
200 | if (V->getType()->isVectorTy()) | ||||||||||||
201 | if (const auto *C = dyn_cast<Constant>(V)) | ||||||||||||
202 | if (auto *CI = dyn_cast_or_null<ConstantFP>( | ||||||||||||
203 | C->getSplatValue(AllowUndef))) { | ||||||||||||
204 | Res = &CI->getValueAPF(); | ||||||||||||
205 | return true; | ||||||||||||
206 | } | ||||||||||||
207 | return false; | ||||||||||||
208 | } | ||||||||||||
209 | }; | ||||||||||||
210 | |||||||||||||
211 | /// Match a ConstantInt or splatted ConstantVector, binding the | ||||||||||||
212 | /// specified pointer to the contained APInt. | ||||||||||||
213 | inline apint_match m_APInt(const APInt *&Res) { | ||||||||||||
214 | // Forbid undefs by default to maintain previous behavior. | ||||||||||||
215 | return apint_match(Res, /* AllowUndef */ false); | ||||||||||||
216 | } | ||||||||||||
217 | |||||||||||||
218 | /// Match APInt while allowing undefs in splat vector constants. | ||||||||||||
219 | inline apint_match m_APIntAllowUndef(const APInt *&Res) { | ||||||||||||
220 | return apint_match(Res, /* AllowUndef */ true); | ||||||||||||
221 | } | ||||||||||||
222 | |||||||||||||
223 | /// Match APInt while forbidding undefs in splat vector constants. | ||||||||||||
224 | inline apint_match m_APIntForbidUndef(const APInt *&Res) { | ||||||||||||
225 | return apint_match(Res, /* AllowUndef */ false); | ||||||||||||
226 | } | ||||||||||||
227 | |||||||||||||
228 | /// Match a ConstantFP or splatted ConstantVector, binding the | ||||||||||||
229 | /// specified pointer to the contained APFloat. | ||||||||||||
230 | inline apfloat_match m_APFloat(const APFloat *&Res) { | ||||||||||||
231 | // Forbid undefs by default to maintain previous behavior. | ||||||||||||
232 | return apfloat_match(Res, /* AllowUndef */ false); | ||||||||||||
233 | } | ||||||||||||
234 | |||||||||||||
235 | /// Match APFloat while allowing undefs in splat vector constants. | ||||||||||||
236 | inline apfloat_match m_APFloatAllowUndef(const APFloat *&Res) { | ||||||||||||
237 | return apfloat_match(Res, /* AllowUndef */ true); | ||||||||||||
238 | } | ||||||||||||
239 | |||||||||||||
240 | /// Match APFloat while forbidding undefs in splat vector constants. | ||||||||||||
241 | inline apfloat_match m_APFloatForbidUndef(const APFloat *&Res) { | ||||||||||||
242 | return apfloat_match(Res, /* AllowUndef */ false); | ||||||||||||
243 | } | ||||||||||||
244 | |||||||||||||
245 | template <int64_t Val> struct constantint_match { | ||||||||||||
246 | template <typename ITy> bool match(ITy *V) { | ||||||||||||
247 | if (const auto *CI = dyn_cast<ConstantInt>(V)) { | ||||||||||||
248 | const APInt &CIV = CI->getValue(); | ||||||||||||
249 | if (Val >= 0) | ||||||||||||
250 | return CIV == static_cast<uint64_t>(Val); | ||||||||||||
251 | // If Val is negative, and CI is shorter than it, truncate to the right | ||||||||||||
252 | // number of bits. If it is larger, then we have to sign extend. Just | ||||||||||||
253 | // compare their negated values. | ||||||||||||
254 | return -CIV == -Val; | ||||||||||||
255 | } | ||||||||||||
256 | return false; | ||||||||||||
257 | } | ||||||||||||
258 | }; | ||||||||||||
259 | |||||||||||||
260 | /// Match a ConstantInt with a specific value. | ||||||||||||
261 | template <int64_t Val> inline constantint_match<Val> m_ConstantInt() { | ||||||||||||
262 | return constantint_match<Val>(); | ||||||||||||
263 | } | ||||||||||||
264 | |||||||||||||
265 | /// This helper class is used to match constant scalars, vector splats, | ||||||||||||
266 | /// and fixed width vectors that satisfy a specified predicate. | ||||||||||||
267 | /// For fixed width vector constants, undefined elements are ignored. | ||||||||||||
268 | template <typename Predicate, typename ConstantVal> | ||||||||||||
269 | struct cstval_pred_ty : public Predicate { | ||||||||||||
270 | template <typename ITy> bool match(ITy *V) { | ||||||||||||
271 | if (const auto *CV = dyn_cast<ConstantVal>(V)) | ||||||||||||
272 | return this->isValue(CV->getValue()); | ||||||||||||
273 | if (const auto *VTy = dyn_cast<VectorType>(V->getType())) { | ||||||||||||
274 | if (const auto *C = dyn_cast<Constant>(V)) { | ||||||||||||
275 | if (const auto *CV = dyn_cast_or_null<ConstantVal>(C->getSplatValue())) | ||||||||||||
276 | return this->isValue(CV->getValue()); | ||||||||||||
277 | |||||||||||||
278 | // Number of elements of a scalable vector unknown at compile time | ||||||||||||
279 | auto *FVTy = dyn_cast<FixedVectorType>(VTy); | ||||||||||||
280 | if (!FVTy) | ||||||||||||
281 | return false; | ||||||||||||
282 | |||||||||||||
283 | // Non-splat vector constant: check each element for a match. | ||||||||||||
284 | unsigned NumElts = FVTy->getNumElements(); | ||||||||||||
285 | assert(NumElts != 0 && "Constant vector with no elements?")((NumElts != 0 && "Constant vector with no elements?" ) ? static_cast<void> (0) : __assert_fail ("NumElts != 0 && \"Constant vector with no elements?\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/include/llvm/IR/PatternMatch.h" , 285, __PRETTY_FUNCTION__)); | ||||||||||||
286 | bool HasNonUndefElements = false; | ||||||||||||
287 | for (unsigned i = 0; i != NumElts; ++i) { | ||||||||||||
288 | Constant *Elt = C->getAggregateElement(i); | ||||||||||||
289 | if (!Elt) | ||||||||||||
290 | return false; | ||||||||||||
291 | if (isa<UndefValue>(Elt)) | ||||||||||||
292 | continue; | ||||||||||||
293 | auto *CV = dyn_cast<ConstantVal>(Elt); | ||||||||||||
294 | if (!CV || !this->isValue(CV->getValue())) | ||||||||||||
295 | return false; | ||||||||||||
296 | HasNonUndefElements = true; | ||||||||||||
297 | } | ||||||||||||
298 | return HasNonUndefElements; | ||||||||||||
299 | } | ||||||||||||
300 | } | ||||||||||||
301 | return false; | ||||||||||||
302 | } | ||||||||||||
303 | }; | ||||||||||||
304 | |||||||||||||
305 | /// specialization of cstval_pred_ty for ConstantInt | ||||||||||||
306 | template <typename Predicate> | ||||||||||||
307 | using cst_pred_ty = cstval_pred_ty<Predicate, ConstantInt>; | ||||||||||||
308 | |||||||||||||
309 | /// specialization of cstval_pred_ty for ConstantFP | ||||||||||||
310 | template <typename Predicate> | ||||||||||||
311 | using cstfp_pred_ty = cstval_pred_ty<Predicate, ConstantFP>; | ||||||||||||
312 | |||||||||||||
313 | /// This helper class is used to match scalar and vector constants that | ||||||||||||
314 | /// satisfy a specified predicate, and bind them to an APInt. | ||||||||||||
315 | template <typename Predicate> struct api_pred_ty : public Predicate { | ||||||||||||
316 | const APInt *&Res; | ||||||||||||
317 | |||||||||||||
318 | api_pred_ty(const APInt *&R) : Res(R) {} | ||||||||||||
319 | |||||||||||||
320 | template <typename ITy> bool match(ITy *V) { | ||||||||||||
321 | if (const auto *CI = dyn_cast<ConstantInt>(V)) | ||||||||||||
322 | if (this->isValue(CI->getValue())) { | ||||||||||||
323 | Res = &CI->getValue(); | ||||||||||||
324 | return true; | ||||||||||||
325 | } | ||||||||||||
326 | if (V->getType()->isVectorTy()) | ||||||||||||
327 | if (const auto *C = dyn_cast<Constant>(V)) | ||||||||||||
328 | if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) | ||||||||||||
329 | if (this->isValue(CI->getValue())) { | ||||||||||||
330 | Res = &CI->getValue(); | ||||||||||||
331 | return true; | ||||||||||||
332 | } | ||||||||||||
333 | |||||||||||||
334 | return false; | ||||||||||||
335 | } | ||||||||||||
336 | }; | ||||||||||||
337 | |||||||||||||
338 | /// This helper class is used to match scalar and vector constants that | ||||||||||||
339 | /// satisfy a specified predicate, and bind them to an APFloat. | ||||||||||||
340 | /// Undefs are allowed in splat vector constants. | ||||||||||||
341 | template <typename Predicate> struct apf_pred_ty : public Predicate { | ||||||||||||
342 | const APFloat *&Res; | ||||||||||||
343 | |||||||||||||
344 | apf_pred_ty(const APFloat *&R) : Res(R) {} | ||||||||||||
345 | |||||||||||||
346 | template <typename ITy> bool match(ITy *V) { | ||||||||||||
347 | if (const auto *CI = dyn_cast<ConstantFP>(V)) | ||||||||||||
348 | if (this->isValue(CI->getValue())) { | ||||||||||||
349 | Res = &CI->getValue(); | ||||||||||||
350 | return true; | ||||||||||||
351 | } | ||||||||||||
352 | if (V->getType()->isVectorTy()) | ||||||||||||
353 | if (const auto *C = dyn_cast<Constant>(V)) | ||||||||||||
354 | if (auto *CI = dyn_cast_or_null<ConstantFP>( | ||||||||||||
355 | C->getSplatValue(/* AllowUndef */ true))) | ||||||||||||
356 | if (this->isValue(CI->getValue())) { | ||||||||||||
357 | Res = &CI->getValue(); | ||||||||||||
358 | return true; | ||||||||||||
359 | } | ||||||||||||
360 | |||||||||||||
361 | return false; | ||||||||||||
362 | } | ||||||||||||
363 | }; | ||||||||||||
364 | |||||||||||||
365 | /////////////////////////////////////////////////////////////////////////////// | ||||||||||||
366 | // | ||||||||||||
367 | // Encapsulate constant value queries for use in templated predicate matchers. | ||||||||||||
368 | // This allows checking if constants match using compound predicates and works | ||||||||||||
369 | // with vector constants, possibly with relaxed constraints. For example, ignore | ||||||||||||
370 | // undef values. | ||||||||||||
371 | // | ||||||||||||
372 | /////////////////////////////////////////////////////////////////////////////// | ||||||||||||
373 | |||||||||||||
374 | struct is_any_apint { | ||||||||||||
375 | bool isValue(const APInt &C) { return true; } | ||||||||||||
376 | }; | ||||||||||||
377 | /// Match an integer or vector with any integral constant. | ||||||||||||
378 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
379 | inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() { | ||||||||||||
380 | return cst_pred_ty<is_any_apint>(); | ||||||||||||
381 | } | ||||||||||||
382 | |||||||||||||
383 | struct is_all_ones { | ||||||||||||
384 | bool isValue(const APInt &C) { return C.isAllOnesValue(); } | ||||||||||||
385 | }; | ||||||||||||
386 | /// Match an integer or vector with all bits set. | ||||||||||||
387 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
388 | inline cst_pred_ty<is_all_ones> m_AllOnes() { | ||||||||||||
389 | return cst_pred_ty<is_all_ones>(); | ||||||||||||
390 | } | ||||||||||||
391 | |||||||||||||
392 | struct is_maxsignedvalue { | ||||||||||||
393 | bool isValue(const APInt &C) { return C.isMaxSignedValue(); } | ||||||||||||
394 | }; | ||||||||||||
395 | /// Match an integer or vector with values having all bits except for the high | ||||||||||||
396 | /// bit set (0x7f...). | ||||||||||||
397 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
398 | inline cst_pred_ty<is_maxsignedvalue> m_MaxSignedValue() { | ||||||||||||
399 | return cst_pred_ty<is_maxsignedvalue>(); | ||||||||||||
400 | } | ||||||||||||
401 | inline api_pred_ty<is_maxsignedvalue> m_MaxSignedValue(const APInt *&V) { | ||||||||||||
402 | return V; | ||||||||||||
403 | } | ||||||||||||
404 | |||||||||||||
405 | struct is_negative { | ||||||||||||
406 | bool isValue(const APInt &C) { return C.isNegative(); } | ||||||||||||
407 | }; | ||||||||||||
408 | /// Match an integer or vector of negative values. | ||||||||||||
409 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
410 | inline cst_pred_ty<is_negative> m_Negative() { | ||||||||||||
411 | return cst_pred_ty<is_negative>(); | ||||||||||||
412 | } | ||||||||||||
413 | inline api_pred_ty<is_negative> m_Negative(const APInt *&V) { | ||||||||||||
414 | return V; | ||||||||||||
415 | } | ||||||||||||
416 | |||||||||||||
417 | struct is_nonnegative { | ||||||||||||
418 | bool isValue(const APInt &C) { return C.isNonNegative(); } | ||||||||||||
419 | }; | ||||||||||||
420 | /// Match an integer or vector of non-negative values. | ||||||||||||
421 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
422 | inline cst_pred_ty<is_nonnegative> m_NonNegative() { | ||||||||||||
423 | return cst_pred_ty<is_nonnegative>(); | ||||||||||||
424 | } | ||||||||||||
425 | inline api_pred_ty<is_nonnegative> m_NonNegative(const APInt *&V) { | ||||||||||||
426 | return V; | ||||||||||||
427 | } | ||||||||||||
428 | |||||||||||||
429 | struct is_strictlypositive { | ||||||||||||
430 | bool isValue(const APInt &C) { return C.isStrictlyPositive(); } | ||||||||||||
431 | }; | ||||||||||||
432 | /// Match an integer or vector of strictly positive values. | ||||||||||||
433 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
434 | inline cst_pred_ty<is_strictlypositive> m_StrictlyPositive() { | ||||||||||||
435 | return cst_pred_ty<is_strictlypositive>(); | ||||||||||||
436 | } | ||||||||||||
437 | inline api_pred_ty<is_strictlypositive> m_StrictlyPositive(const APInt *&V) { | ||||||||||||
438 | return V; | ||||||||||||
439 | } | ||||||||||||
440 | |||||||||||||
441 | struct is_nonpositive { | ||||||||||||
442 | bool isValue(const APInt &C) { return C.isNonPositive(); } | ||||||||||||
443 | }; | ||||||||||||
444 | /// Match an integer or vector of non-positive values. | ||||||||||||
445 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
446 | inline cst_pred_ty<is_nonpositive> m_NonPositive() { | ||||||||||||
447 | return cst_pred_ty<is_nonpositive>(); | ||||||||||||
448 | } | ||||||||||||
449 | inline api_pred_ty<is_nonpositive> m_NonPositive(const APInt *&V) { return V; } | ||||||||||||
450 | |||||||||||||
451 | struct is_one { | ||||||||||||
452 | bool isValue(const APInt &C) { return C.isOneValue(); } | ||||||||||||
453 | }; | ||||||||||||
454 | /// Match an integer 1 or a vector with all elements equal to 1. | ||||||||||||
455 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
456 | inline cst_pred_ty<is_one> m_One() { | ||||||||||||
457 | return cst_pred_ty<is_one>(); | ||||||||||||
458 | } | ||||||||||||
459 | |||||||||||||
460 | struct is_zero_int { | ||||||||||||
461 | bool isValue(const APInt &C) { return C.isNullValue(); } | ||||||||||||
462 | }; | ||||||||||||
463 | /// Match an integer 0 or a vector with all elements equal to 0. | ||||||||||||
464 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
465 | inline cst_pred_ty<is_zero_int> m_ZeroInt() { | ||||||||||||
466 | return cst_pred_ty<is_zero_int>(); | ||||||||||||
467 | } | ||||||||||||
468 | |||||||||||||
469 | struct is_zero { | ||||||||||||
470 | template <typename ITy> bool match(ITy *V) { | ||||||||||||
471 | auto *C = dyn_cast<Constant>(V); | ||||||||||||
472 | // FIXME: this should be able to do something for scalable vectors | ||||||||||||
473 | return C && (C->isNullValue() || cst_pred_ty<is_zero_int>().match(C)); | ||||||||||||
474 | } | ||||||||||||
475 | }; | ||||||||||||
476 | /// Match any null constant or a vector with all elements equal to 0. | ||||||||||||
477 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
478 | inline is_zero m_Zero() { | ||||||||||||
479 | return is_zero(); | ||||||||||||
480 | } | ||||||||||||
481 | |||||||||||||
482 | struct is_power2 { | ||||||||||||
483 | bool isValue(const APInt &C) { return C.isPowerOf2(); } | ||||||||||||
484 | }; | ||||||||||||
485 | /// Match an integer or vector power-of-2. | ||||||||||||
486 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
487 | inline cst_pred_ty<is_power2> m_Power2() { | ||||||||||||
488 | return cst_pred_ty<is_power2>(); | ||||||||||||
489 | } | ||||||||||||
490 | inline api_pred_ty<is_power2> m_Power2(const APInt *&V) { | ||||||||||||
491 | return V; | ||||||||||||
492 | } | ||||||||||||
493 | |||||||||||||
494 | struct is_negated_power2 { | ||||||||||||
495 | bool isValue(const APInt &C) { return (-C).isPowerOf2(); } | ||||||||||||
496 | }; | ||||||||||||
497 | /// Match a integer or vector negated power-of-2. | ||||||||||||
498 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
499 | inline cst_pred_ty<is_negated_power2> m_NegatedPower2() { | ||||||||||||
500 | return cst_pred_ty<is_negated_power2>(); | ||||||||||||
501 | } | ||||||||||||
502 | inline api_pred_ty<is_negated_power2> m_NegatedPower2(const APInt *&V) { | ||||||||||||
503 | return V; | ||||||||||||
504 | } | ||||||||||||
505 | |||||||||||||
506 | struct is_power2_or_zero { | ||||||||||||
507 | bool isValue(const APInt &C) { return !C || C.isPowerOf2(); } | ||||||||||||
508 | }; | ||||||||||||
509 | /// Match an integer or vector of 0 or power-of-2 values. | ||||||||||||
510 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
511 | inline cst_pred_ty<is_power2_or_zero> m_Power2OrZero() { | ||||||||||||
512 | return cst_pred_ty<is_power2_or_zero>(); | ||||||||||||
513 | } | ||||||||||||
514 | inline api_pred_ty<is_power2_or_zero> m_Power2OrZero(const APInt *&V) { | ||||||||||||
515 | return V; | ||||||||||||
516 | } | ||||||||||||
517 | |||||||||||||
518 | struct is_sign_mask { | ||||||||||||
519 | bool isValue(const APInt &C) { return C.isSignMask(); } | ||||||||||||
520 | }; | ||||||||||||
521 | /// Match an integer or vector with only the sign bit(s) set. | ||||||||||||
522 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
523 | inline cst_pred_ty<is_sign_mask> m_SignMask() { | ||||||||||||
524 | return cst_pred_ty<is_sign_mask>(); | ||||||||||||
525 | } | ||||||||||||
526 | |||||||||||||
527 | struct is_lowbit_mask { | ||||||||||||
528 | bool isValue(const APInt &C) { return C.isMask(); } | ||||||||||||
529 | }; | ||||||||||||
530 | /// Match an integer or vector with only the low bit(s) set. | ||||||||||||
531 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
532 | inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() { | ||||||||||||
533 | return cst_pred_ty<is_lowbit_mask>(); | ||||||||||||
534 | } | ||||||||||||
535 | |||||||||||||
536 | struct icmp_pred_with_threshold { | ||||||||||||
537 | ICmpInst::Predicate Pred; | ||||||||||||
538 | const APInt *Thr; | ||||||||||||
539 | bool isValue(const APInt &C) { | ||||||||||||
540 | switch (Pred) { | ||||||||||||
541 | case ICmpInst::Predicate::ICMP_EQ: | ||||||||||||
542 | return C.eq(*Thr); | ||||||||||||
543 | case ICmpInst::Predicate::ICMP_NE: | ||||||||||||
544 | return C.ne(*Thr); | ||||||||||||
545 | case ICmpInst::Predicate::ICMP_UGT: | ||||||||||||
546 | return C.ugt(*Thr); | ||||||||||||
547 | case ICmpInst::Predicate::ICMP_UGE: | ||||||||||||
548 | return C.uge(*Thr); | ||||||||||||
549 | case ICmpInst::Predicate::ICMP_ULT: | ||||||||||||
550 | return C.ult(*Thr); | ||||||||||||
551 | case ICmpInst::Predicate::ICMP_ULE: | ||||||||||||
552 | return C.ule(*Thr); | ||||||||||||
553 | case ICmpInst::Predicate::ICMP_SGT: | ||||||||||||
554 | return C.sgt(*Thr); | ||||||||||||
555 | case ICmpInst::Predicate::ICMP_SGE: | ||||||||||||
556 | return C.sge(*Thr); | ||||||||||||
557 | case ICmpInst::Predicate::ICMP_SLT: | ||||||||||||
558 | return C.slt(*Thr); | ||||||||||||
559 | case ICmpInst::Predicate::ICMP_SLE: | ||||||||||||
560 | return C.sle(*Thr); | ||||||||||||
561 | default: | ||||||||||||
562 | llvm_unreachable("Unhandled ICmp predicate")::llvm::llvm_unreachable_internal("Unhandled ICmp predicate", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/include/llvm/IR/PatternMatch.h" , 562); | ||||||||||||
563 | } | ||||||||||||
564 | } | ||||||||||||
565 | }; | ||||||||||||
566 | /// Match an integer or vector with every element comparing 'pred' (eg/ne/...) | ||||||||||||
567 | /// to Threshold. For vectors, this includes constants with undefined elements. | ||||||||||||
568 | inline cst_pred_ty<icmp_pred_with_threshold> | ||||||||||||
569 | m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold) { | ||||||||||||
570 | cst_pred_ty<icmp_pred_with_threshold> P; | ||||||||||||
571 | P.Pred = Predicate; | ||||||||||||
572 | P.Thr = &Threshold; | ||||||||||||
573 | return P; | ||||||||||||
574 | } | ||||||||||||
575 | |||||||||||||
576 | struct is_nan { | ||||||||||||
577 | bool isValue(const APFloat &C) { return C.isNaN(); } | ||||||||||||
578 | }; | ||||||||||||
579 | /// Match an arbitrary NaN constant. This includes quiet and signalling nans. | ||||||||||||
580 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
581 | inline cstfp_pred_ty<is_nan> m_NaN() { | ||||||||||||
582 | return cstfp_pred_ty<is_nan>(); | ||||||||||||
583 | } | ||||||||||||
584 | |||||||||||||
585 | struct is_nonnan { | ||||||||||||
586 | bool isValue(const APFloat &C) { return !C.isNaN(); } | ||||||||||||
587 | }; | ||||||||||||
588 | /// Match a non-NaN FP constant. | ||||||||||||
589 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
590 | inline cstfp_pred_ty<is_nonnan> m_NonNaN() { | ||||||||||||
591 | return cstfp_pred_ty<is_nonnan>(); | ||||||||||||
592 | } | ||||||||||||
593 | |||||||||||||
594 | struct is_inf { | ||||||||||||
595 | bool isValue(const APFloat &C) { return C.isInfinity(); } | ||||||||||||
596 | }; | ||||||||||||
597 | /// Match a positive or negative infinity FP constant. | ||||||||||||
598 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
599 | inline cstfp_pred_ty<is_inf> m_Inf() { | ||||||||||||
600 | return cstfp_pred_ty<is_inf>(); | ||||||||||||
601 | } | ||||||||||||
602 | |||||||||||||
603 | struct is_noninf { | ||||||||||||
604 | bool isValue(const APFloat &C) { return !C.isInfinity(); } | ||||||||||||
605 | }; | ||||||||||||
606 | /// Match a non-infinity FP constant, i.e. finite or NaN. | ||||||||||||
607 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
608 | inline cstfp_pred_ty<is_noninf> m_NonInf() { | ||||||||||||
609 | return cstfp_pred_ty<is_noninf>(); | ||||||||||||
610 | } | ||||||||||||
611 | |||||||||||||
612 | struct is_finite { | ||||||||||||
613 | bool isValue(const APFloat &C) { return C.isFinite(); } | ||||||||||||
614 | }; | ||||||||||||
615 | /// Match a finite FP constant, i.e. not infinity or NaN. | ||||||||||||
616 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
617 | inline cstfp_pred_ty<is_finite> m_Finite() { | ||||||||||||
618 | return cstfp_pred_ty<is_finite>(); | ||||||||||||
619 | } | ||||||||||||
620 | inline apf_pred_ty<is_finite> m_Finite(const APFloat *&V) { return V; } | ||||||||||||
621 | |||||||||||||
622 | struct is_finitenonzero { | ||||||||||||
623 | bool isValue(const APFloat &C) { return C.isFiniteNonZero(); } | ||||||||||||
624 | }; | ||||||||||||
625 | /// Match a finite non-zero FP constant. | ||||||||||||
626 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
627 | inline cstfp_pred_ty<is_finitenonzero> m_FiniteNonZero() { | ||||||||||||
628 | return cstfp_pred_ty<is_finitenonzero>(); | ||||||||||||
629 | } | ||||||||||||
630 | inline apf_pred_ty<is_finitenonzero> m_FiniteNonZero(const APFloat *&V) { | ||||||||||||
631 | return V; | ||||||||||||
632 | } | ||||||||||||
633 | |||||||||||||
634 | struct is_any_zero_fp { | ||||||||||||
635 | bool isValue(const APFloat &C) { return C.isZero(); } | ||||||||||||
636 | }; | ||||||||||||
637 | /// Match a floating-point negative zero or positive zero. | ||||||||||||
638 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
639 | inline cstfp_pred_ty<is_any_zero_fp> m_AnyZeroFP() { | ||||||||||||
640 | return cstfp_pred_ty<is_any_zero_fp>(); | ||||||||||||
641 | } | ||||||||||||
642 | |||||||||||||
643 | struct is_pos_zero_fp { | ||||||||||||
644 | bool isValue(const APFloat &C) { return C.isPosZero(); } | ||||||||||||
645 | }; | ||||||||||||
646 | /// Match a floating-point positive zero. | ||||||||||||
647 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
648 | inline cstfp_pred_ty<is_pos_zero_fp> m_PosZeroFP() { | ||||||||||||
649 | return cstfp_pred_ty<is_pos_zero_fp>(); | ||||||||||||
650 | } | ||||||||||||
651 | |||||||||||||
652 | struct is_neg_zero_fp { | ||||||||||||
653 | bool isValue(const APFloat &C) { return C.isNegZero(); } | ||||||||||||
654 | }; | ||||||||||||
655 | /// Match a floating-point negative zero. | ||||||||||||
656 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
657 | inline cstfp_pred_ty<is_neg_zero_fp> m_NegZeroFP() { | ||||||||||||
658 | return cstfp_pred_ty<is_neg_zero_fp>(); | ||||||||||||
659 | } | ||||||||||||
660 | |||||||||||||
661 | struct is_non_zero_fp { | ||||||||||||
662 | bool isValue(const APFloat &C) { return C.isNonZero(); } | ||||||||||||
663 | }; | ||||||||||||
664 | /// Match a floating-point non-zero. | ||||||||||||
665 | /// For vectors, this includes constants with undefined elements. | ||||||||||||
666 | inline cstfp_pred_ty<is_non_zero_fp> m_NonZeroFP() { | ||||||||||||
667 | return cstfp_pred_ty<is_non_zero_fp>(); | ||||||||||||
668 | } | ||||||||||||
669 | |||||||||||||
670 | /////////////////////////////////////////////////////////////////////////////// | ||||||||||||
671 | |||||||||||||
672 | template <typename Class> struct bind_ty { | ||||||||||||
673 | Class *&VR; | ||||||||||||
674 | |||||||||||||
675 | bind_ty(Class *&V) : VR(V) {} | ||||||||||||
676 | |||||||||||||
677 | template <typename ITy> bool match(ITy *V) { | ||||||||||||
678 | if (auto *CV = dyn_cast<Class>(V)) { | ||||||||||||
679 | VR = CV; | ||||||||||||
680 | return true; | ||||||||||||
681 | } | ||||||||||||
682 | return false; | ||||||||||||
683 | } | ||||||||||||
684 | }; | ||||||||||||
685 | |||||||||||||
686 | /// Match a value, capturing it if we match. | ||||||||||||
687 | inline bind_ty<Value> m_Value(Value *&V) { return V; } | ||||||||||||
688 | inline bind_ty<const Value> m_Value(const Value *&V) { return V; } | ||||||||||||
689 | |||||||||||||
690 | /// Match an instruction, capturing it if we match. | ||||||||||||
691 | inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; } | ||||||||||||
692 | /// Match a unary operator, capturing it if we match. | ||||||||||||
693 | inline bind_ty<UnaryOperator> m_UnOp(UnaryOperator *&I) { return I; } | ||||||||||||
694 | /// Match a binary operator, capturing it if we match. | ||||||||||||
695 | inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; } | ||||||||||||
696 | /// Match a with overflow intrinsic, capturing it if we match. | ||||||||||||
697 | inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) { return I; } | ||||||||||||
698 | |||||||||||||
699 | /// Match a ConstantInt, capturing the value if we match. | ||||||||||||
700 | inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; } | ||||||||||||
701 | |||||||||||||
702 | /// Match a Constant, capturing the value if we match. | ||||||||||||
703 | inline bind_ty<Constant> m_Constant(Constant *&C) { return C; } | ||||||||||||
704 | |||||||||||||
705 | /// Match a ConstantFP, capturing the value if we match. | ||||||||||||
706 | inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; } | ||||||||||||
707 | |||||||||||||
708 | /// Match a basic block value, capturing it if we match. | ||||||||||||
709 | inline bind_ty<BasicBlock> m_BasicBlock(BasicBlock *&V) { return V; } | ||||||||||||
710 | inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) { | ||||||||||||
711 | return V; | ||||||||||||
712 | } | ||||||||||||
713 | |||||||||||||
714 | /// Match a specified Value*. | ||||||||||||
715 | struct specificval_ty { | ||||||||||||
716 | const Value *Val; | ||||||||||||
717 | |||||||||||||
718 | specificval_ty(const Value *V) : Val(V) {} | ||||||||||||
719 | |||||||||||||
720 | template <typename ITy> bool match(ITy *V) { return V == Val; } | ||||||||||||
721 | }; | ||||||||||||
722 | |||||||||||||
723 | /// Match if we have a specific specified value. | ||||||||||||
724 | inline specificval_ty m_Specific(const Value *V) { return V; } | ||||||||||||
725 | |||||||||||||
726 | /// Stores a reference to the Value *, not the Value * itself, | ||||||||||||
727 | /// thus can be used in commutative matchers. | ||||||||||||
728 | template <typename Class> struct deferredval_ty { | ||||||||||||
729 | Class *const &Val; | ||||||||||||
730 | |||||||||||||
731 | deferredval_ty(Class *const &V) : Val(V) {} | ||||||||||||
732 | |||||||||||||
733 | template <typename ITy> bool match(ITy *const V) { return V == Val; } | ||||||||||||
734 | }; | ||||||||||||
735 | |||||||||||||
736 | /// A commutative-friendly version of m_Specific(). | ||||||||||||
737 | inline deferredval_ty<Value> m_Deferred(Value *const &V) { return V; } | ||||||||||||
738 | inline deferredval_ty<const Value> m_Deferred(const Value *const &V) { | ||||||||||||
739 | return V; | ||||||||||||
740 | } | ||||||||||||
741 | |||||||||||||
742 | /// Match a specified floating point value or vector of all elements of | ||||||||||||
743 | /// that value. | ||||||||||||
744 | struct specific_fpval { | ||||||||||||
745 | double Val; | ||||||||||||
746 | |||||||||||||
747 | specific_fpval(double V) : Val(V) {} | ||||||||||||
748 | |||||||||||||
749 | template <typename ITy> bool match(ITy *V) { | ||||||||||||
750 | if (const auto *CFP = dyn_cast<ConstantFP>(V)) | ||||||||||||
751 | return CFP->isExactlyValue(Val); | ||||||||||||
752 | if (V->getType()->isVectorTy()) | ||||||||||||
753 | if (const auto *C = dyn_cast<Constant>(V)) | ||||||||||||
754 | if (auto *CFP = dyn_cast_or_null<ConstantFP>(C->getSplatValue())) | ||||||||||||
755 | return CFP->isExactlyValue(Val); | ||||||||||||
756 | return false; | ||||||||||||
757 | } | ||||||||||||
758 | }; | ||||||||||||
759 | |||||||||||||
760 | /// Match a specific floating point value or vector with all elements | ||||||||||||
761 | /// equal to the value. | ||||||||||||
762 | inline specific_fpval m_SpecificFP(double V) { return specific_fpval(V); } | ||||||||||||
763 | |||||||||||||
764 | /// Match a float 1.0 or vector with all elements equal to 1.0. | ||||||||||||
765 | inline specific_fpval m_FPOne() { return m_SpecificFP(1.0); } | ||||||||||||
766 | |||||||||||||
767 | struct bind_const_intval_ty { | ||||||||||||
768 | uint64_t &VR; | ||||||||||||
769 | |||||||||||||
770 | bind_const_intval_ty(uint64_t &V) : VR(V) {} | ||||||||||||
771 | |||||||||||||
772 | template <typename ITy> bool match(ITy *V) { | ||||||||||||
773 | if (const auto *CV = dyn_cast<ConstantInt>(V)) | ||||||||||||
774 | if (CV->getValue().ule(UINT64_MAX(18446744073709551615UL))) { | ||||||||||||
775 | VR = CV->getZExtValue(); | ||||||||||||
776 | return true; | ||||||||||||
777 | } | ||||||||||||
778 | return false; | ||||||||||||
779 | } | ||||||||||||
780 | }; | ||||||||||||
781 | |||||||||||||
782 | /// Match a specified integer value or vector of all elements of that | ||||||||||||
783 | /// value. | ||||||||||||
784 | template <bool AllowUndefs> | ||||||||||||
785 | struct specific_intval { | ||||||||||||
786 | APInt Val; | ||||||||||||
787 | |||||||||||||
788 | specific_intval(APInt V) : Val(std::move(V)) {} | ||||||||||||
789 | |||||||||||||
790 | template <typename ITy> bool match(ITy *V) { | ||||||||||||
791 | const auto *CI = dyn_cast<ConstantInt>(V); | ||||||||||||
792 | if (!CI && V->getType()->isVectorTy()) | ||||||||||||
793 | if (const auto *C = dyn_cast<Constant>(V)) | ||||||||||||
794 | CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue(AllowUndefs)); | ||||||||||||
795 | |||||||||||||
796 | return CI && APInt::isSameValue(CI->getValue(), Val); | ||||||||||||
797 | } | ||||||||||||
798 | }; | ||||||||||||
799 | |||||||||||||
800 | /// Match a specific integer value or vector with all elements equal to | ||||||||||||
801 | /// the value. | ||||||||||||
802 | inline specific_intval<false> m_SpecificInt(APInt V) { | ||||||||||||
803 | return specific_intval<false>(std::move(V)); | ||||||||||||
804 | } | ||||||||||||
805 | |||||||||||||
806 | inline specific_intval<false> m_SpecificInt(uint64_t V) { | ||||||||||||
807 | return m_SpecificInt(APInt(64, V)); | ||||||||||||
808 | } | ||||||||||||
809 | |||||||||||||
810 | inline specific_intval<true> m_SpecificIntAllowUndef(APInt V) { | ||||||||||||
811 | return specific_intval<true>(std::move(V)); | ||||||||||||
812 | } | ||||||||||||
813 | |||||||||||||
814 | inline specific_intval<true> m_SpecificIntAllowUndef(uint64_t V) { | ||||||||||||
815 | return m_SpecificIntAllowUndef(APInt(64, V)); | ||||||||||||
816 | } | ||||||||||||
817 | |||||||||||||
818 | /// Match a ConstantInt and bind to its value. This does not match | ||||||||||||
819 | /// ConstantInts wider than 64-bits. | ||||||||||||
820 | inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; } | ||||||||||||
821 | |||||||||||||
822 | /// Match a specified basic block value. | ||||||||||||
823 | struct specific_bbval { | ||||||||||||
824 | BasicBlock *Val; | ||||||||||||
825 | |||||||||||||
826 | specific_bbval(BasicBlock *Val) : Val(Val) {} | ||||||||||||
827 | |||||||||||||
828 | template <typename ITy> bool match(ITy *V) { | ||||||||||||
829 | const auto *BB = dyn_cast<BasicBlock>(V); | ||||||||||||
830 | return BB && BB == Val; | ||||||||||||
831 | } | ||||||||||||
832 | }; | ||||||||||||
833 | |||||||||||||
834 | /// Match a specific basic block value. | ||||||||||||
835 | inline specific_bbval m_SpecificBB(BasicBlock *BB) { | ||||||||||||
836 | return specific_bbval(BB); | ||||||||||||
837 | } | ||||||||||||
838 | |||||||||||||
839 | /// A commutative-friendly version of m_Specific(). | ||||||||||||
840 | inline deferredval_ty<BasicBlock> m_Deferred(BasicBlock *const &BB) { | ||||||||||||
841 | return BB; | ||||||||||||
842 | } | ||||||||||||
843 | inline deferredval_ty<const BasicBlock> | ||||||||||||
844 | m_Deferred(const BasicBlock *const &BB) { | ||||||||||||
845 | return BB; | ||||||||||||
846 | } | ||||||||||||
847 | |||||||||||||
848 | //===----------------------------------------------------------------------===// | ||||||||||||
849 | // Matcher for any binary operator. | ||||||||||||
850 | // | ||||||||||||
851 | template <typename LHS_t, typename RHS_t, bool Commutable = false> | ||||||||||||
852 | struct AnyBinaryOp_match { | ||||||||||||
853 | LHS_t L; | ||||||||||||
854 | RHS_t R; | ||||||||||||
855 | |||||||||||||
856 | // The evaluation order is always stable, regardless of Commutability. | ||||||||||||
857 | // The LHS is always matched first. | ||||||||||||
858 | AnyBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} | ||||||||||||
859 | |||||||||||||
860 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
861 | if (auto *I = dyn_cast<BinaryOperator>(V)) | ||||||||||||
862 | return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) || | ||||||||||||
863 | (Commutable && L.match(I->getOperand(1)) && | ||||||||||||
864 | R.match(I->getOperand(0))); | ||||||||||||
865 | return false; | ||||||||||||
866 | } | ||||||||||||
867 | }; | ||||||||||||
868 | |||||||||||||
869 | template <typename LHS, typename RHS> | ||||||||||||
870 | inline AnyBinaryOp_match<LHS, RHS> m_BinOp(const LHS &L, const RHS &R) { | ||||||||||||
871 | return AnyBinaryOp_match<LHS, RHS>(L, R); | ||||||||||||
872 | } | ||||||||||||
873 | |||||||||||||
874 | //===----------------------------------------------------------------------===// | ||||||||||||
875 | // Matcher for any unary operator. | ||||||||||||
876 | // TODO fuse unary, binary matcher into n-ary matcher | ||||||||||||
877 | // | ||||||||||||
878 | template <typename OP_t> struct AnyUnaryOp_match { | ||||||||||||
879 | OP_t X; | ||||||||||||
880 | |||||||||||||
881 | AnyUnaryOp_match(const OP_t &X) : X(X) {} | ||||||||||||
882 | |||||||||||||
883 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
884 | if (auto *I = dyn_cast<UnaryOperator>(V)) | ||||||||||||
885 | return X.match(I->getOperand(0)); | ||||||||||||
886 | return false; | ||||||||||||
887 | } | ||||||||||||
888 | }; | ||||||||||||
889 | |||||||||||||
890 | template <typename OP_t> inline AnyUnaryOp_match<OP_t> m_UnOp(const OP_t &X) { | ||||||||||||
891 | return AnyUnaryOp_match<OP_t>(X); | ||||||||||||
892 | } | ||||||||||||
893 | |||||||||||||
894 | //===----------------------------------------------------------------------===// | ||||||||||||
895 | // Matchers for specific binary operators. | ||||||||||||
896 | // | ||||||||||||
897 | |||||||||||||
898 | template <typename LHS_t, typename RHS_t, unsigned Opcode, | ||||||||||||
899 | bool Commutable = false> | ||||||||||||
900 | struct BinaryOp_match { | ||||||||||||
901 | LHS_t L; | ||||||||||||
902 | RHS_t R; | ||||||||||||
903 | |||||||||||||
904 | // The evaluation order is always stable, regardless of Commutability. | ||||||||||||
905 | // The LHS is always matched first. | ||||||||||||
906 | BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} | ||||||||||||
907 | |||||||||||||
908 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
909 | if (V->getValueID() == Value::InstructionVal + Opcode) { | ||||||||||||
910 | auto *I = cast<BinaryOperator>(V); | ||||||||||||
911 | return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) || | ||||||||||||
912 | (Commutable && L.match(I->getOperand(1)) && | ||||||||||||
913 | R.match(I->getOperand(0))); | ||||||||||||
914 | } | ||||||||||||
915 | if (auto *CE = dyn_cast<ConstantExpr>(V)) | ||||||||||||
916 | return CE->getOpcode() == Opcode && | ||||||||||||
917 | ((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) || | ||||||||||||
918 | (Commutable && L.match(CE->getOperand(1)) && | ||||||||||||
919 | R.match(CE->getOperand(0)))); | ||||||||||||
920 | return false; | ||||||||||||
921 | } | ||||||||||||
922 | }; | ||||||||||||
923 | |||||||||||||
924 | template <typename LHS, typename RHS> | ||||||||||||
925 | inline BinaryOp_match<LHS, RHS, Instruction::Add> m_Add(const LHS &L, | ||||||||||||
926 | const RHS &R) { | ||||||||||||
927 | return BinaryOp_match<LHS, RHS, Instruction::Add>(L, R); | ||||||||||||
928 | } | ||||||||||||
929 | |||||||||||||
930 | template <typename LHS, typename RHS> | ||||||||||||
931 | inline BinaryOp_match<LHS, RHS, Instruction::FAdd> m_FAdd(const LHS &L, | ||||||||||||
932 | const RHS &R) { | ||||||||||||
933 | return BinaryOp_match<LHS, RHS, Instruction::FAdd>(L, R); | ||||||||||||
934 | } | ||||||||||||
935 | |||||||||||||
936 | template <typename LHS, typename RHS> | ||||||||||||
937 | inline BinaryOp_match<LHS, RHS, Instruction::Sub> m_Sub(const LHS &L, | ||||||||||||
938 | const RHS &R) { | ||||||||||||
939 | return BinaryOp_match<LHS, RHS, Instruction::Sub>(L, R); | ||||||||||||
940 | } | ||||||||||||
941 | |||||||||||||
942 | template <typename LHS, typename RHS> | ||||||||||||
943 | inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L, | ||||||||||||
944 | const RHS &R) { | ||||||||||||
945 | return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R); | ||||||||||||
946 | } | ||||||||||||
947 | |||||||||||||
948 | template <typename Op_t> struct FNeg_match { | ||||||||||||
949 | Op_t X; | ||||||||||||
950 | |||||||||||||
951 | FNeg_match(const Op_t &Op) : X(Op) {} | ||||||||||||
952 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
953 | auto *FPMO = dyn_cast<FPMathOperator>(V); | ||||||||||||
954 | if (!FPMO) return false; | ||||||||||||
955 | |||||||||||||
956 | if (FPMO->getOpcode() == Instruction::FNeg) | ||||||||||||
957 | return X.match(FPMO->getOperand(0)); | ||||||||||||
958 | |||||||||||||
959 | if (FPMO->getOpcode() == Instruction::FSub) { | ||||||||||||
960 | if (FPMO->hasNoSignedZeros()) { | ||||||||||||
961 | // With 'nsz', any zero goes. | ||||||||||||
962 | if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0))) | ||||||||||||
963 | return false; | ||||||||||||
964 | } else { | ||||||||||||
965 | // Without 'nsz', we need fsub -0.0, X exactly. | ||||||||||||
966 | if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0))) | ||||||||||||
967 | return false; | ||||||||||||
968 | } | ||||||||||||
969 | |||||||||||||
970 | return X.match(FPMO->getOperand(1)); | ||||||||||||
971 | } | ||||||||||||
972 | |||||||||||||
973 | return false; | ||||||||||||
974 | } | ||||||||||||
975 | }; | ||||||||||||
976 | |||||||||||||
977 | /// Match 'fneg X' as 'fsub -0.0, X'. | ||||||||||||
978 | template <typename OpTy> | ||||||||||||
979 | inline FNeg_match<OpTy> | ||||||||||||
980 | m_FNeg(const OpTy &X) { | ||||||||||||
981 | return FNeg_match<OpTy>(X); | ||||||||||||
982 | } | ||||||||||||
983 | |||||||||||||
984 | /// Match 'fneg X' as 'fsub +-0.0, X'. | ||||||||||||
985 | template <typename RHS> | ||||||||||||
986 | inline BinaryOp_match<cstfp_pred_ty<is_any_zero_fp>, RHS, Instruction::FSub> | ||||||||||||
987 | m_FNegNSZ(const RHS &X) { | ||||||||||||
988 | return m_FSub(m_AnyZeroFP(), X); | ||||||||||||
989 | } | ||||||||||||
990 | |||||||||||||
991 | template <typename LHS, typename RHS> | ||||||||||||
992 | inline BinaryOp_match<LHS, RHS, Instruction::Mul> m_Mul(const LHS &L, | ||||||||||||
993 | const RHS &R) { | ||||||||||||
994 | return BinaryOp_match<LHS, RHS, Instruction::Mul>(L, R); | ||||||||||||
995 | } | ||||||||||||
996 | |||||||||||||
997 | template <typename LHS, typename RHS> | ||||||||||||
998 | inline BinaryOp_match<LHS, RHS, Instruction::FMul> m_FMul(const LHS &L, | ||||||||||||
999 | const RHS &R) { | ||||||||||||
1000 | return BinaryOp_match<LHS, RHS, Instruction::FMul>(L, R); | ||||||||||||
1001 | } | ||||||||||||
1002 | |||||||||||||
1003 | template <typename LHS, typename RHS> | ||||||||||||
1004 | inline BinaryOp_match<LHS, RHS, Instruction::UDiv> m_UDiv(const LHS &L, | ||||||||||||
1005 | const RHS &R) { | ||||||||||||
1006 | return BinaryOp_match<LHS, RHS, Instruction::UDiv>(L, R); | ||||||||||||
1007 | } | ||||||||||||
1008 | |||||||||||||
1009 | template <typename LHS, typename RHS> | ||||||||||||
1010 | inline BinaryOp_match<LHS, RHS, Instruction::SDiv> m_SDiv(const LHS &L, | ||||||||||||
1011 | const RHS &R) { | ||||||||||||
1012 | return BinaryOp_match<LHS, RHS, Instruction::SDiv>(L, R); | ||||||||||||
1013 | } | ||||||||||||
1014 | |||||||||||||
1015 | template <typename LHS, typename RHS> | ||||||||||||
1016 | inline BinaryOp_match<LHS, RHS, Instruction::FDiv> m_FDiv(const LHS &L, | ||||||||||||
1017 | const RHS &R) { | ||||||||||||
1018 | return BinaryOp_match<LHS, RHS, Instruction::FDiv>(L, R); | ||||||||||||
1019 | } | ||||||||||||
1020 | |||||||||||||
1021 | template <typename LHS, typename RHS> | ||||||||||||
1022 | inline BinaryOp_match<LHS, RHS, Instruction::URem> m_URem(const LHS &L, | ||||||||||||
1023 | const RHS &R) { | ||||||||||||
1024 | return BinaryOp_match<LHS, RHS, Instruction::URem>(L, R); | ||||||||||||
1025 | } | ||||||||||||
1026 | |||||||||||||
1027 | template <typename LHS, typename RHS> | ||||||||||||
1028 | inline BinaryOp_match<LHS, RHS, Instruction::SRem> m_SRem(const LHS &L, | ||||||||||||
1029 | const RHS &R) { | ||||||||||||
1030 | return BinaryOp_match<LHS, RHS, Instruction::SRem>(L, R); | ||||||||||||
1031 | } | ||||||||||||
1032 | |||||||||||||
1033 | template <typename LHS, typename RHS> | ||||||||||||
1034 | inline BinaryOp_match<LHS, RHS, Instruction::FRem> m_FRem(const LHS &L, | ||||||||||||
1035 | const RHS &R) { | ||||||||||||
1036 | return BinaryOp_match<LHS, RHS, Instruction::FRem>(L, R); | ||||||||||||
1037 | } | ||||||||||||
1038 | |||||||||||||
1039 | template <typename LHS, typename RHS> | ||||||||||||
1040 | inline BinaryOp_match<LHS, RHS, Instruction::And> m_And(const LHS &L, | ||||||||||||
1041 | const RHS &R) { | ||||||||||||
1042 | return BinaryOp_match<LHS, RHS, Instruction::And>(L, R); | ||||||||||||
1043 | } | ||||||||||||
1044 | |||||||||||||
1045 | template <typename LHS, typename RHS> | ||||||||||||
1046 | inline BinaryOp_match<LHS, RHS, Instruction::Or> m_Or(const LHS &L, | ||||||||||||
1047 | const RHS &R) { | ||||||||||||
1048 | return BinaryOp_match<LHS, RHS, Instruction::Or>(L, R); | ||||||||||||
1049 | } | ||||||||||||
1050 | |||||||||||||
1051 | template <typename LHS, typename RHS> | ||||||||||||
1052 | inline BinaryOp_match<LHS, RHS, Instruction::Xor> m_Xor(const LHS &L, | ||||||||||||
1053 | const RHS &R) { | ||||||||||||
1054 | return BinaryOp_match<LHS, RHS, Instruction::Xor>(L, R); | ||||||||||||
1055 | } | ||||||||||||
1056 | |||||||||||||
1057 | template <typename LHS, typename RHS> | ||||||||||||
1058 | inline BinaryOp_match<LHS, RHS, Instruction::Shl> m_Shl(const LHS &L, | ||||||||||||
1059 | const RHS &R) { | ||||||||||||
1060 | return BinaryOp_match<LHS, RHS, Instruction::Shl>(L, R); | ||||||||||||
1061 | } | ||||||||||||
1062 | |||||||||||||
1063 | template <typename LHS, typename RHS> | ||||||||||||
1064 | inline BinaryOp_match<LHS, RHS, Instruction::LShr> m_LShr(const LHS &L, | ||||||||||||
1065 | const RHS &R) { | ||||||||||||
1066 | return BinaryOp_match<LHS, RHS, Instruction::LShr>(L, R); | ||||||||||||
1067 | } | ||||||||||||
1068 | |||||||||||||
1069 | template <typename LHS, typename RHS> | ||||||||||||
1070 | inline BinaryOp_match<LHS, RHS, Instruction::AShr> m_AShr(const LHS &L, | ||||||||||||
1071 | const RHS &R) { | ||||||||||||
1072 | return BinaryOp_match<LHS, RHS, Instruction::AShr>(L, R); | ||||||||||||
1073 | } | ||||||||||||
1074 | |||||||||||||
1075 | template <typename LHS_t, typename RHS_t, unsigned Opcode, | ||||||||||||
1076 | unsigned WrapFlags = 0> | ||||||||||||
1077 | struct OverflowingBinaryOp_match { | ||||||||||||
1078 | LHS_t L; | ||||||||||||
1079 | RHS_t R; | ||||||||||||
1080 | |||||||||||||
1081 | OverflowingBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) | ||||||||||||
1082 | : L(LHS), R(RHS) {} | ||||||||||||
1083 | |||||||||||||
1084 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
1085 | if (auto *Op = dyn_cast<OverflowingBinaryOperator>(V)) { | ||||||||||||
1086 | if (Op->getOpcode() != Opcode) | ||||||||||||
1087 | return false; | ||||||||||||
1088 | if (WrapFlags & OverflowingBinaryOperator::NoUnsignedWrap && | ||||||||||||
1089 | !Op->hasNoUnsignedWrap()) | ||||||||||||
1090 | return false; | ||||||||||||
1091 | if (WrapFlags & OverflowingBinaryOperator::NoSignedWrap && | ||||||||||||
1092 | !Op->hasNoSignedWrap()) | ||||||||||||
1093 | return false; | ||||||||||||
1094 | return L.match(Op->getOperand(0)) && R.match(Op->getOperand(1)); | ||||||||||||
1095 | } | ||||||||||||
1096 | return false; | ||||||||||||
1097 | } | ||||||||||||
1098 | }; | ||||||||||||
1099 | |||||||||||||
1100 | template <typename LHS, typename RHS> | ||||||||||||
1101 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, | ||||||||||||
1102 | OverflowingBinaryOperator::NoSignedWrap> | ||||||||||||
1103 | m_NSWAdd(const LHS &L, const RHS &R) { | ||||||||||||
1104 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, | ||||||||||||
1105 | OverflowingBinaryOperator::NoSignedWrap>( | ||||||||||||
1106 | L, R); | ||||||||||||
1107 | } | ||||||||||||
1108 | template <typename LHS, typename RHS> | ||||||||||||
1109 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, | ||||||||||||
1110 | OverflowingBinaryOperator::NoSignedWrap> | ||||||||||||
1111 | m_NSWSub(const LHS &L, const RHS &R) { | ||||||||||||
1112 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, | ||||||||||||
1113 | OverflowingBinaryOperator::NoSignedWrap>( | ||||||||||||
1114 | L, R); | ||||||||||||
1115 | } | ||||||||||||
1116 | template <typename LHS, typename RHS> | ||||||||||||
1117 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, | ||||||||||||
1118 | OverflowingBinaryOperator::NoSignedWrap> | ||||||||||||
1119 | m_NSWMul(const LHS &L, const RHS &R) { | ||||||||||||
1120 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, | ||||||||||||
1121 | OverflowingBinaryOperator::NoSignedWrap>( | ||||||||||||
1122 | L, R); | ||||||||||||
1123 | } | ||||||||||||
1124 | template <typename LHS, typename RHS> | ||||||||||||
1125 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, | ||||||||||||
1126 | OverflowingBinaryOperator::NoSignedWrap> | ||||||||||||
1127 | m_NSWShl(const LHS &L, const RHS &R) { | ||||||||||||
1128 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, | ||||||||||||
1129 | OverflowingBinaryOperator::NoSignedWrap>( | ||||||||||||
1130 | L, R); | ||||||||||||
1131 | } | ||||||||||||
1132 | |||||||||||||
1133 | template <typename LHS, typename RHS> | ||||||||||||
1134 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, | ||||||||||||
1135 | OverflowingBinaryOperator::NoUnsignedWrap> | ||||||||||||
1136 | m_NUWAdd(const LHS &L, const RHS &R) { | ||||||||||||
1137 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, | ||||||||||||
1138 | OverflowingBinaryOperator::NoUnsignedWrap>( | ||||||||||||
1139 | L, R); | ||||||||||||
1140 | } | ||||||||||||
1141 | template <typename LHS, typename RHS> | ||||||||||||
1142 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, | ||||||||||||
1143 | OverflowingBinaryOperator::NoUnsignedWrap> | ||||||||||||
1144 | m_NUWSub(const LHS &L, const RHS &R) { | ||||||||||||
1145 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, | ||||||||||||
1146 | OverflowingBinaryOperator::NoUnsignedWrap>( | ||||||||||||
1147 | L, R); | ||||||||||||
1148 | } | ||||||||||||
1149 | template <typename LHS, typename RHS> | ||||||||||||
1150 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, | ||||||||||||
1151 | OverflowingBinaryOperator::NoUnsignedWrap> | ||||||||||||
1152 | m_NUWMul(const LHS &L, const RHS &R) { | ||||||||||||
1153 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, | ||||||||||||
1154 | OverflowingBinaryOperator::NoUnsignedWrap>( | ||||||||||||
1155 | L, R); | ||||||||||||
1156 | } | ||||||||||||
1157 | template <typename LHS, typename RHS> | ||||||||||||
1158 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, | ||||||||||||
1159 | OverflowingBinaryOperator::NoUnsignedWrap> | ||||||||||||
1160 | m_NUWShl(const LHS &L, const RHS &R) { | ||||||||||||
1161 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, | ||||||||||||
1162 | OverflowingBinaryOperator::NoUnsignedWrap>( | ||||||||||||
1163 | L, R); | ||||||||||||
1164 | } | ||||||||||||
1165 | |||||||||||||
1166 | //===----------------------------------------------------------------------===// | ||||||||||||
1167 | // Class that matches a group of binary opcodes. | ||||||||||||
1168 | // | ||||||||||||
1169 | template <typename LHS_t, typename RHS_t, typename Predicate> | ||||||||||||
1170 | struct BinOpPred_match : Predicate { | ||||||||||||
1171 | LHS_t L; | ||||||||||||
1172 | RHS_t R; | ||||||||||||
1173 | |||||||||||||
1174 | BinOpPred_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} | ||||||||||||
1175 | |||||||||||||
1176 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
1177 | if (auto *I = dyn_cast<Instruction>(V)) | ||||||||||||
1178 | return this->isOpType(I->getOpcode()) && L.match(I->getOperand(0)) && | ||||||||||||
1179 | R.match(I->getOperand(1)); | ||||||||||||
1180 | if (auto *CE = dyn_cast<ConstantExpr>(V)) | ||||||||||||
1181 | return this->isOpType(CE->getOpcode()) && L.match(CE->getOperand(0)) && | ||||||||||||
1182 | R.match(CE->getOperand(1)); | ||||||||||||
1183 | return false; | ||||||||||||
1184 | } | ||||||||||||
1185 | }; | ||||||||||||
1186 | |||||||||||||
1187 | struct is_shift_op { | ||||||||||||
1188 | bool isOpType(unsigned Opcode) { return Instruction::isShift(Opcode); } | ||||||||||||
1189 | }; | ||||||||||||
1190 | |||||||||||||
1191 | struct is_right_shift_op { | ||||||||||||
1192 | bool isOpType(unsigned Opcode) { | ||||||||||||
1193 | return Opcode == Instruction::LShr || Opcode == Instruction::AShr; | ||||||||||||
1194 | } | ||||||||||||
1195 | }; | ||||||||||||
1196 | |||||||||||||
1197 | struct is_logical_shift_op { | ||||||||||||
1198 | bool isOpType(unsigned Opcode) { | ||||||||||||
1199 | return Opcode == Instruction::LShr || Opcode == Instruction::Shl; | ||||||||||||
1200 | } | ||||||||||||
1201 | }; | ||||||||||||
1202 | |||||||||||||
1203 | struct is_bitwiselogic_op { | ||||||||||||
1204 | bool isOpType(unsigned Opcode) { | ||||||||||||
1205 | return Instruction::isBitwiseLogicOp(Opcode); | ||||||||||||
1206 | } | ||||||||||||
1207 | }; | ||||||||||||
1208 | |||||||||||||
1209 | struct is_idiv_op { | ||||||||||||
1210 | bool isOpType(unsigned Opcode) { | ||||||||||||
1211 | return Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; | ||||||||||||
1212 | } | ||||||||||||
1213 | }; | ||||||||||||
1214 | |||||||||||||
1215 | struct is_irem_op { | ||||||||||||
1216 | bool isOpType(unsigned Opcode) { | ||||||||||||
1217 | return Opcode == Instruction::SRem || Opcode == Instruction::URem; | ||||||||||||
1218 | } | ||||||||||||
1219 | }; | ||||||||||||
1220 | |||||||||||||
1221 | /// Matches shift operations. | ||||||||||||
1222 | template <typename LHS, typename RHS> | ||||||||||||
1223 | inline BinOpPred_match<LHS, RHS, is_shift_op> m_Shift(const LHS &L, | ||||||||||||
1224 | const RHS &R) { | ||||||||||||
1225 | return BinOpPred_match<LHS, RHS, is_shift_op>(L, R); | ||||||||||||
1226 | } | ||||||||||||
1227 | |||||||||||||
1228 | /// Matches logical shift operations. | ||||||||||||
1229 | template <typename LHS, typename RHS> | ||||||||||||
1230 | inline BinOpPred_match<LHS, RHS, is_right_shift_op> m_Shr(const LHS &L, | ||||||||||||
1231 | const RHS &R) { | ||||||||||||
1232 | return BinOpPred_match<LHS, RHS, is_right_shift_op>(L, R); | ||||||||||||
1233 | } | ||||||||||||
1234 | |||||||||||||
1235 | /// Matches logical shift operations. | ||||||||||||
1236 | template <typename LHS, typename RHS> | ||||||||||||
1237 | inline BinOpPred_match<LHS, RHS, is_logical_shift_op> | ||||||||||||
1238 | m_LogicalShift(const LHS &L, const RHS &R) { | ||||||||||||
1239 | return BinOpPred_match<LHS, RHS, is_logical_shift_op>(L, R); | ||||||||||||
1240 | } | ||||||||||||
1241 | |||||||||||||
1242 | /// Matches bitwise logic operations. | ||||||||||||
1243 | template <typename LHS, typename RHS> | ||||||||||||
1244 | inline BinOpPred_match<LHS, RHS, is_bitwiselogic_op> | ||||||||||||
1245 | m_BitwiseLogic(const LHS &L, const RHS &R) { | ||||||||||||
1246 | return BinOpPred_match<LHS, RHS, is_bitwiselogic_op>(L, R); | ||||||||||||
1247 | } | ||||||||||||
1248 | |||||||||||||
1249 | /// Matches integer division operations. | ||||||||||||
1250 | template <typename LHS, typename RHS> | ||||||||||||
1251 | inline BinOpPred_match<LHS, RHS, is_idiv_op> m_IDiv(const LHS &L, | ||||||||||||
1252 | const RHS &R) { | ||||||||||||
1253 | return BinOpPred_match<LHS, RHS, is_idiv_op>(L, R); | ||||||||||||
1254 | } | ||||||||||||
1255 | |||||||||||||
1256 | /// Matches integer remainder operations. | ||||||||||||
1257 | template <typename LHS, typename RHS> | ||||||||||||
1258 | inline BinOpPred_match<LHS, RHS, is_irem_op> m_IRem(const LHS &L, | ||||||||||||
1259 | const RHS &R) { | ||||||||||||
1260 | return BinOpPred_match<LHS, RHS, is_irem_op>(L, R); | ||||||||||||
1261 | } | ||||||||||||
1262 | |||||||||||||
1263 | //===----------------------------------------------------------------------===// | ||||||||||||
1264 | // Class that matches exact binary ops. | ||||||||||||
1265 | // | ||||||||||||
1266 | template <typename SubPattern_t> struct Exact_match { | ||||||||||||
1267 | SubPattern_t SubPattern; | ||||||||||||
1268 | |||||||||||||
1269 | Exact_match(const SubPattern_t &SP) : SubPattern(SP) {} | ||||||||||||
1270 | |||||||||||||
1271 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
1272 | if (auto *PEO = dyn_cast<PossiblyExactOperator>(V)) | ||||||||||||
1273 | return PEO->isExact() && SubPattern.match(V); | ||||||||||||
1274 | return false; | ||||||||||||
1275 | } | ||||||||||||
1276 | }; | ||||||||||||
1277 | |||||||||||||
1278 | template <typename T> inline Exact_match<T> m_Exact(const T &SubPattern) { | ||||||||||||
1279 | return SubPattern; | ||||||||||||
1280 | } | ||||||||||||
1281 | |||||||||||||
1282 | //===----------------------------------------------------------------------===// | ||||||||||||
1283 | // Matchers for CmpInst classes | ||||||||||||
1284 | // | ||||||||||||
1285 | |||||||||||||
1286 | template <typename LHS_t, typename RHS_t, typename Class, typename PredicateTy, | ||||||||||||
1287 | bool Commutable = false> | ||||||||||||
1288 | struct CmpClass_match { | ||||||||||||
1289 | PredicateTy &Predicate; | ||||||||||||
1290 | LHS_t L; | ||||||||||||
1291 | RHS_t R; | ||||||||||||
1292 | |||||||||||||
1293 | // The evaluation order is always stable, regardless of Commutability. | ||||||||||||
1294 | // The LHS is always matched first. | ||||||||||||
1295 | CmpClass_match(PredicateTy &Pred, const LHS_t &LHS, const RHS_t &RHS) | ||||||||||||
1296 | : Predicate(Pred), L(LHS), R(RHS) {} | ||||||||||||
1297 | |||||||||||||
1298 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
1299 | if (auto *I = dyn_cast<Class>(V)) { | ||||||||||||
1300 | if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) { | ||||||||||||
1301 | Predicate = I->getPredicate(); | ||||||||||||
1302 | return true; | ||||||||||||
1303 | } else if (Commutable && L.match(I->getOperand(1)) && | ||||||||||||
1304 | R.match(I->getOperand(0))) { | ||||||||||||
1305 | Predicate = I->getSwappedPredicate(); | ||||||||||||
1306 | return true; | ||||||||||||
1307 | } | ||||||||||||
1308 | } | ||||||||||||
1309 | return false; | ||||||||||||
1310 | } | ||||||||||||
1311 | }; | ||||||||||||
1312 | |||||||||||||
1313 | template <typename LHS, typename RHS> | ||||||||||||
1314 | inline CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate> | ||||||||||||
1315 | m_Cmp(CmpInst::Predicate &Pred, const LHS &L, const RHS &R) { | ||||||||||||
1316 | return CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>(Pred, L, R); | ||||||||||||
1317 | } | ||||||||||||
1318 | |||||||||||||
1319 | template <typename LHS, typename RHS> | ||||||||||||
1320 | inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate> | ||||||||||||
1321 | m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) { | ||||||||||||
1322 | return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>(Pred, L, R); | ||||||||||||
1323 | } | ||||||||||||
1324 | |||||||||||||
1325 | template <typename LHS, typename RHS> | ||||||||||||
1326 | inline CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate> | ||||||||||||
1327 | m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R) { | ||||||||||||
1328 | return CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>(Pred, L, R); | ||||||||||||
1329 | } | ||||||||||||
1330 | |||||||||||||
1331 | //===----------------------------------------------------------------------===// | ||||||||||||
1332 | // Matchers for instructions with a given opcode and number of operands. | ||||||||||||
1333 | // | ||||||||||||
1334 | |||||||||||||
1335 | /// Matches instructions with Opcode and three operands. | ||||||||||||
1336 | template <typename T0, unsigned Opcode> struct OneOps_match { | ||||||||||||
1337 | T0 Op1; | ||||||||||||
1338 | |||||||||||||
1339 | OneOps_match(const T0 &Op1) : Op1(Op1) {} | ||||||||||||
1340 | |||||||||||||
1341 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
1342 | if (V->getValueID() == Value::InstructionVal + Opcode) { | ||||||||||||
1343 | auto *I = cast<Instruction>(V); | ||||||||||||
1344 | return Op1.match(I->getOperand(0)); | ||||||||||||
1345 | } | ||||||||||||
1346 | return false; | ||||||||||||
1347 | } | ||||||||||||
1348 | }; | ||||||||||||
1349 | |||||||||||||
1350 | /// Matches instructions with Opcode and three operands. | ||||||||||||
1351 | template <typename T0, typename T1, unsigned Opcode> struct TwoOps_match { | ||||||||||||
1352 | T0 Op1; | ||||||||||||
1353 | T1 Op2; | ||||||||||||
1354 | |||||||||||||
1355 | TwoOps_match(const T0 &Op1, const T1 &Op2) : Op1(Op1), Op2(Op2) {} | ||||||||||||
1356 | |||||||||||||
1357 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
1358 | if (V->getValueID() == Value::InstructionVal + Opcode) { | ||||||||||||
1359 | auto *I = cast<Instruction>(V); | ||||||||||||
1360 | return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)); | ||||||||||||
1361 | } | ||||||||||||
1362 | return false; | ||||||||||||
1363 | } | ||||||||||||
1364 | }; | ||||||||||||
1365 | |||||||||||||
1366 | /// Matches instructions with Opcode and three operands. | ||||||||||||
1367 | template <typename T0, typename T1, typename T2, unsigned Opcode> | ||||||||||||
1368 | struct ThreeOps_match { | ||||||||||||
1369 | T0 Op1; | ||||||||||||
1370 | T1 Op2; | ||||||||||||
1371 | T2 Op3; | ||||||||||||
1372 | |||||||||||||
1373 | ThreeOps_match(const T0 &Op1, const T1 &Op2, const T2 &Op3) | ||||||||||||
1374 | : Op1(Op1), Op2(Op2), Op3(Op3) {} | ||||||||||||
1375 | |||||||||||||
1376 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
1377 | if (V->getValueID() == Value::InstructionVal + Opcode) { | ||||||||||||
1378 | auto *I = cast<Instruction>(V); | ||||||||||||
1379 | return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) && | ||||||||||||
1380 | Op3.match(I->getOperand(2)); | ||||||||||||
1381 | } | ||||||||||||
1382 | return false; | ||||||||||||
1383 | } | ||||||||||||
1384 | }; | ||||||||||||
1385 | |||||||||||||
1386 | /// Matches SelectInst. | ||||||||||||
1387 | template <typename Cond, typename LHS, typename RHS> | ||||||||||||
1388 | inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select> | ||||||||||||
1389 | m_Select(const Cond &C, const LHS &L, const RHS &R) { | ||||||||||||
1390 | return ThreeOps_match<Cond, LHS, RHS, Instruction::Select>(C, L, R); | ||||||||||||
1391 | } | ||||||||||||
1392 | |||||||||||||
1393 | /// This matches a select of two constants, e.g.: | ||||||||||||
1394 | /// m_SelectCst<-1, 0>(m_Value(V)) | ||||||||||||
1395 | template <int64_t L, int64_t R, typename Cond> | ||||||||||||
1396 | inline ThreeOps_match<Cond, constantint_match<L>, constantint_match<R>, | ||||||||||||
1397 | Instruction::Select> | ||||||||||||
1398 | m_SelectCst(const Cond &C) { | ||||||||||||
1399 | return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>()); | ||||||||||||
1400 | } | ||||||||||||
1401 | |||||||||||||
1402 | /// Matches FreezeInst. | ||||||||||||
1403 | template <typename OpTy> | ||||||||||||
1404 | inline OneOps_match<OpTy, Instruction::Freeze> m_Freeze(const OpTy &Op) { | ||||||||||||
1405 | return OneOps_match<OpTy, Instruction::Freeze>(Op); | ||||||||||||
1406 | } | ||||||||||||
1407 | |||||||||||||
1408 | /// Matches InsertElementInst. | ||||||||||||
1409 | template <typename Val_t, typename Elt_t, typename Idx_t> | ||||||||||||
1410 | inline ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement> | ||||||||||||
1411 | m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx) { | ||||||||||||
1412 | return ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>( | ||||||||||||
1413 | Val, Elt, Idx); | ||||||||||||
1414 | } | ||||||||||||
1415 | |||||||||||||
1416 | /// Matches ExtractElementInst. | ||||||||||||
1417 | template <typename Val_t, typename Idx_t> | ||||||||||||
1418 | inline TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement> | ||||||||||||
1419 | m_ExtractElt(const Val_t &Val, const Idx_t &Idx) { | ||||||||||||
1420 | return TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>(Val, Idx); | ||||||||||||
1421 | } | ||||||||||||
1422 | |||||||||||||
1423 | /// Matches shuffle. | ||||||||||||
1424 | template <typename T0, typename T1, typename T2> struct Shuffle_match { | ||||||||||||
1425 | T0 Op1; | ||||||||||||
1426 | T1 Op2; | ||||||||||||
1427 | T2 Mask; | ||||||||||||
1428 | |||||||||||||
1429 | Shuffle_match(const T0 &Op1, const T1 &Op2, const T2 &Mask) | ||||||||||||
1430 | : Op1(Op1), Op2(Op2), Mask(Mask) {} | ||||||||||||
1431 | |||||||||||||
1432 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
1433 | if (auto *I = dyn_cast<ShuffleVectorInst>(V)) { | ||||||||||||
1434 | return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) && | ||||||||||||
1435 | Mask.match(I->getShuffleMask()); | ||||||||||||
1436 | } | ||||||||||||
1437 | return false; | ||||||||||||
1438 | } | ||||||||||||
1439 | }; | ||||||||||||
1440 | |||||||||||||
1441 | struct m_Mask { | ||||||||||||
1442 | ArrayRef<int> &MaskRef; | ||||||||||||
1443 | m_Mask(ArrayRef<int> &MaskRef) : MaskRef(MaskRef) {} | ||||||||||||
1444 | bool match(ArrayRef<int> Mask) { | ||||||||||||
1445 | MaskRef = Mask; | ||||||||||||
1446 | return true; | ||||||||||||
1447 | } | ||||||||||||
1448 | }; | ||||||||||||
1449 | |||||||||||||
1450 | struct m_ZeroMask { | ||||||||||||
1451 | bool match(ArrayRef<int> Mask) { | ||||||||||||
1452 | return all_of(Mask, [](int Elem) { return Elem == 0 || Elem == -1; }); | ||||||||||||
1453 | } | ||||||||||||
1454 | }; | ||||||||||||
1455 | |||||||||||||
1456 | struct m_SpecificMask { | ||||||||||||
1457 | ArrayRef<int> &MaskRef; | ||||||||||||
1458 | m_SpecificMask(ArrayRef<int> &MaskRef) : MaskRef(MaskRef) {} | ||||||||||||
1459 | bool match(ArrayRef<int> Mask) { return MaskRef == Mask; } | ||||||||||||
1460 | }; | ||||||||||||
1461 | |||||||||||||
1462 | struct m_SplatOrUndefMask { | ||||||||||||
1463 | int &SplatIndex; | ||||||||||||
1464 | m_SplatOrUndefMask(int &SplatIndex) : SplatIndex(SplatIndex) {} | ||||||||||||
1465 | bool match(ArrayRef<int> Mask) { | ||||||||||||
1466 | auto First = find_if(Mask, [](int Elem) { return Elem != -1; }); | ||||||||||||
1467 | if (First == Mask.end()) | ||||||||||||
1468 | return false; | ||||||||||||
1469 | SplatIndex = *First; | ||||||||||||
1470 | return all_of(Mask, | ||||||||||||
1471 | [First](int Elem) { return Elem == *First || Elem == -1; }); | ||||||||||||
1472 | } | ||||||||||||
1473 | }; | ||||||||||||
1474 | |||||||||||||
1475 | /// Matches ShuffleVectorInst independently of mask value. | ||||||||||||
1476 | template <typename V1_t, typename V2_t> | ||||||||||||
1477 | inline TwoOps_match<V1_t, V2_t, Instruction::ShuffleVector> | ||||||||||||
1478 | m_Shuffle(const V1_t &v1, const V2_t &v2) { | ||||||||||||
1479 | return TwoOps_match<V1_t, V2_t, Instruction::ShuffleVector>(v1, v2); | ||||||||||||
1480 | } | ||||||||||||
1481 | |||||||||||||
1482 | template <typename V1_t, typename V2_t, typename Mask_t> | ||||||||||||
1483 | inline Shuffle_match<V1_t, V2_t, Mask_t> | ||||||||||||
1484 | m_Shuffle(const V1_t &v1, const V2_t &v2, const Mask_t &mask) { | ||||||||||||
1485 | return Shuffle_match<V1_t, V2_t, Mask_t>(v1, v2, mask); | ||||||||||||
1486 | } | ||||||||||||
1487 | |||||||||||||
1488 | /// Matches LoadInst. | ||||||||||||
1489 | template <typename OpTy> | ||||||||||||
1490 | inline OneOps_match<OpTy, Instruction::Load> m_Load(const OpTy &Op) { | ||||||||||||
1491 | return OneOps_match<OpTy, Instruction::Load>(Op); | ||||||||||||
1492 | } | ||||||||||||
1493 | |||||||||||||
1494 | /// Matches StoreInst. | ||||||||||||
1495 | template <typename ValueOpTy, typename PointerOpTy> | ||||||||||||
1496 | inline TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store> | ||||||||||||
1497 | m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) { | ||||||||||||
1498 | return TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>(ValueOp, | ||||||||||||
1499 | PointerOp); | ||||||||||||
1500 | } | ||||||||||||
1501 | |||||||||||||
1502 | //===----------------------------------------------------------------------===// | ||||||||||||
1503 | // Matchers for CastInst classes | ||||||||||||
1504 | // | ||||||||||||
1505 | |||||||||||||
1506 | template <typename Op_t, unsigned Opcode> struct CastClass_match { | ||||||||||||
1507 | Op_t Op; | ||||||||||||
1508 | |||||||||||||
1509 | CastClass_match(const Op_t &OpMatch) : Op(OpMatch) {} | ||||||||||||
1510 | |||||||||||||
1511 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
1512 | if (auto *O = dyn_cast<Operator>(V)) | ||||||||||||
1513 | return O->getOpcode() == Opcode && Op.match(O->getOperand(0)); | ||||||||||||
1514 | return false; | ||||||||||||
1515 | } | ||||||||||||
1516 | }; | ||||||||||||
1517 | |||||||||||||
1518 | /// Matches BitCast. | ||||||||||||
1519 | template <typename OpTy> | ||||||||||||
1520 | inline CastClass_match<OpTy, Instruction::BitCast> m_BitCast(const OpTy &Op) { | ||||||||||||
1521 | return CastClass_match<OpTy, Instruction::BitCast>(Op); | ||||||||||||
1522 | } | ||||||||||||
1523 | |||||||||||||
1524 | /// Matches PtrToInt. | ||||||||||||
1525 | template <typename OpTy> | ||||||||||||
1526 | inline CastClass_match<OpTy, Instruction::PtrToInt> m_PtrToInt(const OpTy &Op) { | ||||||||||||
1527 | return CastClass_match<OpTy, Instruction::PtrToInt>(Op); | ||||||||||||
1528 | } | ||||||||||||
1529 | |||||||||||||
1530 | /// Matches IntToPtr. | ||||||||||||
1531 | template <typename OpTy> | ||||||||||||
1532 | inline CastClass_match<OpTy, Instruction::IntToPtr> m_IntToPtr(const OpTy &Op) { | ||||||||||||
1533 | return CastClass_match<OpTy, Instruction::IntToPtr>(Op); | ||||||||||||
1534 | } | ||||||||||||
1535 | |||||||||||||
1536 | /// Matches Trunc. | ||||||||||||
1537 | template <typename OpTy> | ||||||||||||
1538 | inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) { | ||||||||||||
1539 | return CastClass_match<OpTy, Instruction::Trunc>(Op); | ||||||||||||
1540 | } | ||||||||||||
1541 | |||||||||||||
1542 | template <typename OpTy> | ||||||||||||
1543 | inline match_combine_or<CastClass_match<OpTy, Instruction::Trunc>, OpTy> | ||||||||||||
1544 | m_TruncOrSelf(const OpTy &Op) { | ||||||||||||
1545 | return m_CombineOr(m_Trunc(Op), Op); | ||||||||||||
1546 | } | ||||||||||||
1547 | |||||||||||||
1548 | /// Matches SExt. | ||||||||||||
1549 | template <typename OpTy> | ||||||||||||
1550 | inline CastClass_match<OpTy, Instruction::SExt> m_SExt(const OpTy &Op) { | ||||||||||||
1551 | return CastClass_match<OpTy, Instruction::SExt>(Op); | ||||||||||||
1552 | } | ||||||||||||
1553 | |||||||||||||
1554 | /// Matches ZExt. | ||||||||||||
1555 | template <typename OpTy> | ||||||||||||
1556 | inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) { | ||||||||||||
1557 | return CastClass_match<OpTy, Instruction::ZExt>(Op); | ||||||||||||
1558 | } | ||||||||||||
1559 | |||||||||||||
1560 | template <typename OpTy> | ||||||||||||
1561 | inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, OpTy> | ||||||||||||
1562 | m_ZExtOrSelf(const OpTy &Op) { | ||||||||||||
1563 | return m_CombineOr(m_ZExt(Op), Op); | ||||||||||||
1564 | } | ||||||||||||
1565 | |||||||||||||
1566 | template <typename OpTy> | ||||||||||||
1567 | inline match_combine_or<CastClass_match<OpTy, Instruction::SExt>, OpTy> | ||||||||||||
1568 | m_SExtOrSelf(const OpTy &Op) { | ||||||||||||
1569 | return m_CombineOr(m_SExt(Op), Op); | ||||||||||||
1570 | } | ||||||||||||
1571 | |||||||||||||
1572 | template <typename OpTy> | ||||||||||||
1573 | inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, | ||||||||||||
1574 | CastClass_match<OpTy, Instruction::SExt>> | ||||||||||||
1575 | m_ZExtOrSExt(const OpTy &Op) { | ||||||||||||
1576 | return m_CombineOr(m_ZExt(Op), m_SExt(Op)); | ||||||||||||
1577 | } | ||||||||||||
1578 | |||||||||||||
1579 | template <typename OpTy> | ||||||||||||
1580 | inline match_combine_or< | ||||||||||||
1581 | match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, | ||||||||||||
1582 | CastClass_match<OpTy, Instruction::SExt>>, | ||||||||||||
1583 | OpTy> | ||||||||||||
1584 | m_ZExtOrSExtOrSelf(const OpTy &Op) { | ||||||||||||
1585 | return m_CombineOr(m_ZExtOrSExt(Op), Op); | ||||||||||||
1586 | } | ||||||||||||
1587 | |||||||||||||
1588 | template <typename OpTy> | ||||||||||||
1589 | inline CastClass_match<OpTy, Instruction::UIToFP> m_UIToFP(const OpTy &Op) { | ||||||||||||
1590 | return CastClass_match<OpTy, Instruction::UIToFP>(Op); | ||||||||||||
1591 | } | ||||||||||||
1592 | |||||||||||||
1593 | template <typename OpTy> | ||||||||||||
1594 | inline CastClass_match<OpTy, Instruction::SIToFP> m_SIToFP(const OpTy &Op) { | ||||||||||||
1595 | return CastClass_match<OpTy, Instruction::SIToFP>(Op); | ||||||||||||
1596 | } | ||||||||||||
1597 | |||||||||||||
1598 | template <typename OpTy> | ||||||||||||
1599 | inline CastClass_match<OpTy, Instruction::FPToUI> m_FPToUI(const OpTy &Op) { | ||||||||||||
1600 | return CastClass_match<OpTy, Instruction::FPToUI>(Op); | ||||||||||||
1601 | } | ||||||||||||
1602 | |||||||||||||
1603 | template <typename OpTy> | ||||||||||||
1604 | inline CastClass_match<OpTy, Instruction::FPToSI> m_FPToSI(const OpTy &Op) { | ||||||||||||
1605 | return CastClass_match<OpTy, Instruction::FPToSI>(Op); | ||||||||||||
1606 | } | ||||||||||||
1607 | |||||||||||||
1608 | template <typename OpTy> | ||||||||||||
1609 | inline CastClass_match<OpTy, Instruction::FPTrunc> m_FPTrunc(const OpTy &Op) { | ||||||||||||
1610 | return CastClass_match<OpTy, Instruction::FPTrunc>(Op); | ||||||||||||
1611 | } | ||||||||||||
1612 | |||||||||||||
1613 | template <typename OpTy> | ||||||||||||
1614 | inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) { | ||||||||||||
1615 | return CastClass_match<OpTy, Instruction::FPExt>(Op); | ||||||||||||
1616 | } | ||||||||||||
1617 | |||||||||||||
1618 | //===----------------------------------------------------------------------===// | ||||||||||||
1619 | // Matchers for control flow. | ||||||||||||
1620 | // | ||||||||||||
1621 | |||||||||||||
1622 | struct br_match { | ||||||||||||
1623 | BasicBlock *&Succ; | ||||||||||||
1624 | |||||||||||||
1625 | br_match(BasicBlock *&Succ) : Succ(Succ) {} | ||||||||||||
1626 | |||||||||||||
1627 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
1628 | if (auto *BI = dyn_cast<BranchInst>(V)) | ||||||||||||
1629 | if (BI->isUnconditional()) { | ||||||||||||
1630 | Succ = BI->getSuccessor(0); | ||||||||||||
1631 | return true; | ||||||||||||
1632 | } | ||||||||||||
1633 | return false; | ||||||||||||
1634 | } | ||||||||||||
1635 | }; | ||||||||||||
1636 | |||||||||||||
1637 | inline br_match m_UnconditionalBr(BasicBlock *&Succ) { return br_match(Succ); } | ||||||||||||
1638 | |||||||||||||
1639 | template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t> | ||||||||||||
1640 | struct brc_match { | ||||||||||||
1641 | Cond_t Cond; | ||||||||||||
1642 | TrueBlock_t T; | ||||||||||||
1643 | FalseBlock_t F; | ||||||||||||
1644 | |||||||||||||
1645 | brc_match(const Cond_t &C, const TrueBlock_t &t, const FalseBlock_t &f) | ||||||||||||
1646 | : Cond(C), T(t), F(f) {} | ||||||||||||
1647 | |||||||||||||
1648 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
1649 | if (auto *BI = dyn_cast<BranchInst>(V)) | ||||||||||||
1650 | if (BI->isConditional() && Cond.match(BI->getCondition())) | ||||||||||||
1651 | return T.match(BI->getSuccessor(0)) && F.match(BI->getSuccessor(1)); | ||||||||||||
1652 | return false; | ||||||||||||
1653 | } | ||||||||||||
1654 | }; | ||||||||||||
1655 | |||||||||||||
1656 | template <typename Cond_t> | ||||||||||||
1657 | inline brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>> | ||||||||||||
1658 | m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) { | ||||||||||||
1659 | return brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>( | ||||||||||||
1660 | C, m_BasicBlock(T), m_BasicBlock(F)); | ||||||||||||
1661 | } | ||||||||||||
1662 | |||||||||||||
1663 | template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t> | ||||||||||||
1664 | inline brc_match<Cond_t, TrueBlock_t, FalseBlock_t> | ||||||||||||
1665 | m_Br(const Cond_t &C, const TrueBlock_t &T, const FalseBlock_t &F) { | ||||||||||||
1666 | return brc_match<Cond_t, TrueBlock_t, FalseBlock_t>(C, T, F); | ||||||||||||
1667 | } | ||||||||||||
1668 | |||||||||||||
1669 | //===----------------------------------------------------------------------===// | ||||||||||||
1670 | // Matchers for max/min idioms, eg: "select (sgt x, y), x, y" -> smax(x,y). | ||||||||||||
1671 | // | ||||||||||||
1672 | |||||||||||||
1673 | template <typename CmpInst_t, typename LHS_t, typename RHS_t, typename Pred_t, | ||||||||||||
1674 | bool Commutable = false> | ||||||||||||
1675 | struct MaxMin_match { | ||||||||||||
1676 | LHS_t L; | ||||||||||||
1677 | RHS_t R; | ||||||||||||
1678 | |||||||||||||
1679 | // The evaluation order is always stable, regardless of Commutability. | ||||||||||||
1680 | // The LHS is always matched first. | ||||||||||||
1681 | MaxMin_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} | ||||||||||||
1682 | |||||||||||||
1683 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
1684 | if (auto *II = dyn_cast<IntrinsicInst>(V)) { | ||||||||||||
1685 | Intrinsic::ID IID = II->getIntrinsicID(); | ||||||||||||
1686 | if ((IID == Intrinsic::smax && Pred_t::match(ICmpInst::ICMP_SGT)) || | ||||||||||||
1687 | (IID == Intrinsic::smin && Pred_t::match(ICmpInst::ICMP_SLT)) || | ||||||||||||
1688 | (IID == Intrinsic::umax && Pred_t::match(ICmpInst::ICMP_UGT)) || | ||||||||||||
1689 | (IID == Intrinsic::umin && Pred_t::match(ICmpInst::ICMP_ULT))) { | ||||||||||||
1690 | Value *LHS = II->getOperand(0), *RHS = II->getOperand(1); | ||||||||||||
1691 | return (L.match(LHS) && R.match(RHS)) || | ||||||||||||
1692 | (Commutable && L.match(RHS) && R.match(LHS)); | ||||||||||||
1693 | } | ||||||||||||
1694 | } | ||||||||||||
1695 | // Look for "(x pred y) ? x : y" or "(x pred y) ? y : x". | ||||||||||||
1696 | auto *SI = dyn_cast<SelectInst>(V); | ||||||||||||
1697 | if (!SI) | ||||||||||||
1698 | return false; | ||||||||||||
1699 | auto *Cmp = dyn_cast<CmpInst_t>(SI->getCondition()); | ||||||||||||
1700 | if (!Cmp) | ||||||||||||
1701 | return false; | ||||||||||||
1702 | // At this point we have a select conditioned on a comparison. Check that | ||||||||||||
1703 | // it is the values returned by the select that are being compared. | ||||||||||||
1704 | Value *TrueVal = SI->getTrueValue(); | ||||||||||||
1705 | Value *FalseVal = SI->getFalseValue(); | ||||||||||||
1706 | Value *LHS = Cmp->getOperand(0); | ||||||||||||
1707 | Value *RHS = Cmp->getOperand(1); | ||||||||||||
1708 | if ((TrueVal != LHS || FalseVal != RHS) && | ||||||||||||
1709 | (TrueVal != RHS || FalseVal != LHS)) | ||||||||||||
1710 | return false; | ||||||||||||
1711 | typename CmpInst_t::Predicate Pred = | ||||||||||||
1712 | LHS == TrueVal ? Cmp->getPredicate() : Cmp->getInversePredicate(); | ||||||||||||
1713 | // Does "(x pred y) ? x : y" represent the desired max/min operation? | ||||||||||||
1714 | if (!Pred_t::match(Pred)) | ||||||||||||
1715 | return false; | ||||||||||||
1716 | // It does! Bind the operands. | ||||||||||||
1717 | return (L.match(LHS) && R.match(RHS)) || | ||||||||||||
1718 | (Commutable && L.match(RHS) && R.match(LHS)); | ||||||||||||
1719 | } | ||||||||||||
1720 | }; | ||||||||||||
1721 | |||||||||||||
1722 | /// Helper class for identifying signed max predicates. | ||||||||||||
1723 | struct smax_pred_ty { | ||||||||||||
1724 | static bool match(ICmpInst::Predicate Pred) { | ||||||||||||
1725 | return Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE; | ||||||||||||
1726 | } | ||||||||||||
1727 | }; | ||||||||||||
1728 | |||||||||||||
1729 | /// Helper class for identifying signed min predicates. | ||||||||||||
1730 | struct smin_pred_ty { | ||||||||||||
1731 | static bool match(ICmpInst::Predicate Pred) { | ||||||||||||
1732 | return Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SLE; | ||||||||||||
1733 | } | ||||||||||||
1734 | }; | ||||||||||||
1735 | |||||||||||||
1736 | /// Helper class for identifying unsigned max predicates. | ||||||||||||
1737 | struct umax_pred_ty { | ||||||||||||
1738 | static bool match(ICmpInst::Predicate Pred) { | ||||||||||||
1739 | return Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE; | ||||||||||||
1740 | } | ||||||||||||
1741 | }; | ||||||||||||
1742 | |||||||||||||
1743 | /// Helper class for identifying unsigned min predicates. | ||||||||||||
1744 | struct umin_pred_ty { | ||||||||||||
1745 | static bool match(ICmpInst::Predicate Pred) { | ||||||||||||
1746 | return Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_ULE; | ||||||||||||
1747 | } | ||||||||||||
1748 | }; | ||||||||||||
1749 | |||||||||||||
1750 | /// Helper class for identifying ordered max predicates. | ||||||||||||
1751 | struct ofmax_pred_ty { | ||||||||||||
1752 | static bool match(FCmpInst::Predicate Pred) { | ||||||||||||
1753 | return Pred == CmpInst::FCMP_OGT || Pred == CmpInst::FCMP_OGE; | ||||||||||||
1754 | } | ||||||||||||
1755 | }; | ||||||||||||
1756 | |||||||||||||
1757 | /// Helper class for identifying ordered min predicates. | ||||||||||||
1758 | struct ofmin_pred_ty { | ||||||||||||
1759 | static bool match(FCmpInst::Predicate Pred) { | ||||||||||||
1760 | return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE; | ||||||||||||
1761 | } | ||||||||||||
1762 | }; | ||||||||||||
1763 | |||||||||||||
1764 | /// Helper class for identifying unordered max predicates. | ||||||||||||
1765 | struct ufmax_pred_ty { | ||||||||||||
1766 | static bool match(FCmpInst::Predicate Pred) { | ||||||||||||
1767 | return Pred == CmpInst::FCMP_UGT || Pred == CmpInst::FCMP_UGE; | ||||||||||||
1768 | } | ||||||||||||
1769 | }; | ||||||||||||
1770 | |||||||||||||
1771 | /// Helper class for identifying unordered min predicates. | ||||||||||||
1772 | struct ufmin_pred_ty { | ||||||||||||
1773 | static bool match(FCmpInst::Predicate Pred) { | ||||||||||||
1774 | return Pred == CmpInst::FCMP_ULT || Pred == CmpInst::FCMP_ULE; | ||||||||||||
1775 | } | ||||||||||||
1776 | }; | ||||||||||||
1777 | |||||||||||||
1778 | template <typename LHS, typename RHS> | ||||||||||||
1779 | inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty> m_SMax(const LHS &L, | ||||||||||||
1780 | const RHS &R) { | ||||||||||||
1781 | return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>(L, R); | ||||||||||||
1782 | } | ||||||||||||
1783 | |||||||||||||
1784 | template <typename LHS, typename RHS> | ||||||||||||
1785 | inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty> m_SMin(const LHS &L, | ||||||||||||
1786 | const RHS &R) { | ||||||||||||
1787 | return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>(L, R); | ||||||||||||
1788 | } | ||||||||||||
1789 | |||||||||||||
1790 | template <typename LHS, typename RHS> | ||||||||||||
1791 | inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty> m_UMax(const LHS &L, | ||||||||||||
1792 | const RHS &R) { | ||||||||||||
1793 | return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>(L, R); | ||||||||||||
1794 | } | ||||||||||||
1795 | |||||||||||||
1796 | template <typename LHS, typename RHS> | ||||||||||||
1797 | inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty> m_UMin(const LHS &L, | ||||||||||||
1798 | const RHS &R) { | ||||||||||||
1799 | return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>(L, R); | ||||||||||||
1800 | } | ||||||||||||
1801 | |||||||||||||
1802 | template <typename LHS, typename RHS> | ||||||||||||
1803 | inline match_combine_or< | ||||||||||||
1804 | match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>, | ||||||||||||
1805 | MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>>, | ||||||||||||
1806 | match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>, | ||||||||||||
1807 | MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>>> | ||||||||||||
1808 | m_MaxOrMin(const LHS &L, const RHS &R) { | ||||||||||||
1809 | return m_CombineOr(m_CombineOr(m_SMax(L, R), m_SMin(L, R)), | ||||||||||||
1810 | m_CombineOr(m_UMax(L, R), m_UMin(L, R))); | ||||||||||||
1811 | } | ||||||||||||
1812 | |||||||||||||
1813 | /// Match an 'ordered' floating point maximum function. | ||||||||||||
1814 | /// Floating point has one special value 'NaN'. Therefore, there is no total | ||||||||||||
1815 | /// order. However, if we can ignore the 'NaN' value (for example, because of a | ||||||||||||
1816 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum' | ||||||||||||
1817 | /// semantics. In the presence of 'NaN' we have to preserve the original | ||||||||||||
1818 | /// select(fcmp(ogt/ge, L, R), L, R) semantics matched by this predicate. | ||||||||||||
1819 | /// | ||||||||||||
1820 | /// max(L, R) iff L and R are not NaN | ||||||||||||
1821 | /// m_OrdFMax(L, R) = R iff L or R are NaN | ||||||||||||
1822 | template <typename LHS, typename RHS> | ||||||||||||
1823 | inline MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty> m_OrdFMax(const LHS &L, | ||||||||||||
1824 | const RHS &R) { | ||||||||||||
1825 | return MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty>(L, R); | ||||||||||||
1826 | } | ||||||||||||
1827 | |||||||||||||
1828 | /// Match an 'ordered' floating point minimum function. | ||||||||||||
1829 | /// Floating point has one special value 'NaN'. Therefore, there is no total | ||||||||||||
1830 | /// order. However, if we can ignore the 'NaN' value (for example, because of a | ||||||||||||
1831 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum' | ||||||||||||
1832 | /// semantics. In the presence of 'NaN' we have to preserve the original | ||||||||||||
1833 | /// select(fcmp(olt/le, L, R), L, R) semantics matched by this predicate. | ||||||||||||
1834 | /// | ||||||||||||
1835 | /// min(L, R) iff L and R are not NaN | ||||||||||||
1836 | /// m_OrdFMin(L, R) = R iff L or R are NaN | ||||||||||||
1837 | template <typename LHS, typename RHS> | ||||||||||||
1838 | inline MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty> m_OrdFMin(const LHS &L, | ||||||||||||
1839 | const RHS &R) { | ||||||||||||
1840 | return MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty>(L, R); | ||||||||||||
1841 | } | ||||||||||||
1842 | |||||||||||||
1843 | /// Match an 'unordered' floating point maximum function. | ||||||||||||
1844 | /// Floating point has one special value 'NaN'. Therefore, there is no total | ||||||||||||
1845 | /// order. However, if we can ignore the 'NaN' value (for example, because of a | ||||||||||||
1846 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum' | ||||||||||||
1847 | /// semantics. In the presence of 'NaN' we have to preserve the original | ||||||||||||
1848 | /// select(fcmp(ugt/ge, L, R), L, R) semantics matched by this predicate. | ||||||||||||
1849 | /// | ||||||||||||
1850 | /// max(L, R) iff L and R are not NaN | ||||||||||||
1851 | /// m_UnordFMax(L, R) = L iff L or R are NaN | ||||||||||||
1852 | template <typename LHS, typename RHS> | ||||||||||||
1853 | inline MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty> | ||||||||||||
1854 | m_UnordFMax(const LHS &L, const RHS &R) { | ||||||||||||
1855 | return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R); | ||||||||||||
1856 | } | ||||||||||||
1857 | |||||||||||||
1858 | /// Match an 'unordered' floating point minimum function. | ||||||||||||
1859 | /// Floating point has one special value 'NaN'. Therefore, there is no total | ||||||||||||
1860 | /// order. However, if we can ignore the 'NaN' value (for example, because of a | ||||||||||||
1861 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum' | ||||||||||||
1862 | /// semantics. In the presence of 'NaN' we have to preserve the original | ||||||||||||
1863 | /// select(fcmp(ult/le, L, R), L, R) semantics matched by this predicate. | ||||||||||||
1864 | /// | ||||||||||||
1865 | /// min(L, R) iff L and R are not NaN | ||||||||||||
1866 | /// m_UnordFMin(L, R) = L iff L or R are NaN | ||||||||||||
1867 | template <typename LHS, typename RHS> | ||||||||||||
1868 | inline MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty> | ||||||||||||
1869 | m_UnordFMin(const LHS &L, const RHS &R) { | ||||||||||||
1870 | return MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>(L, R); | ||||||||||||
1871 | } | ||||||||||||
1872 | |||||||||||||
1873 | //===----------------------------------------------------------------------===// | ||||||||||||
1874 | // Matchers for overflow check patterns: e.g. (a + b) u< a, (a ^ -1) <u b | ||||||||||||
1875 | // Note that S might be matched to other instructions than AddInst. | ||||||||||||
1876 | // | ||||||||||||
1877 | |||||||||||||
1878 | template <typename LHS_t, typename RHS_t, typename Sum_t> | ||||||||||||
1879 | struct UAddWithOverflow_match { | ||||||||||||
1880 | LHS_t L; | ||||||||||||
1881 | RHS_t R; | ||||||||||||
1882 | Sum_t S; | ||||||||||||
1883 | |||||||||||||
1884 | UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S) | ||||||||||||
1885 | : L(L), R(R), S(S) {} | ||||||||||||
1886 | |||||||||||||
1887 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
1888 | Value *ICmpLHS, *ICmpRHS; | ||||||||||||
1889 | ICmpInst::Predicate Pred; | ||||||||||||
1890 | if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V)) | ||||||||||||
1891 | return false; | ||||||||||||
1892 | |||||||||||||
1893 | Value *AddLHS, *AddRHS; | ||||||||||||
1894 | auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS)); | ||||||||||||
1895 | |||||||||||||
1896 | // (a + b) u< a, (a + b) u< b | ||||||||||||
1897 | if (Pred == ICmpInst::ICMP_ULT) | ||||||||||||
1898 | if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS)) | ||||||||||||
1899 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS); | ||||||||||||
1900 | |||||||||||||
1901 | // a >u (a + b), b >u (a + b) | ||||||||||||
1902 | if (Pred == ICmpInst::ICMP_UGT) | ||||||||||||
1903 | if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS)) | ||||||||||||
1904 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS); | ||||||||||||
1905 | |||||||||||||
1906 | Value *Op1; | ||||||||||||
1907 | auto XorExpr = m_OneUse(m_Xor(m_Value(Op1), m_AllOnes())); | ||||||||||||
1908 | // (a ^ -1) <u b | ||||||||||||
1909 | if (Pred == ICmpInst::ICMP_ULT) { | ||||||||||||
1910 | if (XorExpr.match(ICmpLHS)) | ||||||||||||
1911 | return L.match(Op1) && R.match(ICmpRHS) && S.match(ICmpLHS); | ||||||||||||
1912 | } | ||||||||||||
1913 | // b > u (a ^ -1) | ||||||||||||
1914 | if (Pred == ICmpInst::ICMP_UGT) { | ||||||||||||
1915 | if (XorExpr.match(ICmpRHS)) | ||||||||||||
1916 | return L.match(Op1) && R.match(ICmpLHS) && S.match(ICmpRHS); | ||||||||||||
1917 | } | ||||||||||||
1918 | |||||||||||||
1919 | // Match special-case for increment-by-1. | ||||||||||||
1920 | if (Pred == ICmpInst::ICMP_EQ) { | ||||||||||||
1921 | // (a + 1) == 0 | ||||||||||||
1922 | // (1 + a) == 0 | ||||||||||||
1923 | if (AddExpr.match(ICmpLHS) && m_ZeroInt().match(ICmpRHS) && | ||||||||||||
1924 | (m_One().match(AddLHS) || m_One().match(AddRHS))) | ||||||||||||
1925 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS); | ||||||||||||
1926 | // 0 == (a + 1) | ||||||||||||
1927 | // 0 == (1 + a) | ||||||||||||
1928 | if (m_ZeroInt().match(ICmpLHS) && AddExpr.match(ICmpRHS) && | ||||||||||||
1929 | (m_One().match(AddLHS) || m_One().match(AddRHS))) | ||||||||||||
1930 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS); | ||||||||||||
1931 | } | ||||||||||||
1932 | |||||||||||||
1933 | return false; | ||||||||||||
1934 | } | ||||||||||||
1935 | }; | ||||||||||||
1936 | |||||||||||||
1937 | /// Match an icmp instruction checking for unsigned overflow on addition. | ||||||||||||
1938 | /// | ||||||||||||
1939 | /// S is matched to the addition whose result is being checked for overflow, and | ||||||||||||
1940 | /// L and R are matched to the LHS and RHS of S. | ||||||||||||
1941 | template <typename LHS_t, typename RHS_t, typename Sum_t> | ||||||||||||
1942 | UAddWithOverflow_match<LHS_t, RHS_t, Sum_t> | ||||||||||||
1943 | m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) { | ||||||||||||
1944 | return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S); | ||||||||||||
1945 | } | ||||||||||||
1946 | |||||||||||||
1947 | template <typename Opnd_t> struct Argument_match { | ||||||||||||
1948 | unsigned OpI; | ||||||||||||
1949 | Opnd_t Val; | ||||||||||||
1950 | |||||||||||||
1951 | Argument_match(unsigned OpIdx, const Opnd_t &V) : OpI(OpIdx), Val(V) {} | ||||||||||||
1952 | |||||||||||||
1953 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
1954 | // FIXME: Should likely be switched to use `CallBase`. | ||||||||||||
1955 | if (const auto *CI = dyn_cast<CallInst>(V)) | ||||||||||||
1956 | return Val.match(CI->getArgOperand(OpI)); | ||||||||||||
1957 | return false; | ||||||||||||
1958 | } | ||||||||||||
1959 | }; | ||||||||||||
1960 | |||||||||||||
1961 | /// Match an argument. | ||||||||||||
1962 | template <unsigned OpI, typename Opnd_t> | ||||||||||||
1963 | inline Argument_match<Opnd_t> m_Argument(const Opnd_t &Op) { | ||||||||||||
1964 | return Argument_match<Opnd_t>(OpI, Op); | ||||||||||||
1965 | } | ||||||||||||
1966 | |||||||||||||
1967 | /// Intrinsic matchers. | ||||||||||||
1968 | struct IntrinsicID_match { | ||||||||||||
1969 | unsigned ID; | ||||||||||||
1970 | |||||||||||||
1971 | IntrinsicID_match(Intrinsic::ID IntrID) : ID(IntrID) {} | ||||||||||||
1972 | |||||||||||||
1973 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
1974 | if (const auto *CI
| ||||||||||||
1975 | if (const auto *F = CI->getCalledFunction()) | ||||||||||||
1976 | return F->getIntrinsicID() == ID; | ||||||||||||
1977 | return false; | ||||||||||||
1978 | } | ||||||||||||
1979 | }; | ||||||||||||
1980 | |||||||||||||
1981 | /// Intrinsic matches are combinations of ID matchers, and argument | ||||||||||||
1982 | /// matchers. Higher arity matcher are defined recursively in terms of and-ing | ||||||||||||
1983 | /// them with lower arity matchers. Here's some convenient typedefs for up to | ||||||||||||
1984 | /// several arguments, and more can be added as needed | ||||||||||||
1985 | template <typename T0 = void, typename T1 = void, typename T2 = void, | ||||||||||||
1986 | typename T3 = void, typename T4 = void, typename T5 = void, | ||||||||||||
1987 | typename T6 = void, typename T7 = void, typename T8 = void, | ||||||||||||
1988 | typename T9 = void, typename T10 = void> | ||||||||||||
1989 | struct m_Intrinsic_Ty; | ||||||||||||
1990 | template <typename T0> struct m_Intrinsic_Ty<T0> { | ||||||||||||
1991 | using Ty = match_combine_and<IntrinsicID_match, Argument_match<T0>>; | ||||||||||||
1992 | }; | ||||||||||||
1993 | template <typename T0, typename T1> struct m_Intrinsic_Ty<T0, T1> { | ||||||||||||
1994 | using Ty = | ||||||||||||
1995 | match_combine_and<typename m_Intrinsic_Ty<T0>::Ty, Argument_match<T1>>; | ||||||||||||
1996 | }; | ||||||||||||
1997 | template <typename T0, typename T1, typename T2> | ||||||||||||
1998 | struct m_Intrinsic_Ty<T0, T1, T2> { | ||||||||||||
1999 | using Ty = | ||||||||||||
2000 | match_combine_and<typename m_Intrinsic_Ty<T0, T1>::Ty, | ||||||||||||
2001 | Argument_match<T2>>; | ||||||||||||
2002 | }; | ||||||||||||
2003 | template <typename T0, typename T1, typename T2, typename T3> | ||||||||||||
2004 | struct m_Intrinsic_Ty<T0, T1, T2, T3> { | ||||||||||||
2005 | using Ty = | ||||||||||||
2006 | match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2>::Ty, | ||||||||||||
2007 | Argument_match<T3>>; | ||||||||||||
2008 | }; | ||||||||||||
2009 | |||||||||||||
2010 | template <typename T0, typename T1, typename T2, typename T3, typename T4> | ||||||||||||
2011 | struct m_Intrinsic_Ty<T0, T1, T2, T3, T4> { | ||||||||||||
2012 | using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty, | ||||||||||||
2013 | Argument_match<T4>>; | ||||||||||||
2014 | }; | ||||||||||||
2015 | |||||||||||||
2016 | template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5> | ||||||||||||
2017 | struct m_Intrinsic_Ty<T0, T1, T2, T3, T4, T5> { | ||||||||||||
2018 | using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty, | ||||||||||||
2019 | Argument_match<T5>>; | ||||||||||||
2020 | }; | ||||||||||||
2021 | |||||||||||||
2022 | /// Match intrinsic calls like this: | ||||||||||||
2023 | /// m_Intrinsic<Intrinsic::fabs>(m_Value(X)) | ||||||||||||
2024 | template <Intrinsic::ID IntrID> inline IntrinsicID_match m_Intrinsic() { | ||||||||||||
2025 | return IntrinsicID_match(IntrID); | ||||||||||||
2026 | } | ||||||||||||
2027 | |||||||||||||
2028 | template <Intrinsic::ID IntrID, typename T0> | ||||||||||||
2029 | inline typename m_Intrinsic_Ty<T0>::Ty m_Intrinsic(const T0 &Op0) { | ||||||||||||
2030 | return m_CombineAnd(m_Intrinsic<IntrID>(), m_Argument<0>(Op0)); | ||||||||||||
2031 | } | ||||||||||||
2032 | |||||||||||||
2033 | template <Intrinsic::ID IntrID, typename T0, typename T1> | ||||||||||||
2034 | inline typename m_Intrinsic_Ty<T0, T1>::Ty m_Intrinsic(const T0 &Op0, | ||||||||||||
2035 | const T1 &Op1) { | ||||||||||||
2036 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0), m_Argument<1>(Op1)); | ||||||||||||
2037 | } | ||||||||||||
2038 | |||||||||||||
2039 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2> | ||||||||||||
2040 | inline typename m_Intrinsic_Ty<T0, T1, T2>::Ty | ||||||||||||
2041 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2) { | ||||||||||||
2042 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1), m_Argument<2>(Op2)); | ||||||||||||
2043 | } | ||||||||||||
2044 | |||||||||||||
2045 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2, | ||||||||||||
2046 | typename T3> | ||||||||||||
2047 | inline typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty | ||||||||||||
2048 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) { | ||||||||||||
2049 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3)); | ||||||||||||
2050 | } | ||||||||||||
2051 | |||||||||||||
2052 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2, | ||||||||||||
2053 | typename T3, typename T4> | ||||||||||||
2054 | inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty | ||||||||||||
2055 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3, | ||||||||||||
2056 | const T4 &Op4) { | ||||||||||||
2057 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3), | ||||||||||||
2058 | m_Argument<4>(Op4)); | ||||||||||||
2059 | } | ||||||||||||
2060 | |||||||||||||
2061 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2, | ||||||||||||
2062 | typename T3, typename T4, typename T5> | ||||||||||||
2063 | inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4, T5>::Ty | ||||||||||||
2064 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3, | ||||||||||||
2065 | const T4 &Op4, const T5 &Op5) { | ||||||||||||
2066 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3, Op4), | ||||||||||||
2067 | m_Argument<5>(Op5)); | ||||||||||||
2068 | } | ||||||||||||
2069 | |||||||||||||
2070 | // Helper intrinsic matching specializations. | ||||||||||||
2071 | template <typename Opnd0> | ||||||||||||
2072 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BitReverse(const Opnd0 &Op0) { | ||||||||||||
2073 | return m_Intrinsic<Intrinsic::bitreverse>(Op0); | ||||||||||||
2074 | } | ||||||||||||
2075 | |||||||||||||
2076 | template <typename Opnd0> | ||||||||||||
2077 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) { | ||||||||||||
2078 | return m_Intrinsic<Intrinsic::bswap>(Op0); | ||||||||||||
2079 | } | ||||||||||||
2080 | |||||||||||||
2081 | template <typename Opnd0> | ||||||||||||
2082 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FAbs(const Opnd0 &Op0) { | ||||||||||||
2083 | return m_Intrinsic<Intrinsic::fabs>(Op0); | ||||||||||||
2084 | } | ||||||||||||
2085 | |||||||||||||
2086 | template <typename Opnd0> | ||||||||||||
2087 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FCanonicalize(const Opnd0 &Op0) { | ||||||||||||
2088 | return m_Intrinsic<Intrinsic::canonicalize>(Op0); | ||||||||||||
2089 | } | ||||||||||||
2090 | |||||||||||||
2091 | template <typename Opnd0, typename Opnd1> | ||||||||||||
2092 | inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMin(const Opnd0 &Op0, | ||||||||||||
2093 | const Opnd1 &Op1) { | ||||||||||||
2094 | return m_Intrinsic<Intrinsic::minnum>(Op0, Op1); | ||||||||||||
2095 | } | ||||||||||||
2096 | |||||||||||||
2097 | template <typename Opnd0, typename Opnd1> | ||||||||||||
2098 | inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMax(const Opnd0 &Op0, | ||||||||||||
2099 | const Opnd1 &Op1) { | ||||||||||||
2100 | return m_Intrinsic<Intrinsic::maxnum>(Op0, Op1); | ||||||||||||
2101 | } | ||||||||||||
2102 | |||||||||||||
2103 | template <typename Opnd0, typename Opnd1, typename Opnd2> | ||||||||||||
2104 | inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty | ||||||||||||
2105 | m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) { | ||||||||||||
2106 | return m_Intrinsic<Intrinsic::fshl>(Op0, Op1, Op2); | ||||||||||||
2107 | } | ||||||||||||
2108 | |||||||||||||
2109 | template <typename Opnd0, typename Opnd1, typename Opnd2> | ||||||||||||
2110 | inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty | ||||||||||||
2111 | m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) { | ||||||||||||
2112 | return m_Intrinsic<Intrinsic::fshr>(Op0, Op1, Op2); | ||||||||||||
2113 | } | ||||||||||||
2114 | |||||||||||||
2115 | //===----------------------------------------------------------------------===// | ||||||||||||
2116 | // Matchers for two-operands operators with the operators in either order | ||||||||||||
2117 | // | ||||||||||||
2118 | |||||||||||||
2119 | /// Matches a BinaryOperator with LHS and RHS in either order. | ||||||||||||
2120 | template <typename LHS, typename RHS> | ||||||||||||
2121 | inline AnyBinaryOp_match<LHS, RHS, true> m_c_BinOp(const LHS &L, const RHS &R) { | ||||||||||||
2122 | return AnyBinaryOp_match<LHS, RHS, true>(L, R); | ||||||||||||
2123 | } | ||||||||||||
2124 | |||||||||||||
2125 | /// Matches an ICmp with a predicate over LHS and RHS in either order. | ||||||||||||
2126 | /// Swaps the predicate if operands are commuted. | ||||||||||||
2127 | template <typename LHS, typename RHS> | ||||||||||||
2128 | inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true> | ||||||||||||
2129 | m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) { | ||||||||||||
2130 | return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>(Pred, L, | ||||||||||||
2131 | R); | ||||||||||||
2132 | } | ||||||||||||
2133 | |||||||||||||
2134 | /// Matches a Add with LHS and RHS in either order. | ||||||||||||
2135 | template <typename LHS, typename RHS> | ||||||||||||
2136 | inline BinaryOp_match<LHS, RHS, Instruction::Add, true> m_c_Add(const LHS &L, | ||||||||||||
2137 | const RHS &R) { | ||||||||||||
2138 | return BinaryOp_match<LHS, RHS, Instruction::Add, true>(L, R); | ||||||||||||
2139 | } | ||||||||||||
2140 | |||||||||||||
2141 | /// Matches a Mul with LHS and RHS in either order. | ||||||||||||
2142 | template <typename LHS, typename RHS> | ||||||||||||
2143 | inline BinaryOp_match<LHS, RHS, Instruction::Mul, true> m_c_Mul(const LHS &L, | ||||||||||||
2144 | const RHS &R) { | ||||||||||||
2145 | return BinaryOp_match<LHS, RHS, Instruction::Mul, true>(L, R); | ||||||||||||
2146 | } | ||||||||||||
2147 | |||||||||||||
2148 | /// Matches an And with LHS and RHS in either order. | ||||||||||||
2149 | template <typename LHS, typename RHS> | ||||||||||||
2150 | inline BinaryOp_match<LHS, RHS, Instruction::And, true> m_c_And(const LHS &L, | ||||||||||||
2151 | const RHS &R) { | ||||||||||||
2152 | return BinaryOp_match<LHS, RHS, Instruction::And, true>(L, R); | ||||||||||||
2153 | } | ||||||||||||
2154 | |||||||||||||
2155 | /// Matches an Or with LHS and RHS in either order. | ||||||||||||
2156 | template <typename LHS, typename RHS> | ||||||||||||
2157 | inline BinaryOp_match<LHS, RHS, Instruction::Or, true> m_c_Or(const LHS &L, | ||||||||||||
2158 | const RHS &R) { | ||||||||||||
2159 | return BinaryOp_match<LHS, RHS, Instruction::Or, true>(L, R); | ||||||||||||
2160 | } | ||||||||||||
2161 | |||||||||||||
2162 | /// Matches an Xor with LHS and RHS in either order. | ||||||||||||
2163 | template <typename LHS, typename RHS> | ||||||||||||
2164 | inline BinaryOp_match<LHS, RHS, Instruction::Xor, true> m_c_Xor(const LHS &L, | ||||||||||||
2165 | const RHS &R) { | ||||||||||||
2166 | return BinaryOp_match<LHS, RHS, Instruction::Xor, true>(L, R); | ||||||||||||
2167 | } | ||||||||||||
2168 | |||||||||||||
2169 | /// Matches a 'Neg' as 'sub 0, V'. | ||||||||||||
2170 | template <typename ValTy> | ||||||||||||
2171 | inline BinaryOp_match<cst_pred_ty<is_zero_int>, ValTy, Instruction::Sub> | ||||||||||||
2172 | m_Neg(const ValTy &V) { | ||||||||||||
2173 | return m_Sub(m_ZeroInt(), V); | ||||||||||||
2174 | } | ||||||||||||
2175 | |||||||||||||
2176 | /// Matches a 'Neg' as 'sub nsw 0, V'. | ||||||||||||
2177 | template <typename ValTy> | ||||||||||||
2178 | inline OverflowingBinaryOp_match<cst_pred_ty<is_zero_int>, ValTy, | ||||||||||||
2179 | Instruction::Sub, | ||||||||||||
2180 | OverflowingBinaryOperator::NoSignedWrap> | ||||||||||||
2181 | m_NSWNeg(const ValTy &V) { | ||||||||||||
2182 | return m_NSWSub(m_ZeroInt(), V); | ||||||||||||
2183 | } | ||||||||||||
2184 | |||||||||||||
2185 | /// Matches a 'Not' as 'xor V, -1' or 'xor -1, V'. | ||||||||||||
2186 | template <typename ValTy> | ||||||||||||
2187 | inline BinaryOp_match<ValTy, cst_pred_ty<is_all_ones>, Instruction::Xor, true> | ||||||||||||
2188 | m_Not(const ValTy &V) { | ||||||||||||
2189 | return m_c_Xor(V, m_AllOnes()); | ||||||||||||
2190 | } | ||||||||||||
2191 | |||||||||||||
2192 | /// Matches an SMin with LHS and RHS in either order. | ||||||||||||
2193 | template <typename LHS, typename RHS> | ||||||||||||
2194 | inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true> | ||||||||||||
2195 | m_c_SMin(const LHS &L, const RHS &R) { | ||||||||||||
2196 | return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>(L, R); | ||||||||||||
2197 | } | ||||||||||||
2198 | /// Matches an SMax with LHS and RHS in either order. | ||||||||||||
2199 | template <typename LHS, typename RHS> | ||||||||||||
2200 | inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true> | ||||||||||||
2201 | m_c_SMax(const LHS &L, const RHS &R) { | ||||||||||||
2202 | return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>(L, R); | ||||||||||||
2203 | } | ||||||||||||
2204 | /// Matches a UMin with LHS and RHS in either order. | ||||||||||||
2205 | template <typename LHS, typename RHS> | ||||||||||||
2206 | inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true> | ||||||||||||
2207 | m_c_UMin(const LHS &L, const RHS &R) { | ||||||||||||
2208 | return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>(L, R); | ||||||||||||
2209 | } | ||||||||||||
2210 | /// Matches a UMax with LHS and RHS in either order. | ||||||||||||
2211 | template <typename LHS, typename RHS> | ||||||||||||
2212 | inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true> | ||||||||||||
2213 | m_c_UMax(const LHS &L, const RHS &R) { | ||||||||||||
2214 | return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>(L, R); | ||||||||||||
2215 | } | ||||||||||||
2216 | |||||||||||||
2217 | template <typename LHS, typename RHS> | ||||||||||||
2218 | inline match_combine_or< | ||||||||||||
2219 | match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>, | ||||||||||||
2220 | MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>>, | ||||||||||||
2221 | match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>, | ||||||||||||
2222 | MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>>> | ||||||||||||
2223 | m_c_MaxOrMin(const LHS &L, const RHS &R) { | ||||||||||||
2224 | return m_CombineOr(m_CombineOr(m_c_SMax(L, R), m_c_SMin(L, R)), | ||||||||||||
2225 | m_CombineOr(m_c_UMax(L, R), m_c_UMin(L, R))); | ||||||||||||
2226 | } | ||||||||||||
2227 | |||||||||||||
2228 | /// Matches FAdd with LHS and RHS in either order. | ||||||||||||
2229 | template <typename LHS, typename RHS> | ||||||||||||
2230 | inline BinaryOp_match<LHS, RHS, Instruction::FAdd, true> | ||||||||||||
2231 | m_c_FAdd(const LHS &L, const RHS &R) { | ||||||||||||
2232 | return BinaryOp_match<LHS, RHS, Instruction::FAdd, true>(L, R); | ||||||||||||
2233 | } | ||||||||||||
2234 | |||||||||||||
2235 | /// Matches FMul with LHS and RHS in either order. | ||||||||||||
2236 | template <typename LHS, typename RHS> | ||||||||||||
2237 | inline BinaryOp_match<LHS, RHS, Instruction::FMul, true> | ||||||||||||
2238 | m_c_FMul(const LHS &L, const RHS &R) { | ||||||||||||
2239 | return BinaryOp_match<LHS, RHS, Instruction::FMul, true>(L, R); | ||||||||||||
2240 | } | ||||||||||||
2241 | |||||||||||||
2242 | template <typename Opnd_t> struct Signum_match { | ||||||||||||
2243 | Opnd_t Val; | ||||||||||||
2244 | Signum_match(const Opnd_t &V) : Val(V) {} | ||||||||||||
2245 | |||||||||||||
2246 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
2247 | unsigned TypeSize = V->getType()->getScalarSizeInBits(); | ||||||||||||
2248 | if (TypeSize == 0) | ||||||||||||
2249 | return false; | ||||||||||||
2250 | |||||||||||||
2251 | unsigned ShiftWidth = TypeSize - 1; | ||||||||||||
2252 | Value *OpL = nullptr, *OpR = nullptr; | ||||||||||||
2253 | |||||||||||||
2254 | // This is the representation of signum we match: | ||||||||||||
2255 | // | ||||||||||||
2256 | // signum(x) == (x >> 63) | (-x >>u 63) | ||||||||||||
2257 | // | ||||||||||||
2258 | // An i1 value is its own signum, so it's correct to match | ||||||||||||
2259 | // | ||||||||||||
2260 | // signum(x) == (x >> 0) | (-x >>u 0) | ||||||||||||
2261 | // | ||||||||||||
2262 | // for i1 values. | ||||||||||||
2263 | |||||||||||||
2264 | auto LHS = m_AShr(m_Value(OpL), m_SpecificInt(ShiftWidth)); | ||||||||||||
2265 | auto RHS = m_LShr(m_Neg(m_Value(OpR)), m_SpecificInt(ShiftWidth)); | ||||||||||||
2266 | auto Signum = m_Or(LHS, RHS); | ||||||||||||
2267 | |||||||||||||
2268 | return Signum.match(V) && OpL == OpR && Val.match(OpL); | ||||||||||||
2269 | } | ||||||||||||
2270 | }; | ||||||||||||
2271 | |||||||||||||
2272 | /// Matches a signum pattern. | ||||||||||||
2273 | /// | ||||||||||||
2274 | /// signum(x) = | ||||||||||||
2275 | /// x > 0 -> 1 | ||||||||||||
2276 | /// x == 0 -> 0 | ||||||||||||
2277 | /// x < 0 -> -1 | ||||||||||||
2278 | template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) { | ||||||||||||
2279 | return Signum_match<Val_t>(V); | ||||||||||||
2280 | } | ||||||||||||
2281 | |||||||||||||
2282 | template <int Ind, typename Opnd_t> struct ExtractValue_match { | ||||||||||||
2283 | Opnd_t Val; | ||||||||||||
2284 | ExtractValue_match(const Opnd_t &V) : Val(V) {} | ||||||||||||
2285 | |||||||||||||
2286 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||
2287 | if (auto *I = dyn_cast<ExtractValueInst>(V)) | ||||||||||||
2288 | return I->getNumIndices() == 1 && I->getIndices()[0] == Ind && | ||||||||||||
2289 | Val.match(I->getAggregateOperand()); | ||||||||||||
2290 | return false; | ||||||||||||
2291 | } | ||||||||||||
2292 | }; | ||||||||||||
2293 | |||||||||||||
2294 | /// Match a single index ExtractValue instruction. | ||||||||||||
2295 | /// For example m_ExtractValue<1>(...) | ||||||||||||
2296 | template <int Ind, typename Val_t> | ||||||||||||
2297 | inline ExtractValue_match<Ind, Val_t> m_ExtractValue(const Val_t &V) { | ||||||||||||
2298 | return ExtractValue_match<Ind, Val_t>(V); | ||||||||||||
2299 | } | ||||||||||||
2300 | |||||||||||||
2301 | /// Matches patterns for `vscale`. This can either be a call to `llvm.vscale` or | ||||||||||||
2302 | /// the constant expression | ||||||||||||
2303 | /// `ptrtoint(gep <vscale x 1 x i8>, <vscale x 1 x i8>* null, i32 1>` | ||||||||||||
2304 | /// under the right conditions determined by DataLayout. | ||||||||||||
2305 | struct VScaleVal_match { | ||||||||||||
2306 | private: | ||||||||||||
2307 | template <typename Base, typename Offset> | ||||||||||||
2308 | inline BinaryOp_match<Base, Offset, Instruction::GetElementPtr> | ||||||||||||
2309 | m_OffsetGep(const Base &B, const Offset &O) { | ||||||||||||
2310 | return BinaryOp_match<Base, Offset, Instruction::GetElementPtr>(B, O); | ||||||||||||
2311 | } | ||||||||||||
2312 | |||||||||||||
2313 | public: | ||||||||||||
2314 | const DataLayout &DL; | ||||||||||||
2315 | VScaleVal_match(const DataLayout &DL) : DL(DL) {} | ||||||||||||
2316 | |||||||||||||
2317 | template <typename ITy> bool match(ITy *V) { | ||||||||||||
2318 | if (m_Intrinsic<Intrinsic::vscale>().match(V)) | ||||||||||||
2319 | return true; | ||||||||||||
2320 | |||||||||||||
2321 | if (m_PtrToInt(m_OffsetGep(m_Zero(), m_SpecificInt(1))).match(V)) { | ||||||||||||
2322 | Type *PtrTy = cast<Operator>(V)->getOperand(0)->getType(); | ||||||||||||
2323 | auto *DerefTy = PtrTy->getPointerElementType(); | ||||||||||||
2324 | if (isa<ScalableVectorType>(DerefTy) && | ||||||||||||
2325 | DL.getTypeAllocSizeInBits(DerefTy).getKnownMinSize() == 8) | ||||||||||||
2326 | return true; | ||||||||||||
2327 | } | ||||||||||||
2328 | |||||||||||||
2329 | return false; | ||||||||||||
2330 | } | ||||||||||||
2331 | }; | ||||||||||||
2332 | |||||||||||||
2333 | inline VScaleVal_match m_VScale(const DataLayout &DL) { | ||||||||||||
2334 | return VScaleVal_match(DL); | ||||||||||||
2335 | } | ||||||||||||
2336 | |||||||||||||
2337 | } // end namespace PatternMatch | ||||||||||||
2338 | } // end namespace llvm | ||||||||||||
2339 | |||||||||||||
2340 | #endif // LLVM_IR_PATTERNMATCH_H |
1 | //===- llvm/Analysis/AliasAnalysis.h - Alias Analysis Interface -*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the generic AliasAnalysis interface, which is used as the |
10 | // common interface used by all clients of alias analysis information, and |
11 | // implemented by all alias analysis implementations. Mod/Ref information is |
12 | // also captured by this interface. |
13 | // |
14 | // Implementations of this interface must implement the various virtual methods, |
15 | // which automatically provides functionality for the entire suite of client |
16 | // APIs. |
17 | // |
18 | // This API identifies memory regions with the MemoryLocation class. The pointer |
19 | // component specifies the base memory address of the region. The Size specifies |
20 | // the maximum size (in address units) of the memory region, or |
21 | // MemoryLocation::UnknownSize if the size is not known. The TBAA tag |
22 | // identifies the "type" of the memory reference; see the |
23 | // TypeBasedAliasAnalysis class for details. |
24 | // |
25 | // Some non-obvious details include: |
26 | // - Pointers that point to two completely different objects in memory never |
27 | // alias, regardless of the value of the Size component. |
28 | // - NoAlias doesn't imply inequal pointers. The most obvious example of this |
29 | // is two pointers to constant memory. Even if they are equal, constant |
30 | // memory is never stored to, so there will never be any dependencies. |
31 | // In this and other situations, the pointers may be both NoAlias and |
32 | // MustAlias at the same time. The current API can only return one result, |
33 | // though this is rarely a problem in practice. |
34 | // |
35 | //===----------------------------------------------------------------------===// |
36 | |
37 | #ifndef LLVM_ANALYSIS_ALIASANALYSIS_H |
38 | #define LLVM_ANALYSIS_ALIASANALYSIS_H |
39 | |
40 | #include "llvm/ADT/DenseMap.h" |
41 | #include "llvm/ADT/None.h" |
42 | #include "llvm/ADT/Optional.h" |
43 | #include "llvm/ADT/SmallVector.h" |
44 | #include "llvm/Analysis/MemoryLocation.h" |
45 | #include "llvm/Analysis/TargetLibraryInfo.h" |
46 | #include "llvm/IR/Function.h" |
47 | #include "llvm/IR/Instruction.h" |
48 | #include "llvm/IR/Instructions.h" |
49 | #include "llvm/IR/PassManager.h" |
50 | #include "llvm/Pass.h" |
51 | #include <cstdint> |
52 | #include <functional> |
53 | #include <memory> |
54 | #include <vector> |
55 | |
56 | namespace llvm { |
57 | |
58 | class AnalysisUsage; |
59 | class BasicAAResult; |
60 | class BasicBlock; |
61 | class DominatorTree; |
62 | class Value; |
63 | |
64 | /// The possible results of an alias query. |
65 | /// |
66 | /// These results are always computed between two MemoryLocation objects as |
67 | /// a query to some alias analysis. |
68 | /// |
69 | /// Note that these are unscoped enumerations because we would like to support |
70 | /// implicitly testing a result for the existence of any possible aliasing with |
71 | /// a conversion to bool, but an "enum class" doesn't support this. The |
72 | /// canonical names from the literature are suffixed and unique anyways, and so |
73 | /// they serve as global constants in LLVM for these results. |
74 | /// |
75 | /// See docs/AliasAnalysis.html for more information on the specific meanings |
76 | /// of these values. |
77 | enum AliasResult : uint8_t { |
78 | /// The two locations do not alias at all. |
79 | /// |
80 | /// This value is arranged to convert to false, while all other values |
81 | /// convert to true. This allows a boolean context to convert the result to |
82 | /// a binary flag indicating whether there is the possibility of aliasing. |
83 | NoAlias = 0, |
84 | /// The two locations may or may not alias. This is the least precise result. |
85 | MayAlias, |
86 | /// The two locations alias, but only due to a partial overlap. |
87 | PartialAlias, |
88 | /// The two locations precisely alias each other. |
89 | MustAlias, |
90 | }; |
91 | |
92 | /// << operator for AliasResult. |
93 | raw_ostream &operator<<(raw_ostream &OS, AliasResult AR); |
94 | |
95 | /// Flags indicating whether a memory access modifies or references memory. |
96 | /// |
97 | /// This is no access at all, a modification, a reference, or both |
98 | /// a modification and a reference. These are specifically structured such that |
99 | /// they form a three bit matrix and bit-tests for 'mod' or 'ref' or 'must' |
100 | /// work with any of the possible values. |
101 | enum class ModRefInfo : uint8_t { |
102 | /// Must is provided for completeness, but no routines will return only |
103 | /// Must today. See definition of Must below. |
104 | Must = 0, |
105 | /// The access may reference the value stored in memory, |
106 | /// a mustAlias relation was found, and no mayAlias or partialAlias found. |
107 | MustRef = 1, |
108 | /// The access may modify the value stored in memory, |
109 | /// a mustAlias relation was found, and no mayAlias or partialAlias found. |
110 | MustMod = 2, |
111 | /// The access may reference, modify or both the value stored in memory, |
112 | /// a mustAlias relation was found, and no mayAlias or partialAlias found. |
113 | MustModRef = MustRef | MustMod, |
114 | /// The access neither references nor modifies the value stored in memory. |
115 | NoModRef = 4, |
116 | /// The access may reference the value stored in memory. |
117 | Ref = NoModRef | MustRef, |
118 | /// The access may modify the value stored in memory. |
119 | Mod = NoModRef | MustMod, |
120 | /// The access may reference and may modify the value stored in memory. |
121 | ModRef = Ref | Mod, |
122 | |
123 | /// About Must: |
124 | /// Must is set in a best effort manner. |
125 | /// We usually do not try our best to infer Must, instead it is merely |
126 | /// another piece of "free" information that is presented when available. |
127 | /// Must set means there was certainly a MustAlias found. For calls, |
128 | /// where multiple arguments are checked (argmemonly), this translates to |
129 | /// only MustAlias or NoAlias was found. |
130 | /// Must is not set for RAR accesses, even if the two locations must |
131 | /// alias. The reason is that two read accesses translate to an early return |
132 | /// of NoModRef. An additional alias check to set Must may be |
133 | /// expensive. Other cases may also not set Must(e.g. callCapturesBefore). |
134 | /// We refer to Must being *set* when the most significant bit is *cleared*. |
135 | /// Conversely we *clear* Must information by *setting* the Must bit to 1. |
136 | }; |
137 | |
138 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isNoModRef(const ModRefInfo MRI) { |
139 | return (static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustModRef)) == |
140 | static_cast<int>(ModRefInfo::Must); |
141 | } |
142 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isModOrRefSet(const ModRefInfo MRI) { |
143 | return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustModRef); |
144 | } |
145 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isModAndRefSet(const ModRefInfo MRI) { |
146 | return (static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustModRef)) == |
147 | static_cast<int>(ModRefInfo::MustModRef); |
148 | } |
149 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isModSet(const ModRefInfo MRI) { |
150 | return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustMod); |
151 | } |
152 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isRefSet(const ModRefInfo MRI) { |
153 | return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustRef); |
154 | } |
155 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isMustSet(const ModRefInfo MRI) { |
156 | return !(static_cast<int>(MRI) & static_cast<int>(ModRefInfo::NoModRef)); |
157 | } |
158 | |
159 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo setMod(const ModRefInfo MRI) { |
160 | return ModRefInfo(static_cast<int>(MRI) | |
161 | static_cast<int>(ModRefInfo::MustMod)); |
162 | } |
163 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo setRef(const ModRefInfo MRI) { |
164 | return ModRefInfo(static_cast<int>(MRI) | |
165 | static_cast<int>(ModRefInfo::MustRef)); |
166 | } |
167 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo setMust(const ModRefInfo MRI) { |
168 | return ModRefInfo(static_cast<int>(MRI) & |
169 | static_cast<int>(ModRefInfo::MustModRef)); |
170 | } |
171 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo setModAndRef(const ModRefInfo MRI) { |
172 | return ModRefInfo(static_cast<int>(MRI) | |
173 | static_cast<int>(ModRefInfo::MustModRef)); |
174 | } |
175 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo clearMod(const ModRefInfo MRI) { |
176 | return ModRefInfo(static_cast<int>(MRI) & static_cast<int>(ModRefInfo::Ref)); |
177 | } |
178 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo clearRef(const ModRefInfo MRI) { |
179 | return ModRefInfo(static_cast<int>(MRI) & static_cast<int>(ModRefInfo::Mod)); |
180 | } |
181 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo clearMust(const ModRefInfo MRI) { |
182 | return ModRefInfo(static_cast<int>(MRI) | |
183 | static_cast<int>(ModRefInfo::NoModRef)); |
184 | } |
185 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo unionModRef(const ModRefInfo MRI1, |
186 | const ModRefInfo MRI2) { |
187 | return ModRefInfo(static_cast<int>(MRI1) | static_cast<int>(MRI2)); |
188 | } |
189 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo intersectModRef(const ModRefInfo MRI1, |
190 | const ModRefInfo MRI2) { |
191 | return ModRefInfo(static_cast<int>(MRI1) & static_cast<int>(MRI2)); |
192 | } |
193 | |
194 | /// The locations at which a function might access memory. |
195 | /// |
196 | /// These are primarily used in conjunction with the \c AccessKind bits to |
197 | /// describe both the nature of access and the locations of access for a |
198 | /// function call. |
199 | enum FunctionModRefLocation { |
200 | /// Base case is no access to memory. |
201 | FMRL_Nowhere = 0, |
202 | /// Access to memory via argument pointers. |
203 | FMRL_ArgumentPointees = 8, |
204 | /// Memory that is inaccessible via LLVM IR. |
205 | FMRL_InaccessibleMem = 16, |
206 | /// Access to any memory. |
207 | FMRL_Anywhere = 32 | FMRL_InaccessibleMem | FMRL_ArgumentPointees |
208 | }; |
209 | |
210 | /// Summary of how a function affects memory in the program. |
211 | /// |
212 | /// Loads from constant globals are not considered memory accesses for this |
213 | /// interface. Also, functions may freely modify stack space local to their |
214 | /// invocation without having to report it through these interfaces. |
215 | enum FunctionModRefBehavior { |
216 | /// This function does not perform any non-local loads or stores to memory. |
217 | /// |
218 | /// This property corresponds to the GCC 'const' attribute. |
219 | /// This property corresponds to the LLVM IR 'readnone' attribute. |
220 | /// This property corresponds to the IntrNoMem LLVM intrinsic flag. |
221 | FMRB_DoesNotAccessMemory = |
222 | FMRL_Nowhere | static_cast<int>(ModRefInfo::NoModRef), |
223 | |
224 | /// The only memory references in this function (if it has any) are |
225 | /// non-volatile loads from objects pointed to by its pointer-typed |
226 | /// arguments, with arbitrary offsets. |
227 | /// |
228 | /// This property corresponds to the combination of the IntrReadMem |
229 | /// and IntrArgMemOnly LLVM intrinsic flags. |
230 | FMRB_OnlyReadsArgumentPointees = |
231 | FMRL_ArgumentPointees | static_cast<int>(ModRefInfo::Ref), |
232 | |
233 | /// The only memory references in this function (if it has any) are |
234 | /// non-volatile stores from objects pointed to by its pointer-typed |
235 | /// arguments, with arbitrary offsets. |
236 | /// |
237 | /// This property corresponds to the combination of the IntrWriteMem |
238 | /// and IntrArgMemOnly LLVM intrinsic flags. |
239 | FMRB_OnlyWritesArgumentPointees = |
240 | FMRL_ArgumentPointees | static_cast<int>(ModRefInfo::Mod), |
241 | |
242 | /// The only memory references in this function (if it has any) are |
243 | /// non-volatile loads and stores from objects pointed to by its |
244 | /// pointer-typed arguments, with arbitrary offsets. |
245 | /// |
246 | /// This property corresponds to the IntrArgMemOnly LLVM intrinsic flag. |
247 | FMRB_OnlyAccessesArgumentPointees = |
248 | FMRL_ArgumentPointees | static_cast<int>(ModRefInfo::ModRef), |
249 | |
250 | /// The only memory references in this function (if it has any) are |
251 | /// reads of memory that is otherwise inaccessible via LLVM IR. |
252 | /// |
253 | /// This property corresponds to the LLVM IR inaccessiblememonly attribute. |
254 | FMRB_OnlyReadsInaccessibleMem = |
255 | FMRL_InaccessibleMem | static_cast<int>(ModRefInfo::Ref), |
256 | |
257 | /// The only memory references in this function (if it has any) are |
258 | /// writes to memory that is otherwise inaccessible via LLVM IR. |
259 | /// |
260 | /// This property corresponds to the LLVM IR inaccessiblememonly attribute. |
261 | FMRB_OnlyWritesInaccessibleMem = |
262 | FMRL_InaccessibleMem | static_cast<int>(ModRefInfo::Mod), |
263 | |
264 | /// The only memory references in this function (if it has any) are |
265 | /// references of memory that is otherwise inaccessible via LLVM IR. |
266 | /// |
267 | /// This property corresponds to the LLVM IR inaccessiblememonly attribute. |
268 | FMRB_OnlyAccessesInaccessibleMem = |
269 | FMRL_InaccessibleMem | static_cast<int>(ModRefInfo::ModRef), |
270 | |
271 | /// The function may perform non-volatile loads from objects pointed |
272 | /// to by its pointer-typed arguments, with arbitrary offsets, and |
273 | /// it may also perform loads of memory that is otherwise |
274 | /// inaccessible via LLVM IR. |
275 | /// |
276 | /// This property corresponds to the LLVM IR |
277 | /// inaccessiblemem_or_argmemonly attribute. |
278 | FMRB_OnlyReadsInaccessibleOrArgMem = FMRL_InaccessibleMem | |
279 | FMRL_ArgumentPointees | |
280 | static_cast<int>(ModRefInfo::Ref), |
281 | |
282 | /// The function may perform non-volatile stores to objects pointed |
283 | /// to by its pointer-typed arguments, with arbitrary offsets, and |
284 | /// it may also perform stores of memory that is otherwise |
285 | /// inaccessible via LLVM IR. |
286 | /// |
287 | /// This property corresponds to the LLVM IR |
288 | /// inaccessiblemem_or_argmemonly attribute. |
289 | FMRB_OnlyWritesInaccessibleOrArgMem = FMRL_InaccessibleMem | |
290 | FMRL_ArgumentPointees | |
291 | static_cast<int>(ModRefInfo::Mod), |
292 | |
293 | /// The function may perform non-volatile loads and stores of objects |
294 | /// pointed to by its pointer-typed arguments, with arbitrary offsets, and |
295 | /// it may also perform loads and stores of memory that is otherwise |
296 | /// inaccessible via LLVM IR. |
297 | /// |
298 | /// This property corresponds to the LLVM IR |
299 | /// inaccessiblemem_or_argmemonly attribute. |
300 | FMRB_OnlyAccessesInaccessibleOrArgMem = FMRL_InaccessibleMem | |
301 | FMRL_ArgumentPointees | |
302 | static_cast<int>(ModRefInfo::ModRef), |
303 | |
304 | /// This function does not perform any non-local stores or volatile loads, |
305 | /// but may read from any memory location. |
306 | /// |
307 | /// This property corresponds to the GCC 'pure' attribute. |
308 | /// This property corresponds to the LLVM IR 'readonly' attribute. |
309 | /// This property corresponds to the IntrReadMem LLVM intrinsic flag. |
310 | FMRB_OnlyReadsMemory = FMRL_Anywhere | static_cast<int>(ModRefInfo::Ref), |
311 | |
312 | // This function does not read from memory anywhere, but may write to any |
313 | // memory location. |
314 | // |
315 | // This property corresponds to the LLVM IR 'writeonly' attribute. |
316 | // This property corresponds to the IntrWriteMem LLVM intrinsic flag. |
317 | FMRB_OnlyWritesMemory = FMRL_Anywhere | static_cast<int>(ModRefInfo::Mod), |
318 | |
319 | /// This indicates that the function could not be classified into one of the |
320 | /// behaviors above. |
321 | FMRB_UnknownModRefBehavior = |
322 | FMRL_Anywhere | static_cast<int>(ModRefInfo::ModRef) |
323 | }; |
324 | |
325 | // Wrapper method strips bits significant only in FunctionModRefBehavior, |
326 | // to obtain a valid ModRefInfo. The benefit of using the wrapper is that if |
327 | // ModRefInfo enum changes, the wrapper can be updated to & with the new enum |
328 | // entry with all bits set to 1. |
329 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo |
330 | createModRefInfo(const FunctionModRefBehavior FMRB) { |
331 | return ModRefInfo(FMRB & static_cast<int>(ModRefInfo::ModRef)); |
332 | } |
333 | |
334 | /// This class stores info we want to provide to or retain within an alias |
335 | /// query. By default, the root query is stateless and starts with a freshly |
336 | /// constructed info object. Specific alias analyses can use this query info to |
337 | /// store per-query state that is important for recursive or nested queries to |
338 | /// avoid recomputing. To enable preserving this state across multiple queries |
339 | /// where safe (due to the IR not changing), use a `BatchAAResults` wrapper. |
340 | /// The information stored in an `AAQueryInfo` is currently limitted to the |
341 | /// caches used by BasicAA, but can further be extended to fit other AA needs. |
342 | class AAQueryInfo { |
343 | public: |
344 | using LocPair = std::pair<MemoryLocation, MemoryLocation>; |
345 | using AliasCacheT = SmallDenseMap<LocPair, AliasResult, 8>; |
346 | AliasCacheT AliasCache; |
347 | |
348 | using IsCapturedCacheT = SmallDenseMap<const Value *, bool, 8>; |
349 | IsCapturedCacheT IsCapturedCache; |
350 | |
351 | AAQueryInfo() : AliasCache(), IsCapturedCache() {} |
352 | |
353 | AliasResult updateResult(const LocPair &Locs, AliasResult Result) { |
354 | auto It = AliasCache.find(Locs); |
355 | assert(It != AliasCache.end() && "Entry must have existed")((It != AliasCache.end() && "Entry must have existed" ) ? static_cast<void> (0) : __assert_fail ("It != AliasCache.end() && \"Entry must have existed\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/include/llvm/Analysis/AliasAnalysis.h" , 355, __PRETTY_FUNCTION__)); |
356 | return It->second = Result; |
357 | } |
358 | }; |
359 | |
360 | class BatchAAResults; |
361 | |
362 | class AAResults { |
363 | public: |
364 | // Make these results default constructable and movable. We have to spell |
365 | // these out because MSVC won't synthesize them. |
366 | AAResults(const TargetLibraryInfo &TLI) : TLI(TLI) {} |
367 | AAResults(AAResults &&Arg); |
368 | ~AAResults(); |
369 | |
370 | /// Register a specific AA result. |
371 | template <typename AAResultT> void addAAResult(AAResultT &AAResult) { |
372 | // FIXME: We should use a much lighter weight system than the usual |
373 | // polymorphic pattern because we don't own AAResult. It should |
374 | // ideally involve two pointers and no separate allocation. |
375 | AAs.emplace_back(new Model<AAResultT>(AAResult, *this)); |
376 | } |
377 | |
378 | /// Register a function analysis ID that the results aggregation depends on. |
379 | /// |
380 | /// This is used in the new pass manager to implement the invalidation logic |
381 | /// where we must invalidate the results aggregation if any of our component |
382 | /// analyses become invalid. |
383 | void addAADependencyID(AnalysisKey *ID) { AADeps.push_back(ID); } |
384 | |
385 | /// Handle invalidation events in the new pass manager. |
386 | /// |
387 | /// The aggregation is invalidated if any of the underlying analyses is |
388 | /// invalidated. |
389 | bool invalidate(Function &F, const PreservedAnalyses &PA, |
390 | FunctionAnalysisManager::Invalidator &Inv); |
391 | |
392 | //===--------------------------------------------------------------------===// |
393 | /// \name Alias Queries |
394 | /// @{ |
395 | |
396 | /// The main low level interface to the alias analysis implementation. |
397 | /// Returns an AliasResult indicating whether the two pointers are aliased to |
398 | /// each other. This is the interface that must be implemented by specific |
399 | /// alias analysis implementations. |
400 | AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); |
401 | |
402 | /// A convenience wrapper around the primary \c alias interface. |
403 | AliasResult alias(const Value *V1, LocationSize V1Size, const Value *V2, |
404 | LocationSize V2Size) { |
405 | return alias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size)); |
406 | } |
407 | |
408 | /// A convenience wrapper around the primary \c alias interface. |
409 | AliasResult alias(const Value *V1, const Value *V2) { |
410 | return alias(V1, LocationSize::unknown(), V2, LocationSize::unknown()); |
411 | } |
412 | |
413 | /// A trivial helper function to check to see if the specified pointers are |
414 | /// no-alias. |
415 | bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) { |
416 | return alias(LocA, LocB) == NoAlias; |
417 | } |
418 | |
419 | /// A convenience wrapper around the \c isNoAlias helper interface. |
420 | bool isNoAlias(const Value *V1, LocationSize V1Size, const Value *V2, |
421 | LocationSize V2Size) { |
422 | return isNoAlias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size)); |
423 | } |
424 | |
425 | /// A convenience wrapper around the \c isNoAlias helper interface. |
426 | bool isNoAlias(const Value *V1, const Value *V2) { |
427 | return isNoAlias(MemoryLocation(V1), MemoryLocation(V2)); |
428 | } |
429 | |
430 | /// A trivial helper function to check to see if the specified pointers are |
431 | /// must-alias. |
432 | bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) { |
433 | return alias(LocA, LocB) == MustAlias; |
434 | } |
435 | |
436 | /// A convenience wrapper around the \c isMustAlias helper interface. |
437 | bool isMustAlias(const Value *V1, const Value *V2) { |
438 | return alias(V1, LocationSize::precise(1), V2, LocationSize::precise(1)) == |
439 | MustAlias; |
440 | } |
441 | |
442 | /// Checks whether the given location points to constant memory, or if |
443 | /// \p OrLocal is true whether it points to a local alloca. |
444 | bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false); |
445 | |
446 | /// A convenience wrapper around the primary \c pointsToConstantMemory |
447 | /// interface. |
448 | bool pointsToConstantMemory(const Value *P, bool OrLocal = false) { |
449 | return pointsToConstantMemory(MemoryLocation(P), OrLocal); |
450 | } |
451 | |
452 | /// @} |
453 | //===--------------------------------------------------------------------===// |
454 | /// \name Simple mod/ref information |
455 | /// @{ |
456 | |
457 | /// Get the ModRef info associated with a pointer argument of a call. The |
458 | /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note |
459 | /// that these bits do not necessarily account for the overall behavior of |
460 | /// the function, but rather only provide additional per-argument |
461 | /// information. This never sets ModRefInfo::Must. |
462 | ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx); |
463 | |
464 | /// Return the behavior of the given call site. |
465 | FunctionModRefBehavior getModRefBehavior(const CallBase *Call); |
466 | |
467 | /// Return the behavior when calling the given function. |
468 | FunctionModRefBehavior getModRefBehavior(const Function *F); |
469 | |
470 | /// Checks if the specified call is known to never read or write memory. |
471 | /// |
472 | /// Note that if the call only reads from known-constant memory, it is also |
473 | /// legal to return true. Also, calls that unwind the stack are legal for |
474 | /// this predicate. |
475 | /// |
476 | /// Many optimizations (such as CSE and LICM) can be performed on such calls |
477 | /// without worrying about aliasing properties, and many calls have this |
478 | /// property (e.g. calls to 'sin' and 'cos'). |
479 | /// |
480 | /// This property corresponds to the GCC 'const' attribute. |
481 | bool doesNotAccessMemory(const CallBase *Call) { |
482 | return getModRefBehavior(Call) == FMRB_DoesNotAccessMemory; |
483 | } |
484 | |
485 | /// Checks if the specified function is known to never read or write memory. |
486 | /// |
487 | /// Note that if the function only reads from known-constant memory, it is |
488 | /// also legal to return true. Also, function that unwind the stack are legal |
489 | /// for this predicate. |
490 | /// |
491 | /// Many optimizations (such as CSE and LICM) can be performed on such calls |
492 | /// to such functions without worrying about aliasing properties, and many |
493 | /// functions have this property (e.g. 'sin' and 'cos'). |
494 | /// |
495 | /// This property corresponds to the GCC 'const' attribute. |
496 | bool doesNotAccessMemory(const Function *F) { |
497 | return getModRefBehavior(F) == FMRB_DoesNotAccessMemory; |
498 | } |
499 | |
500 | /// Checks if the specified call is known to only read from non-volatile |
501 | /// memory (or not access memory at all). |
502 | /// |
503 | /// Calls that unwind the stack are legal for this predicate. |
504 | /// |
505 | /// This property allows many common optimizations to be performed in the |
506 | /// absence of interfering store instructions, such as CSE of strlen calls. |
507 | /// |
508 | /// This property corresponds to the GCC 'pure' attribute. |
509 | bool onlyReadsMemory(const CallBase *Call) { |
510 | return onlyReadsMemory(getModRefBehavior(Call)); |
511 | } |
512 | |
513 | /// Checks if the specified function is known to only read from non-volatile |
514 | /// memory (or not access memory at all). |
515 | /// |
516 | /// Functions that unwind the stack are legal for this predicate. |
517 | /// |
518 | /// This property allows many common optimizations to be performed in the |
519 | /// absence of interfering store instructions, such as CSE of strlen calls. |
520 | /// |
521 | /// This property corresponds to the GCC 'pure' attribute. |
522 | bool onlyReadsMemory(const Function *F) { |
523 | return onlyReadsMemory(getModRefBehavior(F)); |
524 | } |
525 | |
526 | /// Checks if functions with the specified behavior are known to only read |
527 | /// from non-volatile memory (or not access memory at all). |
528 | static bool onlyReadsMemory(FunctionModRefBehavior MRB) { |
529 | return !isModSet(createModRefInfo(MRB)); |
530 | } |
531 | |
532 | /// Checks if functions with the specified behavior are known to only write |
533 | /// memory (or not access memory at all). |
534 | static bool doesNotReadMemory(FunctionModRefBehavior MRB) { |
535 | return !isRefSet(createModRefInfo(MRB)); |
536 | } |
537 | |
538 | /// Checks if functions with the specified behavior are known to read and |
539 | /// write at most from objects pointed to by their pointer-typed arguments |
540 | /// (with arbitrary offsets). |
541 | static bool onlyAccessesArgPointees(FunctionModRefBehavior MRB) { |
542 | return !(MRB & FMRL_Anywhere & ~FMRL_ArgumentPointees); |
543 | } |
544 | |
545 | /// Checks if functions with the specified behavior are known to potentially |
546 | /// read or write from objects pointed to be their pointer-typed arguments |
547 | /// (with arbitrary offsets). |
548 | static bool doesAccessArgPointees(FunctionModRefBehavior MRB) { |
549 | return isModOrRefSet(createModRefInfo(MRB)) && |
550 | (MRB & FMRL_ArgumentPointees); |
551 | } |
552 | |
553 | /// Checks if functions with the specified behavior are known to read and |
554 | /// write at most from memory that is inaccessible from LLVM IR. |
555 | static bool onlyAccessesInaccessibleMem(FunctionModRefBehavior MRB) { |
556 | return !(MRB & FMRL_Anywhere & ~FMRL_InaccessibleMem); |
557 | } |
558 | |
559 | /// Checks if functions with the specified behavior are known to potentially |
560 | /// read or write from memory that is inaccessible from LLVM IR. |
561 | static bool doesAccessInaccessibleMem(FunctionModRefBehavior MRB) { |
562 | return isModOrRefSet(createModRefInfo(MRB)) && (MRB & FMRL_InaccessibleMem); |
563 | } |
564 | |
565 | /// Checks if functions with the specified behavior are known to read and |
566 | /// write at most from memory that is inaccessible from LLVM IR or objects |
567 | /// pointed to by their pointer-typed arguments (with arbitrary offsets). |
568 | static bool onlyAccessesInaccessibleOrArgMem(FunctionModRefBehavior MRB) { |
569 | return !(MRB & FMRL_Anywhere & |
570 | ~(FMRL_InaccessibleMem | FMRL_ArgumentPointees)); |
571 | } |
572 | |
573 | /// getModRefInfo (for call sites) - Return information about whether |
574 | /// a particular call site modifies or reads the specified memory location. |
575 | ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc); |
576 | |
577 | /// getModRefInfo (for call sites) - A convenience wrapper. |
578 | ModRefInfo getModRefInfo(const CallBase *Call, const Value *P, |
579 | LocationSize Size) { |
580 | return getModRefInfo(Call, MemoryLocation(P, Size)); |
581 | } |
582 | |
583 | /// getModRefInfo (for loads) - Return information about whether |
584 | /// a particular load modifies or reads the specified memory location. |
585 | ModRefInfo getModRefInfo(const LoadInst *L, const MemoryLocation &Loc); |
586 | |
587 | /// getModRefInfo (for loads) - A convenience wrapper. |
588 | ModRefInfo getModRefInfo(const LoadInst *L, const Value *P, |
589 | LocationSize Size) { |
590 | return getModRefInfo(L, MemoryLocation(P, Size)); |
591 | } |
592 | |
593 | /// getModRefInfo (for stores) - Return information about whether |
594 | /// a particular store modifies or reads the specified memory location. |
595 | ModRefInfo getModRefInfo(const StoreInst *S, const MemoryLocation &Loc); |
596 | |
597 | /// getModRefInfo (for stores) - A convenience wrapper. |
598 | ModRefInfo getModRefInfo(const StoreInst *S, const Value *P, |
599 | LocationSize Size) { |
600 | return getModRefInfo(S, MemoryLocation(P, Size)); |
601 | } |
602 | |
603 | /// getModRefInfo (for fences) - Return information about whether |
604 | /// a particular store modifies or reads the specified memory location. |
605 | ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc); |
606 | |
607 | /// getModRefInfo (for fences) - A convenience wrapper. |
608 | ModRefInfo getModRefInfo(const FenceInst *S, const Value *P, |
609 | LocationSize Size) { |
610 | return getModRefInfo(S, MemoryLocation(P, Size)); |
611 | } |
612 | |
613 | /// getModRefInfo (for cmpxchges) - Return information about whether |
614 | /// a particular cmpxchg modifies or reads the specified memory location. |
615 | ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX, |
616 | const MemoryLocation &Loc); |
617 | |
618 | /// getModRefInfo (for cmpxchges) - A convenience wrapper. |
619 | ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX, const Value *P, |
620 | LocationSize Size) { |
621 | return getModRefInfo(CX, MemoryLocation(P, Size)); |
622 | } |
623 | |
624 | /// getModRefInfo (for atomicrmws) - Return information about whether |
625 | /// a particular atomicrmw modifies or reads the specified memory location. |
626 | ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const MemoryLocation &Loc); |
627 | |
628 | /// getModRefInfo (for atomicrmws) - A convenience wrapper. |
629 | ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const Value *P, |
630 | LocationSize Size) { |
631 | return getModRefInfo(RMW, MemoryLocation(P, Size)); |
632 | } |
633 | |
634 | /// getModRefInfo (for va_args) - Return information about whether |
635 | /// a particular va_arg modifies or reads the specified memory location. |
636 | ModRefInfo getModRefInfo(const VAArgInst *I, const MemoryLocation &Loc); |
637 | |
638 | /// getModRefInfo (for va_args) - A convenience wrapper. |
639 | ModRefInfo getModRefInfo(const VAArgInst *I, const Value *P, |
640 | LocationSize Size) { |
641 | return getModRefInfo(I, MemoryLocation(P, Size)); |
642 | } |
643 | |
644 | /// getModRefInfo (for catchpads) - Return information about whether |
645 | /// a particular catchpad modifies or reads the specified memory location. |
646 | ModRefInfo getModRefInfo(const CatchPadInst *I, const MemoryLocation &Loc); |
647 | |
648 | /// getModRefInfo (for catchpads) - A convenience wrapper. |
649 | ModRefInfo getModRefInfo(const CatchPadInst *I, const Value *P, |
650 | LocationSize Size) { |
651 | return getModRefInfo(I, MemoryLocation(P, Size)); |
652 | } |
653 | |
654 | /// getModRefInfo (for catchrets) - Return information about whether |
655 | /// a particular catchret modifies or reads the specified memory location. |
656 | ModRefInfo getModRefInfo(const CatchReturnInst *I, const MemoryLocation &Loc); |
657 | |
658 | /// getModRefInfo (for catchrets) - A convenience wrapper. |
659 | ModRefInfo getModRefInfo(const CatchReturnInst *I, const Value *P, |
660 | LocationSize Size) { |
661 | return getModRefInfo(I, MemoryLocation(P, Size)); |
662 | } |
663 | |
664 | /// Check whether or not an instruction may read or write the optionally |
665 | /// specified memory location. |
666 | /// |
667 | /// |
668 | /// An instruction that doesn't read or write memory may be trivially LICM'd |
669 | /// for example. |
670 | /// |
671 | /// For function calls, this delegates to the alias-analysis specific |
672 | /// call-site mod-ref behavior queries. Otherwise it delegates to the specific |
673 | /// helpers above. |
674 | ModRefInfo getModRefInfo(const Instruction *I, |
675 | const Optional<MemoryLocation> &OptLoc) { |
676 | AAQueryInfo AAQIP; |
677 | return getModRefInfo(I, OptLoc, AAQIP); |
678 | } |
679 | |
680 | /// A convenience wrapper for constructing the memory location. |
681 | ModRefInfo getModRefInfo(const Instruction *I, const Value *P, |
682 | LocationSize Size) { |
683 | return getModRefInfo(I, MemoryLocation(P, Size)); |
684 | } |
685 | |
686 | /// Return information about whether a call and an instruction may refer to |
687 | /// the same memory locations. |
688 | ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call); |
689 | |
690 | /// Return information about whether two call sites may refer to the same set |
691 | /// of memory locations. See the AA documentation for details: |
692 | /// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo |
693 | ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2); |
694 | |
695 | /// Return information about whether a particular call site modifies |
696 | /// or reads the specified memory location \p MemLoc before instruction \p I |
697 | /// in a BasicBlock. |
698 | /// Early exits in callCapturesBefore may lead to ModRefInfo::Must not being |
699 | /// set. |
700 | ModRefInfo callCapturesBefore(const Instruction *I, |
701 | const MemoryLocation &MemLoc, DominatorTree *DT); |
702 | |
703 | /// A convenience wrapper to synthesize a memory location. |
704 | ModRefInfo callCapturesBefore(const Instruction *I, const Value *P, |
705 | LocationSize Size, DominatorTree *DT) { |
706 | return callCapturesBefore(I, MemoryLocation(P, Size), DT); |
707 | } |
708 | |
709 | /// @} |
710 | //===--------------------------------------------------------------------===// |
711 | /// \name Higher level methods for querying mod/ref information. |
712 | /// @{ |
713 | |
714 | /// Check if it is possible for execution of the specified basic block to |
715 | /// modify the location Loc. |
716 | bool canBasicBlockModify(const BasicBlock &BB, const MemoryLocation &Loc); |
717 | |
718 | /// A convenience wrapper synthesizing a memory location. |
719 | bool canBasicBlockModify(const BasicBlock &BB, const Value *P, |
720 | LocationSize Size) { |
721 | return canBasicBlockModify(BB, MemoryLocation(P, Size)); |
722 | } |
723 | |
724 | /// Check if it is possible for the execution of the specified instructions |
725 | /// to mod\ref (according to the mode) the location Loc. |
726 | /// |
727 | /// The instructions to consider are all of the instructions in the range of |
728 | /// [I1,I2] INCLUSIVE. I1 and I2 must be in the same basic block. |
729 | bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2, |
730 | const MemoryLocation &Loc, |
731 | const ModRefInfo Mode); |
732 | |
733 | /// A convenience wrapper synthesizing a memory location. |
734 | bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2, |
735 | const Value *Ptr, LocationSize Size, |
736 | const ModRefInfo Mode) { |
737 | return canInstructionRangeModRef(I1, I2, MemoryLocation(Ptr, Size), Mode); |
738 | } |
739 | |
740 | private: |
741 | AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, |
742 | AAQueryInfo &AAQI); |
743 | bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI, |
744 | bool OrLocal = false); |
745 | ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call2, |
746 | AAQueryInfo &AAQIP); |
747 | ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc, |
748 | AAQueryInfo &AAQI); |
749 | ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2, |
750 | AAQueryInfo &AAQI); |
751 | ModRefInfo getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc, |
752 | AAQueryInfo &AAQI); |
753 | ModRefInfo getModRefInfo(const LoadInst *L, const MemoryLocation &Loc, |
754 | AAQueryInfo &AAQI); |
755 | ModRefInfo getModRefInfo(const StoreInst *S, const MemoryLocation &Loc, |
756 | AAQueryInfo &AAQI); |
757 | ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc, |
758 | AAQueryInfo &AAQI); |
759 | ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX, |
760 | const MemoryLocation &Loc, AAQueryInfo &AAQI); |
761 | ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const MemoryLocation &Loc, |
762 | AAQueryInfo &AAQI); |
763 | ModRefInfo getModRefInfo(const CatchPadInst *I, const MemoryLocation &Loc, |
764 | AAQueryInfo &AAQI); |
765 | ModRefInfo getModRefInfo(const CatchReturnInst *I, const MemoryLocation &Loc, |
766 | AAQueryInfo &AAQI); |
767 | ModRefInfo getModRefInfo(const Instruction *I, |
768 | const Optional<MemoryLocation> &OptLoc, |
769 | AAQueryInfo &AAQIP) { |
770 | if (OptLoc == None) { |
771 | if (const auto *Call = dyn_cast<CallBase>(I)) { |
772 | return createModRefInfo(getModRefBehavior(Call)); |
773 | } |
774 | } |
775 | |
776 | const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation()); |
777 | |
778 | switch (I->getOpcode()) { |
779 | case Instruction::VAArg: |
780 | return getModRefInfo((const VAArgInst *)I, Loc, AAQIP); |
781 | case Instruction::Load: |
782 | return getModRefInfo((const LoadInst *)I, Loc, AAQIP); |
783 | case Instruction::Store: |
784 | return getModRefInfo((const StoreInst *)I, Loc, AAQIP); |
785 | case Instruction::Fence: |
786 | return getModRefInfo((const FenceInst *)I, Loc, AAQIP); |
787 | case Instruction::AtomicCmpXchg: |
788 | return getModRefInfo((const AtomicCmpXchgInst *)I, Loc, AAQIP); |
789 | case Instruction::AtomicRMW: |
790 | return getModRefInfo((const AtomicRMWInst *)I, Loc, AAQIP); |
791 | case Instruction::Call: |
792 | return getModRefInfo((const CallInst *)I, Loc, AAQIP); |
793 | case Instruction::Invoke: |
794 | return getModRefInfo((const InvokeInst *)I, Loc, AAQIP); |
795 | case Instruction::CatchPad: |
796 | return getModRefInfo((const CatchPadInst *)I, Loc, AAQIP); |
797 | case Instruction::CatchRet: |
798 | return getModRefInfo((const CatchReturnInst *)I, Loc, AAQIP); |
799 | default: |
800 | return ModRefInfo::NoModRef; |
801 | } |
802 | } |
803 | |
804 | class Concept; |
805 | |
806 | template <typename T> class Model; |
807 | |
808 | template <typename T> friend class AAResultBase; |
809 | |
810 | const TargetLibraryInfo &TLI; |
811 | |
812 | std::vector<std::unique_ptr<Concept>> AAs; |
813 | |
814 | std::vector<AnalysisKey *> AADeps; |
815 | |
816 | friend class BatchAAResults; |
817 | }; |
818 | |
819 | /// This class is a wrapper over an AAResults, and it is intended to be used |
820 | /// only when there are no IR changes inbetween queries. BatchAAResults is |
821 | /// reusing the same `AAQueryInfo` to preserve the state across queries, |
822 | /// esentially making AA work in "batch mode". The internal state cannot be |
823 | /// cleared, so to go "out-of-batch-mode", the user must either use AAResults, |
824 | /// or create a new BatchAAResults. |
825 | class BatchAAResults { |
826 | AAResults &AA; |
827 | AAQueryInfo AAQI; |
828 | |
829 | public: |
830 | BatchAAResults(AAResults &AAR) : AA(AAR), AAQI() {} |
831 | AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { |
832 | return AA.alias(LocA, LocB, AAQI); |
833 | } |
834 | bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false) { |
835 | return AA.pointsToConstantMemory(Loc, AAQI, OrLocal); |
836 | } |
837 | ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) { |
838 | return AA.getModRefInfo(Call, Loc, AAQI); |
839 | } |
840 | ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) { |
841 | return AA.getModRefInfo(Call1, Call2, AAQI); |
842 | } |
843 | ModRefInfo getModRefInfo(const Instruction *I, |
844 | const Optional<MemoryLocation> &OptLoc) { |
845 | return AA.getModRefInfo(I, OptLoc, AAQI); |
846 | } |
847 | ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call2) { |
848 | return AA.getModRefInfo(I, Call2, AAQI); |
849 | } |
850 | ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) { |
851 | return AA.getArgModRefInfo(Call, ArgIdx); |
852 | } |
853 | FunctionModRefBehavior getModRefBehavior(const CallBase *Call) { |
854 | return AA.getModRefBehavior(Call); |
855 | } |
856 | bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) { |
857 | return alias(LocA, LocB) == MustAlias; |
858 | } |
859 | bool isMustAlias(const Value *V1, const Value *V2) { |
860 | return alias(MemoryLocation(V1, LocationSize::precise(1)), |
861 | MemoryLocation(V2, LocationSize::precise(1))) == MustAlias; |
862 | } |
863 | }; |
864 | |
865 | /// Temporary typedef for legacy code that uses a generic \c AliasAnalysis |
866 | /// pointer or reference. |
867 | using AliasAnalysis = AAResults; |
868 | |
869 | /// A private abstract base class describing the concept of an individual alias |
870 | /// analysis implementation. |
871 | /// |
872 | /// This interface is implemented by any \c Model instantiation. It is also the |
873 | /// interface which a type used to instantiate the model must provide. |
874 | /// |
875 | /// All of these methods model methods by the same name in the \c |
876 | /// AAResults class. Only differences and specifics to how the |
877 | /// implementations are called are documented here. |
878 | class AAResults::Concept { |
879 | public: |
880 | virtual ~Concept() = 0; |
881 | |
882 | /// An update API used internally by the AAResults to provide |
883 | /// a handle back to the top level aggregation. |
884 | virtual void setAAResults(AAResults *NewAAR) = 0; |
885 | |
886 | //===--------------------------------------------------------------------===// |
887 | /// \name Alias Queries |
888 | /// @{ |
889 | |
890 | /// The main low level interface to the alias analysis implementation. |
891 | /// Returns an AliasResult indicating whether the two pointers are aliased to |
892 | /// each other. This is the interface that must be implemented by specific |
893 | /// alias analysis implementations. |
894 | virtual AliasResult alias(const MemoryLocation &LocA, |
895 | const MemoryLocation &LocB, AAQueryInfo &AAQI) = 0; |
896 | |
897 | /// Checks whether the given location points to constant memory, or if |
898 | /// \p OrLocal is true whether it points to a local alloca. |
899 | virtual bool pointsToConstantMemory(const MemoryLocation &Loc, |
900 | AAQueryInfo &AAQI, bool OrLocal) = 0; |
901 | |
902 | /// @} |
903 | //===--------------------------------------------------------------------===// |
904 | /// \name Simple mod/ref information |
905 | /// @{ |
906 | |
907 | /// Get the ModRef info associated with a pointer argument of a callsite. The |
908 | /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note |
909 | /// that these bits do not necessarily account for the overall behavior of |
910 | /// the function, but rather only provide additional per-argument |
911 | /// information. |
912 | virtual ModRefInfo getArgModRefInfo(const CallBase *Call, |
913 | unsigned ArgIdx) = 0; |
914 | |
915 | /// Return the behavior of the given call site. |
916 | virtual FunctionModRefBehavior getModRefBehavior(const CallBase *Call) = 0; |
917 | |
918 | /// Return the behavior when calling the given function. |
919 | virtual FunctionModRefBehavior getModRefBehavior(const Function *F) = 0; |
920 | |
921 | /// getModRefInfo (for call sites) - Return information about whether |
922 | /// a particular call site modifies or reads the specified memory location. |
923 | virtual ModRefInfo getModRefInfo(const CallBase *Call, |
924 | const MemoryLocation &Loc, |
925 | AAQueryInfo &AAQI) = 0; |
926 | |
927 | /// Return information about whether two call sites may refer to the same set |
928 | /// of memory locations. See the AA documentation for details: |
929 | /// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo |
930 | virtual ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2, |
931 | AAQueryInfo &AAQI) = 0; |
932 | |
933 | /// @} |
934 | }; |
935 | |
936 | /// A private class template which derives from \c Concept and wraps some other |
937 | /// type. |
938 | /// |
939 | /// This models the concept by directly forwarding each interface point to the |
940 | /// wrapped type which must implement a compatible interface. This provides |
941 | /// a type erased binding. |
942 | template <typename AAResultT> class AAResults::Model final : public Concept { |
943 | AAResultT &Result; |
944 | |
945 | public: |
946 | explicit Model(AAResultT &Result, AAResults &AAR) : Result(Result) { |
947 | Result.setAAResults(&AAR); |
948 | } |
949 | ~Model() override = default; |
950 | |
951 | void setAAResults(AAResults *NewAAR) override { Result.setAAResults(NewAAR); } |
952 | |
953 | AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, |
954 | AAQueryInfo &AAQI) override { |
955 | return Result.alias(LocA, LocB, AAQI); |
956 | } |
957 | |
958 | bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI, |
959 | bool OrLocal) override { |
960 | return Result.pointsToConstantMemory(Loc, AAQI, OrLocal); |
961 | } |
962 | |
963 | ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) override { |
964 | return Result.getArgModRefInfo(Call, ArgIdx); |
965 | } |
966 | |
967 | FunctionModRefBehavior getModRefBehavior(const CallBase *Call) override { |
968 | return Result.getModRefBehavior(Call); |
969 | } |
970 | |
971 | FunctionModRefBehavior getModRefBehavior(const Function *F) override { |
972 | return Result.getModRefBehavior(F); |
973 | } |
974 | |
975 | ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc, |
976 | AAQueryInfo &AAQI) override { |
977 | return Result.getModRefInfo(Call, Loc, AAQI); |
978 | } |
979 | |
980 | ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2, |
981 | AAQueryInfo &AAQI) override { |
982 | return Result.getModRefInfo(Call1, Call2, AAQI); |
983 | } |
984 | }; |
985 | |
986 | /// A CRTP-driven "mixin" base class to help implement the function alias |
987 | /// analysis results concept. |
988 | /// |
989 | /// Because of the nature of many alias analysis implementations, they often |
990 | /// only implement a subset of the interface. This base class will attempt to |
991 | /// implement the remaining portions of the interface in terms of simpler forms |
992 | /// of the interface where possible, and otherwise provide conservatively |
993 | /// correct fallback implementations. |
994 | /// |
995 | /// Implementors of an alias analysis should derive from this CRTP, and then |
996 | /// override specific methods that they wish to customize. There is no need to |
997 | /// use virtual anywhere, the CRTP base class does static dispatch to the |
998 | /// derived type passed into it. |
999 | template <typename DerivedT> class AAResultBase { |
1000 | // Expose some parts of the interface only to the AAResults::Model |
1001 | // for wrapping. Specifically, this allows the model to call our |
1002 | // setAAResults method without exposing it as a fully public API. |
1003 | friend class AAResults::Model<DerivedT>; |
1004 | |
1005 | /// A pointer to the AAResults object that this AAResult is |
1006 | /// aggregated within. May be null if not aggregated. |
1007 | AAResults *AAR = nullptr; |
1008 | |
1009 | /// Helper to dispatch calls back through the derived type. |
1010 | DerivedT &derived() { return static_cast<DerivedT &>(*this); } |
1011 | |
1012 | /// A setter for the AAResults pointer, which is used to satisfy the |
1013 | /// AAResults::Model contract. |
1014 | void setAAResults(AAResults *NewAAR) { AAR = NewAAR; } |
1015 | |
1016 | protected: |
1017 | /// This proxy class models a common pattern where we delegate to either the |
1018 | /// top-level \c AAResults aggregation if one is registered, or to the |
1019 | /// current result if none are registered. |
1020 | class AAResultsProxy { |
1021 | AAResults *AAR; |
1022 | DerivedT &CurrentResult; |
1023 | |
1024 | public: |
1025 | AAResultsProxy(AAResults *AAR, DerivedT &CurrentResult) |
1026 | : AAR(AAR), CurrentResult(CurrentResult) {} |
1027 | |
1028 | AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, |
1029 | AAQueryInfo &AAQI) { |
1030 | return AAR ? AAR->alias(LocA, LocB, AAQI) |
1031 | : CurrentResult.alias(LocA, LocB, AAQI); |
1032 | } |
1033 | |
1034 | bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI, |
1035 | bool OrLocal) { |
1036 | return AAR ? AAR->pointsToConstantMemory(Loc, AAQI, OrLocal) |
1037 | : CurrentResult.pointsToConstantMemory(Loc, AAQI, OrLocal); |
1038 | } |
1039 | |
1040 | ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) { |
1041 | return AAR ? AAR->getArgModRefInfo(Call, ArgIdx) |
1042 | : CurrentResult.getArgModRefInfo(Call, ArgIdx); |
1043 | } |
1044 | |
1045 | FunctionModRefBehavior getModRefBehavior(const CallBase *Call) { |
1046 | return AAR ? AAR->getModRefBehavior(Call) |
1047 | : CurrentResult.getModRefBehavior(Call); |
1048 | } |
1049 | |
1050 | FunctionModRefBehavior getModRefBehavior(const Function *F) { |
1051 | return AAR ? AAR->getModRefBehavior(F) : CurrentResult.getModRefBehavior(F); |
1052 | } |
1053 | |
1054 | ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc, |
1055 | AAQueryInfo &AAQI) { |
1056 | return AAR ? AAR->getModRefInfo(Call, Loc, AAQI) |
1057 | : CurrentResult.getModRefInfo(Call, Loc, AAQI); |
1058 | } |
1059 | |
1060 | ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2, |
1061 | AAQueryInfo &AAQI) { |
1062 | return AAR ? AAR->getModRefInfo(Call1, Call2, AAQI) |
1063 | : CurrentResult.getModRefInfo(Call1, Call2, AAQI); |
1064 | } |
1065 | }; |
1066 | |
1067 | explicit AAResultBase() = default; |
1068 | |
1069 | // Provide all the copy and move constructors so that derived types aren't |
1070 | // constrained. |
1071 | AAResultBase(const AAResultBase &Arg) {} |
1072 | AAResultBase(AAResultBase &&Arg) {} |
1073 | |
1074 | /// Get a proxy for the best AA result set to query at this time. |
1075 | /// |
1076 | /// When this result is part of a larger aggregation, this will proxy to that |
1077 | /// aggregation. When this result is used in isolation, it will just delegate |
1078 | /// back to the derived class's implementation. |
1079 | /// |
1080 | /// Note that callers of this need to take considerable care to not cause |
1081 | /// performance problems when they use this routine, in the case of a large |
1082 | /// number of alias analyses being aggregated, it can be expensive to walk |
1083 | /// back across the chain. |
1084 | AAResultsProxy getBestAAResults() { return AAResultsProxy(AAR, derived()); } |
1085 | |
1086 | public: |
1087 | AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, |
1088 | AAQueryInfo &AAQI) { |
1089 | return MayAlias; |
1090 | } |
1091 | |
1092 | bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI, |
1093 | bool OrLocal) { |
1094 | return false; |
1095 | } |
1096 | |
1097 | ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) { |
1098 | return ModRefInfo::ModRef; |
1099 | } |
1100 | |
1101 | FunctionModRefBehavior getModRefBehavior(const CallBase *Call) { |
1102 | return FMRB_UnknownModRefBehavior; |
1103 | } |
1104 | |
1105 | FunctionModRefBehavior getModRefBehavior(const Function *F) { |
1106 | return FMRB_UnknownModRefBehavior; |
1107 | } |
1108 | |
1109 | ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc, |
1110 | AAQueryInfo &AAQI) { |
1111 | return ModRefInfo::ModRef; |
1112 | } |
1113 | |
1114 | ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2, |
1115 | AAQueryInfo &AAQI) { |
1116 | return ModRefInfo::ModRef; |
1117 | } |
1118 | }; |
1119 | |
1120 | /// Return true if this pointer is returned by a noalias function. |
1121 | bool isNoAliasCall(const Value *V); |
1122 | |
1123 | /// Return true if this is an argument with the noalias attribute. |
1124 | bool isNoAliasArgument(const Value *V); |
1125 | |
1126 | /// Return true if this pointer refers to a distinct and identifiable object. |
1127 | /// This returns true for: |
1128 | /// Global Variables and Functions (but not Global Aliases) |
1129 | /// Allocas |
1130 | /// ByVal and NoAlias Arguments |
1131 | /// NoAlias returns (e.g. calls to malloc) |
1132 | /// |
1133 | bool isIdentifiedObject(const Value *V); |
1134 | |
1135 | /// Return true if V is umabigously identified at the function-level. |
1136 | /// Different IdentifiedFunctionLocals can't alias. |
1137 | /// Further, an IdentifiedFunctionLocal can not alias with any function |
1138 | /// arguments other than itself, which is not necessarily true for |
1139 | /// IdentifiedObjects. |
1140 | bool isIdentifiedFunctionLocal(const Value *V); |
1141 | |
1142 | /// A manager for alias analyses. |
1143 | /// |
1144 | /// This class can have analyses registered with it and when run, it will run |
1145 | /// all of them and aggregate their results into single AA results interface |
1146 | /// that dispatches across all of the alias analysis results available. |
1147 | /// |
1148 | /// Note that the order in which analyses are registered is very significant. |
1149 | /// That is the order in which the results will be aggregated and queried. |
1150 | /// |
1151 | /// This manager effectively wraps the AnalysisManager for registering alias |
1152 | /// analyses. When you register your alias analysis with this manager, it will |
1153 | /// ensure the analysis itself is registered with its AnalysisManager. |
1154 | /// |
1155 | /// The result of this analysis is only invalidated if one of the particular |
1156 | /// aggregated AA results end up being invalidated. This removes the need to |
1157 | /// explicitly preserve the results of `AAManager`. Note that analyses should no |
1158 | /// longer be registered once the `AAManager` is run. |
1159 | class AAManager : public AnalysisInfoMixin<AAManager> { |
1160 | public: |
1161 | using Result = AAResults; |
1162 | |
1163 | /// Register a specific AA result. |
1164 | template <typename AnalysisT> void registerFunctionAnalysis() { |
1165 | ResultGetters.push_back(&getFunctionAAResultImpl<AnalysisT>); |
1166 | } |
1167 | |
1168 | /// Register a specific AA result. |
1169 | template <typename AnalysisT> void registerModuleAnalysis() { |
1170 | ResultGetters.push_back(&getModuleAAResultImpl<AnalysisT>); |
1171 | } |
1172 | |
1173 | Result run(Function &F, FunctionAnalysisManager &AM) { |
1174 | Result R(AM.getResult<TargetLibraryAnalysis>(F)); |
1175 | for (auto &Getter : ResultGetters) |
1176 | (*Getter)(F, AM, R); |
1177 | return R; |
1178 | } |
1179 | |
1180 | private: |
1181 | friend AnalysisInfoMixin<AAManager>; |
1182 | |
1183 | static AnalysisKey Key; |
1184 | |
1185 | SmallVector<void (*)(Function &F, FunctionAnalysisManager &AM, |
1186 | AAResults &AAResults), |
1187 | 4> ResultGetters; |
1188 | |
1189 | template <typename AnalysisT> |
1190 | static void getFunctionAAResultImpl(Function &F, |
1191 | FunctionAnalysisManager &AM, |
1192 | AAResults &AAResults) { |
1193 | AAResults.addAAResult(AM.template getResult<AnalysisT>(F)); |
1194 | AAResults.addAADependencyID(AnalysisT::ID()); |
1195 | } |
1196 | |
1197 | template <typename AnalysisT> |
1198 | static void getModuleAAResultImpl(Function &F, FunctionAnalysisManager &AM, |
1199 | AAResults &AAResults) { |
1200 | auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F); |
1201 | if (auto *R = |
1202 | MAMProxy.template getCachedResult<AnalysisT>(*F.getParent())) { |
1203 | AAResults.addAAResult(*R); |
1204 | MAMProxy |
1205 | .template registerOuterAnalysisInvalidation<AnalysisT, AAManager>(); |
1206 | } |
1207 | } |
1208 | }; |
1209 | |
1210 | /// A wrapper pass to provide the legacy pass manager access to a suitably |
1211 | /// prepared AAResults object. |
1212 | class AAResultsWrapperPass : public FunctionPass { |
1213 | std::unique_ptr<AAResults> AAR; |
1214 | |
1215 | public: |
1216 | static char ID; |
1217 | |
1218 | AAResultsWrapperPass(); |
1219 | |
1220 | AAResults &getAAResults() { return *AAR; } |
1221 | const AAResults &getAAResults() const { return *AAR; } |
1222 | |
1223 | bool runOnFunction(Function &F) override; |
1224 | |
1225 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
1226 | }; |
1227 | |
1228 | /// A wrapper pass for external alias analyses. This just squirrels away the |
1229 | /// callback used to run any analyses and register their results. |
1230 | struct ExternalAAWrapperPass : ImmutablePass { |
1231 | using CallbackT = std::function<void(Pass &, Function &, AAResults &)>; |
1232 | |
1233 | CallbackT CB; |
1234 | |
1235 | static char ID; |
1236 | |
1237 | ExternalAAWrapperPass(); |
1238 | |
1239 | explicit ExternalAAWrapperPass(CallbackT CB); |
1240 | |
1241 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
1242 | AU.setPreservesAll(); |
1243 | } |
1244 | }; |
1245 | |
1246 | FunctionPass *createAAResultsWrapperPass(); |
1247 | |
1248 | /// A wrapper pass around a callback which can be used to populate the |
1249 | /// AAResults in the AAResultsWrapperPass from an external AA. |
1250 | /// |
1251 | /// The callback provided here will be used each time we prepare an AAResults |
1252 | /// object, and will receive a reference to the function wrapper pass, the |
1253 | /// function, and the AAResults object to populate. This should be used when |
1254 | /// setting up a custom pass pipeline to inject a hook into the AA results. |
1255 | ImmutablePass *createExternalAAWrapperPass( |
1256 | std::function<void(Pass &, Function &, AAResults &)> Callback); |
1257 | |
1258 | /// A helper for the legacy pass manager to create a \c AAResults |
1259 | /// object populated to the best of our ability for a particular function when |
1260 | /// inside of a \c ModulePass or a \c CallGraphSCCPass. |
1261 | /// |
1262 | /// If a \c ModulePass or a \c CallGraphSCCPass calls \p |
1263 | /// createLegacyPMAAResults, it also needs to call \p addUsedAAAnalyses in \p |
1264 | /// getAnalysisUsage. |
1265 | AAResults createLegacyPMAAResults(Pass &P, Function &F, BasicAAResult &BAR); |
1266 | |
1267 | /// A helper for the legacy pass manager to populate \p AU to add uses to make |
1268 | /// sure the analyses required by \p createLegacyPMAAResults are available. |
1269 | void getAAResultsAnalysisUsage(AnalysisUsage &AU); |
1270 | |
1271 | } // end namespace llvm |
1272 | |
1273 | #endif // LLVM_ANALYSIS_ALIASANALYSIS_H |